slide-narrator 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of slide-narrator might be problematic. Click here for more details.

@@ -0,0 +1,535 @@
1
+ """File storage implementation"""
2
+ from abc import ABC, abstractmethod
3
+ from typing import Optional, Dict, Any, Set, List, Tuple
4
+ from pathlib import Path
5
+ import os
6
+ import uuid
7
+ import shutil
8
+ import hashlib
9
+ import asyncio
10
+ import mimetypes
11
+ from datetime import datetime, UTC
12
+ from sqlalchemy import select
13
+ from ..utils.logging import get_logger
14
+ import magic
15
+ import base64
16
+
17
+ # Get configured logger
18
+ logger = get_logger(__name__)
19
+
20
+ class FileStoreError(Exception):
21
+ """Base exception for file store errors"""
22
+ pass
23
+
24
+ class FileNotFoundError(FileStoreError):
25
+ """Raised when a file is not found in storage"""
26
+ pass
27
+
28
+ class StorageFullError(FileStoreError):
29
+ """Raised when storage capacity is exceeded"""
30
+ pass
31
+
32
+ class UnsupportedFileTypeError(FileStoreError):
33
+ """Raised when file type is not allowed"""
34
+ pass
35
+
36
+ class FileTooLargeError(FileStoreError):
37
+ """Raised when file exceeds size limit"""
38
+ pass
39
+
40
+ class FileStore:
41
+ """
42
+ File storage implementation
43
+
44
+ The recommended way to create a FileStore is using the factory method:
45
+
46
+ ```python
47
+ # Create with default settings
48
+ store = await FileStore.create()
49
+
50
+ # Specify custom path
51
+ store = await FileStore.create("/path/to/files")
52
+
53
+ # Customize all settings
54
+ store = await FileStore.create(
55
+ base_path="/path/to/files",
56
+ max_file_size=100*1024*1024, # 100MB
57
+ allowed_mime_types={"image/jpeg", "image/png"},
58
+ max_storage_size=10*1024*1024*1024 # 10GB
59
+ )
60
+ ```
61
+
62
+ The factory method validates storage configuration immediately, allowing
63
+ early detection of storage access issues.
64
+ """
65
+
66
+ # Default configuration
67
+ DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
68
+ DEFAULT_MAX_STORAGE_SIZE = 5 * 1024 * 1024 * 1024 # 5GB
69
+ DEFAULT_ALLOWED_MIME_TYPES = {
70
+ # Documents
71
+ 'application/pdf',
72
+ 'application/msword',
73
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
74
+ 'text/plain',
75
+ 'text/csv',
76
+ 'application/json',
77
+ # Images
78
+ 'image/jpeg',
79
+ 'image/png',
80
+ 'image/gif',
81
+ 'image/webp',
82
+ 'image/svg+xml',
83
+ # Archives
84
+ 'application/zip',
85
+ 'application/x-tar',
86
+ 'application/gzip',
87
+ # Audio formats
88
+ 'audio/mpeg',
89
+ 'audio/mp3',
90
+ 'audio/mp4',
91
+ 'audio/opus',
92
+ 'audio/ogg',
93
+ 'audio/wav',
94
+ 'audio/webm',
95
+ 'audio/aac',
96
+ 'audio/flac',
97
+ 'audio/x-m4a',
98
+ }
99
+
100
+ @classmethod
101
+ async def create(cls, base_path: Optional[str] = None, max_file_size: Optional[int] = None,
102
+ allowed_mime_types: Optional[Set[str]] = None, max_storage_size: Optional[int] = None) -> 'FileStore':
103
+ """
104
+ Factory method to create and validate a FileStore instance.
105
+
106
+ This method validates storage configuration immediately, allowing
107
+ early detection of storage access issues.
108
+
109
+ Args:
110
+ base_path: Base directory for file storage
111
+ max_file_size: Maximum allowed file size in bytes
112
+ allowed_mime_types: Set of allowed MIME types
113
+ max_storage_size: Maximum total storage size in bytes
114
+
115
+ Returns:
116
+ Initialized FileStore instance
117
+
118
+ Raises:
119
+ FileStoreError: If storage directory cannot be created or accessed
120
+ """
121
+ # Create instance
122
+ store = cls(base_path, max_file_size, allowed_mime_types, max_storage_size)
123
+
124
+ try:
125
+ # Validate storage by writing and reading a test file
126
+ test_id = str(uuid.uuid4())
127
+ test_path = store._get_file_path(test_id)
128
+ test_path.parent.mkdir(parents=True, exist_ok=True)
129
+ test_content = b"storage validation"
130
+ test_path.write_bytes(test_content)
131
+ read_content = test_path.read_bytes()
132
+ test_path.unlink() # Clean up
133
+
134
+ # Ensure content matches what we wrote
135
+ if read_content != test_content:
136
+ raise FileStoreError("Storage validation failed: content mismatch")
137
+
138
+ # Check that we can create storage stats
139
+ storage_size = await store.get_storage_size()
140
+ logger.debug(f"Storage validation successful. Current storage size: {storage_size} bytes")
141
+
142
+ return store
143
+ except Exception as e:
144
+ logger.error(f"Storage validation failed: {e}")
145
+ raise FileStoreError(f"Storage validation failed: {e}")
146
+
147
+ def __init__(self, base_path: Optional[str] = None, max_file_size: Optional[int] = None,
148
+ allowed_mime_types: Optional[Set[str]] = None, max_storage_size: Optional[int] = None):
149
+ """Initialize file store
150
+
151
+ Args:
152
+ base_path: Base directory for file storage. If not provided,
153
+ uses NARRATOR_FILE_STORAGE_PATH env var or defaults to ~/.narrator/files
154
+ max_file_size: Maximum allowed file size in bytes. If not provided,
155
+ uses NARRATOR_MAX_FILE_SIZE env var or defaults to 50MB
156
+ allowed_mime_types: Set of allowed MIME types
157
+ max_storage_size: Maximum total storage size in bytes. If not provided,
158
+ uses NARRATOR_MAX_STORAGE_SIZE env var or defaults to 5GB
159
+ """
160
+ # Set storage backend type
161
+ self.storage_backend = "local"
162
+
163
+ # Get max file size from env var or default
164
+ env_max_file_size = os.getenv('NARRATOR_MAX_FILE_SIZE')
165
+ if max_file_size is not None:
166
+ self.max_file_size = max_file_size
167
+ elif env_max_file_size is not None:
168
+ try:
169
+ self.max_file_size = int(env_max_file_size)
170
+ except ValueError:
171
+ logger.warning(f"Invalid NARRATOR_MAX_FILE_SIZE value: {env_max_file_size}. Using default.")
172
+ self.max_file_size = self.DEFAULT_MAX_FILE_SIZE
173
+ else:
174
+ self.max_file_size = self.DEFAULT_MAX_FILE_SIZE
175
+
176
+ # Get max storage size from env var or default
177
+ env_max_storage_size = os.getenv('NARRATOR_MAX_STORAGE_SIZE')
178
+ if max_storage_size is not None:
179
+ self.max_storage_size = max_storage_size
180
+ elif env_max_storage_size is not None:
181
+ try:
182
+ self.max_storage_size = int(env_max_storage_size)
183
+ except ValueError:
184
+ logger.warning(f"Invalid NARRATOR_MAX_STORAGE_SIZE value: {env_max_storage_size}. Using default.")
185
+ self.max_storage_size = self.DEFAULT_MAX_STORAGE_SIZE
186
+ else:
187
+ self.max_storage_size = self.DEFAULT_MAX_STORAGE_SIZE
188
+
189
+ # Get allowed MIME types from env var or default
190
+ env_mime_types = os.getenv('NARRATOR_ALLOWED_MIME_TYPES')
191
+ if allowed_mime_types is not None:
192
+ self.allowed_mime_types = allowed_mime_types
193
+ elif env_mime_types is not None:
194
+ try:
195
+ # Split comma-separated list and strip whitespace
196
+ mime_types = {mime.strip() for mime in env_mime_types.split(',')}
197
+ # Validate each MIME type
198
+ invalid_types = [mime for mime in mime_types if '/' not in mime]
199
+ if invalid_types:
200
+ logger.warning(f"Invalid MIME types in NARRATOR_ALLOWED_MIME_TYPES: {invalid_types}. Using default.")
201
+ self.allowed_mime_types = self.DEFAULT_ALLOWED_MIME_TYPES
202
+ else:
203
+ self.allowed_mime_types = mime_types
204
+ except Exception as e:
205
+ logger.warning(f"Error parsing NARRATOR_ALLOWED_MIME_TYPES: {e}. Using default.")
206
+ self.allowed_mime_types = self.DEFAULT_ALLOWED_MIME_TYPES
207
+ else:
208
+ self.allowed_mime_types = self.DEFAULT_ALLOWED_MIME_TYPES
209
+
210
+ if base_path:
211
+ self.base_path = Path(base_path)
212
+ else:
213
+ env_path = os.getenv('NARRATOR_FILE_STORAGE_PATH')
214
+ if env_path:
215
+ self.base_path = Path(env_path).expanduser()
216
+ else:
217
+ # Default to ~/.narrator/files
218
+ self.base_path = Path.home() / '.narrator' / 'files'
219
+
220
+ # Ensure base directory exists with proper permissions
221
+ self._ensure_directory()
222
+ logger.debug(
223
+ f"Initialized FileStore at {self.base_path} ("
224
+ f"max_file_size={self.max_file_size}, "
225
+ f"max_storage_size={self.max_storage_size}, "
226
+ f"allowed_mime_types={sorted(self.allowed_mime_types)})"
227
+ )
228
+
229
+ def _ensure_directory(self) -> None:
230
+ """Ensure the storage directory exists with proper permissions"""
231
+ try:
232
+ self.base_path.mkdir(parents=True, exist_ok=True)
233
+ # Set directory permissions to 755 (rwxr-xr-x)
234
+ self.base_path.chmod(0o755)
235
+ except Exception as e:
236
+ logger.error(f"Failed to create or set permissions on storage directory {self.base_path}: {e}")
237
+ raise FileStoreError(f"Storage directory initialization failed: {e}")
238
+
239
+ @classmethod
240
+ def get_default_path(cls) -> Path:
241
+ """Get the default file storage path based on environment or defaults"""
242
+ env_path = os.getenv('NARRATOR_FILE_STORAGE_PATH')
243
+ if env_path:
244
+ return Path(env_path).expanduser()
245
+ return Path.home() / '.narrator' / 'files'
246
+
247
+ @classmethod
248
+ def initialize_storage(cls) -> Path:
249
+ """Initialize the file storage directory
250
+
251
+ This can be called during application setup to ensure the storage
252
+ directory exists before the FileStore is instantiated.
253
+
254
+ Returns:
255
+ Path to the initialized storage directory
256
+ """
257
+ storage_path = cls.get_default_path()
258
+ storage_path.mkdir(parents=True, exist_ok=True)
259
+ storage_path.chmod(0o755)
260
+ return storage_path
261
+
262
+ async def validate_file(self, content: bytes, filename: str, mime_type: Optional[str] = None) -> str:
263
+ """Validate file content and type
264
+
265
+ Args:
266
+ content: File content as bytes
267
+ filename: Original filename
268
+ mime_type: Optional MIME type (will be detected if not provided)
269
+
270
+ Returns:
271
+ Validated MIME type
272
+
273
+ Raises:
274
+ UnsupportedFileTypeError: If file type is not allowed
275
+ FileTooLargeError: If file exceeds size limit
276
+ """
277
+ # Check file size
278
+ if len(content) > self.max_file_size:
279
+ raise FileTooLargeError(
280
+ f"File too large: {len(content)} bytes. Maximum allowed: {self.max_file_size} bytes"
281
+ )
282
+
283
+ # Detect or validate MIME type
284
+ if not mime_type:
285
+ mime_type = mimetypes.guess_type(filename)[0]
286
+ if not mime_type:
287
+ # Try to detect from content
288
+ mime_type = magic.from_buffer(content, mime=True)
289
+ logger.debug(f"Detected MIME type for {filename}: {mime_type}")
290
+
291
+ if mime_type not in self.allowed_mime_types:
292
+ raise UnsupportedFileTypeError(f"Unsupported file type: {mime_type}")
293
+
294
+ return mime_type
295
+
296
+ def _get_file_path(self, file_id: str, extension: Optional[str] = None) -> Path:
297
+ """Get full path for file ID using sharded directory structure"""
298
+ # Use first 2 chars of ID as subdirectory to avoid too many files in one dir
299
+ filename = file_id[2:]
300
+ if extension:
301
+ filename = f"{filename}.{extension.lstrip('.')}"
302
+ return self.base_path / file_id[:2] / filename
303
+
304
+ async def save(self, content: bytes, filename: str, mime_type: Optional[str] = None) -> Dict[str, Any]:
305
+ """Save file to storage"""
306
+ # Validate file
307
+ mime_type = await self.validate_file(content, filename, mime_type)
308
+
309
+ # Check storage capacity if limit set
310
+ if self.max_storage_size:
311
+ current_size = await self.get_storage_size()
312
+ if len(content) + current_size > self.max_storage_size:
313
+ raise StorageFullError(
314
+ f"Storage full: {current_size} bytes used, {len(content)} bytes needed, "
315
+ f"{self.max_storage_size} bytes maximum"
316
+ )
317
+
318
+ # Generate unique ID
319
+ file_id = str(uuid.uuid4())
320
+
321
+ # Get file extension from original filename
322
+ extension = Path(filename).suffix.lstrip('.')
323
+
324
+ # Get sharded path with extension
325
+ file_path = self._get_file_path(file_id, extension)
326
+ file_path.parent.mkdir(parents=True, exist_ok=True)
327
+
328
+ # Write content
329
+ file_path.write_bytes(content)
330
+
331
+ metadata = {
332
+ 'id': file_id,
333
+ 'filename': filename,
334
+ 'mime_type': mime_type,
335
+ 'storage_path': str(file_path.relative_to(self.base_path)),
336
+ 'storage_backend': 'local',
337
+ 'created_at': datetime.now(UTC),
338
+ 'metadata': {
339
+ 'size': len(content)
340
+ }
341
+ }
342
+
343
+ logger.debug(f"Saved file {filename} ({len(content)} bytes) to {file_path}")
344
+ logger.debug(f"Successfully stored attachment {filename} with MIME type {mime_type}")
345
+ return metadata
346
+
347
+ async def get(self, file_id: str, storage_path: Optional[str] = None) -> bytes:
348
+ """Get file content from storage
349
+
350
+ Args:
351
+ file_id: The unique file identifier
352
+ storage_path: Optional storage path from metadata (preferred if available)
353
+
354
+ Returns:
355
+ File content as bytes
356
+
357
+ Raises:
358
+ FileNotFoundError: If file cannot be found
359
+ """
360
+ if storage_path:
361
+ # Use the exact path from metadata if available
362
+ file_path = self.base_path / storage_path
363
+ else:
364
+ # Fallback to constructing path from ID (legacy support)
365
+ file_path = self._get_file_path(file_id)
366
+
367
+ if not file_path.exists():
368
+ raise FileNotFoundError(f"File {file_id} not found at {file_path}")
369
+
370
+ return file_path.read_bytes()
371
+
372
+ async def delete(self, file_id: str, storage_path: Optional[str] = None) -> None:
373
+ """Delete file from storage
374
+
375
+ Args:
376
+ file_id: The unique file identifier
377
+ storage_path: Optional storage path from metadata (preferred if available)
378
+ """
379
+ if storage_path:
380
+ # Use the exact path from metadata if available
381
+ file_path = self.base_path / storage_path
382
+ else:
383
+ # Fallback to constructing path from ID (legacy support)
384
+ file_path = self._get_file_path(file_id)
385
+
386
+ if not file_path.exists():
387
+ raise FileNotFoundError(f"File {file_id} not found at {file_path}")
388
+
389
+ file_path.unlink()
390
+
391
+ # Try to remove parent directory if empty
392
+ try:
393
+ file_path.parent.rmdir()
394
+ except OSError:
395
+ # Directory not empty, ignore
396
+ pass
397
+
398
+ async def get_storage_size(self) -> int:
399
+ """Get total storage size in bytes"""
400
+ total = 0
401
+ for path in self.base_path.rglob('*'):
402
+ if path.is_file():
403
+ total += path.stat().st_size
404
+ return total
405
+
406
+ async def get_file_count(self) -> int:
407
+ """Get total number of files (excluding directories)"""
408
+ count = 0
409
+ for path in self.base_path.rglob('*'):
410
+ if path.is_file():
411
+ count += 1
412
+ return count
413
+
414
+ async def check_health(self) -> Dict[str, Any]:
415
+ """Check storage health and return metrics"""
416
+ try:
417
+ total_size = await self.get_storage_size()
418
+ file_count = await self.get_file_count()
419
+ return {
420
+ 'healthy': True,
421
+ 'total_size': total_size,
422
+ 'file_count': file_count,
423
+ 'errors': []
424
+ }
425
+ except Exception as e:
426
+ return {
427
+ 'healthy': False,
428
+ 'errors': [str(e)]
429
+ }
430
+
431
+ async def batch_save(self, files: List[Tuple[bytes, str, str]]) -> List[Dict[str, Any]]:
432
+ """Save multiple files in one operation
433
+
434
+ Args:
435
+ files: List of tuples (content, filename, mime_type)
436
+
437
+ Returns:
438
+ List of file metadata dictionaries
439
+ """
440
+ return [await self.save(content, filename, mime_type)
441
+ for content, filename, mime_type in files]
442
+
443
+ async def batch_delete(self, file_ids: List[str]) -> None:
444
+ """Delete multiple files in one operation"""
445
+ await asyncio.gather(*[self.delete(file_id) for file_id in file_ids])
446
+
447
+ async def cleanup_orphaned_files(self, session) -> Tuple[int, List[str]]:
448
+ """Clean up files that aren't referenced in the database
449
+
450
+ Args:
451
+ session: SQLAlchemy async session
452
+
453
+ Returns:
454
+ Tuple of (number of files deleted, list of errors)
455
+ """
456
+ # Import here to avoid circular dependency
457
+ from ..database.models import MessageRecord
458
+
459
+ # Get all file IDs from messages
460
+ query = select(MessageRecord.attachments)
461
+ result = await session.execute(query)
462
+ db_files = set()
463
+ for row in result.scalars():
464
+ if row:
465
+ for attachment in row:
466
+ if attachment.get('file_id'):
467
+ db_files.add(attachment['file_id'])
468
+
469
+ # Get all files in storage
470
+ stored_files = await self.list_files()
471
+
472
+ # Find orphaned files
473
+ orphaned = set(stored_files) - db_files
474
+
475
+ # Delete orphaned files
476
+ errors = []
477
+ deleted = 0
478
+ for file_id in orphaned:
479
+ try:
480
+ await self.delete(file_id)
481
+ deleted += 1
482
+ except Exception as e:
483
+ errors.append(f"Failed to delete {file_id}: {str(e)}")
484
+
485
+ return deleted, errors
486
+
487
+ async def list_files(self) -> List[str]:
488
+ """List all file IDs in storage"""
489
+ files = []
490
+ for path in self.base_path.rglob('*'):
491
+ if path.is_file():
492
+ # Reconstruct file ID from path - include parent dir name and full filename
493
+ file_id = path.parent.name + path.stem
494
+ files.append(file_id)
495
+ return files
496
+
497
+ def _handle_data_url(self, content: bytes) -> bytes:
498
+ """Handle data URLs"""
499
+ if self.content.startswith('data:'):
500
+ # Handle data URLs
501
+ header, encoded = self.content.split(",", 1)
502
+ return base64.b64decode(encoded)
503
+ return content
504
+
505
+ @classmethod
506
+ def get_base_path(cls) -> str:
507
+ """Get the base storage path from environment variable"""
508
+ env_path = os.getenv('NARRATOR_FILE_STORAGE_PATH')
509
+ if not env_path:
510
+ # For the extracted package, we'll use a default instead of raising an error
511
+ return str(Path.home() / '.narrator' / 'files')
512
+ return env_path
513
+
514
+ @classmethod
515
+ def get_file_url(cls, relative_path: str) -> str:
516
+ """
517
+ Get the full URL for a file based on its relative path.
518
+
519
+ Args:
520
+ relative_path: The path relative to the base storage path
521
+
522
+ Returns:
523
+ The full URL for the file
524
+ """
525
+ # Get the base path
526
+ base_path = cls.get_base_path()
527
+
528
+ # Construct the full URL by combining the base path and relative path
529
+ # Make sure there's exactly one slash between them
530
+ if base_path.endswith('/'):
531
+ base_path = base_path[:-1]
532
+ if relative_path.startswith('/'):
533
+ relative_path = relative_path[1:]
534
+
535
+ return f"{base_path}/{relative_path}"
@@ -0,0 +1,9 @@
1
+ """
2
+ Utilities package for Tyler Stores
3
+ """
4
+
5
+ from .logging import get_logger
6
+
7
+ __all__ = [
8
+ "get_logger",
9
+ ]
@@ -0,0 +1,58 @@
1
+ """Logging configuration for narrator package."""
2
+ import os
3
+ import logging
4
+ from typing import Optional
5
+
6
+ _is_configured = False
7
+
8
+ def _ensure_logging_configured():
9
+ """Internal function to configure logging if not already configured."""
10
+ global _is_configured
11
+ if _is_configured:
12
+ return
13
+
14
+ # Get log level from environment and convert to uppercase
15
+ log_level_str = os.getenv('NARRATOR_LOG_LEVEL', os.getenv('LOG_LEVEL', 'INFO')).upper()
16
+
17
+ # Convert string to logging level constant
18
+ try:
19
+ log_level = getattr(logging, log_level_str)
20
+ except AttributeError:
21
+ print(f"Invalid LOG_LEVEL: {log_level_str}. Defaulting to INFO.")
22
+ log_level = logging.INFO
23
+
24
+ # Configure the root logger with our format
25
+ logging.basicConfig(
26
+ level=log_level,
27
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
28
+ datefmt='%H:%M:%S',
29
+ force=True # Ensure we override any existing configuration
30
+ )
31
+
32
+ # Get the root logger and set its level
33
+ root_logger = logging.getLogger()
34
+ root_logger.setLevel(log_level)
35
+
36
+ _is_configured = True
37
+
38
+ def get_logger(name: Optional[str] = None) -> logging.Logger:
39
+ """Get a configured logger.
40
+
41
+ This function ensures logging is configured with the appropriate level from
42
+ the NARRATOR_LOG_LEVEL (or LOG_LEVEL) environment variable before returning a logger.
43
+ Configuration happens automatically the first time this function is called.
44
+
45
+ Args:
46
+ name: The name for the logger. If None, uses the caller's module name.
47
+
48
+ Returns:
49
+ A configured logger instance.
50
+
51
+ Usage:
52
+ # In any file:
53
+ from narrator.utils.logging import get_logger
54
+ logger = get_logger(__name__) # Automatically configures logging
55
+ logger.debug("Debug message") # Will respect NARRATOR_LOG_LEVEL from .env
56
+ """
57
+ _ensure_logging_configured()
58
+ return logging.getLogger(name or '__name__')