deepdiver 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,402 @@
1
+ """
2
+ Podcast Manager Module
3
+ Part of DeepDiver - NotebookLM Podcast Automation System
4
+
5
+ This module handles podcast file management, organization, and metadata
6
+ for generated audio files from NotebookLM.
7
+
8
+ Assembly Team: Jerry ⚡, Nyro ♠️, Aureon 🌿, JamAI 🎸, Synth 🧵
9
+ """
10
+
11
+ import json
12
+ import logging
13
+ import os
14
+ import shutil
15
+ from datetime import datetime
16
+ from pathlib import Path
17
+ from typing import Dict, List, Optional, Any
18
+
19
+ import yaml
20
+
21
+
22
+ class PodcastManager:
23
+ """
24
+ Manages podcast files and metadata.
25
+
26
+ This class handles the organization, storage, and metadata management
27
+ of generated podcast files from NotebookLM.
28
+ """
29
+
30
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
31
+ """Initialize the podcast manager with configuration."""
32
+ self.config = config or {}
33
+ self.logger = self._setup_logging()
34
+
35
+ # Audio settings
36
+ self.output_dir = self.config.get('AUDIO_SETTINGS', {}).get(
37
+ 'output_dir', './output/podcasts'
38
+ )
39
+ self.naming_pattern = self.config.get('AUDIO_SETTINGS', {}).get(
40
+ 'naming_pattern', '{title}_{timestamp}'
41
+ )
42
+ self.metadata_embed = self.config.get('AUDIO_SETTINGS', {}).get(
43
+ 'metadata_embed', True
44
+ )
45
+ self.quality_check = self.config.get('AUDIO_SETTINGS', {}).get(
46
+ 'quality_check', True
47
+ )
48
+
49
+ # Ensure output directory exists
50
+ os.makedirs(self.output_dir, exist_ok=True)
51
+
52
+ self.logger.info("♠️🌿🎸🧵 PodcastManager initialized")
53
+
54
+ def _setup_logging(self) -> logging.Logger:
55
+ """Set up logging configuration."""
56
+ logger = logging.getLogger('PodcastManager')
57
+ logger.setLevel(logging.INFO)
58
+
59
+ if not logger.handlers:
60
+ handler = logging.StreamHandler()
61
+ formatter = logging.Formatter(
62
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
63
+ )
64
+ handler.setFormatter(formatter)
65
+ logger.addHandler(handler)
66
+
67
+ return logger
68
+
69
+ def generate_filename(self, title: str, timestamp: Optional[datetime] = None) -> str:
70
+ """
71
+ Generate a filename for a podcast based on the naming pattern.
72
+
73
+ Args:
74
+ title (str): Title of the podcast
75
+ timestamp (datetime, optional): Timestamp for the podcast
76
+
77
+ Returns:
78
+ str: Generated filename
79
+ """
80
+ if timestamp is None:
81
+ timestamp = datetime.now()
82
+
83
+ # Clean title for filename
84
+ clean_title = self._clean_filename(title)
85
+
86
+ # Format timestamp
87
+ timestamp_str = timestamp.strftime('%Y%m%d_%H%M%S')
88
+
89
+ # Generate filename based on pattern
90
+ filename = self.naming_pattern.format(
91
+ title=clean_title,
92
+ timestamp=timestamp_str,
93
+ date=timestamp.strftime('%Y%m%d'),
94
+ time=timestamp.strftime('%H%M%S')
95
+ )
96
+
97
+ return f"{filename}.mp3"
98
+
99
+ def _clean_filename(self, filename: str) -> str:
100
+ """Clean a string to be safe for use as a filename."""
101
+ # Remove or replace invalid characters
102
+ invalid_chars = '<>:"/\\|?*'
103
+ for char in invalid_chars:
104
+ filename = filename.replace(char, '_')
105
+
106
+ # Remove extra spaces and limit length
107
+ filename = '_'.join(filename.split())
108
+ if len(filename) > 100:
109
+ filename = filename[:100]
110
+
111
+ return filename
112
+
113
+ def save_podcast(self, source_path: str, title: str,
114
+ metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
115
+ """
116
+ Save a podcast file with metadata.
117
+
118
+ Args:
119
+ source_path (str): Path to the source audio file
120
+ title (str): Title of the podcast
121
+ metadata (Dict[str, Any], optional): Additional metadata
122
+
123
+ Returns:
124
+ Dict[str, Any]: Save operation results
125
+ """
126
+ result = {
127
+ 'success': False,
128
+ 'source_path': source_path,
129
+ 'saved_path': None,
130
+ 'filename': None,
131
+ 'metadata_path': None,
132
+ 'errors': []
133
+ }
134
+
135
+ try:
136
+ # Check if source file exists
137
+ if not os.path.exists(source_path):
138
+ result['errors'].append(f"Source file not found: {source_path}")
139
+ return result
140
+
141
+ # Generate filename
142
+ filename = self.generate_filename(title)
143
+ saved_path = os.path.join(self.output_dir, filename)
144
+
145
+ # Copy file to output directory
146
+ shutil.copy2(source_path, saved_path)
147
+ result['saved_path'] = saved_path
148
+ result['filename'] = filename
149
+
150
+ # Create metadata
151
+ if metadata is None:
152
+ metadata = {}
153
+
154
+ # Add default metadata
155
+ metadata.update({
156
+ 'title': title,
157
+ 'created_at': datetime.now().isoformat(),
158
+ 'source_file': source_path,
159
+ 'saved_file': saved_path,
160
+ 'file_size': os.path.getsize(saved_path),
161
+ 'assembly_team': ['Jerry ⚡', 'Nyro ♠️', 'Aureon 🌿', 'JamAI 🎸', 'Synth 🧵']
162
+ })
163
+
164
+ # Save metadata
165
+ metadata_filename = f"{Path(filename).stem}_metadata.json"
166
+ metadata_path = os.path.join(self.output_dir, metadata_filename)
167
+
168
+ with open(metadata_path, 'w', encoding='utf-8') as f:
169
+ json.dump(metadata, f, indent=2, ensure_ascii=False)
170
+
171
+ result['metadata_path'] = metadata_path
172
+
173
+ # Embed metadata in audio file if requested
174
+ if self.metadata_embed:
175
+ self._embed_metadata(saved_path, metadata)
176
+
177
+ # Quality check if requested
178
+ if self.quality_check:
179
+ quality_result = self._check_audio_quality(saved_path)
180
+ metadata['quality_check'] = quality_result
181
+
182
+ result['success'] = True
183
+ self.logger.info(f"✅ Podcast saved successfully: {saved_path}")
184
+
185
+ except Exception as e:
186
+ result['errors'].append(f"Save error: {e}")
187
+ self.logger.error(f"❌ Failed to save podcast: {e}")
188
+
189
+ return result
190
+
191
+ def _embed_metadata(self, audio_path: str, metadata: Dict[str, Any]):
192
+ """Embed metadata in audio file."""
193
+ try:
194
+ # This would require a library like mutagen for MP3 metadata
195
+ # For now, we'll just log that metadata embedding is requested
196
+ self.logger.info(f"Metadata embedding requested for: {audio_path}")
197
+ self.logger.info("Note: Metadata embedding requires additional audio processing library")
198
+ except Exception as e:
199
+ self.logger.warning(f"Metadata embedding failed: {e}")
200
+
201
+ def _check_audio_quality(self, audio_path: str) -> Dict[str, Any]:
202
+ """Check audio file quality."""
203
+ try:
204
+ file_size = os.path.getsize(audio_path)
205
+
206
+ # Basic quality checks
207
+ quality_result = {
208
+ 'file_size': file_size,
209
+ 'file_size_mb': round(file_size / (1024 * 1024), 2),
210
+ 'has_content': file_size > 1024, # At least 1KB
211
+ 'timestamp': datetime.now().isoformat()
212
+ }
213
+
214
+ # Check if file is likely a valid audio file
215
+ with open(audio_path, 'rb') as f:
216
+ header = f.read(10)
217
+ # Check for MP3 header
218
+ if header.startswith(b'ID3') or header[0:2] == b'\xff\xfb':
219
+ quality_result['format_valid'] = True
220
+ else:
221
+ quality_result['format_valid'] = False
222
+
223
+ return quality_result
224
+
225
+ except Exception as e:
226
+ self.logger.warning(f"Quality check failed: {e}")
227
+ return {'error': str(e), 'timestamp': datetime.now().isoformat()}
228
+
229
+ def list_podcasts(self) -> List[Dict[str, Any]]:
230
+ """
231
+ List all podcasts in the output directory.
232
+
233
+ Returns:
234
+ List[Dict[str, Any]]: List of podcast information
235
+ """
236
+ podcasts = []
237
+
238
+ try:
239
+ for file in os.listdir(self.output_dir):
240
+ if file.endswith('.mp3'):
241
+ file_path = os.path.join(self.output_dir, file)
242
+ file_info = {
243
+ 'filename': file,
244
+ 'path': file_path,
245
+ 'size': os.path.getsize(file_path),
246
+ 'created': datetime.fromtimestamp(
247
+ os.path.getctime(file_path)
248
+ ).isoformat(),
249
+ 'modified': datetime.fromtimestamp(
250
+ os.path.getmtime(file_path)
251
+ ).isoformat()
252
+ }
253
+
254
+ # Try to load metadata
255
+ metadata_file = f"{Path(file).stem}_metadata.json"
256
+ metadata_path = os.path.join(self.output_dir, metadata_file)
257
+
258
+ if os.path.exists(metadata_path):
259
+ try:
260
+ with open(metadata_path, 'r', encoding='utf-8') as f:
261
+ file_info['metadata'] = json.load(f)
262
+ except:
263
+ file_info['metadata'] = None
264
+ else:
265
+ file_info['metadata'] = None
266
+
267
+ podcasts.append(file_info)
268
+
269
+ # Sort by creation time (newest first)
270
+ podcasts.sort(key=lambda x: x['created'], reverse=True)
271
+
272
+ except Exception as e:
273
+ self.logger.error(f"Error listing podcasts: {e}")
274
+
275
+ return podcasts
276
+
277
+ def get_podcast_info(self, filename: str) -> Optional[Dict[str, Any]]:
278
+ """
279
+ Get detailed information about a specific podcast.
280
+
281
+ Args:
282
+ filename (str): Name of the podcast file
283
+
284
+ Returns:
285
+ Optional[Dict[str, Any]]: Podcast information or None if not found
286
+ """
287
+ try:
288
+ file_path = os.path.join(self.output_dir, filename)
289
+
290
+ if not os.path.exists(file_path):
291
+ return None
292
+
293
+ info = {
294
+ 'filename': filename,
295
+ 'path': file_path,
296
+ 'size': os.path.getsize(file_path),
297
+ 'created': datetime.fromtimestamp(os.path.getctime(file_path)).isoformat(),
298
+ 'modified': datetime.fromtimestamp(os.path.getmtime(file_path)).isoformat()
299
+ }
300
+
301
+ # Load metadata
302
+ metadata_file = f"{Path(filename).stem}_metadata.json"
303
+ metadata_path = os.path.join(self.output_dir, metadata_file)
304
+
305
+ if os.path.exists(metadata_path):
306
+ try:
307
+ with open(metadata_path, 'r', encoding='utf-8') as f:
308
+ info['metadata'] = json.load(f)
309
+ except:
310
+ info['metadata'] = None
311
+ else:
312
+ info['metadata'] = None
313
+
314
+ return info
315
+
316
+ except Exception as e:
317
+ self.logger.error(f"Error getting podcast info: {e}")
318
+ return None
319
+
320
+ def delete_podcast(self, filename: str) -> bool:
321
+ """
322
+ Delete a podcast and its metadata.
323
+
324
+ Args:
325
+ filename (str): Name of the podcast file to delete
326
+
327
+ Returns:
328
+ bool: True if deletion successful, False otherwise
329
+ """
330
+ try:
331
+ file_path = os.path.join(self.output_dir, filename)
332
+
333
+ if not os.path.exists(file_path):
334
+ self.logger.warning(f"Podcast file not found: {filename}")
335
+ return False
336
+
337
+ # Delete audio file
338
+ os.remove(file_path)
339
+
340
+ # Delete metadata file
341
+ metadata_file = f"{Path(filename).stem}_metadata.json"
342
+ metadata_path = os.path.join(self.output_dir, metadata_file)
343
+
344
+ if os.path.exists(metadata_path):
345
+ os.remove(metadata_path)
346
+
347
+ self.logger.info(f"✅ Podcast deleted: {filename}")
348
+ return True
349
+
350
+ except Exception as e:
351
+ self.logger.error(f"Error deleting podcast: {e}")
352
+ return False
353
+
354
+ def cleanup_old_podcasts(self, days: int = 30) -> int:
355
+ """
356
+ Clean up podcasts older than specified days.
357
+
358
+ Args:
359
+ days (int): Number of days to keep podcasts
360
+
361
+ Returns:
362
+ int: Number of podcasts deleted
363
+ """
364
+ deleted_count = 0
365
+ cutoff_time = datetime.now().timestamp() - (days * 24 * 60 * 60)
366
+
367
+ try:
368
+ for file in os.listdir(self.output_dir):
369
+ if file.endswith('.mp3'):
370
+ file_path = os.path.join(self.output_dir, file)
371
+
372
+ if os.path.getctime(file_path) < cutoff_time:
373
+ if self.delete_podcast(file):
374
+ deleted_count += 1
375
+
376
+ self.logger.info(f"✅ Cleaned up {deleted_count} old podcasts")
377
+
378
+ except Exception as e:
379
+ self.logger.error(f"Error during cleanup: {e}")
380
+
381
+ return deleted_count
382
+
383
+
384
+ # Example usage and testing
385
+ def test_podcast_manager():
386
+ """Test function for podcast manager."""
387
+ manager = PodcastManager()
388
+
389
+ # Test filename generation
390
+ filename = manager.generate_filename("Test Podcast")
391
+ print(f"Generated filename: {filename}")
392
+
393
+ # Test listing podcasts
394
+ podcasts = manager.list_podcasts()
395
+ print(f"Found {len(podcasts)} podcasts")
396
+
397
+ for podcast in podcasts:
398
+ print(f"- {podcast['filename']} ({podcast['size']} bytes)")
399
+
400
+
401
+ if __name__ == "__main__":
402
+ test_podcast_manager()