deepdiver 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepdiver/__init__.py +38 -0
- deepdiver/content_processor.py +343 -0
- deepdiver/deepdive.py +801 -0
- deepdiver/deepdiver.yaml +79 -0
- deepdiver/notebooklm_automator.py +1441 -0
- deepdiver/podcast_manager.py +402 -0
- deepdiver/session_tracker.py +723 -0
- deepdiver-0.1.0.dist-info/METADATA +455 -0
- deepdiver-0.1.0.dist-info/RECORD +13 -0
- deepdiver-0.1.0.dist-info/WHEEL +5 -0
- deepdiver-0.1.0.dist-info/entry_points.txt +2 -0
- deepdiver-0.1.0.dist-info/licenses/LICENSE +21 -0
- deepdiver-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Podcast Manager Module
|
|
3
|
+
Part of DeepDiver - NotebookLM Podcast Automation System
|
|
4
|
+
|
|
5
|
+
This module handles podcast file management, organization, and metadata
|
|
6
|
+
for generated audio files from NotebookLM.
|
|
7
|
+
|
|
8
|
+
Assembly Team: Jerry ⚡, Nyro ♠️, Aureon 🌿, JamAI 🎸, Synth 🧵
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Dict, List, Optional, Any
|
|
18
|
+
|
|
19
|
+
import yaml
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PodcastManager:
|
|
23
|
+
"""
|
|
24
|
+
Manages podcast files and metadata.
|
|
25
|
+
|
|
26
|
+
This class handles the organization, storage, and metadata management
|
|
27
|
+
of generated podcast files from NotebookLM.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
31
|
+
"""Initialize the podcast manager with configuration."""
|
|
32
|
+
self.config = config or {}
|
|
33
|
+
self.logger = self._setup_logging()
|
|
34
|
+
|
|
35
|
+
# Audio settings
|
|
36
|
+
self.output_dir = self.config.get('AUDIO_SETTINGS', {}).get(
|
|
37
|
+
'output_dir', './output/podcasts'
|
|
38
|
+
)
|
|
39
|
+
self.naming_pattern = self.config.get('AUDIO_SETTINGS', {}).get(
|
|
40
|
+
'naming_pattern', '{title}_{timestamp}'
|
|
41
|
+
)
|
|
42
|
+
self.metadata_embed = self.config.get('AUDIO_SETTINGS', {}).get(
|
|
43
|
+
'metadata_embed', True
|
|
44
|
+
)
|
|
45
|
+
self.quality_check = self.config.get('AUDIO_SETTINGS', {}).get(
|
|
46
|
+
'quality_check', True
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Ensure output directory exists
|
|
50
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
|
51
|
+
|
|
52
|
+
self.logger.info("♠️🌿🎸🧵 PodcastManager initialized")
|
|
53
|
+
|
|
54
|
+
def _setup_logging(self) -> logging.Logger:
|
|
55
|
+
"""Set up logging configuration."""
|
|
56
|
+
logger = logging.getLogger('PodcastManager')
|
|
57
|
+
logger.setLevel(logging.INFO)
|
|
58
|
+
|
|
59
|
+
if not logger.handlers:
|
|
60
|
+
handler = logging.StreamHandler()
|
|
61
|
+
formatter = logging.Formatter(
|
|
62
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
63
|
+
)
|
|
64
|
+
handler.setFormatter(formatter)
|
|
65
|
+
logger.addHandler(handler)
|
|
66
|
+
|
|
67
|
+
return logger
|
|
68
|
+
|
|
69
|
+
def generate_filename(self, title: str, timestamp: Optional[datetime] = None) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Generate a filename for a podcast based on the naming pattern.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
title (str): Title of the podcast
|
|
75
|
+
timestamp (datetime, optional): Timestamp for the podcast
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
str: Generated filename
|
|
79
|
+
"""
|
|
80
|
+
if timestamp is None:
|
|
81
|
+
timestamp = datetime.now()
|
|
82
|
+
|
|
83
|
+
# Clean title for filename
|
|
84
|
+
clean_title = self._clean_filename(title)
|
|
85
|
+
|
|
86
|
+
# Format timestamp
|
|
87
|
+
timestamp_str = timestamp.strftime('%Y%m%d_%H%M%S')
|
|
88
|
+
|
|
89
|
+
# Generate filename based on pattern
|
|
90
|
+
filename = self.naming_pattern.format(
|
|
91
|
+
title=clean_title,
|
|
92
|
+
timestamp=timestamp_str,
|
|
93
|
+
date=timestamp.strftime('%Y%m%d'),
|
|
94
|
+
time=timestamp.strftime('%H%M%S')
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return f"{filename}.mp3"
|
|
98
|
+
|
|
99
|
+
def _clean_filename(self, filename: str) -> str:
|
|
100
|
+
"""Clean a string to be safe for use as a filename."""
|
|
101
|
+
# Remove or replace invalid characters
|
|
102
|
+
invalid_chars = '<>:"/\\|?*'
|
|
103
|
+
for char in invalid_chars:
|
|
104
|
+
filename = filename.replace(char, '_')
|
|
105
|
+
|
|
106
|
+
# Remove extra spaces and limit length
|
|
107
|
+
filename = '_'.join(filename.split())
|
|
108
|
+
if len(filename) > 100:
|
|
109
|
+
filename = filename[:100]
|
|
110
|
+
|
|
111
|
+
return filename
|
|
112
|
+
|
|
113
|
+
def save_podcast(self, source_path: str, title: str,
|
|
114
|
+
metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
115
|
+
"""
|
|
116
|
+
Save a podcast file with metadata.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
source_path (str): Path to the source audio file
|
|
120
|
+
title (str): Title of the podcast
|
|
121
|
+
metadata (Dict[str, Any], optional): Additional metadata
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Dict[str, Any]: Save operation results
|
|
125
|
+
"""
|
|
126
|
+
result = {
|
|
127
|
+
'success': False,
|
|
128
|
+
'source_path': source_path,
|
|
129
|
+
'saved_path': None,
|
|
130
|
+
'filename': None,
|
|
131
|
+
'metadata_path': None,
|
|
132
|
+
'errors': []
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
# Check if source file exists
|
|
137
|
+
if not os.path.exists(source_path):
|
|
138
|
+
result['errors'].append(f"Source file not found: {source_path}")
|
|
139
|
+
return result
|
|
140
|
+
|
|
141
|
+
# Generate filename
|
|
142
|
+
filename = self.generate_filename(title)
|
|
143
|
+
saved_path = os.path.join(self.output_dir, filename)
|
|
144
|
+
|
|
145
|
+
# Copy file to output directory
|
|
146
|
+
shutil.copy2(source_path, saved_path)
|
|
147
|
+
result['saved_path'] = saved_path
|
|
148
|
+
result['filename'] = filename
|
|
149
|
+
|
|
150
|
+
# Create metadata
|
|
151
|
+
if metadata is None:
|
|
152
|
+
metadata = {}
|
|
153
|
+
|
|
154
|
+
# Add default metadata
|
|
155
|
+
metadata.update({
|
|
156
|
+
'title': title,
|
|
157
|
+
'created_at': datetime.now().isoformat(),
|
|
158
|
+
'source_file': source_path,
|
|
159
|
+
'saved_file': saved_path,
|
|
160
|
+
'file_size': os.path.getsize(saved_path),
|
|
161
|
+
'assembly_team': ['Jerry ⚡', 'Nyro ♠️', 'Aureon 🌿', 'JamAI 🎸', 'Synth 🧵']
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
# Save metadata
|
|
165
|
+
metadata_filename = f"{Path(filename).stem}_metadata.json"
|
|
166
|
+
metadata_path = os.path.join(self.output_dir, metadata_filename)
|
|
167
|
+
|
|
168
|
+
with open(metadata_path, 'w', encoding='utf-8') as f:
|
|
169
|
+
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
|
170
|
+
|
|
171
|
+
result['metadata_path'] = metadata_path
|
|
172
|
+
|
|
173
|
+
# Embed metadata in audio file if requested
|
|
174
|
+
if self.metadata_embed:
|
|
175
|
+
self._embed_metadata(saved_path, metadata)
|
|
176
|
+
|
|
177
|
+
# Quality check if requested
|
|
178
|
+
if self.quality_check:
|
|
179
|
+
quality_result = self._check_audio_quality(saved_path)
|
|
180
|
+
metadata['quality_check'] = quality_result
|
|
181
|
+
|
|
182
|
+
result['success'] = True
|
|
183
|
+
self.logger.info(f"✅ Podcast saved successfully: {saved_path}")
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
result['errors'].append(f"Save error: {e}")
|
|
187
|
+
self.logger.error(f"❌ Failed to save podcast: {e}")
|
|
188
|
+
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
def _embed_metadata(self, audio_path: str, metadata: Dict[str, Any]):
|
|
192
|
+
"""Embed metadata in audio file."""
|
|
193
|
+
try:
|
|
194
|
+
# This would require a library like mutagen for MP3 metadata
|
|
195
|
+
# For now, we'll just log that metadata embedding is requested
|
|
196
|
+
self.logger.info(f"Metadata embedding requested for: {audio_path}")
|
|
197
|
+
self.logger.info("Note: Metadata embedding requires additional audio processing library")
|
|
198
|
+
except Exception as e:
|
|
199
|
+
self.logger.warning(f"Metadata embedding failed: {e}")
|
|
200
|
+
|
|
201
|
+
def _check_audio_quality(self, audio_path: str) -> Dict[str, Any]:
|
|
202
|
+
"""Check audio file quality."""
|
|
203
|
+
try:
|
|
204
|
+
file_size = os.path.getsize(audio_path)
|
|
205
|
+
|
|
206
|
+
# Basic quality checks
|
|
207
|
+
quality_result = {
|
|
208
|
+
'file_size': file_size,
|
|
209
|
+
'file_size_mb': round(file_size / (1024 * 1024), 2),
|
|
210
|
+
'has_content': file_size > 1024, # At least 1KB
|
|
211
|
+
'timestamp': datetime.now().isoformat()
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
# Check if file is likely a valid audio file
|
|
215
|
+
with open(audio_path, 'rb') as f:
|
|
216
|
+
header = f.read(10)
|
|
217
|
+
# Check for MP3 header
|
|
218
|
+
if header.startswith(b'ID3') or header[0:2] == b'\xff\xfb':
|
|
219
|
+
quality_result['format_valid'] = True
|
|
220
|
+
else:
|
|
221
|
+
quality_result['format_valid'] = False
|
|
222
|
+
|
|
223
|
+
return quality_result
|
|
224
|
+
|
|
225
|
+
except Exception as e:
|
|
226
|
+
self.logger.warning(f"Quality check failed: {e}")
|
|
227
|
+
return {'error': str(e), 'timestamp': datetime.now().isoformat()}
|
|
228
|
+
|
|
229
|
+
def list_podcasts(self) -> List[Dict[str, Any]]:
|
|
230
|
+
"""
|
|
231
|
+
List all podcasts in the output directory.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
List[Dict[str, Any]]: List of podcast information
|
|
235
|
+
"""
|
|
236
|
+
podcasts = []
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
for file in os.listdir(self.output_dir):
|
|
240
|
+
if file.endswith('.mp3'):
|
|
241
|
+
file_path = os.path.join(self.output_dir, file)
|
|
242
|
+
file_info = {
|
|
243
|
+
'filename': file,
|
|
244
|
+
'path': file_path,
|
|
245
|
+
'size': os.path.getsize(file_path),
|
|
246
|
+
'created': datetime.fromtimestamp(
|
|
247
|
+
os.path.getctime(file_path)
|
|
248
|
+
).isoformat(),
|
|
249
|
+
'modified': datetime.fromtimestamp(
|
|
250
|
+
os.path.getmtime(file_path)
|
|
251
|
+
).isoformat()
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# Try to load metadata
|
|
255
|
+
metadata_file = f"{Path(file).stem}_metadata.json"
|
|
256
|
+
metadata_path = os.path.join(self.output_dir, metadata_file)
|
|
257
|
+
|
|
258
|
+
if os.path.exists(metadata_path):
|
|
259
|
+
try:
|
|
260
|
+
with open(metadata_path, 'r', encoding='utf-8') as f:
|
|
261
|
+
file_info['metadata'] = json.load(f)
|
|
262
|
+
except:
|
|
263
|
+
file_info['metadata'] = None
|
|
264
|
+
else:
|
|
265
|
+
file_info['metadata'] = None
|
|
266
|
+
|
|
267
|
+
podcasts.append(file_info)
|
|
268
|
+
|
|
269
|
+
# Sort by creation time (newest first)
|
|
270
|
+
podcasts.sort(key=lambda x: x['created'], reverse=True)
|
|
271
|
+
|
|
272
|
+
except Exception as e:
|
|
273
|
+
self.logger.error(f"Error listing podcasts: {e}")
|
|
274
|
+
|
|
275
|
+
return podcasts
|
|
276
|
+
|
|
277
|
+
def get_podcast_info(self, filename: str) -> Optional[Dict[str, Any]]:
|
|
278
|
+
"""
|
|
279
|
+
Get detailed information about a specific podcast.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
filename (str): Name of the podcast file
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Optional[Dict[str, Any]]: Podcast information or None if not found
|
|
286
|
+
"""
|
|
287
|
+
try:
|
|
288
|
+
file_path = os.path.join(self.output_dir, filename)
|
|
289
|
+
|
|
290
|
+
if not os.path.exists(file_path):
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
info = {
|
|
294
|
+
'filename': filename,
|
|
295
|
+
'path': file_path,
|
|
296
|
+
'size': os.path.getsize(file_path),
|
|
297
|
+
'created': datetime.fromtimestamp(os.path.getctime(file_path)).isoformat(),
|
|
298
|
+
'modified': datetime.fromtimestamp(os.path.getmtime(file_path)).isoformat()
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
# Load metadata
|
|
302
|
+
metadata_file = f"{Path(filename).stem}_metadata.json"
|
|
303
|
+
metadata_path = os.path.join(self.output_dir, metadata_file)
|
|
304
|
+
|
|
305
|
+
if os.path.exists(metadata_path):
|
|
306
|
+
try:
|
|
307
|
+
with open(metadata_path, 'r', encoding='utf-8') as f:
|
|
308
|
+
info['metadata'] = json.load(f)
|
|
309
|
+
except:
|
|
310
|
+
info['metadata'] = None
|
|
311
|
+
else:
|
|
312
|
+
info['metadata'] = None
|
|
313
|
+
|
|
314
|
+
return info
|
|
315
|
+
|
|
316
|
+
except Exception as e:
|
|
317
|
+
self.logger.error(f"Error getting podcast info: {e}")
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
def delete_podcast(self, filename: str) -> bool:
|
|
321
|
+
"""
|
|
322
|
+
Delete a podcast and its metadata.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
filename (str): Name of the podcast file to delete
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
bool: True if deletion successful, False otherwise
|
|
329
|
+
"""
|
|
330
|
+
try:
|
|
331
|
+
file_path = os.path.join(self.output_dir, filename)
|
|
332
|
+
|
|
333
|
+
if not os.path.exists(file_path):
|
|
334
|
+
self.logger.warning(f"Podcast file not found: {filename}")
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
# Delete audio file
|
|
338
|
+
os.remove(file_path)
|
|
339
|
+
|
|
340
|
+
# Delete metadata file
|
|
341
|
+
metadata_file = f"{Path(filename).stem}_metadata.json"
|
|
342
|
+
metadata_path = os.path.join(self.output_dir, metadata_file)
|
|
343
|
+
|
|
344
|
+
if os.path.exists(metadata_path):
|
|
345
|
+
os.remove(metadata_path)
|
|
346
|
+
|
|
347
|
+
self.logger.info(f"✅ Podcast deleted: {filename}")
|
|
348
|
+
return True
|
|
349
|
+
|
|
350
|
+
except Exception as e:
|
|
351
|
+
self.logger.error(f"Error deleting podcast: {e}")
|
|
352
|
+
return False
|
|
353
|
+
|
|
354
|
+
def cleanup_old_podcasts(self, days: int = 30) -> int:
|
|
355
|
+
"""
|
|
356
|
+
Clean up podcasts older than specified days.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
days (int): Number of days to keep podcasts
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
int: Number of podcasts deleted
|
|
363
|
+
"""
|
|
364
|
+
deleted_count = 0
|
|
365
|
+
cutoff_time = datetime.now().timestamp() - (days * 24 * 60 * 60)
|
|
366
|
+
|
|
367
|
+
try:
|
|
368
|
+
for file in os.listdir(self.output_dir):
|
|
369
|
+
if file.endswith('.mp3'):
|
|
370
|
+
file_path = os.path.join(self.output_dir, file)
|
|
371
|
+
|
|
372
|
+
if os.path.getctime(file_path) < cutoff_time:
|
|
373
|
+
if self.delete_podcast(file):
|
|
374
|
+
deleted_count += 1
|
|
375
|
+
|
|
376
|
+
self.logger.info(f"✅ Cleaned up {deleted_count} old podcasts")
|
|
377
|
+
|
|
378
|
+
except Exception as e:
|
|
379
|
+
self.logger.error(f"Error during cleanup: {e}")
|
|
380
|
+
|
|
381
|
+
return deleted_count
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
# Example usage and testing
|
|
385
|
+
def test_podcast_manager():
|
|
386
|
+
"""Test function for podcast manager."""
|
|
387
|
+
manager = PodcastManager()
|
|
388
|
+
|
|
389
|
+
# Test filename generation
|
|
390
|
+
filename = manager.generate_filename("Test Podcast")
|
|
391
|
+
print(f"Generated filename: {filename}")
|
|
392
|
+
|
|
393
|
+
# Test listing podcasts
|
|
394
|
+
podcasts = manager.list_podcasts()
|
|
395
|
+
print(f"Found {len(podcasts)} podcasts")
|
|
396
|
+
|
|
397
|
+
for podcast in podcasts:
|
|
398
|
+
print(f"- {podcast['filename']} ({podcast['size']} bytes)")
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
if __name__ == "__main__":
|
|
402
|
+
test_podcast_manager()
|