TonieToolbox 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,637 @@
1
+ """
2
+ Media tag processing functionality for the TonieToolbox package
3
+
4
+ This module handles reading and processing metadata tags from audio files,
5
+ which can be used to enhance Tonie file creation with proper track information.
6
+ """
7
+
8
+ import os
9
+ from typing import Dict, Any, Optional, List
10
+ import logging
11
+ import tempfile
12
+ import base64
13
+ from mutagen.flac import Picture
14
+ from .logger import get_logger
15
+ from .dependency_manager import is_mutagen_available, ensure_mutagen
16
+
17
+ # Global variables to track dependency state and store module references
18
+ MUTAGEN_AVAILABLE = False
19
+ mutagen = None
20
+ ID3 = None
21
+ FLAC = None
22
+ MP4 = None
23
+ OggOpus = None
24
+ OggVorbis = None
25
+
26
+ def _import_mutagen():
27
+ """
28
+ Import the mutagen modules and update global variables.
29
+
30
+ Returns:
31
+ bool: True if import was successful, False otherwise
32
+ """
33
+ global MUTAGEN_AVAILABLE, mutagen, ID3, FLAC, MP4, OggOpus, OggVorbis
34
+
35
+ try:
36
+ import mutagen as _mutagen
37
+ from mutagen.id3 import ID3 as _ID3
38
+ from mutagen.flac import FLAC as _FLAC
39
+ from mutagen.mp4 import MP4 as _MP4
40
+ from mutagen.oggopus import OggOpus as _OggOpus
41
+ from mutagen.oggvorbis import OggVorbis as _OggVorbis
42
+
43
+ # Assign to global variables
44
+ mutagen = _mutagen
45
+ ID3 = _ID3
46
+ FLAC = _FLAC
47
+ MP4 = _MP4
48
+ OggOpus = _OggOpus
49
+ OggVorbis = _OggVorbis
50
+ MUTAGEN_AVAILABLE = True
51
+ return True
52
+ except ImportError:
53
+ MUTAGEN_AVAILABLE = False
54
+ return False
55
+
56
+ # Try to import mutagen if it's available
57
+ if is_mutagen_available():
58
+ _import_mutagen()
59
+
60
+ logger = get_logger('media_tags')
61
+
62
+ # Define tag mapping for different formats to standardized names
63
+ # This helps normalize tags across different audio formats
64
+ TAG_MAPPING = {
65
+ # ID3 (MP3) tags
66
+ 'TIT2': 'title',
67
+ 'TALB': 'album',
68
+ 'TPE1': 'artist',
69
+ 'TPE2': 'albumartist',
70
+ 'TCOM': 'composer',
71
+ 'TRCK': 'tracknumber',
72
+ 'TPOS': 'discnumber',
73
+ 'TDRC': 'date',
74
+ 'TCON': 'genre',
75
+ 'TPUB': 'publisher',
76
+ 'TCOP': 'copyright',
77
+ 'COMM': 'comment',
78
+
79
+ # Vorbis tags (FLAC, OGG)
80
+ 'title': 'title',
81
+ 'album': 'album',
82
+ 'artist': 'artist',
83
+ 'albumartist': 'albumartist',
84
+ 'composer': 'composer',
85
+ 'tracknumber': 'tracknumber',
86
+ 'discnumber': 'discnumber',
87
+ 'date': 'date',
88
+ 'genre': 'genre',
89
+ 'publisher': 'publisher',
90
+ 'copyright': 'copyright',
91
+ 'comment': 'comment',
92
+
93
+ # MP4 (M4A, AAC) tags
94
+ '©nam': 'title',
95
+ '©alb': 'album',
96
+ '©ART': 'artist',
97
+ 'aART': 'albumartist',
98
+ '©wrt': 'composer',
99
+ 'trkn': 'tracknumber',
100
+ 'disk': 'discnumber',
101
+ '©day': 'date',
102
+ '©gen': 'genre',
103
+ '©pub': 'publisher',
104
+ 'cprt': 'copyright',
105
+ '©cmt': 'comment',
106
+
107
+ # Additional tags some files might have
108
+ 'album_artist': 'albumartist',
109
+ 'track': 'tracknumber',
110
+ 'track_number': 'tracknumber',
111
+ 'disc': 'discnumber',
112
+ 'disc_number': 'discnumber',
113
+ 'year': 'date',
114
+ 'albuminterpret': 'albumartist', # German tag name
115
+ 'interpret': 'artist', # German tag name
116
+ }
117
+
118
+ # Define replacements for special tag values
119
+ TAG_VALUE_REPLACEMENTS = {
120
+ "Die drei ???": "Die drei Fragezeichen",
121
+ "Die Drei ???": "Die drei Fragezeichen",
122
+ "DIE DREI ???": "Die drei Fragezeichen",
123
+ "Die drei !!!": "Die drei Ausrufezeichen",
124
+ "Die Drei !!!": "Die drei Ausrufezeichen",
125
+ "DIE DREI !!!": "Die drei Ausrufezeichen",
126
+ "TKKG™": "TKKG",
127
+ "Die drei ??? Kids": "Die drei Fragezeichen Kids",
128
+ "Die Drei ??? Kids": "Die drei Fragezeichen Kids",
129
+ "Bibi & Tina": "Bibi und Tina",
130
+ "Benjamin Blümchen™": "Benjamin Blümchen",
131
+ "???": "Fragezeichen",
132
+ "!!!": "Ausrufezeichen",
133
+ }
134
+
135
+ def normalize_tag_value(value: str) -> str:
136
+ """
137
+ Normalize tag values by replacing special characters or known patterns
138
+ with more file-system-friendly alternatives.
139
+
140
+ Args:
141
+ value: The original tag value
142
+
143
+ Returns:
144
+ Normalized tag value
145
+ """
146
+ if not value:
147
+ return value
148
+
149
+ # Check for direct replacements first
150
+ if value in TAG_VALUE_REPLACEMENTS:
151
+ logger.debug("Direct tag replacement: '%s' -> '%s'", value, TAG_VALUE_REPLACEMENTS[value])
152
+ return TAG_VALUE_REPLACEMENTS[value]
153
+
154
+ # Check for partial matches and replacements
155
+ result = value
156
+ for pattern, replacement in TAG_VALUE_REPLACEMENTS.items():
157
+ if pattern in result:
158
+ original = result
159
+ result = result.replace(pattern, replacement)
160
+ logger.debug("Partial tag replacement: '%s' -> '%s'", original, result)
161
+
162
+ # Special case for "Die drei ???" type patterns that might have been missed
163
+ result = result.replace("???", "Fragezeichen")
164
+
165
+ return result
166
+
167
+ def is_available() -> bool:
168
+ """
169
+ Check if tag reading functionality is available.
170
+
171
+ Returns:
172
+ bool: True if mutagen is available, False otherwise
173
+ """
174
+ return MUTAGEN_AVAILABLE or is_mutagen_available()
175
+
176
+ def get_file_tags(file_path: str) -> Dict[str, Any]:
177
+ """
178
+ Extract metadata tags from an audio file.
179
+
180
+ Args:
181
+ file_path: Path to the audio file
182
+
183
+ Returns:
184
+ Dictionary containing standardized tag names and values
185
+ """
186
+ global MUTAGEN_AVAILABLE
187
+
188
+ if not MUTAGEN_AVAILABLE:
189
+ # Try to ensure mutagen is available
190
+ if ensure_mutagen(auto_install=True):
191
+ # If successful, import the necessary modules
192
+ if not _import_mutagen():
193
+ logger.warning("Mutagen library not available. Cannot read media tags.")
194
+ return {}
195
+ else:
196
+ logger.warning("Mutagen library not available. Cannot read media tags.")
197
+ return {}
198
+
199
+ logger.debug("Reading tags from file: %s", file_path)
200
+ tags = {}
201
+
202
+ try:
203
+ # Use mutagen to identify and load the file
204
+ audio = mutagen.File(file_path)
205
+ if audio is None:
206
+ logger.warning("Could not identify file format: %s", file_path)
207
+ return tags
208
+
209
+ # Process different file types
210
+ if isinstance(audio, ID3) or hasattr(audio, 'ID3'):
211
+ # MP3 files
212
+ id3 = audio if isinstance(audio, ID3) else audio.ID3
213
+ for tag_key, tag_value in id3.items():
214
+ tag_name = tag_key.split(':')[0] # Handle ID3 tags with colons
215
+ if tag_name in TAG_MAPPING:
216
+ tag_value_str = str(tag_value)
217
+ tags[TAG_MAPPING[tag_name]] = normalize_tag_value(tag_value_str)
218
+ elif isinstance(audio, (FLAC, OggOpus, OggVorbis)):
219
+ # FLAC and OGG files
220
+ for tag_key, tag_values in audio.items():
221
+ tag_key_lower = tag_key.lower()
222
+ if tag_key_lower in TAG_MAPPING:
223
+ # Some tags might have multiple values, we'll take the first one
224
+ tag_value = tag_values[0] if tag_values else ''
225
+ tags[TAG_MAPPING[tag_key_lower]] = normalize_tag_value(tag_value)
226
+ elif isinstance(audio, MP4):
227
+ # MP4 files
228
+ for tag_key, tag_value in audio.items():
229
+ if tag_key in TAG_MAPPING:
230
+ if isinstance(tag_value, list):
231
+ if tag_key in ('trkn', 'disk'):
232
+ # Handle track and disc number tuples
233
+ if tag_value and isinstance(tag_value[0], tuple) and len(tag_value[0]) >= 1:
234
+ tags[TAG_MAPPING[tag_key]] = str(tag_value[0][0])
235
+ else:
236
+ tag_value_str = str(tag_value[0]) if tag_value else ''
237
+ tags[TAG_MAPPING[tag_key]] = normalize_tag_value(tag_value_str)
238
+ else:
239
+ tag_value_str = str(tag_value)
240
+ tags[TAG_MAPPING[tag_key]] = normalize_tag_value(tag_value_str)
241
+ else:
242
+ # Generic audio file - try to read any available tags
243
+ for tag_key, tag_value in audio.items():
244
+ tag_key_lower = tag_key.lower()
245
+ if tag_key_lower in TAG_MAPPING:
246
+ if isinstance(tag_value, list):
247
+ tag_value_str = str(tag_value[0]) if tag_value else ''
248
+ tags[TAG_MAPPING[tag_key_lower]] = normalize_tag_value(tag_value_str)
249
+ else:
250
+ tag_value_str = str(tag_value)
251
+ tags[TAG_MAPPING[tag_key_lower]] = normalize_tag_value(tag_value_str)
252
+
253
+ logger.debug("Successfully read %d tags from file", len(tags))
254
+ return tags
255
+ except Exception as e:
256
+ logger.error("Error reading tags from file %s: %s", file_path, str(e))
257
+ return tags
258
+
259
+ def extract_first_audio_file_tags(folder_path: str) -> Dict[str, str]:
260
+ """
261
+ Extract tags from the first audio file in a folder.
262
+
263
+ Args:
264
+ folder_path: Path to folder containing audio files
265
+
266
+ Returns:
267
+ Dictionary containing standardized tag names and values
268
+ """
269
+ from .audio_conversion import filter_directories
270
+ import glob
271
+
272
+ logger.debug("Looking for audio files in %s", folder_path)
273
+ files = filter_directories(glob.glob(os.path.join(folder_path, "*")))
274
+
275
+ if not files:
276
+ logger.debug("No audio files found in folder")
277
+ return {}
278
+
279
+ # Get tags from the first file
280
+ first_file = files[0]
281
+ logger.debug("Using first audio file for tags: %s", first_file)
282
+
283
+ return get_file_tags(first_file)
284
+
285
+ def extract_album_info(folder_path: str) -> Dict[str, str]:
286
+ """
287
+ Extract album information from audio files in a folder.
288
+ Tries to get consistent album, artist and other information.
289
+
290
+ Args:
291
+ folder_path: Path to folder containing audio files
292
+
293
+ Returns:
294
+ Dictionary with extracted metadata (album, albumartist, etc.)
295
+ """
296
+ from .audio_conversion import filter_directories
297
+ import glob
298
+
299
+ logger.debug("Extracting album information from folder: %s", folder_path)
300
+
301
+ # Get all audio files in the folder
302
+ audio_files = filter_directories(glob.glob(os.path.join(folder_path, "*")))
303
+ if not audio_files:
304
+ logger.debug("No audio files found in folder")
305
+ return {}
306
+
307
+ # Collect tag information from all files
308
+ all_tags = []
309
+ for file_path in audio_files:
310
+ tags = get_file_tags(file_path)
311
+ if tags:
312
+ all_tags.append(tags)
313
+
314
+ if not all_tags:
315
+ logger.debug("Could not read tags from any files in folder")
316
+ return {}
317
+
318
+ # Try to find consistent album information
319
+ result = {}
320
+ key_tags = ['album', 'albumartist', 'artist', 'date', 'genre']
321
+
322
+ for tag_name in key_tags:
323
+ # Count occurrences of each value
324
+ value_counts = {}
325
+ for tags in all_tags:
326
+ if tag_name in tags:
327
+ value = tags[tag_name]
328
+ if value in value_counts:
329
+ value_counts[value] += 1
330
+ else:
331
+ value_counts[value] = 1
332
+
333
+ # Use the most common value, or the first one if there's a tie
334
+ if value_counts:
335
+ most_common_value = max(value_counts.items(), key=lambda x: x[1])[0]
336
+ result[tag_name] = most_common_value
337
+
338
+ logger.debug("Extracted album info: %s", str(result))
339
+ return result
340
+
341
+ def get_file_metadata(file_path: str) -> Dict[str, str]:
342
+ """
343
+ Get comprehensive metadata about a single audio file,
344
+ including both file tags and additional information.
345
+
346
+ Args:
347
+ file_path: Path to the audio file
348
+
349
+ Returns:
350
+ Dictionary containing metadata information
351
+ """
352
+ metadata = {}
353
+
354
+ # Get basic file information
355
+ try:
356
+ basename = os.path.basename(file_path)
357
+ filename, extension = os.path.splitext(basename)
358
+
359
+ metadata['filename'] = filename
360
+ metadata['extension'] = extension.lower().replace('.', '')
361
+ metadata['path'] = file_path
362
+
363
+ # Get file size
364
+ metadata['filesize'] = os.path.getsize(file_path)
365
+
366
+ # Add tags from the file
367
+ tags = get_file_tags(file_path)
368
+ metadata.update(tags)
369
+
370
+ return metadata
371
+ except Exception as e:
372
+ logger.error("Error getting file metadata for %s: %s", file_path, str(e))
373
+ return metadata
374
+
375
+ def get_folder_metadata(folder_path: str) -> Dict[str, Any]:
376
+ """
377
+ Get comprehensive metadata about a folder of audio files.
378
+
379
+ Args:
380
+ folder_path: Path to folder containing audio files
381
+
382
+ Returns:
383
+ Dictionary containing metadata information and list of files
384
+ """
385
+ folder_metadata = {}
386
+
387
+ # Get basic folder information
388
+ folder_metadata['folder_name'] = os.path.basename(folder_path)
389
+ folder_metadata['folder_path'] = folder_path
390
+
391
+ # Try to extract album info
392
+ album_info = extract_album_info(folder_path)
393
+ folder_metadata.update(album_info)
394
+
395
+ # Also get folder name metadata using existing function
396
+ from .recursive_processor import extract_folder_meta
397
+ folder_name_meta = extract_folder_meta(folder_path)
398
+
399
+ # Combine the metadata, prioritizing tag-based over folder name based
400
+ for key, value in folder_name_meta.items():
401
+ if key not in folder_metadata or not folder_metadata[key]:
402
+ folder_metadata[key] = value
403
+
404
+ # Get list of audio files with their metadata
405
+ from .audio_conversion import filter_directories
406
+ import glob
407
+
408
+ audio_files = filter_directories(glob.glob(os.path.join(folder_path, "*")))
409
+ files_metadata = []
410
+
411
+ for file_path in audio_files:
412
+ file_metadata = get_file_metadata(file_path)
413
+ files_metadata.append(file_metadata)
414
+
415
+ folder_metadata['files'] = files_metadata
416
+ folder_metadata['file_count'] = len(files_metadata)
417
+
418
+ return folder_metadata
419
+
420
+ def format_metadata_filename(metadata: Dict[str, str], template: str = "{tracknumber} - {title}") -> str:
421
+ """
422
+ Format a filename using metadata and a template string.
423
+
424
+ Args:
425
+ metadata: Dictionary of metadata tags
426
+ template: Template string with placeholders matching metadata keys
427
+
428
+ Returns:
429
+ Formatted string, or empty string if formatting fails
430
+ """
431
+ try:
432
+ # Format track numbers correctly (e.g., "1" -> "01")
433
+ if 'tracknumber' in metadata:
434
+ track = metadata['tracknumber']
435
+ if '/' in track: # Handle "1/10" format
436
+ track = track.split('/')[0]
437
+ try:
438
+ metadata['tracknumber'] = f"{int(track):02d}"
439
+ except (ValueError, TypeError):
440
+ pass # Keep original value if not a simple number
441
+
442
+ # Format disc numbers the same way
443
+ if 'discnumber' in metadata:
444
+ disc = metadata['discnumber']
445
+ if '/' in disc: # Handle "1/2" format
446
+ disc = disc.split('/')[0]
447
+ try:
448
+ metadata['discnumber'] = f"{int(disc):02d}"
449
+ except (ValueError, TypeError):
450
+ pass
451
+
452
+ # Substitute keys in template
453
+ result = template
454
+ for key, value in metadata.items():
455
+ placeholder = "{" + key + "}"
456
+ if placeholder in result:
457
+ result = result.replace(placeholder, str(value))
458
+
459
+ # Clean up any remaining placeholders for missing metadata
460
+ import re
461
+ result = re.sub(r'\{[^}]+\}', '', result)
462
+
463
+ # Clean up consecutive spaces, dashes, etc.
464
+ result = re.sub(r'\s+', ' ', result)
465
+ result = re.sub(r'[-_\s]*-[-_\s]*', ' - ', result)
466
+ result = re.sub(r'^\s+|\s+$', '', result) # trim
467
+
468
+ # Replace characters that aren't allowed in filenames
469
+ result = re.sub(r'[<>:"/\\|?*]', '-', result)
470
+
471
+ return result
472
+ except Exception as e:
473
+ logger.error("Error formatting metadata: %s", str(e))
474
+ return ""
475
+
476
+ def extract_artwork(file_path: str, output_path: Optional[str] = None) -> Optional[str]:
477
+ """
478
+ Extract artwork from an audio file.
479
+
480
+ Args:
481
+ file_path: Path to the audio file
482
+ output_path: Path where to save the extracted artwork.
483
+ If None, a temporary file will be created.
484
+
485
+ Returns:
486
+ Path to the extracted artwork file, or None if no artwork was found
487
+ """
488
+ if not MUTAGEN_AVAILABLE:
489
+ logger.debug("Mutagen not available - cannot extract artwork")
490
+ return None
491
+
492
+ if not os.path.exists(file_path):
493
+ logger.error("File not found: %s", file_path)
494
+ return None
495
+
496
+ try:
497
+ file_ext = os.path.splitext(file_path.lower())[1]
498
+ artwork_data = None
499
+ mime_type = None
500
+
501
+ # Extract artwork based on file type
502
+ if file_ext == '.mp3':
503
+ audio = mutagen.File(file_path)
504
+
505
+ # Try to get artwork from APIC frames
506
+ if audio.tags:
507
+ for frame in audio.tags.values():
508
+ if frame.FrameID == 'APIC':
509
+ artwork_data = frame.data
510
+ mime_type = frame.mime
511
+ break
512
+
513
+ elif file_ext == '.flac':
514
+ audio = FLAC(file_path)
515
+
516
+ # Get pictures from FLAC
517
+ if audio.pictures:
518
+ artwork_data = audio.pictures[0].data
519
+ mime_type = audio.pictures[0].mime
520
+
521
+ elif file_ext in ['.m4a', '.mp4', '.aac']:
522
+ audio = MP4(file_path)
523
+
524
+ # Check 'covr' atom
525
+ if 'covr' in audio:
526
+ artwork_data = audio['covr'][0]
527
+ # Determine mime type based on data format
528
+ if isinstance(artwork_data, mutagen.mp4.MP4Cover):
529
+ if artwork_data.format == mutagen.mp4.MP4Cover.FORMAT_JPEG:
530
+ mime_type = 'image/jpeg'
531
+ elif artwork_data.format == mutagen.mp4.MP4Cover.FORMAT_PNG:
532
+ mime_type = 'image/png'
533
+ else:
534
+ mime_type = 'image/jpeg' # Default guess
535
+
536
+ elif file_ext == '.ogg':
537
+ try:
538
+ audio = OggVorbis(file_path)
539
+ except:
540
+ try:
541
+ audio = OggOpus(file_path)
542
+ except:
543
+ logger.debug("Could not determine OGG type for %s", file_path)
544
+ return None
545
+
546
+ # For OGG files, metadata pictures are more complex to extract
547
+ if 'metadata_block_picture' in audio:
548
+ picture_data = base64.b64decode(audio['metadata_block_picture'][0])
549
+ flac_picture = Picture(data=picture_data)
550
+ artwork_data = flac_picture.data
551
+ mime_type = flac_picture.mime
552
+
553
+ # If we found artwork data, save it to a file
554
+ if artwork_data:
555
+ # Determine file extension from mime type
556
+ if mime_type == 'image/jpeg':
557
+ ext = '.jpg'
558
+ elif mime_type == 'image/png':
559
+ ext = '.png'
560
+ else:
561
+ ext = '.jpg' # Default to jpg
562
+
563
+ # Create output path if not provided
564
+ if not output_path:
565
+ # Create a temporary file
566
+ temp_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
567
+ output_path = temp_file.name
568
+ temp_file.close()
569
+ elif not os.path.splitext(output_path)[1]:
570
+ # Add extension if not in the output path
571
+ output_path += ext
572
+
573
+ # Write artwork to file
574
+ with open(output_path, 'wb') as f:
575
+ f.write(artwork_data)
576
+
577
+ logger.info("Extracted artwork saved to %s", output_path)
578
+ return output_path
579
+ else:
580
+ logger.debug("No artwork found in file: %s", file_path)
581
+ return None
582
+
583
+ except Exception as e:
584
+ logger.debug("Error extracting artwork: %s", e)
585
+ return None
586
+
587
+ def find_cover_image(source_dir):
588
+ """
589
+ Find a cover image in the source directory.
590
+
591
+ Args:
592
+ source_dir: Path to the directory to search for cover images
593
+
594
+ Returns:
595
+ str: Path to the found cover image, or None if not found
596
+ """
597
+ if not os.path.isdir(source_dir):
598
+ return None
599
+
600
+ # Common cover image file names
601
+ cover_names = [
602
+ 'cover', 'folder', 'album', 'front', 'artwork', 'image',
603
+ 'albumart', 'albumartwork', 'booklet'
604
+ ]
605
+
606
+ # Common image extensions
607
+ image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']
608
+
609
+ # Try different variations
610
+ for name in cover_names:
611
+ for ext in image_extensions:
612
+ # Try exact name match
613
+ cover_path = os.path.join(source_dir, name + ext)
614
+ if os.path.exists(cover_path):
615
+ logger.debug("Found cover image: %s", cover_path)
616
+ return cover_path
617
+
618
+ # Try case-insensitive match
619
+ for file in os.listdir(source_dir):
620
+ if file.lower() == (name + ext).lower():
621
+ cover_path = os.path.join(source_dir, file)
622
+ logger.debug("Found cover image: %s", cover_path)
623
+ return cover_path
624
+
625
+ # If no exact matches, try finding any file containing the cover names
626
+ for file in os.listdir(source_dir):
627
+ file_lower = file.lower()
628
+ file_ext = os.path.splitext(file_lower)[1]
629
+ if file_ext in image_extensions:
630
+ for name in cover_names:
631
+ if name in file_lower:
632
+ cover_path = os.path.join(source_dir, file)
633
+ logger.debug("Found cover image: %s", cover_path)
634
+ return cover_path
635
+
636
+ logger.debug("No cover image found in directory: %s", source_dir)
637
+ return None