TonieToolbox 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,471 @@
1
+ """
2
+ Media tag processing functionality for the TonieToolbox package
3
+
4
+ This module handles reading and processing metadata tags from audio files,
5
+ which can be used to enhance Tonie file creation with proper track information.
6
+ """
7
+
8
+ import os
9
+ from typing import Dict, Any, Optional, List
10
+ import logging
11
+ from .logger import get_logger
12
+ from .dependency_manager import is_mutagen_available, ensure_mutagen
13
+
14
+ # Global variables to track dependency state and store module references
15
+ MUTAGEN_AVAILABLE = False
16
+ mutagen = None
17
+ ID3 = None
18
+ FLAC = None
19
+ MP4 = None
20
+ OggOpus = None
21
+ OggVorbis = None
22
+
23
+ def _import_mutagen():
24
+ """
25
+ Import the mutagen modules and update global variables.
26
+
27
+ Returns:
28
+ bool: True if import was successful, False otherwise
29
+ """
30
+ global MUTAGEN_AVAILABLE, mutagen, ID3, FLAC, MP4, OggOpus, OggVorbis
31
+
32
+ try:
33
+ import mutagen as _mutagen
34
+ from mutagen.id3 import ID3 as _ID3
35
+ from mutagen.flac import FLAC as _FLAC
36
+ from mutagen.mp4 import MP4 as _MP4
37
+ from mutagen.oggopus import OggOpus as _OggOpus
38
+ from mutagen.oggvorbis import OggVorbis as _OggVorbis
39
+
40
+ # Assign to global variables
41
+ mutagen = _mutagen
42
+ ID3 = _ID3
43
+ FLAC = _FLAC
44
+ MP4 = _MP4
45
+ OggOpus = _OggOpus
46
+ OggVorbis = _OggVorbis
47
+ MUTAGEN_AVAILABLE = True
48
+ return True
49
+ except ImportError:
50
+ MUTAGEN_AVAILABLE = False
51
+ return False
52
+
53
+ # Try to import mutagen if it's available
54
+ if is_mutagen_available():
55
+ _import_mutagen()
56
+
57
+ logger = get_logger('media_tags')
58
+
59
+ # Define tag mapping for different formats to standardized names
60
+ # This helps normalize tags across different audio formats
61
+ TAG_MAPPING = {
62
+ # ID3 (MP3) tags
63
+ 'TIT2': 'title',
64
+ 'TALB': 'album',
65
+ 'TPE1': 'artist',
66
+ 'TPE2': 'albumartist',
67
+ 'TCOM': 'composer',
68
+ 'TRCK': 'tracknumber',
69
+ 'TPOS': 'discnumber',
70
+ 'TDRC': 'date',
71
+ 'TCON': 'genre',
72
+ 'TPUB': 'publisher',
73
+ 'TCOP': 'copyright',
74
+ 'COMM': 'comment',
75
+
76
+ # Vorbis tags (FLAC, OGG)
77
+ 'title': 'title',
78
+ 'album': 'album',
79
+ 'artist': 'artist',
80
+ 'albumartist': 'albumartist',
81
+ 'composer': 'composer',
82
+ 'tracknumber': 'tracknumber',
83
+ 'discnumber': 'discnumber',
84
+ 'date': 'date',
85
+ 'genre': 'genre',
86
+ 'publisher': 'publisher',
87
+ 'copyright': 'copyright',
88
+ 'comment': 'comment',
89
+
90
+ # MP4 (M4A, AAC) tags
91
+ '©nam': 'title',
92
+ '©alb': 'album',
93
+ '©ART': 'artist',
94
+ 'aART': 'albumartist',
95
+ '©wrt': 'composer',
96
+ 'trkn': 'tracknumber',
97
+ 'disk': 'discnumber',
98
+ '©day': 'date',
99
+ '©gen': 'genre',
100
+ '©pub': 'publisher',
101
+ 'cprt': 'copyright',
102
+ '©cmt': 'comment',
103
+
104
+ # Additional tags some files might have
105
+ 'album_artist': 'albumartist',
106
+ 'track': 'tracknumber',
107
+ 'track_number': 'tracknumber',
108
+ 'disc': 'discnumber',
109
+ 'disc_number': 'discnumber',
110
+ 'year': 'date',
111
+ 'albuminterpret': 'albumartist', # German tag name
112
+ 'interpret': 'artist', # German tag name
113
+ }
114
+
115
+ # Define replacements for special tag values
116
+ TAG_VALUE_REPLACEMENTS = {
117
+ "Die drei ???": "Die drei Fragezeichen",
118
+ "Die Drei ???": "Die drei Fragezeichen",
119
+ "DIE DREI ???": "Die drei Fragezeichen",
120
+ "Die drei !!!": "Die drei Ausrufezeichen",
121
+ "Die Drei !!!": "Die drei Ausrufezeichen",
122
+ "DIE DREI !!!": "Die drei Ausrufezeichen",
123
+ "TKKG™": "TKKG",
124
+ "Die drei ??? Kids": "Die drei Fragezeichen Kids",
125
+ "Die Drei ??? Kids": "Die drei Fragezeichen Kids",
126
+ "Bibi & Tina": "Bibi und Tina",
127
+ "Benjamin Blümchen™": "Benjamin Blümchen",
128
+ "???": "Fragezeichen",
129
+ "!!!": "Ausrufezeichen",
130
+ }
131
+
132
+ def normalize_tag_value(value: str) -> str:
133
+ """
134
+ Normalize tag values by replacing special characters or known patterns
135
+ with more file-system-friendly alternatives.
136
+
137
+ Args:
138
+ value: The original tag value
139
+
140
+ Returns:
141
+ Normalized tag value
142
+ """
143
+ if not value:
144
+ return value
145
+
146
+ # Check for direct replacements first
147
+ if value in TAG_VALUE_REPLACEMENTS:
148
+ logger.debug("Direct tag replacement: '%s' -> '%s'", value, TAG_VALUE_REPLACEMENTS[value])
149
+ return TAG_VALUE_REPLACEMENTS[value]
150
+
151
+ # Check for partial matches and replacements
152
+ result = value
153
+ for pattern, replacement in TAG_VALUE_REPLACEMENTS.items():
154
+ if pattern in result:
155
+ original = result
156
+ result = result.replace(pattern, replacement)
157
+ logger.debug("Partial tag replacement: '%s' -> '%s'", original, result)
158
+
159
+ # Special case for "Die drei ???" type patterns that might have been missed
160
+ result = result.replace("???", "Fragezeichen")
161
+
162
+ return result
163
+
164
+ def is_available() -> bool:
165
+ """
166
+ Check if tag reading functionality is available.
167
+
168
+ Returns:
169
+ bool: True if mutagen is available, False otherwise
170
+ """
171
+ return MUTAGEN_AVAILABLE or is_mutagen_available()
172
+
173
+ def get_file_tags(file_path: str) -> Dict[str, Any]:
174
+ """
175
+ Extract metadata tags from an audio file.
176
+
177
+ Args:
178
+ file_path: Path to the audio file
179
+
180
+ Returns:
181
+ Dictionary containing standardized tag names and values
182
+ """
183
+ global MUTAGEN_AVAILABLE
184
+
185
+ if not MUTAGEN_AVAILABLE:
186
+ # Try to ensure mutagen is available
187
+ if ensure_mutagen(auto_install=True):
188
+ # If successful, import the necessary modules
189
+ if not _import_mutagen():
190
+ logger.warning("Mutagen library not available. Cannot read media tags.")
191
+ return {}
192
+ else:
193
+ logger.warning("Mutagen library not available. Cannot read media tags.")
194
+ return {}
195
+
196
+ logger.debug("Reading tags from file: %s", file_path)
197
+ tags = {}
198
+
199
+ try:
200
+ # Use mutagen to identify and load the file
201
+ audio = mutagen.File(file_path)
202
+ if audio is None:
203
+ logger.warning("Could not identify file format: %s", file_path)
204
+ return tags
205
+
206
+ # Process different file types
207
+ if isinstance(audio, ID3) or hasattr(audio, 'ID3'):
208
+ # MP3 files
209
+ id3 = audio if isinstance(audio, ID3) else audio.ID3
210
+ for tag_key, tag_value in id3.items():
211
+ tag_name = tag_key.split(':')[0] # Handle ID3 tags with colons
212
+ if tag_name in TAG_MAPPING:
213
+ tag_value_str = str(tag_value)
214
+ tags[TAG_MAPPING[tag_name]] = normalize_tag_value(tag_value_str)
215
+ elif isinstance(audio, (FLAC, OggOpus, OggVorbis)):
216
+ # FLAC and OGG files
217
+ for tag_key, tag_values in audio.items():
218
+ tag_key_lower = tag_key.lower()
219
+ if tag_key_lower in TAG_MAPPING:
220
+ # Some tags might have multiple values, we'll take the first one
221
+ tag_value = tag_values[0] if tag_values else ''
222
+ tags[TAG_MAPPING[tag_key_lower]] = normalize_tag_value(tag_value)
223
+ elif isinstance(audio, MP4):
224
+ # MP4 files
225
+ for tag_key, tag_value in audio.items():
226
+ if tag_key in TAG_MAPPING:
227
+ if isinstance(tag_value, list):
228
+ if tag_key in ('trkn', 'disk'):
229
+ # Handle track and disc number tuples
230
+ if tag_value and isinstance(tag_value[0], tuple) and len(tag_value[0]) >= 1:
231
+ tags[TAG_MAPPING[tag_key]] = str(tag_value[0][0])
232
+ else:
233
+ tag_value_str = str(tag_value[0]) if tag_value else ''
234
+ tags[TAG_MAPPING[tag_key]] = normalize_tag_value(tag_value_str)
235
+ else:
236
+ tag_value_str = str(tag_value)
237
+ tags[TAG_MAPPING[tag_key]] = normalize_tag_value(tag_value_str)
238
+ else:
239
+ # Generic audio file - try to read any available tags
240
+ for tag_key, tag_value in audio.items():
241
+ tag_key_lower = tag_key.lower()
242
+ if tag_key_lower in TAG_MAPPING:
243
+ if isinstance(tag_value, list):
244
+ tag_value_str = str(tag_value[0]) if tag_value else ''
245
+ tags[TAG_MAPPING[tag_key_lower]] = normalize_tag_value(tag_value_str)
246
+ else:
247
+ tag_value_str = str(tag_value)
248
+ tags[TAG_MAPPING[tag_key_lower]] = normalize_tag_value(tag_value_str)
249
+
250
+ logger.debug("Successfully read %d tags from file", len(tags))
251
+ return tags
252
+ except Exception as e:
253
+ logger.error("Error reading tags from file %s: %s", file_path, str(e))
254
+ return tags
255
+
256
+ def extract_first_audio_file_tags(folder_path: str) -> Dict[str, str]:
257
+ """
258
+ Extract tags from the first audio file in a folder.
259
+
260
+ Args:
261
+ folder_path: Path to folder containing audio files
262
+
263
+ Returns:
264
+ Dictionary containing standardized tag names and values
265
+ """
266
+ from .audio_conversion import filter_directories
267
+ import glob
268
+
269
+ logger.debug("Looking for audio files in %s", folder_path)
270
+ files = filter_directories(glob.glob(os.path.join(folder_path, "*")))
271
+
272
+ if not files:
273
+ logger.debug("No audio files found in folder")
274
+ return {}
275
+
276
+ # Get tags from the first file
277
+ first_file = files[0]
278
+ logger.debug("Using first audio file for tags: %s", first_file)
279
+
280
+ return get_file_tags(first_file)
281
+
282
+ def extract_album_info(folder_path: str) -> Dict[str, str]:
283
+ """
284
+ Extract album information from audio files in a folder.
285
+ Tries to get consistent album, artist and other information.
286
+
287
+ Args:
288
+ folder_path: Path to folder containing audio files
289
+
290
+ Returns:
291
+ Dictionary with extracted metadata (album, albumartist, etc.)
292
+ """
293
+ from .audio_conversion import filter_directories
294
+ import glob
295
+
296
+ logger.debug("Extracting album information from folder: %s", folder_path)
297
+
298
+ # Get all audio files in the folder
299
+ audio_files = filter_directories(glob.glob(os.path.join(folder_path, "*")))
300
+ if not audio_files:
301
+ logger.debug("No audio files found in folder")
302
+ return {}
303
+
304
+ # Collect tag information from all files
305
+ all_tags = []
306
+ for file_path in audio_files:
307
+ tags = get_file_tags(file_path)
308
+ if tags:
309
+ all_tags.append(tags)
310
+
311
+ if not all_tags:
312
+ logger.debug("Could not read tags from any files in folder")
313
+ return {}
314
+
315
+ # Try to find consistent album information
316
+ result = {}
317
+ key_tags = ['album', 'albumartist', 'artist', 'date', 'genre']
318
+
319
+ for tag_name in key_tags:
320
+ # Count occurrences of each value
321
+ value_counts = {}
322
+ for tags in all_tags:
323
+ if tag_name in tags:
324
+ value = tags[tag_name]
325
+ if value in value_counts:
326
+ value_counts[value] += 1
327
+ else:
328
+ value_counts[value] = 1
329
+
330
+ # Use the most common value, or the first one if there's a tie
331
+ if value_counts:
332
+ most_common_value = max(value_counts.items(), key=lambda x: x[1])[0]
333
+ result[tag_name] = most_common_value
334
+
335
+ logger.debug("Extracted album info: %s", str(result))
336
+ return result
337
+
338
+ def get_file_metadata(file_path: str) -> Dict[str, str]:
339
+ """
340
+ Get comprehensive metadata about a single audio file,
341
+ including both file tags and additional information.
342
+
343
+ Args:
344
+ file_path: Path to the audio file
345
+
346
+ Returns:
347
+ Dictionary containing metadata information
348
+ """
349
+ metadata = {}
350
+
351
+ # Get basic file information
352
+ try:
353
+ basename = os.path.basename(file_path)
354
+ filename, extension = os.path.splitext(basename)
355
+
356
+ metadata['filename'] = filename
357
+ metadata['extension'] = extension.lower().replace('.', '')
358
+ metadata['path'] = file_path
359
+
360
+ # Get file size
361
+ metadata['filesize'] = os.path.getsize(file_path)
362
+
363
+ # Add tags from the file
364
+ tags = get_file_tags(file_path)
365
+ metadata.update(tags)
366
+
367
+ return metadata
368
+ except Exception as e:
369
+ logger.error("Error getting file metadata for %s: %s", file_path, str(e))
370
+ return metadata
371
+
372
+ def get_folder_metadata(folder_path: str) -> Dict[str, Any]:
373
+ """
374
+ Get comprehensive metadata about a folder of audio files.
375
+
376
+ Args:
377
+ folder_path: Path to folder containing audio files
378
+
379
+ Returns:
380
+ Dictionary containing metadata information and list of files
381
+ """
382
+ folder_metadata = {}
383
+
384
+ # Get basic folder information
385
+ folder_metadata['folder_name'] = os.path.basename(folder_path)
386
+ folder_metadata['folder_path'] = folder_path
387
+
388
+ # Try to extract album info
389
+ album_info = extract_album_info(folder_path)
390
+ folder_metadata.update(album_info)
391
+
392
+ # Also get folder name metadata using existing function
393
+ from .recursive_processor import extract_folder_meta
394
+ folder_name_meta = extract_folder_meta(folder_path)
395
+
396
+ # Combine the metadata, prioritizing tag-based over folder name based
397
+ for key, value in folder_name_meta.items():
398
+ if key not in folder_metadata or not folder_metadata[key]:
399
+ folder_metadata[key] = value
400
+
401
+ # Get list of audio files with their metadata
402
+ from .audio_conversion import filter_directories
403
+ import glob
404
+
405
+ audio_files = filter_directories(glob.glob(os.path.join(folder_path, "*")))
406
+ files_metadata = []
407
+
408
+ for file_path in audio_files:
409
+ file_metadata = get_file_metadata(file_path)
410
+ files_metadata.append(file_metadata)
411
+
412
+ folder_metadata['files'] = files_metadata
413
+ folder_metadata['file_count'] = len(files_metadata)
414
+
415
+ return folder_metadata
416
+
417
+ def format_metadata_filename(metadata: Dict[str, str], template: str = "{tracknumber} - {title}") -> str:
418
+ """
419
+ Format a filename using metadata and a template string.
420
+
421
+ Args:
422
+ metadata: Dictionary of metadata tags
423
+ template: Template string with placeholders matching metadata keys
424
+
425
+ Returns:
426
+ Formatted string, or empty string if formatting fails
427
+ """
428
+ try:
429
+ # Format track numbers correctly (e.g., "1" -> "01")
430
+ if 'tracknumber' in metadata:
431
+ track = metadata['tracknumber']
432
+ if '/' in track: # Handle "1/10" format
433
+ track = track.split('/')[0]
434
+ try:
435
+ metadata['tracknumber'] = f"{int(track):02d}"
436
+ except (ValueError, TypeError):
437
+ pass # Keep original value if not a simple number
438
+
439
+ # Format disc numbers the same way
440
+ if 'discnumber' in metadata:
441
+ disc = metadata['discnumber']
442
+ if '/' in disc: # Handle "1/2" format
443
+ disc = disc.split('/')[0]
444
+ try:
445
+ metadata['discnumber'] = f"{int(disc):02d}"
446
+ except (ValueError, TypeError):
447
+ pass
448
+
449
+ # Substitute keys in template
450
+ result = template
451
+ for key, value in metadata.items():
452
+ placeholder = "{" + key + "}"
453
+ if placeholder in result:
454
+ result = result.replace(placeholder, str(value))
455
+
456
+ # Clean up any remaining placeholders for missing metadata
457
+ import re
458
+ result = re.sub(r'\{[^}]+\}', '', result)
459
+
460
+ # Clean up consecutive spaces, dashes, etc.
461
+ result = re.sub(r'\s+', ' ', result)
462
+ result = re.sub(r'[-_\s]*-[-_\s]*', ' - ', result)
463
+ result = re.sub(r'^\s+|\s+$', '', result) # trim
464
+
465
+ # Replace characters that aren't allowed in filenames
466
+ result = re.sub(r'[<>:"/\\|?*]', '-', result)
467
+
468
+ return result
469
+ except Exception as e:
470
+ logger.error("Error formatting metadata: %s", str(e))
471
+ return ""
@@ -204,12 +204,101 @@ def extract_folder_meta(folder_path: str) -> Dict[str, str]:
204
204
  return meta
205
205
 
206
206
 
207
- def process_recursive_folders(root_path: str) -> List[Tuple[str, str, List[str]]]:
207
+ def get_folder_name_from_metadata(folder_path: str, use_media_tags: bool = False, template: str = None) -> str:
208
+ """
209
+ Generate a suitable output filename for a folder based on folder name
210
+ and optionally audio file metadata.
211
+
212
+ Args:
213
+ folder_path: Path to folder
214
+ use_media_tags: Whether to use media tags from audio files if available
215
+ template: Optional template for formatting output name using media tags
216
+
217
+ Returns:
218
+ String with cleaned output name
219
+ """
220
+ # Start with folder name metadata
221
+ folder_meta = extract_folder_meta(folder_path)
222
+ output_name = None
223
+
224
+ # Try to get metadata from audio files if requested
225
+ if use_media_tags:
226
+ try:
227
+ # Import here to avoid circular imports
228
+ from .media_tags import extract_album_info, format_metadata_filename, is_available, normalize_tag_value
229
+
230
+ if is_available():
231
+ logger.debug("Using media tags to generate folder name for: %s", folder_path)
232
+
233
+ # Get album metadata from the files
234
+ album_info = extract_album_info(folder_path)
235
+
236
+ if album_info:
237
+ # Normalize all tag values to handle special characters
238
+ for key, value in album_info.items():
239
+ album_info[key] = normalize_tag_value(value)
240
+
241
+ # Add folder metadata as fallback values
242
+ if 'number' in folder_meta and folder_meta['number']:
243
+ if 'tracknumber' not in album_info or not album_info['tracknumber']:
244
+ album_info['tracknumber'] = folder_meta['number']
245
+
246
+ if 'title' in folder_meta and folder_meta['title']:
247
+ if 'album' not in album_info or not album_info['album']:
248
+ album_info['album'] = normalize_tag_value(folder_meta['title'])
249
+
250
+ # Use template or default format
251
+ format_template = template or "{album}"
252
+ if 'artist' in album_info and album_info['artist']:
253
+ format_template = format_template + " - {artist}"
254
+ if 'number' in folder_meta and folder_meta['number']:
255
+ format_template = "{tracknumber} - " + format_template
256
+
257
+ formatted_name = format_metadata_filename(album_info, format_template)
258
+
259
+ if formatted_name:
260
+ logger.debug("Generated name from media tags: %s", formatted_name)
261
+ output_name = formatted_name
262
+ except Exception as e:
263
+ logger.warning("Error using media tags for folder naming: %s", str(e))
264
+
265
+ # Fall back to folder name parsing if no media tags or if media tag extraction failed
266
+ if not output_name:
267
+ if folder_meta['number'] and folder_meta['title']:
268
+ # Apply normalization to the title from the folder name
269
+ try:
270
+ from .media_tags import normalize_tag_value
271
+ normalized_title = normalize_tag_value(folder_meta['title'])
272
+ output_name = f"{folder_meta['number']} - {normalized_title}"
273
+ except:
274
+ output_name = f"{folder_meta['number']} - {folder_meta['title']}"
275
+ else:
276
+ # Try to normalize the folder name itself
277
+ folder_name = os.path.basename(folder_path)
278
+ try:
279
+ from .media_tags import normalize_tag_value
280
+ output_name = normalize_tag_value(folder_name)
281
+ except:
282
+ output_name = folder_name
283
+
284
+ # Clean up the output name (remove invalid filename characters)
285
+ output_name = re.sub(r'[<>:"/\\|?*]', '_', output_name)
286
+ output_name = output_name.replace("???", "Fragezeichen")
287
+ output_name = output_name.replace("!!!", "Ausrufezeichen")
288
+
289
+ logger.debug("Final generated output name: %s", output_name)
290
+ return output_name
291
+
292
+
293
+ def process_recursive_folders(root_path: str, use_media_tags: bool = False,
294
+ name_template: str = None) -> List[Tuple[str, str, List[str]]]:
208
295
  """
209
296
  Process folders recursively and prepare data for conversion.
210
297
 
211
298
  Args:
212
299
  root_path: Root directory to start processing from
300
+ use_media_tags: Whether to use media tags from audio files for naming
301
+ name_template: Optional template for formatting output names using media tags
213
302
 
214
303
  Returns:
215
304
  List of tuples: (output_filename, folder_path, list_of_audio_files)
@@ -230,17 +319,13 @@ def process_recursive_folders(root_path: str) -> List[Tuple[str, str, List[str]]
230
319
  # Use natural sort order to ensure consistent results
231
320
  audio_files = natural_sort(audio_files)
232
321
 
233
- meta = extract_folder_meta(folder_path)
234
-
235
322
  if audio_files:
236
- # Create output filename from metadata
237
- if meta['number'] and meta['title']:
238
- output_name = f"{meta['number']} - {meta['title']}"
239
- else:
240
- output_name = os.path.basename(folder_path)
241
-
242
- # Clean up the output name (remove invalid filename characters)
243
- output_name = re.sub(r'[<>:"/\\|?*]', '_', output_name)
323
+ # Generate output filename using metadata
324
+ output_name = get_folder_name_from_metadata(
325
+ folder_path,
326
+ use_media_tags=use_media_tags,
327
+ template=name_template
328
+ )
244
329
 
245
330
  results.append((output_name, folder_path, audio_files))
246
331
  logger.debug("Created processing task: %s -> %s (%d files)",