lattifai 0.4.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. lattifai/__init__.py +61 -47
  2. lattifai/alignment/__init__.py +6 -0
  3. lattifai/alignment/lattice1_aligner.py +119 -0
  4. lattifai/alignment/lattice1_worker.py +185 -0
  5. lattifai/{tokenizer → alignment}/phonemizer.py +4 -4
  6. lattifai/alignment/segmenter.py +166 -0
  7. lattifai/{tokenizer → alignment}/tokenizer.py +244 -169
  8. lattifai/audio2.py +211 -0
  9. lattifai/caption/__init__.py +20 -0
  10. lattifai/caption/caption.py +1275 -0
  11. lattifai/{io → caption}/gemini_reader.py +30 -30
  12. lattifai/{io → caption}/gemini_writer.py +17 -17
  13. lattifai/{io → caption}/supervision.py +4 -3
  14. lattifai/caption/text_parser.py +145 -0
  15. lattifai/cli/__init__.py +17 -0
  16. lattifai/cli/alignment.py +153 -0
  17. lattifai/cli/caption.py +204 -0
  18. lattifai/cli/server.py +19 -0
  19. lattifai/cli/transcribe.py +197 -0
  20. lattifai/cli/youtube.py +128 -0
  21. lattifai/client.py +460 -251
  22. lattifai/config/__init__.py +20 -0
  23. lattifai/config/alignment.py +73 -0
  24. lattifai/config/caption.py +178 -0
  25. lattifai/config/client.py +46 -0
  26. lattifai/config/diarization.py +67 -0
  27. lattifai/config/media.py +335 -0
  28. lattifai/config/transcription.py +84 -0
  29. lattifai/diarization/__init__.py +5 -0
  30. lattifai/diarization/lattifai.py +89 -0
  31. lattifai/errors.py +98 -91
  32. lattifai/logging.py +116 -0
  33. lattifai/mixin.py +552 -0
  34. lattifai/server/app.py +420 -0
  35. lattifai/transcription/__init__.py +76 -0
  36. lattifai/transcription/base.py +108 -0
  37. lattifai/transcription/gemini.py +219 -0
  38. lattifai/transcription/lattifai.py +103 -0
  39. lattifai/{workflows → transcription}/prompts/__init__.py +4 -4
  40. lattifai/types.py +30 -0
  41. lattifai/utils.py +16 -44
  42. lattifai/workflow/__init__.py +22 -0
  43. lattifai/workflow/agents.py +6 -0
  44. lattifai/{workflows → workflow}/base.py +22 -22
  45. lattifai/{workflows → workflow}/file_manager.py +239 -215
  46. lattifai/workflow/youtube.py +564 -0
  47. lattifai-1.0.0.dist-info/METADATA +736 -0
  48. lattifai-1.0.0.dist-info/RECORD +52 -0
  49. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
  50. lattifai-1.0.0.dist-info/entry_points.txt +13 -0
  51. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +1 -1
  52. lattifai/base_client.py +0 -126
  53. lattifai/bin/__init__.py +0 -3
  54. lattifai/bin/agent.py +0 -325
  55. lattifai/bin/align.py +0 -296
  56. lattifai/bin/cli_base.py +0 -25
  57. lattifai/bin/subtitle.py +0 -210
  58. lattifai/io/__init__.py +0 -42
  59. lattifai/io/reader.py +0 -85
  60. lattifai/io/text_parser.py +0 -75
  61. lattifai/io/utils.py +0 -15
  62. lattifai/io/writer.py +0 -90
  63. lattifai/tokenizer/__init__.py +0 -3
  64. lattifai/workers/__init__.py +0 -3
  65. lattifai/workers/lattice1_alpha.py +0 -284
  66. lattifai/workflows/__init__.py +0 -34
  67. lattifai/workflows/agents.py +0 -10
  68. lattifai/workflows/gemini.py +0 -167
  69. lattifai/workflows/prompts/README.md +0 -22
  70. lattifai/workflows/prompts/gemini/README.md +0 -24
  71. lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
  72. lattifai/workflows/youtube.py +0 -931
  73. lattifai-0.4.5.dist-info/METADATA +0 -808
  74. lattifai-0.4.5.dist-info/RECORD +0 -39
  75. lattifai-0.4.5.dist-info/entry_points.txt +0 -3
  76. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,931 +0,0 @@
1
- """
2
- YouTube downloader module using yt-dlp and Agent
3
- """
4
-
5
- import asyncio
6
- import os
7
- import re
8
- import subprocess
9
- import tempfile
10
- from pathlib import Path
11
- from typing import Any, Dict, List, Optional
12
-
13
- from ..client import AsyncLattifAI
14
- from ..io import SUBTITLE_FORMATS, GeminiReader, GeminiWriter, SubtitleIO
15
- from .base import WorkflowAgent, WorkflowStep, setup_workflow_logger
16
- from .file_manager import FileExistenceManager
17
- from .gemini import GeminiTranscriber
18
-
19
-
20
- class YouTubeDownloader:
21
- """YouTube video/audio downloader using yt-dlp
22
-
23
- Configuration (in __init__):
24
- - None (stateless downloader)
25
-
26
- Runtime parameters (in __call__ or methods):
27
- - url: YouTube URL to download
28
- - output_dir: Where to save files
29
- - media_format: Format to download (mp3, mp4, etc.)
30
- - force_overwrite: Whether to overwrite existing files
31
- """
32
-
33
- def __init__(self):
34
- self.logger = setup_workflow_logger('youtube')
35
- # Check if yt-dlp is available
36
- self._check_ytdlp()
37
-
38
- @staticmethod
39
- def extract_video_id(url: str) -> str:
40
- """
41
- Extract video ID from YouTube URL
42
-
43
- Supports various YouTube URL formats:
44
- - https://www.youtube.com/watch?v=VIDEO_ID
45
- - https://youtu.be/VIDEO_ID
46
- - https://www.youtube.com/shorts/VIDEO_ID
47
- - https://m.youtube.com/watch?v=VIDEO_ID
48
-
49
- Returns:
50
- Video ID (e.g., 'cprOj8PWepY')
51
- """
52
- patterns = [
53
- r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([a-zA-Z0-9_-]{11})',
54
- r'youtube\.com/embed/([a-zA-Z0-9_-]{11})',
55
- r'youtube\.com/v/([a-zA-Z0-9_-]{11})',
56
- ]
57
-
58
- for pattern in patterns:
59
- match = re.search(pattern, url)
60
- if match:
61
- return match.group(1)
62
- return 'youtube_media'
63
-
64
- def _check_ytdlp(self):
65
- """Check if yt-dlp is installed"""
66
- try:
67
- result = subprocess.run(['yt-dlp', '--version'], capture_output=True, text=True, check=True)
68
- self.logger.info(f'yt-dlp version: {result.stdout.strip()}')
69
- except (subprocess.CalledProcessError, FileNotFoundError):
70
- raise RuntimeError(
71
- 'yt-dlp is not installed or not found in PATH. Please install it with: pip install yt-dlp'
72
- )
73
-
74
- async def get_video_info(self, url: str) -> Dict[str, Any]:
75
- """Get video metadata without downloading"""
76
- self.logger.info(f'🔍 Extracting video info for: {url}')
77
-
78
- cmd = ['yt-dlp', '--dump-json', '--no-download', url]
79
-
80
- try:
81
- # Run in thread pool to avoid blocking
82
- loop = asyncio.get_event_loop()
83
- result = await loop.run_in_executor(
84
- None, lambda: subprocess.run(cmd, capture_output=True, text=True, check=True)
85
- )
86
-
87
- import json
88
-
89
- metadata = json.loads(result.stdout)
90
-
91
- # Extract relevant info
92
- info = {
93
- 'title': metadata.get('title', 'Unknown'),
94
- 'duration': metadata.get('duration', 0),
95
- 'uploader': metadata.get('uploader', 'Unknown'),
96
- 'upload_date': metadata.get('upload_date', 'Unknown'),
97
- 'view_count': metadata.get('view_count', 0),
98
- 'description': metadata.get('description', ''),
99
- 'thumbnail': metadata.get('thumbnail', ''),
100
- 'webpage_url': metadata.get('webpage_url', url),
101
- }
102
-
103
- self.logger.info(f'✅ Video info extracted: {info["title"]}')
104
- return info
105
-
106
- except subprocess.CalledProcessError as e:
107
- self.logger.error(f'Failed to extract video info: {e.stderr}')
108
- raise RuntimeError(f'Failed to extract video info: {e.stderr}')
109
- except json.JSONDecodeError as e:
110
- self.logger.error(f'Failed to parse video metadata: {e}')
111
- raise RuntimeError(f'Failed to parse video metadata: {e}')
112
-
113
- async def download_media(
114
- self,
115
- url: str,
116
- output_dir: Optional[str] = None,
117
- media_format: Optional[str] = None,
118
- force_overwrite: bool = False,
119
- ) -> str:
120
- """
121
- Download media (audio or video) from YouTube URL based on format
122
-
123
- This is a unified method that automatically selects between audio and video
124
- download based on the media format extension.
125
-
126
- Args:
127
- url: YouTube URL
128
- output_dir: Output directory (default: temp directory)
129
- media_format: Media format - audio (mp3, wav, m4a, aac, opus, ogg, flac, aiff)
130
- or video (mp4, webm, mkv, avi, mov, etc.) (default: instance format)
131
- force_overwrite: Skip user confirmation and overwrite existing files
132
-
133
- Returns:
134
- Path to downloaded media file
135
- """
136
- media_format = media_format or self.media_format
137
-
138
- # Determine if format is audio or video
139
- audio_formats = ['mp3', 'wav', 'm4a', 'aac', 'opus', 'ogg', 'flac', 'aiff']
140
- is_audio = media_format.lower() in audio_formats
141
-
142
- if is_audio:
143
- self.logger.info(f'🎵 Detected audio format: {media_format}')
144
- return await self.download_audio(
145
- url=url, output_dir=output_dir, media_format=media_format, force_overwrite=force_overwrite
146
- )
147
- else:
148
- self.logger.info(f'🎬 Detected video format: {media_format}')
149
- return await self.download_video(
150
- url=url, output_dir=output_dir, video_format=media_format, force_overwrite=force_overwrite
151
- )
152
-
153
- async def _download_media_internal(
154
- self,
155
- url: str,
156
- output_dir: str,
157
- media_format: str,
158
- is_audio: bool,
159
- force_overwrite: bool = False,
160
- ) -> str:
161
- """
162
- Internal unified method for downloading audio or video from YouTube
163
-
164
- Args:
165
- url: YouTube URL
166
- output_dir: Output directory
167
- media_format: Media format (audio or video extension)
168
- is_audio: True for audio download, False for video download
169
- force_overwrite: Skip user confirmation and overwrite existing files
170
-
171
- Returns:
172
- Path to downloaded media file
173
- """
174
- target_dir = Path(output_dir).expanduser()
175
- media_type = 'audio' if is_audio else 'video'
176
- emoji = '🎵' if is_audio else '🎬'
177
-
178
- self.logger.info(f'{emoji} Downloading {media_type} from: {url}')
179
- self.logger.info(f'📁 Output directory: {target_dir}')
180
- self.logger.info(f'{"🎶" if is_audio else "🎥"} Media format: {media_format}')
181
-
182
- # Create output directory if it doesn't exist
183
- target_dir.mkdir(parents=True, exist_ok=True)
184
-
185
- # Extract video ID and check for existing files
186
- video_id = self.extract_video_id(url)
187
- existing_files = FileExistenceManager.check_existing_files(video_id, str(target_dir), [media_format])
188
-
189
- # Handle existing files
190
- if existing_files['media'] and not force_overwrite:
191
- if FileExistenceManager.is_interactive_mode():
192
- user_choice = FileExistenceManager.prompt_user_confirmation(
193
- {'media': existing_files['media']}, 'media download'
194
- )
195
-
196
- if user_choice == 'cancel':
197
- raise RuntimeError('Media download cancelled by user')
198
- elif user_choice == 'overwrite':
199
- # Continue with download
200
- pass
201
- elif user_choice in existing_files['media']:
202
- # User selected a specific file
203
- self.logger.info(f'✅ Using selected media file: {user_choice}')
204
- return user_choice
205
- else:
206
- # Fallback: use first file
207
- self.logger.info(f'✅ Using existing media file: {existing_files["media"][0]}')
208
- return existing_files['media'][0]
209
- else:
210
- # Non-interactive mode: use existing file
211
- self.logger.info(f'✅ Using existing media file: {existing_files["media"][0]}')
212
- return existing_files['media'][0]
213
-
214
- # Generate output filename template
215
- output_template = str(target_dir / f'{video_id}.%(ext)s')
216
-
217
- # Build yt-dlp command based on media type
218
- if is_audio:
219
- cmd = [
220
- 'yt-dlp',
221
- '--extract-audio',
222
- '--audio-format',
223
- media_format,
224
- '--audio-quality',
225
- '0', # Best quality
226
- '--output',
227
- output_template,
228
- '--no-playlist',
229
- url,
230
- ]
231
- else:
232
- cmd = [
233
- 'yt-dlp',
234
- '--format',
235
- 'bestvideo*+bestaudio/best',
236
- '--merge-output-format',
237
- media_format,
238
- '--output',
239
- output_template,
240
- '--no-playlist',
241
- url,
242
- ]
243
-
244
- try:
245
- # Run in thread pool to avoid blocking
246
- loop = asyncio.get_event_loop()
247
- result = await loop.run_in_executor(
248
- None, lambda: subprocess.run(cmd, capture_output=True, text=True, check=True)
249
- )
250
-
251
- self.logger.info(f'✅ {media_type.capitalize()} download completed')
252
-
253
- # Find the downloaded file
254
- # Try to parse from yt-dlp output first
255
- if is_audio:
256
- output_lines = result.stderr.strip().split('\n')
257
- for line in reversed(output_lines):
258
- if 'Destination:' in line or 'has already been downloaded' in line:
259
- parts = line.split()
260
- filename = ' '.join(parts[1:]) if 'Destination:' in line else parts[0]
261
- file_path = target_dir / filename
262
- if file_path.exists():
263
- self.logger.info(f'{emoji} Downloaded {media_type} file: {file_path}')
264
- return str(file_path)
265
-
266
- # Check for expected file format
267
- expected_file = target_dir / f'{video_id}.{media_format}'
268
- if expected_file.exists():
269
- self.logger.info(f'{emoji} Downloaded {media_type}: {expected_file}')
270
- return str(expected_file)
271
-
272
- # Fallback: search for media files with this video_id
273
- if is_audio:
274
- fallback_extensions = [media_format, 'mp3', 'wav', 'm4a', 'aac']
275
- else:
276
- fallback_extensions = [media_format, 'mp4', 'webm', 'mkv']
277
-
278
- for ext in fallback_extensions:
279
- files = list(target_dir.glob(f'{video_id}*.{ext}'))
280
- if files:
281
- latest_file = max(files, key=os.path.getctime)
282
- self.logger.info(f'{emoji} Found {media_type} file: {latest_file}')
283
- return str(latest_file)
284
-
285
- raise RuntimeError(f'Downloaded {media_type} file not found')
286
-
287
- except subprocess.CalledProcessError as e:
288
- self.logger.error(f'Failed to download {media_type}: {e.stderr}')
289
- raise RuntimeError(f'Failed to download {media_type}: {e.stderr}')
290
-
291
- async def download_audio(
292
- self,
293
- url: str,
294
- output_dir: Optional[str] = None,
295
- media_format: Optional[str] = None,
296
- force_overwrite: bool = False,
297
- ) -> str:
298
- """
299
- Download audio from YouTube URL
300
-
301
- Args:
302
- url: YouTube URL
303
- output_dir: Output directory (default: temp directory)
304
- media_format: Audio format (default: instance format)
305
- force_overwrite: Skip user confirmation and overwrite existing files
306
-
307
- Returns:
308
- Path to downloaded audio file
309
- """
310
- target_dir = output_dir or tempfile.gettempdir()
311
- media_format = media_format or self.media_format
312
- return await self._download_media_internal(
313
- url, target_dir, media_format, is_audio=True, force_overwrite=force_overwrite
314
- )
315
-
316
- async def download_video(
317
- self, url: str, output_dir: Optional[str] = None, video_format: str = 'mp4', force_overwrite: bool = False
318
- ) -> str:
319
- """
320
- Download video from YouTube URL
321
-
322
- Args:
323
- url: YouTube URL
324
- output_dir: Output directory (default: temp directory)
325
- video_format: Video format
326
- force_overwrite: Skip user confirmation and overwrite existing files
327
-
328
- Returns:
329
- Path to downloaded video file
330
- """
331
- target_dir = output_dir or tempfile.gettempdir()
332
- return await self._download_media_internal(
333
- url, target_dir, video_format, is_audio=False, force_overwrite=force_overwrite
334
- )
335
-
336
- async def download_subtitles(
337
- self,
338
- url: str,
339
- output_dir: str,
340
- force_overwrite: bool = False,
341
- subtitle_lang: Optional[str] = None,
342
- enable_gemini_option: bool = False,
343
- ) -> Optional[str]:
344
- """
345
- Download video subtitles using yt-dlp
346
-
347
- Args:
348
- url: YouTube URL
349
- output_dir: Output directory
350
- force_overwrite: Skip user confirmation and overwrite existing files
351
- subtitle_lang: Specific subtitle language/track to download (e.g., 'en')
352
- If None, downloads all available subtitles
353
- enable_gemini_option: Whether to show Gemini transcription as an option in interactive mode
354
-
355
- Returns:
356
- Path to downloaded transcript file or None if not available
357
- """
358
- target_dir = Path(output_dir).expanduser()
359
-
360
- # Create output directory if it doesn't exist
361
- target_dir.mkdir(parents=True, exist_ok=True)
362
-
363
- # Extract video ID and check for existing subtitle files
364
- video_id = self.extract_video_id(url)
365
- if not force_overwrite:
366
- existing_files = FileExistenceManager.check_existing_files(
367
- video_id, str(target_dir), subtitle_formats=SUBTITLE_FORMATS
368
- )
369
-
370
- # Handle existing subtitle files
371
- if existing_files['subtitle'] and not force_overwrite:
372
- if FileExistenceManager.is_interactive_mode():
373
- user_choice = FileExistenceManager.prompt_user_confirmation(
374
- {'subtitle': existing_files['subtitle']}, 'subtitle download'
375
- )
376
-
377
- if user_choice == 'cancel':
378
- raise RuntimeError('Subtitle download cancelled by user')
379
- elif user_choice == 'overwrite':
380
- # Continue with download
381
- pass
382
- elif user_choice in existing_files['subtitle']:
383
- # User selected a specific file
384
- subtitle_file = Path(user_choice)
385
- self.logger.info(f'✅ Using selected subtitle file: {subtitle_file}')
386
- return str(subtitle_file)
387
- else:
388
- # Fallback: use first file
389
- subtitle_file = Path(existing_files['subtitle'][0])
390
- self.logger.info(f'✅ Using existing subtitle file: {subtitle_file}')
391
- return str(subtitle_file)
392
- else:
393
- subtitle_file = Path(existing_files['subtitle'][0])
394
- self.logger.info(f'🔍 Found existing subtitle: {subtitle_file}')
395
- return str(subtitle_file)
396
-
397
- self.logger.info(f'📥 Downloading subtitle for: {url}')
398
- if subtitle_lang:
399
- self.logger.info(f'🎯 Targeting specific subtitle track: {subtitle_lang}')
400
-
401
- output_template = str(target_dir / f'{video_id}.%(ext)s')
402
-
403
- # Configure yt-dlp options for subtitle download
404
- ytdlp_options = [
405
- 'yt-dlp',
406
- '--skip-download', # Don't download video/audio
407
- '--output',
408
- output_template,
409
- '--sub-format',
410
- 'best', # Prefer best available format
411
- ]
412
-
413
- # Add subtitle language selection if specified
414
- if subtitle_lang:
415
- ytdlp_options.extend(['--write-sub', '--write-auto-sub', '--sub-langs', f'{subtitle_lang}.*'])
416
- else:
417
- # Download only manual subtitles (not auto-generated) in English to avoid rate limiting
418
- ytdlp_options.extend(['--write-sub', '--write-auto-sub'])
419
-
420
- ytdlp_options.append(url)
421
-
422
- try:
423
- # Run in thread pool to avoid blocking
424
- loop = asyncio.get_event_loop()
425
- result = await loop.run_in_executor(
426
- None, lambda: subprocess.run(ytdlp_options, capture_output=True, text=True, check=True)
427
- )
428
-
429
- self.logger.info(f'yt-dlp transcript output: {result.stdout.strip()}')
430
-
431
- # Find the downloaded transcript file
432
- subtitle_patterns = [
433
- f'{video_id}.*vtt',
434
- f'{video_id}.*srt',
435
- f'{video_id}.*sub',
436
- f'{video_id}.*sbv',
437
- f'{video_id}.*ssa',
438
- f'{video_id}.*ass',
439
- ]
440
-
441
- subtitle_files = []
442
- for pattern in subtitle_patterns:
443
- _subtitle_files = list(target_dir.glob(pattern))
444
- for subtitle_file in _subtitle_files:
445
- self.logger.info(f'📥 Downloaded subtitle: {subtitle_file}')
446
- subtitle_files.extend(_subtitle_files)
447
-
448
- if not subtitle_files:
449
- self.logger.warning('No subtitle available for this video')
450
- return None
451
-
452
- # If only one subtitle file, return it directly
453
- if len(subtitle_files) == 1:
454
- self.logger.info(f'✅ Using subtitle: {subtitle_files[0]}')
455
- return str(subtitle_files[0])
456
-
457
- # Multiple subtitle files found, let user choose
458
- if FileExistenceManager.is_interactive_mode():
459
- self.logger.info(f'📋 Found {len(subtitle_files)} subtitle files')
460
- # Use the enable_gemini_option parameter passed by caller
461
- subtitle_choice = FileExistenceManager.prompt_file_selection(
462
- file_type='subtitle',
463
- files=[str(f) for f in subtitle_files],
464
- operation='use',
465
- enable_gemini=enable_gemini_option,
466
- )
467
-
468
- if subtitle_choice == 'cancel':
469
- raise RuntimeError('Subtitle selection cancelled by user')
470
- elif subtitle_choice == 'gemini':
471
- # User chose to transcribe with Gemini instead of using downloaded subtitles
472
- self.logger.info('✨ User selected Gemini transcription')
473
- return 'gemini' # Return special value to indicate Gemini transcription
474
- elif subtitle_choice:
475
- self.logger.info(f'✅ Selected subtitle: {subtitle_choice}')
476
- return subtitle_choice
477
- else:
478
- # Fallback to first file
479
- self.logger.info(f'✅ Using first subtitle: {subtitle_files[0]}')
480
- return str(subtitle_files[0])
481
- else:
482
- # Non-interactive mode: use first file
483
- self.logger.info(f'✅ Using first subtitle: {subtitle_files[0]}')
484
- return str(subtitle_files[0])
485
-
486
- except subprocess.CalledProcessError as e:
487
- error_msg = e.stderr.strip() if e.stderr else str(e)
488
- if 'No automatic or manual subtitles found' in error_msg:
489
- self.logger.warning('No subtitles available for this video')
490
- return None
491
- else:
492
- self.logger.error(f'Failed to download transcript: {error_msg}')
493
- raise RuntimeError(f'Failed to download transcript: {error_msg}')
494
-
495
- async def list_available_subtitles(self, url: str) -> List[Dict[str, Any]]:
496
- """
497
- List all available subtitle tracks for a YouTube video
498
-
499
- Args:
500
- url: YouTube URL
501
-
502
- Returns:
503
- List of subtitle track information dictionaries
504
- """
505
- self.logger.info(f'📋 Listing available subtitles for: {url}')
506
-
507
- cmd = ['yt-dlp', '--list-subs', '--no-download', url]
508
-
509
- try:
510
- # Run in thread pool to avoid blocking
511
- loop = asyncio.get_event_loop()
512
- result = await loop.run_in_executor(
513
- None, lambda: subprocess.run(cmd, capture_output=True, text=True, check=True)
514
- )
515
-
516
- # Parse the subtitle list output
517
- subtitle_info = []
518
- lines = result.stdout.strip().split('\n')
519
-
520
- # Look for the subtitle section (not automatic captions)
521
- in_subtitle_section = False
522
- for line in lines:
523
- if 'Available subtitles for' in line:
524
- in_subtitle_section = True
525
- continue
526
- elif 'Available automatic captions for' in line:
527
- in_subtitle_section = False
528
- continue
529
- elif in_subtitle_section and line.strip():
530
- # Skip header lines
531
- if 'Language' in line and 'Name' in line and 'Formats' in line:
532
- continue
533
-
534
- # Parse subtitle information
535
- # Format: "Language Name Formats" where formats are comma-separated
536
- # Example: "en-uYU-mmqFLq8 English - CC1 vtt, srt, ttml, srv3, srv2, srv1, json3"
537
-
538
- if line.strip() and not line.startswith('['):
539
- # Split by multiple spaces to separate language, name, and formats
540
- import re
541
-
542
- parts = re.split(r'\s{2,}', line.strip())
543
-
544
- if len(parts) >= 2:
545
- # First part is language, last part is formats
546
- language_and_name = parts[0]
547
- formats_str = parts[-1]
548
-
549
- # Split language and name - language is first word
550
- lang_name_parts = language_and_name.split(' ', 1)
551
- language = lang_name_parts[0]
552
- name = lang_name_parts[1] if len(lang_name_parts) > 1 else ''
553
-
554
- # If there are more than 2 parts, middle parts are also part of name
555
- if len(parts) > 2:
556
- name = ' '.join([name] + parts[1:-1]).strip()
557
-
558
- # Parse formats - they are comma-separated
559
- formats = [f.strip() for f in formats_str.split(',') if f.strip()]
560
-
561
- subtitle_info.append({'language': language, 'name': name, 'formats': formats})
562
-
563
- self.logger.info(f'✅ Found {len(subtitle_info)} subtitle tracks')
564
- return subtitle_info
565
-
566
- except subprocess.CalledProcessError as e:
567
- self.logger.error(f'Failed to list subtitles: {e.stderr}')
568
- raise RuntimeError(f'Failed to list subtitles: {e.stderr}')
569
-
570
-
571
- class YouTubeSubtitleAgent(WorkflowAgent):
572
- """Agent for YouTube URL to aligned subtitles workflow
573
-
574
- Configuration (in __init__):
575
- - downloader, transcriber, aligner: Component instances (dependency injection)
576
- - max_retries: Max retry attempts for workflow steps
577
-
578
- Runtime parameters (in __call__ or process_youtube_url):
579
- - url: YouTube URL to process
580
- - output_dir: Where to save files
581
- - media_format: Video/audio format (mp3, mp4, etc.)
582
- - force_overwrite: Whether to overwrite existing files
583
- - output_format: Subtitle output format (srt, vtt, etc.)
584
- - split_sentence: Re-segment subtitles semantically
585
- - word_level: Include word-level timestamps
586
- """
587
-
588
- def __init__(
589
- self,
590
- downloader: YouTubeDownloader,
591
- transcriber: GeminiTranscriber,
592
- aligner: AsyncLattifAI,
593
- max_retries: int = 0,
594
- ):
595
- super().__init__('YouTube Subtitle Agent', max_retries)
596
-
597
- # Components (injected)
598
- self.downloader = downloader
599
- self.transcriber = transcriber
600
- self.aligner = aligner
601
-
602
- def define_steps(self) -> List[WorkflowStep]:
603
- """Define the workflow steps"""
604
- return [
605
- WorkflowStep(
606
- name='Process YouTube URL', description='Extract video info and download video/audio', required=True
607
- ),
608
- WorkflowStep(
609
- name='Transcribe Media',
610
- description='Download subtitle if available or transcribe the media file',
611
- required=True,
612
- ),
613
- WorkflowStep(name='Align Subtitle', description='Align Subtitle with media using LattifAI', required=True),
614
- WorkflowStep(
615
- name='Export Results', description='Export aligned subtitles in specified formats', required=True
616
- ),
617
- ]
618
-
619
- async def execute_step(self, step: WorkflowStep, context: Dict[str, Any]) -> Any:
620
- """Execute a single workflow step"""
621
-
622
- if step.name == 'Process YouTube URL':
623
- return await self._process_youtube_url(context)
624
-
625
- elif step.name == 'Transcribe Media':
626
- return await self._transcribe_media(context)
627
-
628
- elif step.name == 'Align Subtitle':
629
- return await self._align_subtitle(context)
630
-
631
- elif step.name == 'Export Results':
632
- return await self._export_results(context)
633
-
634
- else:
635
- raise ValueError(f'Unknown step: {step.name}')
636
-
637
- async def _process_youtube_url(self, context: Dict[str, Any]) -> Dict[str, Any]:
638
- """Step 1: Process YouTube URL and download video"""
639
- url = context.get('url')
640
- if not url:
641
- raise ValueError('YouTube URL is required')
642
-
643
- output_dir = context.get('output_dir') or tempfile.gettempdir()
644
- output_dir = Path(output_dir).expanduser()
645
- output_dir.mkdir(parents=True, exist_ok=True)
646
-
647
- media_format = context.get('media_format', 'mp4')
648
- force_overwrite = context.get('force_overwrite', False)
649
-
650
- self.logger.info(f'🎥 Processing YouTube URL: {url}')
651
- self.logger.info(f'📦 Media format: {media_format}')
652
-
653
- # Download media (audio or video) with runtime parameters
654
- media_path = await self.downloader.download_media(
655
- url=url,
656
- output_dir=str(output_dir),
657
- media_format=media_format,
658
- force_overwrite=force_overwrite,
659
- )
660
-
661
- # Try to download subtitles if available
662
- subtitle_path = None
663
- try:
664
- subtitle_path = await self.downloader.download_subtitles(
665
- url=url,
666
- output_dir=str(output_dir),
667
- force_overwrite=force_overwrite,
668
- enable_gemini_option=bool(self.transcriber.api_key),
669
- )
670
- if subtitle_path:
671
- self.logger.info(f'✅ Subtitle downloaded: {subtitle_path}')
672
- else:
673
- self.logger.info('ℹ️ No subtitles available for this video')
674
- except Exception as e:
675
- self.logger.warning(f'⚠️ Failed to download subtitles: {e}')
676
- # Continue without subtitles - will transcribe later if needed
677
-
678
- # Get video metadata
679
- metadata = await self.downloader.get_video_info(url)
680
-
681
- result = {
682
- 'url': url,
683
- 'video_path': media_path, # Keep 'video_path' key for backward compatibility
684
- 'audio_path': media_path, # Also add 'audio_path' for clarity
685
- 'metadata': metadata,
686
- 'video_format': media_format,
687
- 'output_dir': output_dir,
688
- 'force_overwrite': force_overwrite,
689
- 'downloaded_subtitle_path': subtitle_path, # Store downloaded subtitle path
690
- }
691
-
692
- self.logger.info(f'✅ Media downloaded: {media_path}')
693
- return result
694
-
695
- async def _transcribe_media(self, context: Dict[str, Any]) -> Dict[str, Any]:
696
- """Step 2: Transcribe video using Gemini 2.5 Pro or use downloaded subtitle"""
697
- url = context.get('url')
698
- result = context.get('process_youtube_url_result', {})
699
- video_path = result.get('video_path')
700
- output_dir = result.get('output_dir')
701
- force_overwrite = result.get('force_overwrite', False)
702
- downloaded_subtitle_path = result.get('downloaded_subtitle_path')
703
-
704
- if not url or not video_path:
705
- raise ValueError('URL and video path not found in context')
706
-
707
- video_id = self.downloader.extract_video_id(url)
708
-
709
- # If subtitle was already downloaded in step 1 and user selected it, use it directly
710
- if downloaded_subtitle_path and downloaded_subtitle_path != 'gemini':
711
- self.logger.info(f'📥 Using subtitle: {downloaded_subtitle_path}')
712
- return {'subtitle_path': downloaded_subtitle_path}
713
-
714
- # Check for existing subtitles if subtitle was not downloaded yet
715
- self.logger.info('📥 Checking for existing subtitles...')
716
-
717
- # Check for existing subtitle files (all formats including Gemini transcripts)
718
- existing_files = FileExistenceManager.check_existing_files(
719
- video_id,
720
- str(output_dir),
721
- subtitle_formats=SUBTITLE_FORMATS, # Check all subtitle formats including Markdown
722
- )
723
-
724
- # Prompt user if subtitle exists and force_overwrite is not set
725
- if existing_files['subtitle'] and not force_overwrite:
726
- # Let user choose which subtitle file to use
727
- # Enable Gemini option if API key is available (check transcriber's api_key)
728
- has_gemini_key = bool(self.transcriber.api_key)
729
- subtitle_choice = FileExistenceManager.prompt_file_selection(
730
- file_type='subtitle',
731
- files=existing_files['subtitle'],
732
- operation='transcribe',
733
- enable_gemini=has_gemini_key,
734
- )
735
-
736
- if subtitle_choice == 'cancel':
737
- raise RuntimeError('Transcription cancelled by user')
738
- elif subtitle_choice in ('overwrite', 'gemini'):
739
- # Continue to transcription below
740
- # For 'gemini', user explicitly chose to transcribe with Gemini
741
- pass
742
- elif subtitle_choice == 'use':
743
- # User chose to use existing subtitle files (use first one)
744
- subtitle_path = Path(existing_files['subtitle'][0])
745
- self.logger.info(f'🔁 Using existing subtitle: {subtitle_path}')
746
- return {'subtitle_path': str(subtitle_path)}
747
- elif subtitle_choice: # User selected a specific file path
748
- # Use selected subtitle
749
- subtitle_path = Path(subtitle_choice)
750
- self.logger.info(f'🔁 Using existing subtitle: {subtitle_path}')
751
- return {'subtitle_path': str(subtitle_path)}
752
- # If user_choice == 'overwrite' or 'gemini', continue to transcription below
753
-
754
- # TODO: support other Transcriber options
755
- self.logger.info('✨ Transcribing URL with Gemini 2.5 Pro...')
756
- transcript = await self.transcriber.transcribe_url(url)
757
- subtitle_path = output_dir / f'{video_id}_Gemini.md'
758
- with open(subtitle_path, 'w', encoding='utf-8') as f:
759
- f.write(transcript)
760
- result = {'subtitle_path': str(subtitle_path)}
761
- self.logger.info(f'✅ Transcript generated: {len(transcript)} characters')
762
- return result
763
-
764
- async def _align_subtitle(self, context: Dict[str, Any]) -> Dict[str, Any]:
765
- """Step 3: Align transcript with video using LattifAI"""
766
- result = context['process_youtube_url_result']
767
- media_path = result.get('video_path', result.get('audio_path'))
768
- subtitle_path = context.get('transcribe_media_result', {}).get('subtitle_path')
769
-
770
- if not media_path or not subtitle_path:
771
- raise ValueError('Video path and subtitle path are required')
772
-
773
- self.logger.info('🎯 Aligning subtitle with video...')
774
-
775
- if subtitle_path.endswith('_Gemini.md'):
776
- is_gemini_format = True
777
- else:
778
- is_gemini_format = False
779
- subtitle_path = Path(subtitle_path)
780
-
781
- self.logger.info(f'📄 Subtitle format: {"Gemini" if is_gemini_format else f"{subtitle_path.suffix}"}')
782
-
783
- original_subtitle_path = subtitle_path
784
- output_dir = result.get('output_dir')
785
- split_sentence = context.get('split_sentence', False)
786
- word_level = context.get('word_level', False)
787
- output_path = output_dir / f'{Path(media_path).stem}_aligned.ass'
788
-
789
- # Perform alignment with LattifAI (split_sentence and word_level passed as function parameters)
790
- aligned_result = await self.aligner.alignment(
791
- audio=media_path,
792
- subtitle=str(subtitle_path), # Use dialogue text for YouTube format, original for plain text
793
- format='gemini' if is_gemini_format else 'auto',
794
- split_sentence=split_sentence,
795
- return_details=word_level,
796
- output_subtitle_path=str(output_path),
797
- )
798
-
799
- result = {
800
- 'aligned_path': output_path,
801
- 'alignment_result': aligned_result,
802
- 'original_subtitle_path': original_subtitle_path,
803
- 'is_gemini_format': is_gemini_format,
804
- }
805
-
806
- self.logger.info('✅ Alignment completed')
807
- return result
808
-
809
- async def _export_results(self, context: Dict[str, Any]) -> Dict[str, Any]:
810
- """Step 4: Export results in specified format and update subtitle file"""
811
- align_result = context.get('align_subtitle_result', {})
812
- aligned_path = align_result.get('aligned_path')
813
- original_subtitle_path = align_result.get('original_subtitle_path')
814
- is_gemini_format = align_result.get('is_gemini_format', False)
815
- metadata = context.get('process_youtube_url_result', {}).get('metadata', {})
816
-
817
- if not aligned_path:
818
- raise ValueError('Aligned subtitle path not found')
819
-
820
- output_format = context.get('output_format', 'srt')
821
- self.logger.info(f'📤 Exporting results in format: {output_format}')
822
-
823
- supervisions = SubtitleIO.read(aligned_path, format='ass')
824
- exported_files = {}
825
-
826
- # Update original transcript file with aligned timestamps if YouTube format
827
- if is_gemini_format:
828
- assert Path(original_subtitle_path).exists(), 'Original subtitle path not found'
829
- self.logger.info('📝 Updating original transcript with aligned timestamps...')
830
-
831
- try:
832
- # Generate updated transcript file path
833
- original_path = Path(original_subtitle_path)
834
- updated_subtitle_path = original_path.parent / f'{original_path.stem}_LattifAI.md'
835
-
836
- # Update timestamps in original transcript
837
- GeminiWriter.update_timestamps(
838
- original_transcript=original_subtitle_path,
839
- aligned_supervisions=supervisions,
840
- output_path=str(updated_subtitle_path),
841
- )
842
-
843
- exported_files['updated_transcript'] = str(updated_subtitle_path)
844
- self.logger.info(f'✅ Updated transcript: {updated_subtitle_path}')
845
-
846
- except Exception as e:
847
- self.logger.warning(f'⚠️ Failed to update transcript timestamps: {e}')
848
-
849
- # Export to requested subtitle format
850
- output_path = str(aligned_path).replace(
851
- '_aligned.ass', f'{"_Gemini" if is_gemini_format else ""}_LattifAI.{output_format}'
852
- )
853
- SubtitleIO.write(supervisions, output_path=output_path)
854
- exported_files[output_format] = output_path
855
- self.logger.info(f'✅ Exported {output_format.upper()}: {output_path}')
856
-
857
- result = {
858
- 'exported_files': exported_files,
859
- 'metadata': metadata,
860
- 'subtitle_count': len(supervisions),
861
- 'is_gemini_format': is_gemini_format,
862
- 'original_subtitle_path': original_subtitle_path,
863
- }
864
-
865
- return result
866
-
867
- async def __call__(
868
- self,
869
- url: str,
870
- output_dir: Optional[str] = None,
871
- media_format: str = 'mp4',
872
- force_overwrite: bool = False,
873
- output_format: str = 'srt',
874
- split_sentence: bool = False,
875
- word_level: bool = False,
876
- ) -> Dict[str, Any]:
877
- """Main entry point - callable interface"""
878
- return await self.process_youtube_url(
879
- url=url,
880
- output_dir=output_dir,
881
- media_format=media_format,
882
- force_overwrite=force_overwrite,
883
- output_format=output_format,
884
- split_sentence=split_sentence,
885
- word_level=word_level,
886
- )
887
-
888
- async def process_youtube_url(
889
- self,
890
- url: str,
891
- output_dir: Optional[str] = None,
892
- media_format: str = 'mp4',
893
- force_overwrite: bool = False,
894
- output_format: str = 'srt',
895
- split_sentence: bool = False,
896
- word_level: bool = False,
897
- ) -> Dict[str, Any]:
898
- """
899
- Main entry point for processing a YouTube URL
900
-
901
- Args:
902
- url: YouTube URL to process
903
- output_dir: Directory to save output files
904
- media_format: Media format for download (mp3, mp4, etc.)
905
- force_overwrite: Force overwrite existing files
906
- output_format: Subtitle output format (srt, vtt, ass, etc.)
907
- split_sentence: Re-segment subtitles by semantics
908
- word_level: Include word-level alignment timestamps
909
-
910
- Returns:
911
- Dictionary containing results and exported file paths
912
- """
913
- # Execute the workflow with parameters
914
- result = await self.execute(
915
- url=url,
916
- output_dir=output_dir,
917
- media_format=media_format,
918
- force_overwrite=force_overwrite,
919
- output_format=output_format,
920
- split_sentence=split_sentence,
921
- word_level=word_level,
922
- )
923
-
924
- if result.is_success:
925
- return result.data.get('export_results_result', {})
926
- else:
927
- # Re-raise the original exception if available to preserve error type and context
928
- if result.exception:
929
- raise result.exception
930
- else:
931
- raise Exception(f'Workflow failed: {result.error}')