lattifai 0.4.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. lattifai/__init__.py +61 -47
  2. lattifai/alignment/__init__.py +6 -0
  3. lattifai/alignment/lattice1_aligner.py +119 -0
  4. lattifai/alignment/lattice1_worker.py +185 -0
  5. lattifai/{tokenizer → alignment}/phonemizer.py +4 -4
  6. lattifai/alignment/segmenter.py +166 -0
  7. lattifai/{tokenizer → alignment}/tokenizer.py +244 -169
  8. lattifai/audio2.py +211 -0
  9. lattifai/caption/__init__.py +20 -0
  10. lattifai/caption/caption.py +1275 -0
  11. lattifai/{io → caption}/gemini_reader.py +30 -30
  12. lattifai/{io → caption}/gemini_writer.py +17 -17
  13. lattifai/{io → caption}/supervision.py +4 -3
  14. lattifai/caption/text_parser.py +145 -0
  15. lattifai/cli/__init__.py +17 -0
  16. lattifai/cli/alignment.py +153 -0
  17. lattifai/cli/caption.py +204 -0
  18. lattifai/cli/server.py +19 -0
  19. lattifai/cli/transcribe.py +197 -0
  20. lattifai/cli/youtube.py +128 -0
  21. lattifai/client.py +460 -251
  22. lattifai/config/__init__.py +20 -0
  23. lattifai/config/alignment.py +73 -0
  24. lattifai/config/caption.py +178 -0
  25. lattifai/config/client.py +46 -0
  26. lattifai/config/diarization.py +67 -0
  27. lattifai/config/media.py +335 -0
  28. lattifai/config/transcription.py +84 -0
  29. lattifai/diarization/__init__.py +5 -0
  30. lattifai/diarization/lattifai.py +89 -0
  31. lattifai/errors.py +98 -91
  32. lattifai/logging.py +116 -0
  33. lattifai/mixin.py +552 -0
  34. lattifai/server/app.py +420 -0
  35. lattifai/transcription/__init__.py +76 -0
  36. lattifai/transcription/base.py +108 -0
  37. lattifai/transcription/gemini.py +219 -0
  38. lattifai/transcription/lattifai.py +103 -0
  39. lattifai/{workflows → transcription}/prompts/__init__.py +4 -4
  40. lattifai/types.py +30 -0
  41. lattifai/utils.py +16 -44
  42. lattifai/workflow/__init__.py +22 -0
  43. lattifai/workflow/agents.py +6 -0
  44. lattifai/{workflows → workflow}/base.py +22 -22
  45. lattifai/{workflows → workflow}/file_manager.py +239 -215
  46. lattifai/workflow/youtube.py +564 -0
  47. lattifai-1.0.0.dist-info/METADATA +736 -0
  48. lattifai-1.0.0.dist-info/RECORD +52 -0
  49. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
  50. lattifai-1.0.0.dist-info/entry_points.txt +13 -0
  51. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +1 -1
  52. lattifai/base_client.py +0 -126
  53. lattifai/bin/__init__.py +0 -3
  54. lattifai/bin/agent.py +0 -325
  55. lattifai/bin/align.py +0 -296
  56. lattifai/bin/cli_base.py +0 -25
  57. lattifai/bin/subtitle.py +0 -210
  58. lattifai/io/__init__.py +0 -42
  59. lattifai/io/reader.py +0 -85
  60. lattifai/io/text_parser.py +0 -75
  61. lattifai/io/utils.py +0 -15
  62. lattifai/io/writer.py +0 -90
  63. lattifai/tokenizer/__init__.py +0 -3
  64. lattifai/workers/__init__.py +0 -3
  65. lattifai/workers/lattice1_alpha.py +0 -284
  66. lattifai/workflows/__init__.py +0 -34
  67. lattifai/workflows/agents.py +0 -10
  68. lattifai/workflows/gemini.py +0 -167
  69. lattifai/workflows/prompts/README.md +0 -22
  70. lattifai/workflows/prompts/gemini/README.md +0 -24
  71. lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
  72. lattifai/workflows/youtube.py +0 -931
  73. lattifai-0.4.5.dist-info/METADATA +0 -808
  74. lattifai-0.4.5.dist-info/RECORD +0 -39
  75. lattifai-0.4.5.dist-info/entry_points.txt +0 -3
  76. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,204 @@
1
+ """Caption CLI entry point with nemo_run."""
2
+
3
+ from typing import Optional
4
+
5
+ import nemo_run as run
6
+ from lhotse.utils import Pathlike
7
+ from typing_extensions import Annotated
8
+
9
+ from lattifai.config import CaptionConfig
10
+
11
+
12
+ @run.cli.entrypoint(name="convert", namespace="caption")
13
+ def convert(
14
+ input_path: Pathlike,
15
+ output_path: Pathlike,
16
+ include_speaker_in_text: bool = True,
17
+ normalize_text: bool = False,
18
+ ):
19
+ """
20
+ Convert caption file to another format.
21
+
22
+ This command reads a caption file from one format and writes it to another format,
23
+ preserving all timing information, text content, and speaker labels (if present).
24
+ Supports common caption formats including SRT, VTT, JSON, and Praat TextGrid.
25
+
26
+ Shortcut: invoking ``laisub-convert`` is equivalent to running ``lai caption convert``.
27
+
28
+ Args:
29
+ input_path: Path to input caption file (supports SRT, VTT, JSON, TextGrid formats)
30
+ output_path: Path to output caption file (format determined by file extension)
31
+ include_speaker_in_text: Preserve speaker labels in caption text content.
32
+ normalize_text: Whether to normalize caption text during conversion.
33
+ This applies text cleaning such as removing HTML tags, decoding entities,
34
+ collapsing whitespace, and standardizing punctuation.
35
+
36
+ Examples:
37
+ # Basic format conversion (positional arguments)
38
+ lai caption convert input.srt output.vtt
39
+
40
+ # Convert with text normalization
41
+ lai caption convert input.srt output.json normalize_text=true
42
+
43
+ # Mixing positional and keyword arguments
44
+ lai caption convert input.srt output.vtt \\
45
+ include_speaker_in_text=false \\
46
+ normalize_text=true
47
+
48
+ # Using keyword arguments (traditional syntax)
49
+ lai caption convert \\
50
+ input_path=input.srt \\
51
+ output_path=output.TextGrid
52
+ """
53
+ from lattifai.caption import Caption
54
+
55
+ caption = Caption.read(input_path, normalize_text=normalize_text)
56
+ caption.write(output_path, include_speaker_in_text=include_speaker_in_text)
57
+
58
+ print(f"✅ Converted {input_path} -> {output_path}")
59
+ return output_path
60
+
61
+
62
+ @run.cli.entrypoint(name="normalize", namespace="caption")
63
+ def normalize(
64
+ input_path: Pathlike,
65
+ output_path: Pathlike,
66
+ caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
67
+ ):
68
+ """
69
+ Normalize caption text by cleaning HTML entities and whitespace.
70
+
71
+ This command reads a caption file and normalizes all text content by applying
72
+ the following transformations:
73
+ - Decode common HTML entities (&, <, >, ", ',  )
74
+ - Remove HTML tags (e.g., <i>, <font>, <b>, <br>)
75
+ - Collapse multiple whitespace characters into single spaces
76
+ - Convert curly apostrophes to straight ones in contractions
77
+ - Strip leading and trailing whitespace from each segment
78
+
79
+ Shortcut: invoking ``laisub-normalize`` is equivalent to running ``lai caption normalize``.
80
+
81
+ Args:
82
+ input_path: Path to input caption file to normalize
83
+ output_path: Path to output caption file (defaults to overwriting input file)
84
+ caption: Caption configuration for text normalization.
85
+ Fields: input_format, output_format, normalize_text (automatically enabled),
86
+ encoding
87
+
88
+ Examples:
89
+ # Normalize and save to new file (positional arguments)
90
+ lai caption normalize input.srt output.srt
91
+
92
+ # Normalize with format conversion
93
+ lai caption normalize input.vtt output.srt
94
+
95
+ # Normalize with custom caption config
96
+ lai caption normalize input.srt output.srt \\
97
+ caption.encoding=utf-8
98
+
99
+ # Using keyword arguments (traditional syntax)
100
+ lai caption normalize \\
101
+ input_path=input.srt \\
102
+ output_path=output.srt
103
+ """
104
+ from pathlib import Path
105
+
106
+ from lattifai.caption import Caption
107
+
108
+ input_path = Path(input_path).expanduser()
109
+ output_path = Path(output_path).expanduser()
110
+
111
+ caption_obj = Caption.read(input_path, normalize_text=True)
112
+ caption_obj.write(output_path, include_speaker_in_text=True)
113
+
114
+ if output_path == input_path:
115
+ print(f"✅ Normalized {input_path} (in-place)")
116
+ else:
117
+ print(f"✅ Normalized {input_path} -> {output_path}")
118
+
119
+ return output_path
120
+
121
+
122
+ @run.cli.entrypoint(name="shift", namespace="caption")
123
+ def shift(
124
+ input_path: Pathlike,
125
+ output_path: Pathlike,
126
+ seconds: float,
127
+ caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
128
+ ):
129
+ """
130
+ Shift caption timestamps by a specified number of seconds.
131
+
132
+ This command reads a caption file and adjusts all timestamps by adding or
133
+ subtracting a specified offset. Use positive values to delay captions and
134
+ negative values to make them appear earlier.
135
+
136
+ Shortcut: invoking ``laisub-shift`` is equivalent to running ``lai caption shift``.
137
+
138
+ Args:
139
+ input_path: Path to input caption file
140
+ output_path: Path to output caption file (can be same as input for in-place modification)
141
+ seconds: Number of seconds to shift timestamps. Positive values delay captions,
142
+ negative values advance them earlier.
143
+ caption: Caption configuration for reading/writing.
144
+ Fields: input_format, output_format, encoding
145
+
146
+ Examples:
147
+ # Delay captions by 2 seconds (positional arguments)
148
+ lai caption shift input.srt output.srt 2.0
149
+
150
+ # Make captions appear 1.5 seconds earlier
151
+ lai caption shift input.srt output.srt -1.5
152
+
153
+ # Shift and convert format
154
+ lai caption shift input.vtt output.srt seconds=0.5
155
+
156
+ # Using keyword arguments (traditional syntax)
157
+ lai caption shift \\
158
+ input_path=input.srt \\
159
+ output_path=output.srt \\
160
+ seconds=3.0
161
+ """
162
+ from pathlib import Path
163
+
164
+ from lattifai.caption import Caption
165
+
166
+ input_path = Path(input_path).expanduser()
167
+ output_path = Path(output_path).expanduser()
168
+
169
+ # Read captions
170
+ caption_obj = Caption.read(input_path)
171
+
172
+ # Shift timestamps
173
+ shifted_caption = caption_obj.shift_time(seconds)
174
+
175
+ # Write shifted captions
176
+ shifted_caption.write(output_path, include_speaker_in_text=True)
177
+
178
+ if seconds >= 0:
179
+ direction = f"delayed by {seconds}s"
180
+ else:
181
+ direction = f"advanced by {abs(seconds)}s"
182
+
183
+ if output_path == input_path:
184
+ print(f"✅ Shifted timestamps {direction} in {input_path} (in-place)")
185
+ else:
186
+ print(f"✅ Shifted timestamps {direction}: {input_path} -> {output_path}")
187
+
188
+ return output_path
189
+
190
+
191
+ def main_convert():
192
+ run.cli.main(convert)
193
+
194
+
195
+ def main_normalize():
196
+ run.cli.main(normalize)
197
+
198
+
199
+ def main_shift():
200
+ run.cli.main(shift)
201
+
202
+
203
+ if __name__ == "__main__":
204
+ main_convert()
lattifai/cli/server.py ADDED
@@ -0,0 +1,19 @@
1
+ import os
2
+
3
+ import colorful
4
+ import uvicorn
5
+
6
+
7
+ def main():
8
+ """Launch the LattifAI Web Interface."""
9
+ print(colorful.bold_green("🚀 Launching LattifAI Web Interface..."))
10
+ print(colorful.cyan("See http://localhost:8001"))
11
+
12
+ # Ensure the directory contains the app
13
+ # We might need to adjust python path or just rely on installed package
14
+
15
+ uvicorn.run("lattifai.server.app:app", host="0.0.0.0", port=8001, reload=True, log_level="info")
16
+
17
+
18
+ if __name__ == "__main__":
19
+ main()
@@ -0,0 +1,197 @@
1
+ """Transcription CLI entry point with nemo_run."""
2
+
3
+ from typing import Optional
4
+
5
+ import nemo_run as run
6
+ from lhotse.utils import Pathlike
7
+ from typing_extensions import Annotated
8
+
9
+ from lattifai.audio2 import AudioLoader, ChannelSelectorType
10
+ from lattifai.cli.alignment import align as alignment_align
11
+ from lattifai.config import (
12
+ AlignmentConfig,
13
+ CaptionConfig,
14
+ ClientConfig,
15
+ DiarizationConfig,
16
+ MediaConfig,
17
+ TranscriptionConfig,
18
+ )
19
+ from lattifai.utils import _resolve_model_path
20
+
21
+
22
+ @run.cli.entrypoint(name="run", namespace="transcribe")
23
+ def transcribe(
24
+ input: Optional[str] = None,
25
+ output_caption: Optional[str] = None,
26
+ output_dir: Optional[Pathlike] = None,
27
+ media_format: str = "mp3",
28
+ channel_selector: Optional[ChannelSelectorType] = "average",
29
+ transcription: Annotated[Optional[TranscriptionConfig], run.Config[TranscriptionConfig]] = None,
30
+ ):
31
+ """
32
+ Transcribe audio/video file or YouTube URL to caption.
33
+
34
+ This command performs automatic speech recognition (ASR) on audio/video files
35
+ or YouTube videos, generating timestamped transcriptions in various caption formats.
36
+
37
+ Shortcut: invoking ``lai-transcribe`` is equivalent to running ``lai transcribe run``.
38
+
39
+ Args:
40
+ input: Path to input audio/video file or YouTube URL (can be provided as positional argument)
41
+ output_caption: Path for output caption file (can be provided as positional argument)
42
+ output_dir: Directory for output files when using YouTube URL
43
+ media_format: Media format for YouTube downloads (default: mp3)
44
+ channel_selector: Audio channel selection strategy (default: average)
45
+ Options: average, left, right, or an integer channel index.
46
+ Note: Ignored when input is a URL and Gemini transcriber is used.
47
+ transcription: Transcription service configuration.
48
+ Fields: model_name, device, language, gemini_api_key
49
+
50
+ Examples:
51
+ # Transcribe local file with positional arguments
52
+ lai transcribe run audio.wav output.srt
53
+
54
+ # Transcribe YouTube video
55
+ lai transcribe run "https://www.youtube.com/watch?v=VIDEO_ID" ./output
56
+
57
+ # Using specific transcription model
58
+ lai transcribe run audio.mp4 output.ass \\
59
+ transcription.model_name=nvidia/parakeet-tdt-0.6b-v3
60
+
61
+ # Using Gemini transcription (requires API key)
62
+ lai transcribe run audio.wav output.srt \\
63
+ transcription.model_name=gemini-2.5-pro \\
64
+ transcription.gemini_api_key=YOUR_KEY
65
+
66
+ # Specify language for transcription
67
+ lai transcribe run audio.wav output.srt \\
68
+ transcription.language=zh
69
+
70
+ # Full configuration with keyword arguments
71
+ lai transcribe run \\
72
+ input=audio.wav \\
73
+ output_caption=output.srt \\
74
+ transcription.device=cuda \\
75
+ transcription.model_name=iic/SenseVoiceSmall
76
+ """
77
+ import asyncio
78
+ from pathlib import Path
79
+
80
+ import colorful
81
+
82
+ from lattifai.transcription import create_transcriber
83
+
84
+ # Initialize transcription config with defaults
85
+ transcription_config = transcription or TranscriptionConfig()
86
+
87
+ # Validate input is required
88
+ if not input:
89
+ raise ValueError("Input is required. Provide input as positional argument (file path or URL).")
90
+
91
+ # Detect if input is a URL
92
+ is_url = input.startswith(("http://", "https://"))
93
+
94
+ # Prepare output paths
95
+ if is_url:
96
+ # For URLs, use output_dir
97
+ if output_dir:
98
+ output_path = Path(str(output_dir)).expanduser()
99
+ output_path.mkdir(parents=True, exist_ok=True)
100
+ else:
101
+ output_path = Path.cwd()
102
+ else:
103
+ # For files, use input path directory
104
+ input_path = Path(str(input))
105
+ output_path = input_path.parent
106
+
107
+ # Create transcriber
108
+ if not transcription_config.lattice_model_path:
109
+ transcription_config.lattice_model_path = _resolve_model_path("Lattifai/Lattice-1")
110
+ transcriber = create_transcriber(transcription_config=transcription_config)
111
+
112
+ print(colorful.cyan(f"🎤 Starting transcription with {transcriber.name}..."))
113
+ print(colorful.cyan(f" Input: {input}"))
114
+
115
+ # Perform transcription
116
+ if is_url and transcriber.supports_url:
117
+ # Check if transcriber supports URL directly
118
+ print(colorful.cyan(" Transcribing from URL directly..."))
119
+ transcript = asyncio.run(transcriber.transcribe(input))
120
+ else:
121
+ if is_url:
122
+ # Download media first, then transcribe
123
+ print(colorful.cyan(" Downloading media from URL..."))
124
+ from lattifai.workflow.youtube import YouTubeDownloader
125
+
126
+ downloader = YouTubeDownloader()
127
+ input_path = asyncio.run(
128
+ downloader.download_media(
129
+ url=input,
130
+ output_dir=str(output_path),
131
+ media_format=media_format,
132
+ force_overwrite=False,
133
+ )
134
+ )
135
+ print(colorful.cyan(f" Media downloaded to: {input_path}"))
136
+ else:
137
+ input_path = Path(str(input))
138
+
139
+ print(colorful.cyan(" Loading audio..."))
140
+ # For files, load audio first
141
+ audio_loader = AudioLoader(device=transcription_config.device)
142
+ media_audio = audio_loader(input_path, channel_selector=channel_selector)
143
+ transcript = asyncio.run(transcriber.transcribe(media_audio))
144
+
145
+ # Determine output caption path
146
+ if output_caption:
147
+ final_output = Path(str(output_caption))
148
+ final_output.parent.mkdir(parents=True, exist_ok=True)
149
+ else:
150
+ if is_url:
151
+ # For URLs, generate output filename based on transcriber
152
+ output_format = transcriber.file_suffix.lstrip(".")
153
+ final_output = output_path / f"youtube_LattifAI_{transcriber.name}.{output_format}"
154
+ else:
155
+ # For files, use input filename with suffix
156
+ final_output = Path(str(input)).with_suffix(".LattifAI.srt")
157
+
158
+ print(colorful.cyan(f" Output: {final_output}"))
159
+
160
+ # Write output
161
+ transcriber.write(transcript, final_output, encoding="utf-8", cache_audio_events=False)
162
+
163
+ print(colorful.green(f"🎉 Transcription completed: {final_output}"))
164
+
165
+ return transcript
166
+
167
+
168
+ @run.cli.entrypoint(name="align", namespace="transcribe")
169
+ def transcribe_align(
170
+ input_media: Optional[str] = None,
171
+ output_caption: Optional[str] = None,
172
+ media: Annotated[Optional[MediaConfig], run.Config[MediaConfig]] = None,
173
+ caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
174
+ client: Annotated[Optional[ClientConfig], run.Config[ClientConfig]] = None,
175
+ alignment: Annotated[Optional[AlignmentConfig], run.Config[AlignmentConfig]] = None,
176
+ transcription: Annotated[Optional[TranscriptionConfig], run.Config[TranscriptionConfig]] = None,
177
+ diarization: Annotated[Optional[DiarizationConfig], run.Config[DiarizationConfig]] = None,
178
+ ):
179
+ return alignment_align(
180
+ input_media=input_media,
181
+ output_caption=output_caption,
182
+ media=media,
183
+ caption=caption,
184
+ client=client,
185
+ alignment=alignment,
186
+ transcription=transcription,
187
+ diarization=diarization,
188
+ )
189
+
190
+
191
+ def main():
192
+ """Entry point for lai-transcribe command."""
193
+ run.cli.main(transcribe)
194
+
195
+
196
+ if __name__ == "__main__":
197
+ main()
@@ -0,0 +1,128 @@
1
+ """YouTube workflow CLI entry point with nemo_run."""
2
+
3
+ from typing import Optional
4
+
5
+ import nemo_run as run
6
+ from typing_extensions import Annotated
7
+
8
+ from lattifai.client import LattifAI
9
+ from lattifai.config import (
10
+ AlignmentConfig,
11
+ CaptionConfig,
12
+ ClientConfig,
13
+ DiarizationConfig,
14
+ MediaConfig,
15
+ TranscriptionConfig,
16
+ )
17
+
18
+
19
+ @run.cli.entrypoint(name="youtube", namespace="alignment")
20
+ def youtube(
21
+ yt_url: Optional[str] = None,
22
+ media: Annotated[Optional[MediaConfig], run.Config[MediaConfig]] = None,
23
+ client: Annotated[Optional[ClientConfig], run.Config[ClientConfig]] = None,
24
+ alignment: Annotated[Optional[AlignmentConfig], run.Config[AlignmentConfig]] = None,
25
+ caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
26
+ transcription: Annotated[Optional[TranscriptionConfig], run.Config[TranscriptionConfig]] = None,
27
+ diarization: Annotated[Optional[DiarizationConfig], run.Config[DiarizationConfig]] = None,
28
+ ):
29
+ """
30
+ Download media from YouTube (when needed) and align captions.
31
+
32
+ This command provides a convenient workflow for aligning captions with YouTube videos.
33
+ It can automatically download media from YouTube URLs and optionally transcribe audio
34
+ using Gemini or download available captions from YouTube.
35
+
36
+ When a YouTube URL is provided:
37
+ 1. Downloads media in the specified format (audio or video)
38
+ 2. Optionally transcribes audio with Gemini OR downloads YouTube captions
39
+ 3. Performs forced alignment with the provided or generated captions
40
+
41
+ Shortcut: invoking ``lai-youtube`` is equivalent to running ``lai alignment youtube``.
42
+
43
+ Args:
44
+ yt_url: YouTube video URL (can be provided as positional argument)
45
+ media: Media configuration for controlling formats and output directories.
46
+ Fields: input_path (YouTube URL), output_dir, output_format, force_overwrite
47
+ client: API client configuration.
48
+ Fields: api_key, timeout, max_retries
49
+ alignment: Alignment configuration (model selection and inference settings).
50
+ Fields: model_name, device, batch_size
51
+ caption: Caption configuration for reading/writing caption files.
52
+ Fields: output_format, output_path, normalize_text,
53
+ split_sentence, word_level, encoding
54
+ transcription: Transcription service configuration (enables Gemini transcription).
55
+ Fields: gemini_api_key, model_name, language, device
56
+ diarization: Speaker diarization configuration.
57
+ Fields: enabled, num_speakers, min_speakers, max_speakers, device
58
+
59
+ Examples:
60
+ # Download from YouTube and align (positional argument)
61
+ lai alignment youtube "https://www.youtube.com/watch?v=VIDEO_ID"
62
+
63
+ # With custom output directory and format
64
+ lai alignment youtube "https://www.youtube.com/watch?v=VIDEO_ID" \\
65
+ media.output_dir=/tmp/youtube \\
66
+ media.output_format=mp3
67
+
68
+ # Full configuration with smart splitting and word-level alignment
69
+ lai alignment youtube "https://www.youtube.com/watch?v=VIDEO_ID" \\
70
+ caption.output_path=aligned.srt \\
71
+ caption.split_sentence=true \\
72
+ caption.word_level=true \\
73
+ alignment.device=cuda
74
+
75
+ # Use Gemini transcription (requires API key)
76
+ lai alignment youtube "https://www.youtube.com/watch?v=VIDEO_ID" \\
77
+ transcription.gemini_api_key=YOUR_KEY \\
78
+ transcription.model_name=gemini-2.0-flash
79
+
80
+ # Using keyword argument (traditional syntax)
81
+ lai alignment youtube \\
82
+ yt_url="https://www.youtube.com/watch?v=VIDEO_ID" \\
83
+ alignment.device=mps
84
+ """
85
+ # Initialize configs with defaults
86
+ media_config = media or MediaConfig()
87
+ caption_config = caption or CaptionConfig()
88
+
89
+ # Validate URL input: require exactly one of yt_url or media.input_path
90
+ if yt_url and media_config.input_path:
91
+ raise ValueError(
92
+ "Cannot specify both positional yt_url and media.input_path. "
93
+ "Use either positional argument or config, not both."
94
+ )
95
+
96
+ if not yt_url and not media_config.input_path:
97
+ raise ValueError("YouTube URL is required. Provide either positional yt_url or media.input_path parameter.")
98
+
99
+ # Assign yt_url to media_config.input_path if provided
100
+ if yt_url:
101
+ media_config.set_input_path(yt_url)
102
+
103
+ # Create LattifAI client with all configurations
104
+ lattifai_client = LattifAI(
105
+ client_config=client,
106
+ alignment_config=alignment,
107
+ caption_config=caption_config,
108
+ transcription_config=transcription,
109
+ diarization_config=diarization,
110
+ )
111
+ # Call the client's youtube method
112
+ return lattifai_client.youtube(
113
+ url=media_config.input_path,
114
+ output_dir=media_config.output_dir,
115
+ output_caption_path=caption_config.output_path,
116
+ media_format=media_config.normalize_format() if media_config.output_format else None,
117
+ force_overwrite=media_config.force_overwrite,
118
+ split_sentence=caption_config.split_sentence,
119
+ channel_selector=media_config.channel_selector,
120
+ )
121
+
122
+
123
+ def main():
124
+ run.cli.main(youtube)
125
+
126
+
127
+ if __name__ == "__main__":
128
+ main()