lattifai 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +42 -27
- lattifai/alignment/__init__.py +6 -0
- lattifai/alignment/lattice1_aligner.py +119 -0
- lattifai/{workers/lattice1_alpha.py → alignment/lattice1_worker.py} +33 -132
- lattifai/{tokenizer → alignment}/phonemizer.py +1 -1
- lattifai/alignment/segmenter.py +166 -0
- lattifai/{tokenizer → alignment}/tokenizer.py +186 -112
- lattifai/audio2.py +211 -0
- lattifai/caption/__init__.py +20 -0
- lattifai/caption/caption.py +1275 -0
- lattifai/{io → caption}/supervision.py +1 -0
- lattifai/{io → caption}/text_parser.py +53 -10
- lattifai/cli/__init__.py +17 -0
- lattifai/cli/alignment.py +153 -0
- lattifai/cli/caption.py +204 -0
- lattifai/cli/server.py +19 -0
- lattifai/cli/transcribe.py +197 -0
- lattifai/cli/youtube.py +128 -0
- lattifai/client.py +455 -246
- lattifai/config/__init__.py +20 -0
- lattifai/config/alignment.py +73 -0
- lattifai/config/caption.py +178 -0
- lattifai/config/client.py +46 -0
- lattifai/config/diarization.py +67 -0
- lattifai/config/media.py +335 -0
- lattifai/config/transcription.py +84 -0
- lattifai/diarization/__init__.py +5 -0
- lattifai/diarization/lattifai.py +89 -0
- lattifai/errors.py +41 -34
- lattifai/logging.py +116 -0
- lattifai/mixin.py +552 -0
- lattifai/server/app.py +420 -0
- lattifai/transcription/__init__.py +76 -0
- lattifai/transcription/base.py +108 -0
- lattifai/transcription/gemini.py +219 -0
- lattifai/transcription/lattifai.py +103 -0
- lattifai/types.py +30 -0
- lattifai/utils.py +3 -31
- lattifai/workflow/__init__.py +22 -0
- lattifai/workflow/agents.py +6 -0
- lattifai/{workflows → workflow}/file_manager.py +81 -57
- lattifai/workflow/youtube.py +564 -0
- lattifai-1.0.0.dist-info/METADATA +736 -0
- lattifai-1.0.0.dist-info/RECORD +52 -0
- {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
- lattifai-1.0.0.dist-info/entry_points.txt +13 -0
- lattifai/base_client.py +0 -126
- lattifai/bin/__init__.py +0 -3
- lattifai/bin/agent.py +0 -324
- lattifai/bin/align.py +0 -295
- lattifai/bin/cli_base.py +0 -25
- lattifai/bin/subtitle.py +0 -210
- lattifai/io/__init__.py +0 -43
- lattifai/io/reader.py +0 -86
- lattifai/io/utils.py +0 -15
- lattifai/io/writer.py +0 -102
- lattifai/tokenizer/__init__.py +0 -3
- lattifai/workers/__init__.py +0 -3
- lattifai/workflows/__init__.py +0 -34
- lattifai/workflows/agents.py +0 -12
- lattifai/workflows/gemini.py +0 -167
- lattifai/workflows/prompts/README.md +0 -22
- lattifai/workflows/prompts/gemini/README.md +0 -24
- lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
- lattifai/workflows/youtube.py +0 -931
- lattifai-0.4.6.dist-info/METADATA +0 -806
- lattifai-0.4.6.dist-info/RECORD +0 -39
- lattifai-0.4.6.dist-info/entry_points.txt +0 -3
- /lattifai/{io → caption}/gemini_reader.py +0 -0
- /lattifai/{io → caption}/gemini_writer.py +0 -0
- /lattifai/{workflows → transcription}/prompts/__init__.py +0 -0
- /lattifai/{workflows → workflow}/base.py +0 -0
- {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
lattifai/bin/align.py
DELETED
|
@@ -1,295 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
import colorful
|
|
6
|
-
from lhotse.utils import Pathlike
|
|
7
|
-
|
|
8
|
-
from lattifai.bin.cli_base import cli
|
|
9
|
-
from lattifai.client import AsyncLattifAI, LattifAI
|
|
10
|
-
from lattifai.io import INPUT_SUBTITLE_FORMATS, OUTPUT_SUBTITLE_FORMATS
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@cli.command()
|
|
14
|
-
@click.option(
|
|
15
|
-
"-F",
|
|
16
|
-
"--input_format",
|
|
17
|
-
"--input-format",
|
|
18
|
-
type=click.Choice(INPUT_SUBTITLE_FORMATS, case_sensitive=False),
|
|
19
|
-
default="auto",
|
|
20
|
-
help="Input subtitle format.",
|
|
21
|
-
)
|
|
22
|
-
@click.option(
|
|
23
|
-
"-S",
|
|
24
|
-
"--split-sentence",
|
|
25
|
-
"--split_sentence",
|
|
26
|
-
is_flag=True,
|
|
27
|
-
default=False,
|
|
28
|
-
help="Re-segment subtitles by semantics.",
|
|
29
|
-
)
|
|
30
|
-
@click.option(
|
|
31
|
-
"-W",
|
|
32
|
-
"--word-level",
|
|
33
|
-
"--word_level",
|
|
34
|
-
is_flag=True,
|
|
35
|
-
default=False,
|
|
36
|
-
help="Include word-level alignment timestamps in output (for JSON, TextGrid, and subtitle formats).",
|
|
37
|
-
)
|
|
38
|
-
@click.option(
|
|
39
|
-
"-D",
|
|
40
|
-
"--device",
|
|
41
|
-
type=click.Choice(["cpu", "cuda", "mps"], case_sensitive=False),
|
|
42
|
-
default="cpu",
|
|
43
|
-
help="Device to use for inference.",
|
|
44
|
-
)
|
|
45
|
-
@click.option(
|
|
46
|
-
"-M",
|
|
47
|
-
"--model-name-or-path",
|
|
48
|
-
"--model_name_or_path",
|
|
49
|
-
type=str,
|
|
50
|
-
default="Lattifai/Lattice-1-Alpha",
|
|
51
|
-
help="Model name or path for alignment.",
|
|
52
|
-
)
|
|
53
|
-
@click.option(
|
|
54
|
-
"-K",
|
|
55
|
-
"-L",
|
|
56
|
-
"--api-key",
|
|
57
|
-
"--api_key",
|
|
58
|
-
type=str,
|
|
59
|
-
default=None,
|
|
60
|
-
help="API key for LattifAI.",
|
|
61
|
-
)
|
|
62
|
-
@click.argument(
|
|
63
|
-
"input_media_path",
|
|
64
|
-
type=click.Path(exists=True, dir_okay=False),
|
|
65
|
-
)
|
|
66
|
-
@click.argument(
|
|
67
|
-
"input_subtitle_path",
|
|
68
|
-
type=click.Path(exists=True, dir_okay=False),
|
|
69
|
-
)
|
|
70
|
-
@click.argument(
|
|
71
|
-
"output_subtitle_path",
|
|
72
|
-
type=click.Path(allow_dash=True),
|
|
73
|
-
)
|
|
74
|
-
def align(
|
|
75
|
-
input_media_path: Pathlike,
|
|
76
|
-
input_subtitle_path: Pathlike,
|
|
77
|
-
output_subtitle_path: Pathlike,
|
|
78
|
-
input_format: str = "auto",
|
|
79
|
-
split_sentence: bool = False,
|
|
80
|
-
word_level: bool = False,
|
|
81
|
-
device: str = "cpu",
|
|
82
|
-
model_name_or_path: str = "Lattifai/Lattice-1-Alpha",
|
|
83
|
-
api_key: str = None,
|
|
84
|
-
):
|
|
85
|
-
"""
|
|
86
|
-
Command used to align media(audio/video) with subtitles
|
|
87
|
-
"""
|
|
88
|
-
try:
|
|
89
|
-
client = LattifAI(model_name_or_path=model_name_or_path, device=device, api_key=api_key)
|
|
90
|
-
client.alignment(
|
|
91
|
-
input_media_path,
|
|
92
|
-
input_subtitle_path,
|
|
93
|
-
format=input_format.lower(),
|
|
94
|
-
split_sentence=split_sentence,
|
|
95
|
-
return_details=word_level,
|
|
96
|
-
output_subtitle_path=output_subtitle_path,
|
|
97
|
-
)
|
|
98
|
-
click.echo(colorful.green(f"✅ Alignment completed successfully: {output_subtitle_path}"))
|
|
99
|
-
except Exception as e:
|
|
100
|
-
from lattifai.errors import LattifAIError
|
|
101
|
-
|
|
102
|
-
# Display error message
|
|
103
|
-
if isinstance(e, LattifAIError):
|
|
104
|
-
click.echo(colorful.red("❌ Alignment failed:"))
|
|
105
|
-
click.echo(e.get_message())
|
|
106
|
-
# Show support info
|
|
107
|
-
click.echo(e.get_support_info())
|
|
108
|
-
else:
|
|
109
|
-
click.echo(colorful.red(f"❌ Alignment failed: {str(e)}"))
|
|
110
|
-
|
|
111
|
-
raise click.ClickException("Alignment failed")
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
@cli.command()
|
|
115
|
-
@click.option(
|
|
116
|
-
"-M",
|
|
117
|
-
"--media-format",
|
|
118
|
-
"--media_format",
|
|
119
|
-
type=click.Choice(
|
|
120
|
-
[
|
|
121
|
-
# Audio formats
|
|
122
|
-
"mp3",
|
|
123
|
-
"wav",
|
|
124
|
-
"m4a",
|
|
125
|
-
"aac",
|
|
126
|
-
"flac",
|
|
127
|
-
"ogg",
|
|
128
|
-
"opus",
|
|
129
|
-
"aiff",
|
|
130
|
-
# Video formats
|
|
131
|
-
"mp4",
|
|
132
|
-
"webm",
|
|
133
|
-
"mkv",
|
|
134
|
-
"avi",
|
|
135
|
-
"mov",
|
|
136
|
-
],
|
|
137
|
-
case_sensitive=False,
|
|
138
|
-
),
|
|
139
|
-
default="mp3",
|
|
140
|
-
help="Media format for YouTube download (audio or video).",
|
|
141
|
-
)
|
|
142
|
-
@click.option(
|
|
143
|
-
"-S",
|
|
144
|
-
"--split-sentence",
|
|
145
|
-
"--split_sentence",
|
|
146
|
-
is_flag=True,
|
|
147
|
-
default=False,
|
|
148
|
-
help="Re-segment subtitles by semantics.",
|
|
149
|
-
)
|
|
150
|
-
@click.option(
|
|
151
|
-
"-W",
|
|
152
|
-
"--word-level",
|
|
153
|
-
"--word_level",
|
|
154
|
-
is_flag=True,
|
|
155
|
-
default=False,
|
|
156
|
-
help="Include word-level alignment timestamps in output (for JSON, TextGrid, and subtitle formats).",
|
|
157
|
-
)
|
|
158
|
-
@click.option(
|
|
159
|
-
"-O",
|
|
160
|
-
"--output-dir",
|
|
161
|
-
"--output_dir",
|
|
162
|
-
type=click.Path(file_okay=False, dir_okay=True, writable=True),
|
|
163
|
-
default=".",
|
|
164
|
-
help="Output directory (default: current directory).",
|
|
165
|
-
)
|
|
166
|
-
@click.option(
|
|
167
|
-
"-D",
|
|
168
|
-
"--device",
|
|
169
|
-
type=click.Choice(["cpu", "cuda", "mps"], case_sensitive=False),
|
|
170
|
-
default="cpu",
|
|
171
|
-
help="Device to use for inference.",
|
|
172
|
-
)
|
|
173
|
-
@click.option(
|
|
174
|
-
"-M",
|
|
175
|
-
"--model-name-or-path",
|
|
176
|
-
"--model_name_or_path",
|
|
177
|
-
type=str,
|
|
178
|
-
default="Lattifai/Lattice-1-Alpha",
|
|
179
|
-
help="Model name or path for alignment.",
|
|
180
|
-
)
|
|
181
|
-
@click.option(
|
|
182
|
-
"-K",
|
|
183
|
-
"-L",
|
|
184
|
-
"--api-key",
|
|
185
|
-
"--api_key",
|
|
186
|
-
type=str,
|
|
187
|
-
default=None,
|
|
188
|
-
help="API key for LattifAI.",
|
|
189
|
-
)
|
|
190
|
-
@click.option(
|
|
191
|
-
"-G",
|
|
192
|
-
"--gemini-api-key",
|
|
193
|
-
"--gemini_api_key",
|
|
194
|
-
type=str,
|
|
195
|
-
default=None,
|
|
196
|
-
help="Gemini API key for transcription fallback when subtitles are unavailable.",
|
|
197
|
-
)
|
|
198
|
-
@click.option(
|
|
199
|
-
"-F",
|
|
200
|
-
"--output-format",
|
|
201
|
-
"--output_format",
|
|
202
|
-
type=click.Choice(OUTPUT_SUBTITLE_FORMATS, case_sensitive=False),
|
|
203
|
-
default="vtt",
|
|
204
|
-
help="Subtitle output format.",
|
|
205
|
-
)
|
|
206
|
-
@click.argument(
|
|
207
|
-
"yt_url",
|
|
208
|
-
type=str,
|
|
209
|
-
)
|
|
210
|
-
def youtube(
|
|
211
|
-
yt_url: str,
|
|
212
|
-
media_format: str = "mp3",
|
|
213
|
-
split_sentence: bool = False,
|
|
214
|
-
word_level: bool = False,
|
|
215
|
-
output_dir: str = ".",
|
|
216
|
-
device: str = "cpu",
|
|
217
|
-
model_name_or_path: str = "Lattifai/Lattice-1-Alpha",
|
|
218
|
-
api_key: str = None,
|
|
219
|
-
gemini_api_key: str = None,
|
|
220
|
-
output_format: str = "vtt",
|
|
221
|
-
):
|
|
222
|
-
"""
|
|
223
|
-
Download media and subtitles from YouTube for further alignment.
|
|
224
|
-
"""
|
|
225
|
-
from lattifai.workflows.gemini import GeminiTranscriber
|
|
226
|
-
from lattifai.workflows.youtube import YouTubeDownloader, YouTubeSubtitleAgent
|
|
227
|
-
|
|
228
|
-
# Get Gemini API key
|
|
229
|
-
gemini_key = gemini_api_key or os.getenv("GEMINI_API_KEY")
|
|
230
|
-
|
|
231
|
-
async def _process():
|
|
232
|
-
# Initialize components with their configuration (only config, not runtime params)
|
|
233
|
-
downloader = YouTubeDownloader()
|
|
234
|
-
transcriber = GeminiTranscriber(api_key=gemini_key)
|
|
235
|
-
aligner = AsyncLattifAI(api_key=api_key, model_name_or_path=model_name_or_path, device=device)
|
|
236
|
-
|
|
237
|
-
# Create agent with initialized components
|
|
238
|
-
agent = YouTubeSubtitleAgent(
|
|
239
|
-
downloader=downloader,
|
|
240
|
-
transcriber=transcriber,
|
|
241
|
-
aligner=aligner,
|
|
242
|
-
max_retries=0,
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
result = await agent.process_youtube_url(
|
|
246
|
-
url=yt_url,
|
|
247
|
-
output_dir=output_dir,
|
|
248
|
-
media_format=media_format,
|
|
249
|
-
force_overwrite=False,
|
|
250
|
-
output_format=output_format,
|
|
251
|
-
split_sentence=split_sentence,
|
|
252
|
-
word_level=word_level,
|
|
253
|
-
)
|
|
254
|
-
return result
|
|
255
|
-
|
|
256
|
-
try:
|
|
257
|
-
result = asyncio.run(_process())
|
|
258
|
-
|
|
259
|
-
# Display results
|
|
260
|
-
click.echo(colorful.green("✅ Processing completed!"))
|
|
261
|
-
click.echo()
|
|
262
|
-
|
|
263
|
-
# Show metadata
|
|
264
|
-
metadata = result.get("metadata", {})
|
|
265
|
-
if metadata:
|
|
266
|
-
click.echo(f'🎬 Title: {metadata.get("title", "Unknown")}')
|
|
267
|
-
click.echo(f'⏱️ Duration: {metadata.get("duration", 0)} seconds')
|
|
268
|
-
click.echo()
|
|
269
|
-
|
|
270
|
-
# Show exported files
|
|
271
|
-
exported_files = result.get("exported_files", {})
|
|
272
|
-
if exported_files:
|
|
273
|
-
click.echo(colorful.green("📄 Generated subtitle files:"))
|
|
274
|
-
for format_name, file_path in exported_files.items():
|
|
275
|
-
click.echo(f" {format_name}: {file_path}")
|
|
276
|
-
click.echo()
|
|
277
|
-
|
|
278
|
-
# Show subtitle count
|
|
279
|
-
subtitle_count = result.get("subtitle_count", 0)
|
|
280
|
-
click.echo(f"📝 Generated {subtitle_count} subtitle segments")
|
|
281
|
-
|
|
282
|
-
except Exception as e:
|
|
283
|
-
from lattifai.errors import LattifAIError
|
|
284
|
-
|
|
285
|
-
# Extract error message without support info (to avoid duplication)
|
|
286
|
-
if isinstance(e, LattifAIError):
|
|
287
|
-
# Use the get_message() method which includes proper formatting
|
|
288
|
-
click.echo(colorful.red("❌ Failed to process YouTube URL:"))
|
|
289
|
-
click.echo(e.get_message())
|
|
290
|
-
# Show support info once at the end
|
|
291
|
-
click.echo(e.get_support_info())
|
|
292
|
-
else:
|
|
293
|
-
click.echo(colorful.red(f"❌ Failed to process YouTube URL: {str(e)}"))
|
|
294
|
-
|
|
295
|
-
raise click.ClickException("Processing failed")
|
lattifai/bin/cli_base.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
import click
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
@click.group()
|
|
7
|
-
def cli():
|
|
8
|
-
"""
|
|
9
|
-
The shell entry point to Lattifai, a tool for audio data manipulation.
|
|
10
|
-
"""
|
|
11
|
-
# Load environment variables from .env file
|
|
12
|
-
from dotenv import find_dotenv, load_dotenv
|
|
13
|
-
|
|
14
|
-
# Try to find and load .env file from current directory or parent directories
|
|
15
|
-
load_dotenv(find_dotenv(usecwd=True))
|
|
16
|
-
|
|
17
|
-
logging.basicConfig(
|
|
18
|
-
format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s",
|
|
19
|
-
level=logging.INFO,
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
import os
|
|
23
|
-
|
|
24
|
-
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
|
25
|
-
os.environ["TOKENIZERS_PARALLELISM"] = "FALSE"
|
lattifai/bin/subtitle.py
DELETED
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
from lhotse.utils import Pathlike
|
|
6
|
-
|
|
7
|
-
from lattifai.bin.cli_base import cli
|
|
8
|
-
from lattifai.io import SUBTITLE_FORMATS
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@cli.group()
|
|
12
|
-
def subtitle():
|
|
13
|
-
"""Commands for subtitle format conversion and management."""
|
|
14
|
-
pass
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@subtitle.command()
|
|
18
|
-
@click.argument(
|
|
19
|
-
"input_subtitle_path",
|
|
20
|
-
type=click.Path(exists=True, dir_okay=False),
|
|
21
|
-
)
|
|
22
|
-
@click.argument(
|
|
23
|
-
"output_subtitle_path",
|
|
24
|
-
type=click.Path(allow_dash=True),
|
|
25
|
-
)
|
|
26
|
-
def convert(
|
|
27
|
-
input_subtitle_path: Pathlike,
|
|
28
|
-
output_subtitle_path: Pathlike,
|
|
29
|
-
):
|
|
30
|
-
"""
|
|
31
|
-
Convert subtitle file to another format.
|
|
32
|
-
"""
|
|
33
|
-
if str(output_subtitle_path).lower().endswith(".TextGrid".lower()):
|
|
34
|
-
from lattifai.io import SubtitleIO
|
|
35
|
-
|
|
36
|
-
alignments = SubtitleIO.read(input_subtitle_path)
|
|
37
|
-
SubtitleIO.write(alignments, output_subtitle_path)
|
|
38
|
-
else:
|
|
39
|
-
import pysubs2
|
|
40
|
-
|
|
41
|
-
subtitle = pysubs2.load(input_subtitle_path)
|
|
42
|
-
|
|
43
|
-
subtitle.save(output_subtitle_path)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
@subtitle.command()
|
|
47
|
-
@click.argument("url", type=str, required=True)
|
|
48
|
-
@click.option(
|
|
49
|
-
"--output-dir",
|
|
50
|
-
"--output_dir",
|
|
51
|
-
"-o",
|
|
52
|
-
type=click.Path(file_okay=False, dir_okay=True),
|
|
53
|
-
default=".",
|
|
54
|
-
help="Output directory for downloaded subtitle files (default: current directory).",
|
|
55
|
-
)
|
|
56
|
-
@click.option(
|
|
57
|
-
"--output-format",
|
|
58
|
-
"--output_format",
|
|
59
|
-
"-f",
|
|
60
|
-
type=click.Choice(SUBTITLE_FORMATS + ["best"], case_sensitive=False),
|
|
61
|
-
default="best",
|
|
62
|
-
help="Preferred subtitle format to download (default: best available).",
|
|
63
|
-
)
|
|
64
|
-
@click.option("--force-overwrite", "-F", is_flag=True, help="Overwrite existing files without prompting.")
|
|
65
|
-
@click.option(
|
|
66
|
-
"--lang",
|
|
67
|
-
"-l",
|
|
68
|
-
"-L",
|
|
69
|
-
"--subtitle-lang",
|
|
70
|
-
"--subtitle_lang",
|
|
71
|
-
type=str,
|
|
72
|
-
help='Specific subtitle language/track to download (e.g., "en").',
|
|
73
|
-
)
|
|
74
|
-
def download(
|
|
75
|
-
url: str,
|
|
76
|
-
output_dir: str,
|
|
77
|
-
output_format: str,
|
|
78
|
-
force_overwrite: bool,
|
|
79
|
-
lang: str,
|
|
80
|
-
):
|
|
81
|
-
"""
|
|
82
|
-
Download subtitles from YouTube URL using yt-dlp.
|
|
83
|
-
|
|
84
|
-
URL should be a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID).
|
|
85
|
-
"""
|
|
86
|
-
# Import here to avoid circular imports and keep startup fast
|
|
87
|
-
from lattifai.workflows.youtube import YouTubeDownloader
|
|
88
|
-
|
|
89
|
-
# Validate URL format
|
|
90
|
-
if not _is_valid_youtube_url(url):
|
|
91
|
-
click.echo(f"Error: Invalid YouTube URL format: {url}", err=True)
|
|
92
|
-
click.echo("Please provide a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)", err=True)
|
|
93
|
-
raise click.Abort()
|
|
94
|
-
|
|
95
|
-
# Convert relative path to absolute
|
|
96
|
-
output_path = Path(output_dir).resolve()
|
|
97
|
-
|
|
98
|
-
# Create output directory if it doesn't exist
|
|
99
|
-
output_path.mkdir(parents=True, exist_ok=True)
|
|
100
|
-
|
|
101
|
-
click.echo(f"Downloading subtitles from: {url}")
|
|
102
|
-
click.echo(f" Output directory: {output_path}")
|
|
103
|
-
click.echo(f" Preferred format: {output_format}")
|
|
104
|
-
if lang:
|
|
105
|
-
click.echo(f" Subtitle language: {lang}")
|
|
106
|
-
else:
|
|
107
|
-
click.echo(" Subtitle language: All available")
|
|
108
|
-
|
|
109
|
-
# Initialize downloader and download
|
|
110
|
-
downloader = YouTubeDownloader()
|
|
111
|
-
|
|
112
|
-
async def download_subtitles():
|
|
113
|
-
try:
|
|
114
|
-
result = await downloader.download_subtitles(
|
|
115
|
-
url=url,
|
|
116
|
-
output_dir=str(output_path),
|
|
117
|
-
force_overwrite=force_overwrite,
|
|
118
|
-
subtitle_lang=lang,
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
if result:
|
|
122
|
-
click.echo("✅ Subtitles downloaded successfully!")
|
|
123
|
-
return result
|
|
124
|
-
else:
|
|
125
|
-
click.echo("⚠️ No subtitles available for this video")
|
|
126
|
-
return None
|
|
127
|
-
|
|
128
|
-
except Exception as e:
|
|
129
|
-
click.echo(f"❌ Error downloading subtitles: {str(e)}", err=True)
|
|
130
|
-
raise click.Abort()
|
|
131
|
-
|
|
132
|
-
# Run the async function
|
|
133
|
-
result = asyncio.run(download_subtitles())
|
|
134
|
-
|
|
135
|
-
if result:
|
|
136
|
-
if result == "gemini":
|
|
137
|
-
click.echo("✨ Gemini transcription selected (use the agent command to transcribe)")
|
|
138
|
-
else:
|
|
139
|
-
click.echo(f"📄 Subtitle file saved to: {result}")
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
@subtitle.command()
|
|
143
|
-
@click.argument("url", type=str, required=True)
|
|
144
|
-
def list_subs(url: str):
|
|
145
|
-
"""
|
|
146
|
-
List available subtitle tracks for a YouTube video.
|
|
147
|
-
|
|
148
|
-
URL should be a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
|
|
149
|
-
"""
|
|
150
|
-
# Import here to avoid circular imports and keep startup fast
|
|
151
|
-
from lattifai.workflows.youtube import YouTubeDownloader
|
|
152
|
-
|
|
153
|
-
# Validate URL format
|
|
154
|
-
if not _is_valid_youtube_url(url):
|
|
155
|
-
click.echo(f"Error: Invalid YouTube URL format: {url}", err=True)
|
|
156
|
-
click.echo("Please provide a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)", err=True)
|
|
157
|
-
raise click.Abort()
|
|
158
|
-
|
|
159
|
-
click.echo(f"Listing available subtitles for: {url}")
|
|
160
|
-
|
|
161
|
-
# Initialize downloader
|
|
162
|
-
downloader = YouTubeDownloader()
|
|
163
|
-
|
|
164
|
-
async def list_available_subtitles():
|
|
165
|
-
try:
|
|
166
|
-
result = await downloader.list_available_subtitles(url)
|
|
167
|
-
|
|
168
|
-
if result:
|
|
169
|
-
click.echo("📋 Available subtitle tracks:")
|
|
170
|
-
for subtitle_info in result:
|
|
171
|
-
click.echo(f' 🎬 Language: {subtitle_info["language"]} - {subtitle_info["name"]}')
|
|
172
|
-
click.echo(f' 📄 Formats: {", ".join(subtitle_info["formats"])}')
|
|
173
|
-
click.echo()
|
|
174
|
-
|
|
175
|
-
click.echo("💡 To download a specific track, use:")
|
|
176
|
-
click.echo(f' lattifai subtitle download "{url}" --lang <language_code>')
|
|
177
|
-
click.echo(' Example: lattifai subtitle download "{}" --lang en-JkeT_87f4cc'.format(url))
|
|
178
|
-
else:
|
|
179
|
-
click.echo("⚠️ No subtitles available for this video")
|
|
180
|
-
|
|
181
|
-
except Exception as e:
|
|
182
|
-
click.echo(f"❌ Error listing subtitles: {str(e)}", err=True)
|
|
183
|
-
raise click.Abort()
|
|
184
|
-
|
|
185
|
-
# Run the async function
|
|
186
|
-
asyncio.run(list_available_subtitles())
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
def _is_valid_youtube_url(url: str) -> bool:
|
|
190
|
-
"""
|
|
191
|
-
Validate if the URL is a valid YouTube URL format.
|
|
192
|
-
|
|
193
|
-
Supports various YouTube URL formats:
|
|
194
|
-
- https://www.youtube.com/watch?v=VIDEO_ID
|
|
195
|
-
- https://youtu.be/VIDEO_ID
|
|
196
|
-
- https://www.youtube.com/shorts/VIDEO_ID
|
|
197
|
-
- https://m.youtube.com/watch?v=VIDEO_ID
|
|
198
|
-
"""
|
|
199
|
-
import re
|
|
200
|
-
|
|
201
|
-
patterns = [
|
|
202
|
-
r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([a-zA-Z0-9_-]{11})",
|
|
203
|
-
r"youtube\.com/embed/([a-zA-Z0-9_-]{11})",
|
|
204
|
-
r"youtube\.com/v/([a-zA-Z0-9_-]{11})",
|
|
205
|
-
]
|
|
206
|
-
|
|
207
|
-
for pattern in patterns:
|
|
208
|
-
if re.search(pattern, url):
|
|
209
|
-
return True
|
|
210
|
-
return False
|
lattifai/io/__init__.py
DELETED
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
2
|
-
|
|
3
|
-
from lhotse.utils import Pathlike
|
|
4
|
-
|
|
5
|
-
from .gemini_reader import GeminiReader, GeminiSegment
|
|
6
|
-
from .gemini_writer import GeminiWriter
|
|
7
|
-
from .reader import SubtitleFormat, SubtitleReader
|
|
8
|
-
from .supervision import Supervision
|
|
9
|
-
from .text_parser import normalize_html_text
|
|
10
|
-
from .utils import (
|
|
11
|
-
ALL_SUBTITLE_FORMATS,
|
|
12
|
-
INPUT_SUBTITLE_FORMATS,
|
|
13
|
-
OUTPUT_SUBTITLE_FORMATS,
|
|
14
|
-
SUBTITLE_FORMATS,
|
|
15
|
-
)
|
|
16
|
-
from .writer import SubtitleWriter
|
|
17
|
-
|
|
18
|
-
__all__ = [
|
|
19
|
-
"SubtitleReader",
|
|
20
|
-
"SubtitleWriter",
|
|
21
|
-
"SubtitleIO",
|
|
22
|
-
"Supervision",
|
|
23
|
-
"GeminiReader",
|
|
24
|
-
"GeminiWriter",
|
|
25
|
-
"GeminiSegment",
|
|
26
|
-
"SUBTITLE_FORMATS",
|
|
27
|
-
"INPUT_SUBTITLE_FORMATS",
|
|
28
|
-
"OUTPUT_SUBTITLE_FORMATS",
|
|
29
|
-
"ALL_SUBTITLE_FORMATS",
|
|
30
|
-
]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class SubtitleIO:
|
|
34
|
-
def __init__(self):
|
|
35
|
-
pass
|
|
36
|
-
|
|
37
|
-
@classmethod
|
|
38
|
-
def read(cls, subtitle: Pathlike, format: Optional[SubtitleFormat] = None) -> List[Supervision]:
|
|
39
|
-
return SubtitleReader.read(subtitle, format=format)
|
|
40
|
-
|
|
41
|
-
@classmethod
|
|
42
|
-
def write(cls, alignments: List[Supervision], output_path: Pathlike) -> Pathlike:
|
|
43
|
-
return SubtitleWriter.write(alignments, output_path)
|
lattifai/io/reader.py
DELETED
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
from abc import ABCMeta
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from typing import List, Literal, Optional, Union
|
|
4
|
-
|
|
5
|
-
from lhotse.utils import Pathlike
|
|
6
|
-
|
|
7
|
-
from .supervision import Supervision
|
|
8
|
-
from .text_parser import parse_speaker_text
|
|
9
|
-
|
|
10
|
-
SubtitleFormat = Literal["txt", "srt", "vtt", "ass", "auto"]
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SubtitleReader(ABCMeta):
|
|
14
|
-
"""Parser for converting different subtitle formats to List[Supervision]."""
|
|
15
|
-
|
|
16
|
-
@classmethod
|
|
17
|
-
def read(cls, subtitle: Pathlike, format: Optional[SubtitleFormat] = None) -> List[Supervision]:
|
|
18
|
-
"""Parse text and convert to Lhotse List[Supervision].
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
text: Input text to parse. Can be either:
|
|
22
|
-
- str: Direct text content to parse
|
|
23
|
-
- Path: File path to read and parse
|
|
24
|
-
format: Input text format (txt, srt, vtt, ass, textgrid)
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
Parsed text in Lhotse Cut
|
|
28
|
-
"""
|
|
29
|
-
if not format and Path(str(subtitle)).exists():
|
|
30
|
-
format = Path(str(subtitle)).suffix.lstrip(".").lower()
|
|
31
|
-
elif format:
|
|
32
|
-
format = format.lower()
|
|
33
|
-
|
|
34
|
-
if format == "gemini" or str(subtitle).endswith("Gemini.md"):
|
|
35
|
-
from .gemini_reader import GeminiReader
|
|
36
|
-
|
|
37
|
-
supervisions = GeminiReader.extract_for_alignment(subtitle)
|
|
38
|
-
elif format == "txt" or (format == "auto" and str(subtitle)[-4:].lower() == ".txt"):
|
|
39
|
-
if not Path(str(subtitle)).exists(): # str
|
|
40
|
-
lines = [line.strip() for line in str(subtitle).split("\n")]
|
|
41
|
-
else: # file
|
|
42
|
-
path_str = str(subtitle)
|
|
43
|
-
with open(path_str, encoding="utf-8") as f:
|
|
44
|
-
lines = [line.strip() for line in f.readlines()]
|
|
45
|
-
supervisions = [Supervision(text=line) for line in lines if line]
|
|
46
|
-
else:
|
|
47
|
-
try:
|
|
48
|
-
supervisions = cls._parse_subtitle(subtitle, format=format)
|
|
49
|
-
except Exception as e:
|
|
50
|
-
print(f"Failed to parse subtitle with Format: {format}, Exception: {e}, trying 'gemini' parser.")
|
|
51
|
-
from .gemini_reader import GeminiReader
|
|
52
|
-
|
|
53
|
-
supervisions = GeminiReader.extract_for_alignment(subtitle)
|
|
54
|
-
|
|
55
|
-
return supervisions
|
|
56
|
-
|
|
57
|
-
@classmethod
|
|
58
|
-
def _parse_subtitle(cls, subtitle: Pathlike, format: Optional[SubtitleFormat]) -> List[Supervision]:
|
|
59
|
-
import pysubs2
|
|
60
|
-
|
|
61
|
-
try:
|
|
62
|
-
subs: pysubs2.SSAFile = pysubs2.load(
|
|
63
|
-
subtitle, encoding="utf-8", format_=format if format != "auto" else None
|
|
64
|
-
) # file
|
|
65
|
-
except IOError:
|
|
66
|
-
try:
|
|
67
|
-
subs: pysubs2.SSAFile = pysubs2.SSAFile.from_string(
|
|
68
|
-
subtitle, format_=format if format != "auto" else None
|
|
69
|
-
) # str
|
|
70
|
-
except Exception as e:
|
|
71
|
-
del e
|
|
72
|
-
subs: pysubs2.SSAFile = pysubs2.load(subtitle, encoding="utf-8") # auto detect format
|
|
73
|
-
|
|
74
|
-
supervisions = []
|
|
75
|
-
for event in subs.events:
|
|
76
|
-
# NOT apply text_parser.py:normalize_html_text here, to keep original text in subtitles
|
|
77
|
-
speaker, text = parse_speaker_text(event.text)
|
|
78
|
-
supervisions.append(
|
|
79
|
-
Supervision(
|
|
80
|
-
text=text,
|
|
81
|
-
speaker=speaker,
|
|
82
|
-
start=event.start / 1000.0 if event.start is not None else None,
|
|
83
|
-
duration=(event.end - event.start) / 1000.0 if event.end is not None else None,
|
|
84
|
-
)
|
|
85
|
-
)
|
|
86
|
-
return supervisions
|
lattifai/io/utils.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Utility constants and helper functions for subtitle I/O operations
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
# Supported subtitle formats for reading/writing
|
|
6
|
-
SUBTITLE_FORMATS = ["srt", "vtt", "ass", "ssa", "sub", "sbv", "txt", "md"]
|
|
7
|
-
|
|
8
|
-
# Input subtitle formats (includes special formats like 'auto' and 'gemini')
|
|
9
|
-
INPUT_SUBTITLE_FORMATS = ["srt", "vtt", "ass", "ssa", "sub", "sbv", "txt", "auto", "gemini"]
|
|
10
|
-
|
|
11
|
-
# Output subtitle formats (includes special formats like 'TextGrid' and 'json')
|
|
12
|
-
OUTPUT_SUBTITLE_FORMATS = ["srt", "vtt", "ass", "ssa", "sub", "sbv", "txt", "TextGrid", "json"]
|
|
13
|
-
|
|
14
|
-
# All subtitle formats combined (for file detection)
|
|
15
|
-
ALL_SUBTITLE_FORMATS = list(set(SUBTITLE_FORMATS + ["TextGrid", "json", "gemini"]))
|