lattifai 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. lattifai/__init__.py +42 -27
  2. lattifai/alignment/__init__.py +6 -0
  3. lattifai/alignment/lattice1_aligner.py +119 -0
  4. lattifai/{workers/lattice1_alpha.py → alignment/lattice1_worker.py} +33 -132
  5. lattifai/{tokenizer → alignment}/phonemizer.py +1 -1
  6. lattifai/alignment/segmenter.py +166 -0
  7. lattifai/{tokenizer → alignment}/tokenizer.py +186 -112
  8. lattifai/audio2.py +211 -0
  9. lattifai/caption/__init__.py +20 -0
  10. lattifai/caption/caption.py +1275 -0
  11. lattifai/{io → caption}/supervision.py +1 -0
  12. lattifai/{io → caption}/text_parser.py +53 -10
  13. lattifai/cli/__init__.py +17 -0
  14. lattifai/cli/alignment.py +153 -0
  15. lattifai/cli/caption.py +204 -0
  16. lattifai/cli/server.py +19 -0
  17. lattifai/cli/transcribe.py +197 -0
  18. lattifai/cli/youtube.py +128 -0
  19. lattifai/client.py +455 -246
  20. lattifai/config/__init__.py +20 -0
  21. lattifai/config/alignment.py +73 -0
  22. lattifai/config/caption.py +178 -0
  23. lattifai/config/client.py +46 -0
  24. lattifai/config/diarization.py +67 -0
  25. lattifai/config/media.py +335 -0
  26. lattifai/config/transcription.py +84 -0
  27. lattifai/diarization/__init__.py +5 -0
  28. lattifai/diarization/lattifai.py +89 -0
  29. lattifai/errors.py +41 -34
  30. lattifai/logging.py +116 -0
  31. lattifai/mixin.py +552 -0
  32. lattifai/server/app.py +420 -0
  33. lattifai/transcription/__init__.py +76 -0
  34. lattifai/transcription/base.py +108 -0
  35. lattifai/transcription/gemini.py +219 -0
  36. lattifai/transcription/lattifai.py +103 -0
  37. lattifai/types.py +30 -0
  38. lattifai/utils.py +3 -31
  39. lattifai/workflow/__init__.py +22 -0
  40. lattifai/workflow/agents.py +6 -0
  41. lattifai/{workflows → workflow}/file_manager.py +81 -57
  42. lattifai/workflow/youtube.py +564 -0
  43. lattifai-1.0.0.dist-info/METADATA +736 -0
  44. lattifai-1.0.0.dist-info/RECORD +52 -0
  45. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
  46. lattifai-1.0.0.dist-info/entry_points.txt +13 -0
  47. lattifai/base_client.py +0 -126
  48. lattifai/bin/__init__.py +0 -3
  49. lattifai/bin/agent.py +0 -324
  50. lattifai/bin/align.py +0 -295
  51. lattifai/bin/cli_base.py +0 -25
  52. lattifai/bin/subtitle.py +0 -210
  53. lattifai/io/__init__.py +0 -43
  54. lattifai/io/reader.py +0 -86
  55. lattifai/io/utils.py +0 -15
  56. lattifai/io/writer.py +0 -102
  57. lattifai/tokenizer/__init__.py +0 -3
  58. lattifai/workers/__init__.py +0 -3
  59. lattifai/workflows/__init__.py +0 -34
  60. lattifai/workflows/agents.py +0 -12
  61. lattifai/workflows/gemini.py +0 -167
  62. lattifai/workflows/prompts/README.md +0 -22
  63. lattifai/workflows/prompts/gemini/README.md +0 -24
  64. lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
  65. lattifai/workflows/youtube.py +0 -931
  66. lattifai-0.4.6.dist-info/METADATA +0 -806
  67. lattifai-0.4.6.dist-info/RECORD +0 -39
  68. lattifai-0.4.6.dist-info/entry_points.txt +0 -3
  69. /lattifai/{io → caption}/gemini_reader.py +0 -0
  70. /lattifai/{io → caption}/gemini_writer.py +0 -0
  71. /lattifai/{workflows → transcription}/prompts/__init__.py +0 -0
  72. /lattifai/{workflows → workflow}/base.py +0 -0
  73. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +0 -0
  74. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
lattifai/bin/align.py DELETED
@@ -1,295 +0,0 @@
1
- import asyncio
2
- import os
3
-
4
- import click
5
- import colorful
6
- from lhotse.utils import Pathlike
7
-
8
- from lattifai.bin.cli_base import cli
9
- from lattifai.client import AsyncLattifAI, LattifAI
10
- from lattifai.io import INPUT_SUBTITLE_FORMATS, OUTPUT_SUBTITLE_FORMATS
11
-
12
-
13
- @cli.command()
14
- @click.option(
15
- "-F",
16
- "--input_format",
17
- "--input-format",
18
- type=click.Choice(INPUT_SUBTITLE_FORMATS, case_sensitive=False),
19
- default="auto",
20
- help="Input subtitle format.",
21
- )
22
- @click.option(
23
- "-S",
24
- "--split-sentence",
25
- "--split_sentence",
26
- is_flag=True,
27
- default=False,
28
- help="Re-segment subtitles by semantics.",
29
- )
30
- @click.option(
31
- "-W",
32
- "--word-level",
33
- "--word_level",
34
- is_flag=True,
35
- default=False,
36
- help="Include word-level alignment timestamps in output (for JSON, TextGrid, and subtitle formats).",
37
- )
38
- @click.option(
39
- "-D",
40
- "--device",
41
- type=click.Choice(["cpu", "cuda", "mps"], case_sensitive=False),
42
- default="cpu",
43
- help="Device to use for inference.",
44
- )
45
- @click.option(
46
- "-M",
47
- "--model-name-or-path",
48
- "--model_name_or_path",
49
- type=str,
50
- default="Lattifai/Lattice-1-Alpha",
51
- help="Model name or path for alignment.",
52
- )
53
- @click.option(
54
- "-K",
55
- "-L",
56
- "--api-key",
57
- "--api_key",
58
- type=str,
59
- default=None,
60
- help="API key for LattifAI.",
61
- )
62
- @click.argument(
63
- "input_media_path",
64
- type=click.Path(exists=True, dir_okay=False),
65
- )
66
- @click.argument(
67
- "input_subtitle_path",
68
- type=click.Path(exists=True, dir_okay=False),
69
- )
70
- @click.argument(
71
- "output_subtitle_path",
72
- type=click.Path(allow_dash=True),
73
- )
74
- def align(
75
- input_media_path: Pathlike,
76
- input_subtitle_path: Pathlike,
77
- output_subtitle_path: Pathlike,
78
- input_format: str = "auto",
79
- split_sentence: bool = False,
80
- word_level: bool = False,
81
- device: str = "cpu",
82
- model_name_or_path: str = "Lattifai/Lattice-1-Alpha",
83
- api_key: str = None,
84
- ):
85
- """
86
- Command used to align media(audio/video) with subtitles
87
- """
88
- try:
89
- client = LattifAI(model_name_or_path=model_name_or_path, device=device, api_key=api_key)
90
- client.alignment(
91
- input_media_path,
92
- input_subtitle_path,
93
- format=input_format.lower(),
94
- split_sentence=split_sentence,
95
- return_details=word_level,
96
- output_subtitle_path=output_subtitle_path,
97
- )
98
- click.echo(colorful.green(f"✅ Alignment completed successfully: {output_subtitle_path}"))
99
- except Exception as e:
100
- from lattifai.errors import LattifAIError
101
-
102
- # Display error message
103
- if isinstance(e, LattifAIError):
104
- click.echo(colorful.red("❌ Alignment failed:"))
105
- click.echo(e.get_message())
106
- # Show support info
107
- click.echo(e.get_support_info())
108
- else:
109
- click.echo(colorful.red(f"❌ Alignment failed: {str(e)}"))
110
-
111
- raise click.ClickException("Alignment failed")
112
-
113
-
114
- @cli.command()
115
- @click.option(
116
- "-M",
117
- "--media-format",
118
- "--media_format",
119
- type=click.Choice(
120
- [
121
- # Audio formats
122
- "mp3",
123
- "wav",
124
- "m4a",
125
- "aac",
126
- "flac",
127
- "ogg",
128
- "opus",
129
- "aiff",
130
- # Video formats
131
- "mp4",
132
- "webm",
133
- "mkv",
134
- "avi",
135
- "mov",
136
- ],
137
- case_sensitive=False,
138
- ),
139
- default="mp3",
140
- help="Media format for YouTube download (audio or video).",
141
- )
142
- @click.option(
143
- "-S",
144
- "--split-sentence",
145
- "--split_sentence",
146
- is_flag=True,
147
- default=False,
148
- help="Re-segment subtitles by semantics.",
149
- )
150
- @click.option(
151
- "-W",
152
- "--word-level",
153
- "--word_level",
154
- is_flag=True,
155
- default=False,
156
- help="Include word-level alignment timestamps in output (for JSON, TextGrid, and subtitle formats).",
157
- )
158
- @click.option(
159
- "-O",
160
- "--output-dir",
161
- "--output_dir",
162
- type=click.Path(file_okay=False, dir_okay=True, writable=True),
163
- default=".",
164
- help="Output directory (default: current directory).",
165
- )
166
- @click.option(
167
- "-D",
168
- "--device",
169
- type=click.Choice(["cpu", "cuda", "mps"], case_sensitive=False),
170
- default="cpu",
171
- help="Device to use for inference.",
172
- )
173
- @click.option(
174
- "-M",
175
- "--model-name-or-path",
176
- "--model_name_or_path",
177
- type=str,
178
- default="Lattifai/Lattice-1-Alpha",
179
- help="Model name or path for alignment.",
180
- )
181
- @click.option(
182
- "-K",
183
- "-L",
184
- "--api-key",
185
- "--api_key",
186
- type=str,
187
- default=None,
188
- help="API key for LattifAI.",
189
- )
190
- @click.option(
191
- "-G",
192
- "--gemini-api-key",
193
- "--gemini_api_key",
194
- type=str,
195
- default=None,
196
- help="Gemini API key for transcription fallback when subtitles are unavailable.",
197
- )
198
- @click.option(
199
- "-F",
200
- "--output-format",
201
- "--output_format",
202
- type=click.Choice(OUTPUT_SUBTITLE_FORMATS, case_sensitive=False),
203
- default="vtt",
204
- help="Subtitle output format.",
205
- )
206
- @click.argument(
207
- "yt_url",
208
- type=str,
209
- )
210
- def youtube(
211
- yt_url: str,
212
- media_format: str = "mp3",
213
- split_sentence: bool = False,
214
- word_level: bool = False,
215
- output_dir: str = ".",
216
- device: str = "cpu",
217
- model_name_or_path: str = "Lattifai/Lattice-1-Alpha",
218
- api_key: str = None,
219
- gemini_api_key: str = None,
220
- output_format: str = "vtt",
221
- ):
222
- """
223
- Download media and subtitles from YouTube for further alignment.
224
- """
225
- from lattifai.workflows.gemini import GeminiTranscriber
226
- from lattifai.workflows.youtube import YouTubeDownloader, YouTubeSubtitleAgent
227
-
228
- # Get Gemini API key
229
- gemini_key = gemini_api_key or os.getenv("GEMINI_API_KEY")
230
-
231
- async def _process():
232
- # Initialize components with their configuration (only config, not runtime params)
233
- downloader = YouTubeDownloader()
234
- transcriber = GeminiTranscriber(api_key=gemini_key)
235
- aligner = AsyncLattifAI(api_key=api_key, model_name_or_path=model_name_or_path, device=device)
236
-
237
- # Create agent with initialized components
238
- agent = YouTubeSubtitleAgent(
239
- downloader=downloader,
240
- transcriber=transcriber,
241
- aligner=aligner,
242
- max_retries=0,
243
- )
244
-
245
- result = await agent.process_youtube_url(
246
- url=yt_url,
247
- output_dir=output_dir,
248
- media_format=media_format,
249
- force_overwrite=False,
250
- output_format=output_format,
251
- split_sentence=split_sentence,
252
- word_level=word_level,
253
- )
254
- return result
255
-
256
- try:
257
- result = asyncio.run(_process())
258
-
259
- # Display results
260
- click.echo(colorful.green("✅ Processing completed!"))
261
- click.echo()
262
-
263
- # Show metadata
264
- metadata = result.get("metadata", {})
265
- if metadata:
266
- click.echo(f'🎬 Title: {metadata.get("title", "Unknown")}')
267
- click.echo(f'⏱️ Duration: {metadata.get("duration", 0)} seconds')
268
- click.echo()
269
-
270
- # Show exported files
271
- exported_files = result.get("exported_files", {})
272
- if exported_files:
273
- click.echo(colorful.green("📄 Generated subtitle files:"))
274
- for format_name, file_path in exported_files.items():
275
- click.echo(f" {format_name}: {file_path}")
276
- click.echo()
277
-
278
- # Show subtitle count
279
- subtitle_count = result.get("subtitle_count", 0)
280
- click.echo(f"📝 Generated {subtitle_count} subtitle segments")
281
-
282
- except Exception as e:
283
- from lattifai.errors import LattifAIError
284
-
285
- # Extract error message without support info (to avoid duplication)
286
- if isinstance(e, LattifAIError):
287
- # Use the get_message() method which includes proper formatting
288
- click.echo(colorful.red("❌ Failed to process YouTube URL:"))
289
- click.echo(e.get_message())
290
- # Show support info once at the end
291
- click.echo(e.get_support_info())
292
- else:
293
- click.echo(colorful.red(f"❌ Failed to process YouTube URL: {str(e)}"))
294
-
295
- raise click.ClickException("Processing failed")
lattifai/bin/cli_base.py DELETED
@@ -1,25 +0,0 @@
1
- import logging
2
-
3
- import click
4
-
5
-
6
- @click.group()
7
- def cli():
8
- """
9
- The shell entry point to Lattifai, a tool for audio data manipulation.
10
- """
11
- # Load environment variables from .env file
12
- from dotenv import find_dotenv, load_dotenv
13
-
14
- # Try to find and load .env file from current directory or parent directories
15
- load_dotenv(find_dotenv(usecwd=True))
16
-
17
- logging.basicConfig(
18
- format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s",
19
- level=logging.INFO,
20
- )
21
-
22
- import os
23
-
24
- os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
25
- os.environ["TOKENIZERS_PARALLELISM"] = "FALSE"
lattifai/bin/subtitle.py DELETED
@@ -1,210 +0,0 @@
1
- import asyncio
2
- from pathlib import Path
3
-
4
- import click
5
- from lhotse.utils import Pathlike
6
-
7
- from lattifai.bin.cli_base import cli
8
- from lattifai.io import SUBTITLE_FORMATS
9
-
10
-
11
- @cli.group()
12
- def subtitle():
13
- """Commands for subtitle format conversion and management."""
14
- pass
15
-
16
-
17
- @subtitle.command()
18
- @click.argument(
19
- "input_subtitle_path",
20
- type=click.Path(exists=True, dir_okay=False),
21
- )
22
- @click.argument(
23
- "output_subtitle_path",
24
- type=click.Path(allow_dash=True),
25
- )
26
- def convert(
27
- input_subtitle_path: Pathlike,
28
- output_subtitle_path: Pathlike,
29
- ):
30
- """
31
- Convert subtitle file to another format.
32
- """
33
- if str(output_subtitle_path).lower().endswith(".TextGrid".lower()):
34
- from lattifai.io import SubtitleIO
35
-
36
- alignments = SubtitleIO.read(input_subtitle_path)
37
- SubtitleIO.write(alignments, output_subtitle_path)
38
- else:
39
- import pysubs2
40
-
41
- subtitle = pysubs2.load(input_subtitle_path)
42
-
43
- subtitle.save(output_subtitle_path)
44
-
45
-
46
- @subtitle.command()
47
- @click.argument("url", type=str, required=True)
48
- @click.option(
49
- "--output-dir",
50
- "--output_dir",
51
- "-o",
52
- type=click.Path(file_okay=False, dir_okay=True),
53
- default=".",
54
- help="Output directory for downloaded subtitle files (default: current directory).",
55
- )
56
- @click.option(
57
- "--output-format",
58
- "--output_format",
59
- "-f",
60
- type=click.Choice(SUBTITLE_FORMATS + ["best"], case_sensitive=False),
61
- default="best",
62
- help="Preferred subtitle format to download (default: best available).",
63
- )
64
- @click.option("--force-overwrite", "-F", is_flag=True, help="Overwrite existing files without prompting.")
65
- @click.option(
66
- "--lang",
67
- "-l",
68
- "-L",
69
- "--subtitle-lang",
70
- "--subtitle_lang",
71
- type=str,
72
- help='Specific subtitle language/track to download (e.g., "en").',
73
- )
74
- def download(
75
- url: str,
76
- output_dir: str,
77
- output_format: str,
78
- force_overwrite: bool,
79
- lang: str,
80
- ):
81
- """
82
- Download subtitles from YouTube URL using yt-dlp.
83
-
84
- URL should be a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID).
85
- """
86
- # Import here to avoid circular imports and keep startup fast
87
- from lattifai.workflows.youtube import YouTubeDownloader
88
-
89
- # Validate URL format
90
- if not _is_valid_youtube_url(url):
91
- click.echo(f"Error: Invalid YouTube URL format: {url}", err=True)
92
- click.echo("Please provide a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)", err=True)
93
- raise click.Abort()
94
-
95
- # Convert relative path to absolute
96
- output_path = Path(output_dir).resolve()
97
-
98
- # Create output directory if it doesn't exist
99
- output_path.mkdir(parents=True, exist_ok=True)
100
-
101
- click.echo(f"Downloading subtitles from: {url}")
102
- click.echo(f" Output directory: {output_path}")
103
- click.echo(f" Preferred format: {output_format}")
104
- if lang:
105
- click.echo(f" Subtitle language: {lang}")
106
- else:
107
- click.echo(" Subtitle language: All available")
108
-
109
- # Initialize downloader and download
110
- downloader = YouTubeDownloader()
111
-
112
- async def download_subtitles():
113
- try:
114
- result = await downloader.download_subtitles(
115
- url=url,
116
- output_dir=str(output_path),
117
- force_overwrite=force_overwrite,
118
- subtitle_lang=lang,
119
- )
120
-
121
- if result:
122
- click.echo("✅ Subtitles downloaded successfully!")
123
- return result
124
- else:
125
- click.echo("⚠️ No subtitles available for this video")
126
- return None
127
-
128
- except Exception as e:
129
- click.echo(f"❌ Error downloading subtitles: {str(e)}", err=True)
130
- raise click.Abort()
131
-
132
- # Run the async function
133
- result = asyncio.run(download_subtitles())
134
-
135
- if result:
136
- if result == "gemini":
137
- click.echo("✨ Gemini transcription selected (use the agent command to transcribe)")
138
- else:
139
- click.echo(f"📄 Subtitle file saved to: {result}")
140
-
141
-
142
- @subtitle.command()
143
- @click.argument("url", type=str, required=True)
144
- def list_subs(url: str):
145
- """
146
- List available subtitle tracks for a YouTube video.
147
-
148
- URL should be a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
149
- """
150
- # Import here to avoid circular imports and keep startup fast
151
- from lattifai.workflows.youtube import YouTubeDownloader
152
-
153
- # Validate URL format
154
- if not _is_valid_youtube_url(url):
155
- click.echo(f"Error: Invalid YouTube URL format: {url}", err=True)
156
- click.echo("Please provide a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)", err=True)
157
- raise click.Abort()
158
-
159
- click.echo(f"Listing available subtitles for: {url}")
160
-
161
- # Initialize downloader
162
- downloader = YouTubeDownloader()
163
-
164
- async def list_available_subtitles():
165
- try:
166
- result = await downloader.list_available_subtitles(url)
167
-
168
- if result:
169
- click.echo("📋 Available subtitle tracks:")
170
- for subtitle_info in result:
171
- click.echo(f' 🎬 Language: {subtitle_info["language"]} - {subtitle_info["name"]}')
172
- click.echo(f' 📄 Formats: {", ".join(subtitle_info["formats"])}')
173
- click.echo()
174
-
175
- click.echo("💡 To download a specific track, use:")
176
- click.echo(f' lattifai subtitle download "{url}" --lang <language_code>')
177
- click.echo(' Example: lattifai subtitle download "{}" --lang en-JkeT_87f4cc'.format(url))
178
- else:
179
- click.echo("⚠️ No subtitles available for this video")
180
-
181
- except Exception as e:
182
- click.echo(f"❌ Error listing subtitles: {str(e)}", err=True)
183
- raise click.Abort()
184
-
185
- # Run the async function
186
- asyncio.run(list_available_subtitles())
187
-
188
-
189
- def _is_valid_youtube_url(url: str) -> bool:
190
- """
191
- Validate if the URL is a valid YouTube URL format.
192
-
193
- Supports various YouTube URL formats:
194
- - https://www.youtube.com/watch?v=VIDEO_ID
195
- - https://youtu.be/VIDEO_ID
196
- - https://www.youtube.com/shorts/VIDEO_ID
197
- - https://m.youtube.com/watch?v=VIDEO_ID
198
- """
199
- import re
200
-
201
- patterns = [
202
- r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([a-zA-Z0-9_-]{11})",
203
- r"youtube\.com/embed/([a-zA-Z0-9_-]{11})",
204
- r"youtube\.com/v/([a-zA-Z0-9_-]{11})",
205
- ]
206
-
207
- for pattern in patterns:
208
- if re.search(pattern, url):
209
- return True
210
- return False
lattifai/io/__init__.py DELETED
@@ -1,43 +0,0 @@
1
- from typing import List, Optional
2
-
3
- from lhotse.utils import Pathlike
4
-
5
- from .gemini_reader import GeminiReader, GeminiSegment
6
- from .gemini_writer import GeminiWriter
7
- from .reader import SubtitleFormat, SubtitleReader
8
- from .supervision import Supervision
9
- from .text_parser import normalize_html_text
10
- from .utils import (
11
- ALL_SUBTITLE_FORMATS,
12
- INPUT_SUBTITLE_FORMATS,
13
- OUTPUT_SUBTITLE_FORMATS,
14
- SUBTITLE_FORMATS,
15
- )
16
- from .writer import SubtitleWriter
17
-
18
- __all__ = [
19
- "SubtitleReader",
20
- "SubtitleWriter",
21
- "SubtitleIO",
22
- "Supervision",
23
- "GeminiReader",
24
- "GeminiWriter",
25
- "GeminiSegment",
26
- "SUBTITLE_FORMATS",
27
- "INPUT_SUBTITLE_FORMATS",
28
- "OUTPUT_SUBTITLE_FORMATS",
29
- "ALL_SUBTITLE_FORMATS",
30
- ]
31
-
32
-
33
- class SubtitleIO:
34
- def __init__(self):
35
- pass
36
-
37
- @classmethod
38
- def read(cls, subtitle: Pathlike, format: Optional[SubtitleFormat] = None) -> List[Supervision]:
39
- return SubtitleReader.read(subtitle, format=format)
40
-
41
- @classmethod
42
- def write(cls, alignments: List[Supervision], output_path: Pathlike) -> Pathlike:
43
- return SubtitleWriter.write(alignments, output_path)
lattifai/io/reader.py DELETED
@@ -1,86 +0,0 @@
1
- from abc import ABCMeta
2
- from pathlib import Path
3
- from typing import List, Literal, Optional, Union
4
-
5
- from lhotse.utils import Pathlike
6
-
7
- from .supervision import Supervision
8
- from .text_parser import parse_speaker_text
9
-
10
- SubtitleFormat = Literal["txt", "srt", "vtt", "ass", "auto"]
11
-
12
-
13
- class SubtitleReader(ABCMeta):
14
- """Parser for converting different subtitle formats to List[Supervision]."""
15
-
16
- @classmethod
17
- def read(cls, subtitle: Pathlike, format: Optional[SubtitleFormat] = None) -> List[Supervision]:
18
- """Parse text and convert to Lhotse List[Supervision].
19
-
20
- Args:
21
- text: Input text to parse. Can be either:
22
- - str: Direct text content to parse
23
- - Path: File path to read and parse
24
- format: Input text format (txt, srt, vtt, ass, textgrid)
25
-
26
- Returns:
27
- Parsed text in Lhotse Cut
28
- """
29
- if not format and Path(str(subtitle)).exists():
30
- format = Path(str(subtitle)).suffix.lstrip(".").lower()
31
- elif format:
32
- format = format.lower()
33
-
34
- if format == "gemini" or str(subtitle).endswith("Gemini.md"):
35
- from .gemini_reader import GeminiReader
36
-
37
- supervisions = GeminiReader.extract_for_alignment(subtitle)
38
- elif format == "txt" or (format == "auto" and str(subtitle)[-4:].lower() == ".txt"):
39
- if not Path(str(subtitle)).exists(): # str
40
- lines = [line.strip() for line in str(subtitle).split("\n")]
41
- else: # file
42
- path_str = str(subtitle)
43
- with open(path_str, encoding="utf-8") as f:
44
- lines = [line.strip() for line in f.readlines()]
45
- supervisions = [Supervision(text=line) for line in lines if line]
46
- else:
47
- try:
48
- supervisions = cls._parse_subtitle(subtitle, format=format)
49
- except Exception as e:
50
- print(f"Failed to parse subtitle with Format: {format}, Exception: {e}, trying 'gemini' parser.")
51
- from .gemini_reader import GeminiReader
52
-
53
- supervisions = GeminiReader.extract_for_alignment(subtitle)
54
-
55
- return supervisions
56
-
57
- @classmethod
58
- def _parse_subtitle(cls, subtitle: Pathlike, format: Optional[SubtitleFormat]) -> List[Supervision]:
59
- import pysubs2
60
-
61
- try:
62
- subs: pysubs2.SSAFile = pysubs2.load(
63
- subtitle, encoding="utf-8", format_=format if format != "auto" else None
64
- ) # file
65
- except IOError:
66
- try:
67
- subs: pysubs2.SSAFile = pysubs2.SSAFile.from_string(
68
- subtitle, format_=format if format != "auto" else None
69
- ) # str
70
- except Exception as e:
71
- del e
72
- subs: pysubs2.SSAFile = pysubs2.load(subtitle, encoding="utf-8") # auto detect format
73
-
74
- supervisions = []
75
- for event in subs.events:
76
- # NOT apply text_parser.py:normalize_html_text here, to keep original text in subtitles
77
- speaker, text = parse_speaker_text(event.text)
78
- supervisions.append(
79
- Supervision(
80
- text=text,
81
- speaker=speaker,
82
- start=event.start / 1000.0 if event.start is not None else None,
83
- duration=(event.end - event.start) / 1000.0 if event.end is not None else None,
84
- )
85
- )
86
- return supervisions
lattifai/io/utils.py DELETED
@@ -1,15 +0,0 @@
1
- """
2
- Utility constants and helper functions for subtitle I/O operations
3
- """
4
-
5
- # Supported subtitle formats for reading/writing
6
- SUBTITLE_FORMATS = ["srt", "vtt", "ass", "ssa", "sub", "sbv", "txt", "md"]
7
-
8
- # Input subtitle formats (includes special formats like 'auto' and 'gemini')
9
- INPUT_SUBTITLE_FORMATS = ["srt", "vtt", "ass", "ssa", "sub", "sbv", "txt", "auto", "gemini"]
10
-
11
- # Output subtitle formats (includes special formats like 'TextGrid' and 'json')
12
- OUTPUT_SUBTITLE_FORMATS = ["srt", "vtt", "ass", "ssa", "sub", "sbv", "txt", "TextGrid", "json"]
13
-
14
- # All subtitle formats combined (for file detection)
15
- ALL_SUBTITLE_FORMATS = list(set(SUBTITLE_FORMATS + ["TextGrid", "json", "gemini"]))