lattifai 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/_init.py +20 -0
- lattifai/alignment/__init__.py +2 -3
- lattifai/alignment/lattice1_aligner.py +117 -4
- lattifai/alignment/lattice1_worker.py +47 -4
- lattifai/alignment/segmenter.py +3 -2
- lattifai/alignment/text_align.py +2 -1
- lattifai/alignment/tokenizer.py +56 -29
- lattifai/audio2.py +162 -183
- lattifai/cli/alignment.py +5 -0
- lattifai/cli/caption.py +6 -6
- lattifai/cli/transcribe.py +1 -5
- lattifai/cli/youtube.py +3 -0
- lattifai/client.py +41 -12
- lattifai/config/__init__.py +21 -3
- lattifai/config/alignment.py +7 -0
- lattifai/config/caption.py +13 -243
- lattifai/config/client.py +16 -0
- lattifai/config/event.py +102 -0
- lattifai/config/transcription.py +25 -1
- lattifai/data/__init__.py +8 -0
- lattifai/data/caption.py +228 -0
- lattifai/errors.py +78 -53
- lattifai/event/__init__.py +65 -0
- lattifai/event/lattifai.py +166 -0
- lattifai/mixin.py +22 -17
- lattifai/transcription/base.py +2 -1
- lattifai/transcription/gemini.py +147 -16
- lattifai/transcription/lattifai.py +8 -11
- lattifai/types.py +1 -1
- lattifai/youtube/client.py +143 -48
- {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/METADATA +117 -54
- lattifai-1.3.0.dist-info/RECORD +57 -0
- lattifai/__init__.py +0 -88
- lattifai/alignment/sentence_splitter.py +0 -350
- lattifai/caption/__init__.py +0 -96
- lattifai/caption/caption.py +0 -661
- lattifai/caption/formats/__init__.py +0 -199
- lattifai/caption/formats/base.py +0 -211
- lattifai/caption/formats/gemini.py +0 -722
- lattifai/caption/formats/json.py +0 -194
- lattifai/caption/formats/lrc.py +0 -309
- lattifai/caption/formats/nle/__init__.py +0 -9
- lattifai/caption/formats/nle/audition.py +0 -561
- lattifai/caption/formats/nle/avid.py +0 -423
- lattifai/caption/formats/nle/fcpxml.py +0 -549
- lattifai/caption/formats/nle/premiere.py +0 -589
- lattifai/caption/formats/pysubs2.py +0 -642
- lattifai/caption/formats/sbv.py +0 -147
- lattifai/caption/formats/tabular.py +0 -338
- lattifai/caption/formats/textgrid.py +0 -193
- lattifai/caption/formats/ttml.py +0 -652
- lattifai/caption/formats/vtt.py +0 -469
- lattifai/caption/parsers/__init__.py +0 -9
- lattifai/caption/parsers/text_parser.py +0 -147
- lattifai/caption/standardize.py +0 -636
- lattifai/caption/supervision.py +0 -34
- lattifai/caption/utils.py +0 -474
- lattifai-1.2.2.dist-info/RECORD +0 -76
- {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/WHEEL +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/entry_points.txt +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
"""Caption format handlers registry.
|
|
2
|
-
|
|
3
|
-
This module provides a central registry for all caption format readers and writers.
|
|
4
|
-
Formats are registered using decorators and can be looked up by format ID.
|
|
5
|
-
|
|
6
|
-
Example:
|
|
7
|
-
>>> from lattifai.caption.formats import get_reader, get_writer
|
|
8
|
-
>>> reader = get_reader("srt")
|
|
9
|
-
>>> supervisions = reader.read("input.srt")
|
|
10
|
-
>>> writer = get_writer("vtt")
|
|
11
|
-
>>> writer.write(supervisions, "output.vtt")
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from typing import Dict, List, Optional, Type
|
|
15
|
-
|
|
16
|
-
from .base import FormatHandler, FormatReader, FormatWriter
|
|
17
|
-
|
|
18
|
-
# Global registries
|
|
19
|
-
_READERS: Dict[str, Type[FormatReader]] = {}
|
|
20
|
-
_WRITERS: Dict[str, Type[FormatWriter]] = {}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def register_reader(format_id: str):
|
|
24
|
-
"""Decorator to register a format reader.
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
format_id: Unique identifier for the format (e.g., "srt", "vtt")
|
|
28
|
-
|
|
29
|
-
Example:
|
|
30
|
-
@register_reader("srt")
|
|
31
|
-
class SRTReader(FormatReader):
|
|
32
|
-
...
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
def decorator(cls: Type[FormatReader]) -> Type[FormatReader]:
|
|
36
|
-
cls.format_id = format_id
|
|
37
|
-
_READERS[format_id.lower()] = cls
|
|
38
|
-
return cls
|
|
39
|
-
|
|
40
|
-
return decorator
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def register_writer(format_id: str):
|
|
44
|
-
"""Decorator to register a format writer.
|
|
45
|
-
|
|
46
|
-
Args:
|
|
47
|
-
format_id: Unique identifier for the format
|
|
48
|
-
|
|
49
|
-
Example:
|
|
50
|
-
@register_writer("srt")
|
|
51
|
-
class SRTWriter(FormatWriter):
|
|
52
|
-
...
|
|
53
|
-
"""
|
|
54
|
-
|
|
55
|
-
def decorator(cls: Type[FormatWriter]) -> Type[FormatWriter]:
|
|
56
|
-
cls.format_id = format_id
|
|
57
|
-
_WRITERS[format_id.lower()] = cls
|
|
58
|
-
return cls
|
|
59
|
-
|
|
60
|
-
return decorator
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def register_format(format_id: str):
|
|
64
|
-
"""Decorator to register both reader and writer for a format.
|
|
65
|
-
|
|
66
|
-
Use this for classes that implement both FormatReader and FormatWriter.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
format_id: Unique identifier for the format
|
|
70
|
-
|
|
71
|
-
Example:
|
|
72
|
-
@register_format("srt")
|
|
73
|
-
class SRTFormat(FormatHandler):
|
|
74
|
-
...
|
|
75
|
-
"""
|
|
76
|
-
|
|
77
|
-
def decorator(cls: Type[FormatHandler]) -> Type[FormatHandler]:
|
|
78
|
-
cls.format_id = format_id
|
|
79
|
-
_READERS[format_id.lower()] = cls
|
|
80
|
-
_WRITERS[format_id.lower()] = cls
|
|
81
|
-
return cls
|
|
82
|
-
|
|
83
|
-
return decorator
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def get_reader(format_id: str) -> Optional[Type[FormatReader]]:
|
|
87
|
-
"""Get a reader class by format ID.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
format_id: Format identifier (case-insensitive)
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
Reader class or None if not found
|
|
94
|
-
"""
|
|
95
|
-
return _READERS.get(format_id.lower())
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def get_writer(format_id: str) -> Optional[Type[FormatWriter]]:
|
|
99
|
-
"""Get a writer class by format ID.
|
|
100
|
-
|
|
101
|
-
Args:
|
|
102
|
-
format_id: Format identifier (case-insensitive)
|
|
103
|
-
|
|
104
|
-
Returns:
|
|
105
|
-
Writer class or None if not found
|
|
106
|
-
"""
|
|
107
|
-
return _WRITERS.get(format_id.lower())
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def list_readers() -> List[str]:
|
|
111
|
-
"""Get list of all registered reader format IDs."""
|
|
112
|
-
return sorted(_READERS.keys())
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def list_writers() -> List[str]:
|
|
116
|
-
"""Get list of all registered writer format IDs."""
|
|
117
|
-
return sorted(_WRITERS.keys())
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def detect_format(path: str) -> Optional[str]:
|
|
121
|
-
"""Detect format from file path by checking registered readers.
|
|
122
|
-
|
|
123
|
-
Args:
|
|
124
|
-
path: File path to check
|
|
125
|
-
|
|
126
|
-
Returns:
|
|
127
|
-
Format ID or None if no match found
|
|
128
|
-
"""
|
|
129
|
-
path_str = str(path)
|
|
130
|
-
|
|
131
|
-
# Check if it's content instead of a path
|
|
132
|
-
is_content = "\n" in path_str or len(path_str) > 500
|
|
133
|
-
|
|
134
|
-
# Prioritize specific formats that can detect by content
|
|
135
|
-
# These often use shared extensions like .vtt, .txt, or .xml
|
|
136
|
-
priority_formats = ["vtt", "gemini", "premiere_xml"]
|
|
137
|
-
for format_id in priority_formats:
|
|
138
|
-
reader_cls = _READERS.get(format_id)
|
|
139
|
-
if reader_cls and reader_cls.can_read(path_str):
|
|
140
|
-
return format_id
|
|
141
|
-
|
|
142
|
-
if is_content:
|
|
143
|
-
return None
|
|
144
|
-
|
|
145
|
-
# Check each reader's extensions
|
|
146
|
-
path_lower = path_str.lower()
|
|
147
|
-
for format_id, reader_cls in _READERS.items():
|
|
148
|
-
if format_id in priority_formats:
|
|
149
|
-
continue
|
|
150
|
-
if reader_cls.can_read(path_lower):
|
|
151
|
-
return format_id
|
|
152
|
-
|
|
153
|
-
# Fallback: try extension directly
|
|
154
|
-
from pathlib import Path
|
|
155
|
-
|
|
156
|
-
try:
|
|
157
|
-
ext = Path(path_lower).suffix.lstrip(".")
|
|
158
|
-
if ext in _READERS:
|
|
159
|
-
return ext
|
|
160
|
-
except (OSError, ValueError):
|
|
161
|
-
# Likely content, not a path
|
|
162
|
-
pass
|
|
163
|
-
|
|
164
|
-
return None
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
# Import all format modules to trigger registration
|
|
168
|
-
# Standard formats
|
|
169
|
-
from . import gemini # YouTube/Gemini markdown
|
|
170
|
-
from . import lrc # Enhanced LRC with word-level timestamps
|
|
171
|
-
from . import pysubs2 # SRT, ASS, SSA, SUB, SAMI
|
|
172
|
-
from . import sbv # SubViewer
|
|
173
|
-
from . import tabular # CSV, TSV, AUD, TXT, JSON
|
|
174
|
-
from . import textgrid # Praat TextGrid
|
|
175
|
-
from . import ttml # TTML, IMSC1, EBU-TT-D
|
|
176
|
-
from . import vtt # WebVTT with YouTube VTT word-level timestamp support
|
|
177
|
-
|
|
178
|
-
# Professional NLE formats
|
|
179
|
-
from .nle import audition # Adobe Audition / Pro Tools markers
|
|
180
|
-
from .nle import avid # Avid DS
|
|
181
|
-
from .nle import fcpxml # Final Cut Pro XML
|
|
182
|
-
from .nle import premiere # Adobe Premiere Pro XML
|
|
183
|
-
|
|
184
|
-
__all__ = [
|
|
185
|
-
# Base classes
|
|
186
|
-
"FormatReader",
|
|
187
|
-
"FormatWriter",
|
|
188
|
-
"FormatHandler",
|
|
189
|
-
# Registration
|
|
190
|
-
"register_reader",
|
|
191
|
-
"register_writer",
|
|
192
|
-
"register_format",
|
|
193
|
-
# Lookup
|
|
194
|
-
"get_reader",
|
|
195
|
-
"get_writer",
|
|
196
|
-
"list_readers",
|
|
197
|
-
"list_writers",
|
|
198
|
-
"detect_format",
|
|
199
|
-
]
|
lattifai/caption/formats/base.py
DELETED
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
"""Base classes for caption format readers and writers.
|
|
2
|
-
|
|
3
|
-
This module provides abstract base classes that all format handlers must implement,
|
|
4
|
-
ensuring a consistent interface across different caption formats.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from abc import ABC, abstractmethod
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
10
|
-
|
|
11
|
-
from lhotse.utils import Pathlike
|
|
12
|
-
|
|
13
|
-
if TYPE_CHECKING:
|
|
14
|
-
from ..supervision import Supervision
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class FormatReader(ABC):
|
|
18
|
-
"""Abstract base class for caption format readers.
|
|
19
|
-
|
|
20
|
-
All format readers must implement the `read` method to parse caption content
|
|
21
|
-
and return a list of Supervision objects.
|
|
22
|
-
|
|
23
|
-
Class Attributes:
|
|
24
|
-
format_id: Unique identifier for the format (e.g., "srt", "vtt")
|
|
25
|
-
extensions: List of file extensions this reader handles (e.g., [".srt"])
|
|
26
|
-
description: Human-readable description of the format
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
format_id: str = ""
|
|
30
|
-
extensions: List[str] = []
|
|
31
|
-
description: str = ""
|
|
32
|
-
|
|
33
|
-
@classmethod
|
|
34
|
-
@abstractmethod
|
|
35
|
-
def read(
|
|
36
|
-
cls,
|
|
37
|
-
source: Union[Pathlike, str],
|
|
38
|
-
normalize_text: bool = True,
|
|
39
|
-
**kwargs,
|
|
40
|
-
) -> List["Supervision"]:
|
|
41
|
-
"""Read caption content and return list of Supervision objects.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
source: File path or string content
|
|
45
|
-
normalize_text: Whether to normalize text (strip HTML, etc.)
|
|
46
|
-
**kwargs: Format-specific options
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
List of Supervision objects with timing and text
|
|
50
|
-
"""
|
|
51
|
-
pass
|
|
52
|
-
|
|
53
|
-
@classmethod
|
|
54
|
-
def extract_metadata(cls, source: Union[Pathlike, str]) -> Dict[str, str]:
|
|
55
|
-
"""Extract metadata from caption file or content.
|
|
56
|
-
|
|
57
|
-
Args:
|
|
58
|
-
source: File path or string content
|
|
59
|
-
|
|
60
|
-
Returns:
|
|
61
|
-
Dictionary of metadata key-value pairs
|
|
62
|
-
"""
|
|
63
|
-
return {}
|
|
64
|
-
|
|
65
|
-
@classmethod
|
|
66
|
-
def can_read(cls, path: Union[Pathlike, str]) -> bool:
|
|
67
|
-
"""Check if this reader can handle the given file.
|
|
68
|
-
|
|
69
|
-
Args:
|
|
70
|
-
path: File path to check
|
|
71
|
-
|
|
72
|
-
Returns:
|
|
73
|
-
True if this reader supports the file format
|
|
74
|
-
"""
|
|
75
|
-
path_str = str(path).lower()
|
|
76
|
-
return any(path_str.endswith(ext.lower()) for ext in cls.extensions)
|
|
77
|
-
|
|
78
|
-
@classmethod
|
|
79
|
-
def is_content(cls, source: Union[Pathlike, str]) -> bool:
|
|
80
|
-
"""Check if source is string content rather than a file path.
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
source: Source to check
|
|
84
|
-
|
|
85
|
-
Returns:
|
|
86
|
-
True if source appears to be content, not a path
|
|
87
|
-
"""
|
|
88
|
-
if not isinstance(source, str):
|
|
89
|
-
return False
|
|
90
|
-
# If it has newlines or is very long, it's likely content
|
|
91
|
-
return "\n" in source or len(source) > 500
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class FormatWriter(ABC):
|
|
95
|
-
"""Abstract base class for caption format writers.
|
|
96
|
-
|
|
97
|
-
All format writers must implement `write` and `to_bytes` methods.
|
|
98
|
-
|
|
99
|
-
Class Attributes:
|
|
100
|
-
format_id: Unique identifier for the format (e.g., "srt", "vtt")
|
|
101
|
-
extensions: List of file extensions for this format
|
|
102
|
-
description: Human-readable description of the format
|
|
103
|
-
"""
|
|
104
|
-
|
|
105
|
-
format_id: str = ""
|
|
106
|
-
extensions: List[str] = []
|
|
107
|
-
description: str = ""
|
|
108
|
-
|
|
109
|
-
@classmethod
|
|
110
|
-
@abstractmethod
|
|
111
|
-
def write(
|
|
112
|
-
cls,
|
|
113
|
-
supervisions: List["Supervision"],
|
|
114
|
-
output_path: Pathlike,
|
|
115
|
-
include_speaker: bool = True,
|
|
116
|
-
**kwargs,
|
|
117
|
-
) -> Path:
|
|
118
|
-
"""Write supervisions to a file.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
supervisions: List of Supervision objects to write
|
|
122
|
-
output_path: Path to output file
|
|
123
|
-
include_speaker: Whether to include speaker labels in text
|
|
124
|
-
**kwargs: Format-specific options
|
|
125
|
-
|
|
126
|
-
Returns:
|
|
127
|
-
Path to the written file
|
|
128
|
-
"""
|
|
129
|
-
pass
|
|
130
|
-
|
|
131
|
-
@classmethod
|
|
132
|
-
@abstractmethod
|
|
133
|
-
def to_bytes(
|
|
134
|
-
cls,
|
|
135
|
-
supervisions: List["Supervision"],
|
|
136
|
-
include_speaker: bool = True,
|
|
137
|
-
**kwargs,
|
|
138
|
-
) -> bytes:
|
|
139
|
-
"""Convert supervisions to bytes in this format.
|
|
140
|
-
|
|
141
|
-
Args:
|
|
142
|
-
supervisions: List of Supervision objects
|
|
143
|
-
include_speaker: Whether to include speaker labels
|
|
144
|
-
**kwargs: Format-specific options
|
|
145
|
-
|
|
146
|
-
Returns:
|
|
147
|
-
Caption content as bytes
|
|
148
|
-
"""
|
|
149
|
-
pass
|
|
150
|
-
|
|
151
|
-
@classmethod
|
|
152
|
-
def _should_include_speaker(cls, sup: Any, include_speaker: bool) -> bool:
|
|
153
|
-
"""Check if speaker should be included in output text.
|
|
154
|
-
|
|
155
|
-
Considers both the global include_speaker flag and the segment-level
|
|
156
|
-
'original_speaker' flag in custom metadata.
|
|
157
|
-
"""
|
|
158
|
-
if not include_speaker or not getattr(sup, "speaker", None):
|
|
159
|
-
return False
|
|
160
|
-
custom = getattr(sup, "custom", None)
|
|
161
|
-
if custom and not custom.get("original_speaker", True):
|
|
162
|
-
return False
|
|
163
|
-
return True
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
class FormatHandler(FormatReader, FormatWriter):
|
|
167
|
-
"""Combined reader and writer for formats that support both.
|
|
168
|
-
|
|
169
|
-
Most caption formats support both reading and writing. This class
|
|
170
|
-
combines both interfaces for convenience.
|
|
171
|
-
"""
|
|
172
|
-
|
|
173
|
-
pass
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
# Type aliases for registration
|
|
177
|
-
ReaderType = type[FormatReader]
|
|
178
|
-
WriterType = type[FormatWriter]
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
def expand_to_word_supervisions(supervisions: List["Supervision"]) -> List["Supervision"]:
|
|
182
|
-
"""Expand supervisions with word alignment to one supervision per word.
|
|
183
|
-
|
|
184
|
-
Used for word-per-segment output when word_level=True but karaoke=False.
|
|
185
|
-
|
|
186
|
-
Args:
|
|
187
|
-
supervisions: List of Supervision objects with optional alignment data
|
|
188
|
-
|
|
189
|
-
Returns:
|
|
190
|
-
List of Supervision objects, one per word if alignment exists,
|
|
191
|
-
otherwise returns original supervisions unchanged.
|
|
192
|
-
"""
|
|
193
|
-
from ..supervision import Supervision
|
|
194
|
-
|
|
195
|
-
result = []
|
|
196
|
-
for sup in supervisions:
|
|
197
|
-
if sup.alignment and "word" in sup.alignment:
|
|
198
|
-
for word in sup.alignment["word"]:
|
|
199
|
-
result.append(
|
|
200
|
-
Supervision(
|
|
201
|
-
text=word.symbol,
|
|
202
|
-
start=word.start,
|
|
203
|
-
duration=word.duration,
|
|
204
|
-
speaker=sup.speaker,
|
|
205
|
-
id=f"{sup.id}_word" if sup.id else "",
|
|
206
|
-
recording_id=sup.recording_id if hasattr(sup, "recording_id") else "",
|
|
207
|
-
)
|
|
208
|
-
)
|
|
209
|
-
else:
|
|
210
|
-
result.append(sup)
|
|
211
|
-
return result
|