lattifai 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. lattifai/__init__.py +0 -24
  2. lattifai/alignment/__init__.py +10 -1
  3. lattifai/alignment/lattice1_aligner.py +66 -58
  4. lattifai/alignment/lattice1_worker.py +1 -6
  5. lattifai/alignment/punctuation.py +38 -0
  6. lattifai/alignment/segmenter.py +1 -1
  7. lattifai/alignment/sentence_splitter.py +350 -0
  8. lattifai/alignment/text_align.py +440 -0
  9. lattifai/alignment/tokenizer.py +91 -220
  10. lattifai/caption/__init__.py +82 -6
  11. lattifai/caption/caption.py +335 -1143
  12. lattifai/caption/formats/__init__.py +199 -0
  13. lattifai/caption/formats/base.py +211 -0
  14. lattifai/caption/formats/gemini.py +722 -0
  15. lattifai/caption/formats/json.py +194 -0
  16. lattifai/caption/formats/lrc.py +309 -0
  17. lattifai/caption/formats/nle/__init__.py +9 -0
  18. lattifai/caption/formats/nle/audition.py +561 -0
  19. lattifai/caption/formats/nle/avid.py +423 -0
  20. lattifai/caption/formats/nle/fcpxml.py +549 -0
  21. lattifai/caption/formats/nle/premiere.py +589 -0
  22. lattifai/caption/formats/pysubs2.py +642 -0
  23. lattifai/caption/formats/sbv.py +147 -0
  24. lattifai/caption/formats/tabular.py +338 -0
  25. lattifai/caption/formats/textgrid.py +193 -0
  26. lattifai/caption/formats/ttml.py +652 -0
  27. lattifai/caption/formats/vtt.py +469 -0
  28. lattifai/caption/parsers/__init__.py +9 -0
  29. lattifai/caption/{text_parser.py → parsers/text_parser.py} +4 -2
  30. lattifai/caption/standardize.py +636 -0
  31. lattifai/caption/utils.py +474 -0
  32. lattifai/cli/__init__.py +2 -1
  33. lattifai/cli/caption.py +108 -1
  34. lattifai/cli/transcribe.py +4 -9
  35. lattifai/cli/youtube.py +4 -1
  36. lattifai/client.py +48 -84
  37. lattifai/config/__init__.py +11 -1
  38. lattifai/config/alignment.py +9 -2
  39. lattifai/config/caption.py +267 -23
  40. lattifai/config/media.py +20 -0
  41. lattifai/diarization/__init__.py +41 -1
  42. lattifai/mixin.py +36 -18
  43. lattifai/transcription/base.py +6 -1
  44. lattifai/transcription/lattifai.py +19 -54
  45. lattifai/utils.py +81 -13
  46. lattifai/workflow/__init__.py +28 -4
  47. lattifai/workflow/file_manager.py +2 -5
  48. lattifai/youtube/__init__.py +43 -0
  49. lattifai/youtube/client.py +1170 -0
  50. lattifai/youtube/types.py +23 -0
  51. lattifai-1.2.2.dist-info/METADATA +615 -0
  52. lattifai-1.2.2.dist-info/RECORD +76 -0
  53. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/entry_points.txt +1 -2
  54. lattifai/caption/gemini_reader.py +0 -371
  55. lattifai/caption/gemini_writer.py +0 -173
  56. lattifai/cli/app_installer.py +0 -142
  57. lattifai/cli/server.py +0 -44
  58. lattifai/server/app.py +0 -427
  59. lattifai/workflow/youtube.py +0 -577
  60. lattifai-1.2.0.dist-info/METADATA +0 -1133
  61. lattifai-1.2.0.dist-info/RECORD +0 -57
  62. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/WHEEL +0 -0
  63. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/licenses/LICENSE +0 -0
  64. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,199 @@
1
+ """Caption format handlers registry.
2
+
3
+ This module provides a central registry for all caption format readers and writers.
4
+ Formats are registered using decorators and can be looked up by format ID.
5
+
6
+ Example:
7
+ >>> from lattifai.caption.formats import get_reader, get_writer
8
+ >>> reader = get_reader("srt")
9
+ >>> supervisions = reader.read("input.srt")
10
+ >>> writer = get_writer("vtt")
11
+ >>> writer.write(supervisions, "output.vtt")
12
+ """
13
+
14
+ from typing import Dict, List, Optional, Type
15
+
16
+ from .base import FormatHandler, FormatReader, FormatWriter
17
+
18
+ # Global registries
19
+ _READERS: Dict[str, Type[FormatReader]] = {}
20
+ _WRITERS: Dict[str, Type[FormatWriter]] = {}
21
+
22
+
23
+ def register_reader(format_id: str):
24
+ """Decorator to register a format reader.
25
+
26
+ Args:
27
+ format_id: Unique identifier for the format (e.g., "srt", "vtt")
28
+
29
+ Example:
30
+ @register_reader("srt")
31
+ class SRTReader(FormatReader):
32
+ ...
33
+ """
34
+
35
+ def decorator(cls: Type[FormatReader]) -> Type[FormatReader]:
36
+ cls.format_id = format_id
37
+ _READERS[format_id.lower()] = cls
38
+ return cls
39
+
40
+ return decorator
41
+
42
+
43
+ def register_writer(format_id: str):
44
+ """Decorator to register a format writer.
45
+
46
+ Args:
47
+ format_id: Unique identifier for the format
48
+
49
+ Example:
50
+ @register_writer("srt")
51
+ class SRTWriter(FormatWriter):
52
+ ...
53
+ """
54
+
55
+ def decorator(cls: Type[FormatWriter]) -> Type[FormatWriter]:
56
+ cls.format_id = format_id
57
+ _WRITERS[format_id.lower()] = cls
58
+ return cls
59
+
60
+ return decorator
61
+
62
+
63
+ def register_format(format_id: str):
64
+ """Decorator to register both reader and writer for a format.
65
+
66
+ Use this for classes that implement both FormatReader and FormatWriter.
67
+
68
+ Args:
69
+ format_id: Unique identifier for the format
70
+
71
+ Example:
72
+ @register_format("srt")
73
+ class SRTFormat(FormatHandler):
74
+ ...
75
+ """
76
+
77
+ def decorator(cls: Type[FormatHandler]) -> Type[FormatHandler]:
78
+ cls.format_id = format_id
79
+ _READERS[format_id.lower()] = cls
80
+ _WRITERS[format_id.lower()] = cls
81
+ return cls
82
+
83
+ return decorator
84
+
85
+
86
+ def get_reader(format_id: str) -> Optional[Type[FormatReader]]:
87
+ """Get a reader class by format ID.
88
+
89
+ Args:
90
+ format_id: Format identifier (case-insensitive)
91
+
92
+ Returns:
93
+ Reader class or None if not found
94
+ """
95
+ return _READERS.get(format_id.lower())
96
+
97
+
98
+ def get_writer(format_id: str) -> Optional[Type[FormatWriter]]:
99
+ """Get a writer class by format ID.
100
+
101
+ Args:
102
+ format_id: Format identifier (case-insensitive)
103
+
104
+ Returns:
105
+ Writer class or None if not found
106
+ """
107
+ return _WRITERS.get(format_id.lower())
108
+
109
+
110
+ def list_readers() -> List[str]:
111
+ """Get list of all registered reader format IDs."""
112
+ return sorted(_READERS.keys())
113
+
114
+
115
+ def list_writers() -> List[str]:
116
+ """Get list of all registered writer format IDs."""
117
+ return sorted(_WRITERS.keys())
118
+
119
+
120
+ def detect_format(path: str) -> Optional[str]:
121
+ """Detect format from file path by checking registered readers.
122
+
123
+ Args:
124
+ path: File path to check
125
+
126
+ Returns:
127
+ Format ID or None if no match found
128
+ """
129
+ path_str = str(path)
130
+
131
+ # Check if it's content instead of a path
132
+ is_content = "\n" in path_str or len(path_str) > 500
133
+
134
+ # Prioritize specific formats that can detect by content
135
+ # These often use shared extensions like .vtt, .txt, or .xml
136
+ priority_formats = ["vtt", "gemini", "premiere_xml"]
137
+ for format_id in priority_formats:
138
+ reader_cls = _READERS.get(format_id)
139
+ if reader_cls and reader_cls.can_read(path_str):
140
+ return format_id
141
+
142
+ if is_content:
143
+ return None
144
+
145
+ # Check each reader's extensions
146
+ path_lower = path_str.lower()
147
+ for format_id, reader_cls in _READERS.items():
148
+ if format_id in priority_formats:
149
+ continue
150
+ if reader_cls.can_read(path_lower):
151
+ return format_id
152
+
153
+ # Fallback: try extension directly
154
+ from pathlib import Path
155
+
156
+ try:
157
+ ext = Path(path_lower).suffix.lstrip(".")
158
+ if ext in _READERS:
159
+ return ext
160
+ except (OSError, ValueError):
161
+ # Likely content, not a path
162
+ pass
163
+
164
+ return None
165
+
166
+
167
+ # Import all format modules to trigger registration
168
+ # Standard formats
169
+ from . import gemini # YouTube/Gemini markdown
170
+ from . import lrc # Enhanced LRC with word-level timestamps
171
+ from . import pysubs2 # SRT, ASS, SSA, SUB, SAMI
172
+ from . import sbv # SubViewer
173
+ from . import tabular # CSV, TSV, AUD, TXT, JSON
174
+ from . import textgrid # Praat TextGrid
175
+ from . import ttml # TTML, IMSC1, EBU-TT-D
176
+ from . import vtt # WebVTT with YouTube VTT word-level timestamp support
177
+
178
+ # Professional NLE formats
179
+ from .nle import audition # Adobe Audition / Pro Tools markers
180
+ from .nle import avid # Avid DS
181
+ from .nle import fcpxml # Final Cut Pro XML
182
+ from .nle import premiere # Adobe Premiere Pro XML
183
+
184
+ __all__ = [
185
+ # Base classes
186
+ "FormatReader",
187
+ "FormatWriter",
188
+ "FormatHandler",
189
+ # Registration
190
+ "register_reader",
191
+ "register_writer",
192
+ "register_format",
193
+ # Lookup
194
+ "get_reader",
195
+ "get_writer",
196
+ "list_readers",
197
+ "list_writers",
198
+ "detect_format",
199
+ ]
@@ -0,0 +1,211 @@
1
+ """Base classes for caption format readers and writers.
2
+
3
+ This module provides abstract base classes that all format handlers must implement,
4
+ ensuring a consistent interface across different caption formats.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
10
+
11
+ from lhotse.utils import Pathlike
12
+
13
+ if TYPE_CHECKING:
14
+ from ..supervision import Supervision
15
+
16
+
17
+ class FormatReader(ABC):
18
+ """Abstract base class for caption format readers.
19
+
20
+ All format readers must implement the `read` method to parse caption content
21
+ and return a list of Supervision objects.
22
+
23
+ Class Attributes:
24
+ format_id: Unique identifier for the format (e.g., "srt", "vtt")
25
+ extensions: List of file extensions this reader handles (e.g., [".srt"])
26
+ description: Human-readable description of the format
27
+ """
28
+
29
+ format_id: str = ""
30
+ extensions: List[str] = []
31
+ description: str = ""
32
+
33
+ @classmethod
34
+ @abstractmethod
35
+ def read(
36
+ cls,
37
+ source: Union[Pathlike, str],
38
+ normalize_text: bool = True,
39
+ **kwargs,
40
+ ) -> List["Supervision"]:
41
+ """Read caption content and return list of Supervision objects.
42
+
43
+ Args:
44
+ source: File path or string content
45
+ normalize_text: Whether to normalize text (strip HTML, etc.)
46
+ **kwargs: Format-specific options
47
+
48
+ Returns:
49
+ List of Supervision objects with timing and text
50
+ """
51
+ pass
52
+
53
+ @classmethod
54
+ def extract_metadata(cls, source: Union[Pathlike, str]) -> Dict[str, str]:
55
+ """Extract metadata from caption file or content.
56
+
57
+ Args:
58
+ source: File path or string content
59
+
60
+ Returns:
61
+ Dictionary of metadata key-value pairs
62
+ """
63
+ return {}
64
+
65
+ @classmethod
66
+ def can_read(cls, path: Union[Pathlike, str]) -> bool:
67
+ """Check if this reader can handle the given file.
68
+
69
+ Args:
70
+ path: File path to check
71
+
72
+ Returns:
73
+ True if this reader supports the file format
74
+ """
75
+ path_str = str(path).lower()
76
+ return any(path_str.endswith(ext.lower()) for ext in cls.extensions)
77
+
78
+ @classmethod
79
+ def is_content(cls, source: Union[Pathlike, str]) -> bool:
80
+ """Check if source is string content rather than a file path.
81
+
82
+ Args:
83
+ source: Source to check
84
+
85
+ Returns:
86
+ True if source appears to be content, not a path
87
+ """
88
+ if not isinstance(source, str):
89
+ return False
90
+ # If it has newlines or is very long, it's likely content
91
+ return "\n" in source or len(source) > 500
92
+
93
+
94
+ class FormatWriter(ABC):
95
+ """Abstract base class for caption format writers.
96
+
97
+ All format writers must implement `write` and `to_bytes` methods.
98
+
99
+ Class Attributes:
100
+ format_id: Unique identifier for the format (e.g., "srt", "vtt")
101
+ extensions: List of file extensions for this format
102
+ description: Human-readable description of the format
103
+ """
104
+
105
+ format_id: str = ""
106
+ extensions: List[str] = []
107
+ description: str = ""
108
+
109
+ @classmethod
110
+ @abstractmethod
111
+ def write(
112
+ cls,
113
+ supervisions: List["Supervision"],
114
+ output_path: Pathlike,
115
+ include_speaker: bool = True,
116
+ **kwargs,
117
+ ) -> Path:
118
+ """Write supervisions to a file.
119
+
120
+ Args:
121
+ supervisions: List of Supervision objects to write
122
+ output_path: Path to output file
123
+ include_speaker: Whether to include speaker labels in text
124
+ **kwargs: Format-specific options
125
+
126
+ Returns:
127
+ Path to the written file
128
+ """
129
+ pass
130
+
131
+ @classmethod
132
+ @abstractmethod
133
+ def to_bytes(
134
+ cls,
135
+ supervisions: List["Supervision"],
136
+ include_speaker: bool = True,
137
+ **kwargs,
138
+ ) -> bytes:
139
+ """Convert supervisions to bytes in this format.
140
+
141
+ Args:
142
+ supervisions: List of Supervision objects
143
+ include_speaker: Whether to include speaker labels
144
+ **kwargs: Format-specific options
145
+
146
+ Returns:
147
+ Caption content as bytes
148
+ """
149
+ pass
150
+
151
+ @classmethod
152
+ def _should_include_speaker(cls, sup: Any, include_speaker: bool) -> bool:
153
+ """Check if speaker should be included in output text.
154
+
155
+ Considers both the global include_speaker flag and the segment-level
156
+ 'original_speaker' flag in custom metadata.
157
+ """
158
+ if not include_speaker or not getattr(sup, "speaker", None):
159
+ return False
160
+ custom = getattr(sup, "custom", None)
161
+ if custom and not custom.get("original_speaker", True):
162
+ return False
163
+ return True
164
+
165
+
166
+ class FormatHandler(FormatReader, FormatWriter):
167
+ """Combined reader and writer for formats that support both.
168
+
169
+ Most caption formats support both reading and writing. This class
170
+ combines both interfaces for convenience.
171
+ """
172
+
173
+ pass
174
+
175
+
176
+ # Type aliases for registration
177
+ ReaderType = type[FormatReader]
178
+ WriterType = type[FormatWriter]
179
+
180
+
181
+ def expand_to_word_supervisions(supervisions: List["Supervision"]) -> List["Supervision"]:
182
+ """Expand supervisions with word alignment to one supervision per word.
183
+
184
+ Used for word-per-segment output when word_level=True but karaoke=False.
185
+
186
+ Args:
187
+ supervisions: List of Supervision objects with optional alignment data
188
+
189
+ Returns:
190
+ List of Supervision objects, one per word if alignment exists,
191
+ otherwise returns original supervisions unchanged.
192
+ """
193
+ from ..supervision import Supervision
194
+
195
+ result = []
196
+ for sup in supervisions:
197
+ if sup.alignment and "word" in sup.alignment:
198
+ for word in sup.alignment["word"]:
199
+ result.append(
200
+ Supervision(
201
+ text=word.symbol,
202
+ start=word.start,
203
+ duration=word.duration,
204
+ speaker=sup.speaker,
205
+ id=f"{sup.id}_word" if sup.id else "",
206
+ recording_id=sup.recording_id if hasattr(sup, "recording_id") else "",
207
+ )
208
+ )
209
+ else:
210
+ result.append(sup)
211
+ return result