lattifai 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lattifai/_init.py +20 -0
  2. lattifai/alignment/__init__.py +2 -3
  3. lattifai/alignment/lattice1_aligner.py +117 -4
  4. lattifai/alignment/lattice1_worker.py +47 -4
  5. lattifai/alignment/segmenter.py +3 -2
  6. lattifai/alignment/text_align.py +2 -1
  7. lattifai/alignment/tokenizer.py +56 -29
  8. lattifai/audio2.py +162 -183
  9. lattifai/cli/alignment.py +5 -0
  10. lattifai/cli/caption.py +6 -6
  11. lattifai/cli/transcribe.py +1 -5
  12. lattifai/cli/youtube.py +3 -0
  13. lattifai/client.py +41 -12
  14. lattifai/config/__init__.py +21 -3
  15. lattifai/config/alignment.py +7 -0
  16. lattifai/config/caption.py +13 -243
  17. lattifai/config/client.py +16 -0
  18. lattifai/config/event.py +102 -0
  19. lattifai/config/transcription.py +25 -1
  20. lattifai/data/__init__.py +8 -0
  21. lattifai/data/caption.py +228 -0
  22. lattifai/errors.py +78 -53
  23. lattifai/event/__init__.py +65 -0
  24. lattifai/event/lattifai.py +166 -0
  25. lattifai/mixin.py +22 -17
  26. lattifai/transcription/base.py +2 -1
  27. lattifai/transcription/gemini.py +147 -16
  28. lattifai/transcription/lattifai.py +8 -11
  29. lattifai/types.py +1 -1
  30. lattifai/youtube/client.py +143 -48
  31. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/METADATA +117 -54
  32. lattifai-1.3.0.dist-info/RECORD +57 -0
  33. lattifai/__init__.py +0 -88
  34. lattifai/alignment/sentence_splitter.py +0 -350
  35. lattifai/caption/__init__.py +0 -96
  36. lattifai/caption/caption.py +0 -661
  37. lattifai/caption/formats/__init__.py +0 -199
  38. lattifai/caption/formats/base.py +0 -211
  39. lattifai/caption/formats/gemini.py +0 -722
  40. lattifai/caption/formats/json.py +0 -194
  41. lattifai/caption/formats/lrc.py +0 -309
  42. lattifai/caption/formats/nle/__init__.py +0 -9
  43. lattifai/caption/formats/nle/audition.py +0 -561
  44. lattifai/caption/formats/nle/avid.py +0 -423
  45. lattifai/caption/formats/nle/fcpxml.py +0 -549
  46. lattifai/caption/formats/nle/premiere.py +0 -589
  47. lattifai/caption/formats/pysubs2.py +0 -642
  48. lattifai/caption/formats/sbv.py +0 -147
  49. lattifai/caption/formats/tabular.py +0 -338
  50. lattifai/caption/formats/textgrid.py +0 -193
  51. lattifai/caption/formats/ttml.py +0 -652
  52. lattifai/caption/formats/vtt.py +0 -469
  53. lattifai/caption/parsers/__init__.py +0 -9
  54. lattifai/caption/parsers/text_parser.py +0 -147
  55. lattifai/caption/standardize.py +0 -636
  56. lattifai/caption/supervision.py +0 -34
  57. lattifai/caption/utils.py +0 -474
  58. lattifai-1.2.2.dist-info/RECORD +0 -76
  59. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/WHEEL +0 -0
  60. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/entry_points.txt +0 -0
  61. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/licenses/LICENSE +0 -0
  62. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,423 +0,0 @@
1
- """Avid DS Subtitle format writer for Avid Media Composer integration.
2
-
3
- This module provides functionality to export captions in Avid DS format,
4
- which is the native format for Avid Media Composer's SubCap plugin.
5
-
6
- Format specification:
7
- - Header: "@ This file written with the Avid Caption plugin, version 1"
8
- - Body: Tab-separated timecode (HH:MM:SS:FF) and text
9
- - Timecodes are frame-based, not millisecond-based
10
- """
11
-
12
- from dataclasses import dataclass
13
- from enum import Enum
14
- from pathlib import Path
15
- from typing import List, Optional, Union
16
-
17
- from lhotse.utils import Pathlike
18
-
19
- from ...supervision import Supervision
20
- from .. import register_writer
21
- from ..base import FormatReader, FormatWriter
22
-
23
-
24
- class FrameRate(Enum):
25
- """Standard video frame rates for professional editing."""
26
-
27
- FPS_23_976 = 23.976 # Film (24p pulldown)
28
- FPS_24 = 24.0 # Film
29
- FPS_25 = 25.0 # PAL / 25p
30
- FPS_29_97_NDF = 29.97 # NTSC Non-Drop Frame
31
- FPS_29_97_DF = 29.97 # NTSC Drop Frame (handled separately)
32
- FPS_30 = 30.0 # 30p
33
- FPS_50 = 50.0 # PAL 50p
34
- FPS_59_94 = 59.94 # NTSC 60p
35
- FPS_60 = 60.0 # 60p
36
-
37
-
38
- @dataclass
39
- class AvidDSConfig:
40
- """Configuration for Avid DS export.
41
-
42
- Attributes:
43
- fps: Frame rate for timecode calculation
44
- drop_frame: Whether to use drop-frame timecode (for 29.97fps)
45
- max_line_length: Maximum characters per line (Avid SubCap typically limits to 32-40)
46
- include_speaker: Whether to include speaker labels in text
47
- """
48
-
49
- fps: float = 25.0
50
- drop_frame: bool = False
51
- max_line_length: int = 40
52
- include_speaker: bool = True
53
-
54
-
55
- class AvidDSWriter:
56
- """Writer for Avid DS subtitle format.
57
-
58
- This writer generates files compatible with Avid Media Composer's SubCap plugin.
59
- It handles frame-based timecode conversion and enforces broadcast-safe line lengths.
60
-
61
- Example:
62
- >>> from lattifai.caption import Caption
63
- >>> from lattifai.caption.formats.nle.avid import AvidDSWriter, AvidDSConfig
64
- >>> caption = Caption.read("input.srt")
65
- >>> config = AvidDSConfig(fps=25.0)
66
- >>> AvidDSWriter.write(caption.supervisions, "output_avid.txt", config)
67
- """
68
-
69
- # Avid DS file header (required for SubCap plugin)
70
- HEADER = "@ This file written with the Avid Caption plugin, version 1"
71
-
72
- @classmethod
73
- def seconds_to_timecode(
74
- cls,
75
- seconds: float,
76
- fps: float = 25.0,
77
- drop_frame: bool = False,
78
- ) -> str:
79
- """Convert seconds to SMPTE timecode (HH:MM:SS:FF).
80
-
81
- Args:
82
- seconds: Time in seconds
83
- fps: Frame rate (e.g., 23.976, 24, 25, 29.97, 30)
84
- drop_frame: Use drop-frame timecode (only for 29.97fps)
85
-
86
- Returns:
87
- Timecode string in HH:MM:SS:FF format (or HH:MM:SS;FF for drop-frame)
88
-
89
- Note:
90
- Drop-frame timecode skips frame numbers 0 and 1 at the start of each
91
- minute except every 10th minute to keep timecode in sync with real time
92
- for 29.97fps video.
93
- """
94
- if seconds < 0:
95
- seconds = 0
96
-
97
- if drop_frame and abs(fps - 29.97) < 0.01:
98
- # Drop-frame calculation for 29.97fps
99
- # Total frames at 29.97fps
100
- total_frames = int(round(seconds * 29.97))
101
-
102
- # Drop-frame adjustment
103
- # 2 frames dropped every minute except every 10th minute
104
- # = 2 * 9 = 18 frames dropped every 10 minutes
105
- d = total_frames // 17982 # Number of complete 10-minute chunks
106
- m = total_frames % 17982 # Remaining frames
107
- if m >= 2:
108
- # Add back dropped frames
109
- total_frames += 18 * d + 2 * ((m - 2) // 1798)
110
-
111
- frames = total_frames % 30
112
- total_seconds = total_frames // 30
113
- secs = total_seconds % 60
114
- total_minutes = total_seconds // 60
115
- mins = total_minutes % 60
116
- hours = total_minutes // 60
117
-
118
- # Drop-frame uses semicolon separator
119
- return f"{hours:02d}:{mins:02d}:{secs:02d};{frames:02d}"
120
- else:
121
- # Non-drop frame calculation
122
- total_frames = int(round(seconds * fps))
123
- frames = int(total_frames % fps)
124
- total_seconds = int(total_frames // fps)
125
- secs = total_seconds % 60
126
- total_minutes = total_seconds // 60
127
- mins = total_minutes % 60
128
- hours = total_minutes // 60
129
-
130
- return f"{hours:02d}:{mins:02d}:{secs:02d}:{frames:02d}"
131
-
132
- @classmethod
133
- def wrap_text(cls, text: str, max_length: int = 40) -> List[str]:
134
- """Wrap text to fit within maximum line length.
135
-
136
- Args:
137
- text: Text to wrap
138
- max_length: Maximum characters per line
139
-
140
- Returns:
141
- List of wrapped lines
142
- """
143
- if len(text) <= max_length:
144
- return [text]
145
-
146
- words = text.split()
147
- lines = []
148
- current_line = []
149
- current_length = 0
150
-
151
- for word in words:
152
- word_length = len(word)
153
- # +1 for space between words
154
- if current_length + word_length + (1 if current_line else 0) <= max_length:
155
- current_line.append(word)
156
- current_length += word_length + (1 if len(current_line) > 1 else 0)
157
- else:
158
- if current_line:
159
- lines.append(" ".join(current_line))
160
- current_line = [word]
161
- current_length = word_length
162
-
163
- if current_line:
164
- lines.append(" ".join(current_line))
165
-
166
- return lines
167
-
168
- @classmethod
169
- def write(
170
- cls,
171
- supervisions: List[Supervision],
172
- output_path: Pathlike,
173
- config: Optional[AvidDSConfig] = None,
174
- ) -> Path:
175
- """Write supervisions to Avid DS format file.
176
-
177
- Args:
178
- supervisions: List of supervision segments
179
- output_path: Output file path
180
- config: Avid DS export configuration
181
-
182
- Returns:
183
- Path to written file
184
- """
185
- if config is None:
186
- config = AvidDSConfig()
187
-
188
- output_path = Path(output_path)
189
- lines = [cls.HEADER, ""] # Header + blank line
190
-
191
- for sup in supervisions:
192
- # Convert timestamps to timecode
193
- start_tc = cls.seconds_to_timecode(sup.start, config.fps, config.drop_frame)
194
- end_tc = cls.seconds_to_timecode(sup.end, config.fps, config.drop_frame)
195
-
196
- # Prepare text
197
- text = sup.text.strip() if sup.text else ""
198
-
199
- # Check if speaker should be included
200
- include_this_speaker = config.include_speaker and sup.speaker
201
- if include_this_speaker and hasattr(sup, "custom") and sup.custom:
202
- if not sup.custom.get("original_speaker", True):
203
- include_this_speaker = False
204
-
205
- if include_this_speaker:
206
- text = f"{sup.speaker}: {text}"
207
-
208
- # Wrap text to max line length
209
- wrapped_lines = cls.wrap_text(text, config.max_line_length)
210
- text = "\n".join(wrapped_lines)
211
-
212
- # Avid DS format: START_TC TAB END_TC TAB TEXT
213
- lines.append(f"{start_tc}\t{end_tc}\t{text}")
214
-
215
- # Write file
216
- with open(output_path, "w", encoding="utf-8") as f:
217
- f.write("\n".join(lines))
218
-
219
- return output_path
220
-
221
- @classmethod
222
- def to_bytes(
223
- cls,
224
- supervisions: List[Supervision],
225
- config: Optional[AvidDSConfig] = None,
226
- ) -> bytes:
227
- """Convert supervisions to Avid DS format bytes.
228
-
229
- Args:
230
- supervisions: List of supervision segments
231
- config: Avid DS export configuration
232
-
233
- Returns:
234
- Avid DS content as bytes
235
- """
236
- if config is None:
237
- config = AvidDSConfig()
238
-
239
- lines = [cls.HEADER, ""]
240
-
241
- for sup in supervisions:
242
- start_tc = cls.seconds_to_timecode(sup.start, config.fps, config.drop_frame)
243
- end_tc = cls.seconds_to_timecode(sup.end, config.fps, config.drop_frame)
244
-
245
- text = sup.text.strip() if sup.text else ""
246
-
247
- # Check if speaker should be included
248
- include_this_speaker = config.include_speaker and sup.speaker
249
- if include_this_speaker and hasattr(sup, "custom") and sup.custom:
250
- if not sup.custom.get("original_speaker", True):
251
- include_this_speaker = False
252
-
253
- if include_this_speaker:
254
- text = f"{sup.speaker}: {text}"
255
-
256
- wrapped_lines = cls.wrap_text(text, config.max_line_length)
257
- text = "\n".join(wrapped_lines)
258
-
259
- lines.append(f"{start_tc}\t{end_tc}\t{text}")
260
-
261
- return "\n".join(lines).encode("utf-8")
262
-
263
-
264
- @register_writer("avid_ds")
265
- class AvidDSFormat(FormatWriter):
266
- """Format handler for Avid DS caption format."""
267
-
268
- format_id = "avid_ds"
269
- extensions = [".txt"]
270
- description = "Avid DS Caption Format"
271
-
272
- @classmethod
273
- def write(
274
- cls,
275
- supervisions: List[Supervision],
276
- output_path: Pathlike,
277
- include_speaker: bool = True,
278
- **kwargs,
279
- ):
280
- """Write supervisions to Avid DS format file.
281
-
282
- Args:
283
- supervisions: List of supervision segments
284
- output_path: Path to output file
285
- include_speaker: Whether to include speaker labels
286
- **kwargs: Additional config options (fps, drop_frame, etc.)
287
-
288
- Returns:
289
- Path to written file
290
- """
291
- # Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by Avid DS)
292
- kwargs.pop("word_level", None)
293
- kwargs.pop("karaoke", None)
294
- kwargs.pop("karaoke_config", None)
295
- kwargs.pop("metadata", None)
296
- config = AvidDSConfig(include_speaker=include_speaker, **kwargs)
297
- return AvidDSWriter.write(supervisions, output_path, config)
298
-
299
- @classmethod
300
- def to_bytes(
301
- cls,
302
- supervisions: List[Supervision],
303
- include_speaker: bool = True,
304
- **kwargs,
305
- ) -> bytes:
306
- """Convert supervisions to Avid DS format bytes.
307
-
308
- Args:
309
- supervisions: List of supervision segments
310
- include_speaker: Whether to include speaker labels
311
- **kwargs: Additional config options
312
-
313
- Returns:
314
- Avid DS content as bytes
315
- """
316
- # Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by Avid DS)
317
- kwargs.pop("word_level", None)
318
- kwargs.pop("karaoke", None)
319
- kwargs.pop("karaoke_config", None)
320
- kwargs.pop("metadata", None)
321
- config = AvidDSConfig(include_speaker=include_speaker, **kwargs)
322
- return AvidDSWriter.to_bytes(supervisions, config)
323
-
324
-
325
- class AvidDSReader:
326
- """Reader for Avid DS subtitle format."""
327
-
328
- @classmethod
329
- def _timecode_to_seconds(cls, tc: str, fps: float = 25.0) -> float:
330
- """Convert SMPTE timecode (HH:MM:SS:FF) to seconds."""
331
- parts = tc.replace(";", ":").split(":")
332
- if len(parts) != 4:
333
- return 0.0
334
-
335
- h, m, s, f = map(int, parts)
336
- total_seconds = h * 3600 + m * 60 + s
337
- return total_seconds + (f / fps)
338
-
339
- @classmethod
340
- def read(cls, source: str, normalize_text: bool = True) -> List[Supervision]:
341
- """Read Avid DS content and return supervisions."""
342
- supervisions = []
343
- lines = source.splitlines()
344
-
345
- # Check header roughly
346
- if not any(line.startswith("@") for line in lines[:5]):
347
- # Not a strict Avid DS file maybe, but try anyway if columns match
348
- pass
349
-
350
- for line in lines:
351
- line = line.strip()
352
- if not line or line.startswith("@") or line.startswith("#"):
353
- continue
354
-
355
- parts = line.split("\t")
356
- if len(parts) >= 3:
357
- start_tc = parts[0]
358
- end_tc = parts[1]
359
- text = "\t".join(parts[2:]) # Text might contain tabs? unlikely for captions but safe join
360
-
361
- # Heuristic: verify TC format roughly
362
- if ":" not in start_tc:
363
- continue
364
-
365
- # Default FPS 25.0 if unknown, usually Avid DS is context dependent.
366
- # Ideally config or header hints FPS, but standard TXT often lacks it.
367
- # We assume 25 or try to guess?
368
- # Let's verify separators: ';' implies drop frame (29.97).
369
- fps = 25.0
370
- if ";" in start_tc or ";" in end_tc:
371
- fps = 29.97
372
-
373
- start_sec = cls._timecode_to_seconds(start_tc, fps)
374
- end_sec = cls._timecode_to_seconds(end_tc, fps)
375
-
376
- # Handle text cleanup
377
- # Remove speaker if present and normalize?
378
- # Avid DS text is just raw text usually.
379
-
380
- if end_sec > start_sec:
381
- supervisions.append(
382
- Supervision(
383
- id=str(uuid.uuid4()),
384
- recording_id="avid_import",
385
- start=start_sec,
386
- duration=end_sec - start_sec,
387
- text=text.strip() if normalize_text else text,
388
- )
389
- )
390
-
391
- return sorted(supervisions, key=lambda s: s.start)
392
-
393
-
394
- import uuid
395
-
396
- from .. import register_reader
397
-
398
-
399
- @register_reader("avid_ds")
400
- class AvidDSReaderHandler(FormatReader):
401
- """Reader handler for Avid DS."""
402
-
403
- format_id = "avid_ds"
404
- extensions = [".txt"]
405
-
406
- @classmethod
407
- def can_read(cls, path: Union[Pathlike, str]) -> bool:
408
- # Txt is generic, so we must peek content
409
- if isinstance(path, (str, Path)) and not cls.is_content(path):
410
- # We rely on upstream detection or explicit format selection usually.
411
- # but check ext
412
- return str(path).lower().endswith(".txt")
413
- return False
414
-
415
- @classmethod
416
- def read(cls, source: Union[Pathlike, str], normalize_text: bool = True, **kwargs) -> List[Supervision]:
417
- if isinstance(source, (str, Path)) and not cls.is_content(source):
418
- with open(source, "r", encoding="utf-8") as f:
419
- content = f.read()
420
- else:
421
- content = str(source)
422
-
423
- return AvidDSReader.read(content, normalize_text=normalize_text)