lattifai 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. lattifai/alignment/__init__.py +10 -1
  2. lattifai/alignment/lattice1_aligner.py +66 -58
  3. lattifai/alignment/punctuation.py +38 -0
  4. lattifai/alignment/sentence_splitter.py +152 -21
  5. lattifai/alignment/text_align.py +440 -0
  6. lattifai/alignment/tokenizer.py +82 -40
  7. lattifai/caption/__init__.py +82 -6
  8. lattifai/caption/caption.py +335 -1141
  9. lattifai/caption/formats/__init__.py +199 -0
  10. lattifai/caption/formats/base.py +211 -0
  11. lattifai/caption/{gemini_reader.py → formats/gemini.py} +320 -60
  12. lattifai/caption/formats/json.py +194 -0
  13. lattifai/caption/formats/lrc.py +309 -0
  14. lattifai/caption/formats/nle/__init__.py +9 -0
  15. lattifai/caption/formats/nle/audition.py +561 -0
  16. lattifai/caption/formats/nle/avid.py +423 -0
  17. lattifai/caption/formats/nle/fcpxml.py +549 -0
  18. lattifai/caption/formats/nle/premiere.py +589 -0
  19. lattifai/caption/formats/pysubs2.py +642 -0
  20. lattifai/caption/formats/sbv.py +147 -0
  21. lattifai/caption/formats/tabular.py +338 -0
  22. lattifai/caption/formats/textgrid.py +193 -0
  23. lattifai/caption/formats/ttml.py +652 -0
  24. lattifai/caption/formats/vtt.py +469 -0
  25. lattifai/caption/parsers/__init__.py +9 -0
  26. lattifai/caption/{text_parser.py → parsers/text_parser.py} +4 -2
  27. lattifai/caption/standardize.py +636 -0
  28. lattifai/caption/utils.py +474 -0
  29. lattifai/cli/__init__.py +2 -1
  30. lattifai/cli/caption.py +108 -1
  31. lattifai/cli/transcribe.py +1 -1
  32. lattifai/cli/youtube.py +4 -1
  33. lattifai/client.py +33 -113
  34. lattifai/config/__init__.py +11 -1
  35. lattifai/config/alignment.py +7 -0
  36. lattifai/config/caption.py +267 -23
  37. lattifai/config/media.py +20 -0
  38. lattifai/diarization/__init__.py +41 -1
  39. lattifai/mixin.py +27 -15
  40. lattifai/transcription/base.py +6 -1
  41. lattifai/transcription/lattifai.py +19 -54
  42. lattifai/utils.py +7 -13
  43. lattifai/workflow/__init__.py +28 -4
  44. lattifai/workflow/file_manager.py +2 -5
  45. lattifai/youtube/__init__.py +43 -0
  46. lattifai/youtube/client.py +1170 -0
  47. lattifai/youtube/types.py +23 -0
  48. lattifai-1.2.2.dist-info/METADATA +615 -0
  49. lattifai-1.2.2.dist-info/RECORD +76 -0
  50. {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/entry_points.txt +1 -2
  51. lattifai/caption/gemini_writer.py +0 -173
  52. lattifai/cli/app_installer.py +0 -142
  53. lattifai/cli/server.py +0 -44
  54. lattifai/server/app.py +0 -427
  55. lattifai/workflow/youtube.py +0 -577
  56. lattifai-1.2.1.dist-info/METADATA +0 -1134
  57. lattifai-1.2.1.dist-info/RECORD +0 -58
  58. {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/WHEEL +0 -0
  59. {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/licenses/LICENSE +0 -0
  60. {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,636 @@
1
+ """
2
+ Caption Standardization Module
3
+
4
+ Implements broadcast-grade caption standardization following Netflix/BBC guidelines:
5
+ - Timeline cleanup (min/max duration, gap checking)
6
+ - Smart text line breaking
7
+ - Quality validation
8
+
9
+ Reference Standards:
10
+ - Netflix Timed Text Style Guide
11
+ - BBC Subtitle Guidelines
12
+ - EBU-TT-D Standard
13
+ """
14
+
15
+ import re
16
+ from dataclasses import dataclass, field
17
+ from typing import List, Optional, Union
18
+
19
+ from lhotse.supervision import SupervisionSegment
20
+
21
+ from ..config.caption import StandardizationConfig
22
+ from .supervision import Supervision
23
+
24
+ __all__ = [
25
+ "CaptionStandardizer",
26
+ "CaptionValidator",
27
+ "StandardizationConfig",
28
+ "ValidationResult",
29
+ "standardize_captions",
30
+ "apply_margins_to_captions",
31
+ ]
32
+
33
+
34
+ @dataclass
35
+ class ValidationResult:
36
+ """Validation result."""
37
+
38
+ valid: bool = True
39
+ """Whether all validations passed"""
40
+
41
+ warnings: List[str] = field(default_factory=list)
42
+ """List of warning messages"""
43
+
44
+ # Statistics
45
+ avg_cps: float = 0.0
46
+ """Average reading speed (chars/sec)"""
47
+
48
+ max_cpl: int = 0
49
+ """Maximum characters per line"""
50
+
51
+ segments_too_short: int = 0
52
+ """Number of segments too short"""
53
+
54
+ segments_too_long: int = 0
55
+ """Number of segments too long"""
56
+
57
+ gaps_too_small: int = 0
58
+ """Number of gaps too small"""
59
+
60
+
61
+ class CaptionStandardizer:
62
+ """
63
+ Caption standardization processor.
64
+
65
+ Processing flow:
66
+ 1. Timeline cleanup - Adjust duration and gaps
67
+ 2. Text formatting - Smart line breaking
68
+ 3. Validation - Generate quality metrics
69
+
70
+ Example:
71
+ >>> standardizer = CaptionStandardizer(min_duration=0.8, max_chars_per_line=42)
72
+ >>> processed = standardizer.process(supervisions)
73
+ """
74
+
75
+ # Chinese/Japanese punctuation (for line break priority)
76
+ # Reference: alignment/punctuation.py
77
+ CJK_PUNCTUATION = r"[,。、?!:;·…—~" "''()【】〔〕〖〗《》〈〉「」『』〘〙〚〛]"
78
+
79
+ # English/Western punctuation
80
+ EN_PUNCTUATION = r"[,.!?;:\-–—«»‹›]"
81
+
82
+ # All splittable punctuation (for line break search)
83
+ ALL_PUNCTUATION = r"[,。、?!:;·…—~,.!?;:\-–—\s]"
84
+
85
+ def __init__(
86
+ self,
87
+ min_duration: float = 0.8,
88
+ max_duration: float = 7.0,
89
+ min_gap: float = 0.08,
90
+ max_lines: int = 2,
91
+ max_chars_per_line: int = 42,
92
+ ):
93
+ """
94
+ Initialize standardizer.
95
+
96
+ Args:
97
+ min_duration: Minimum duration (seconds)
98
+ max_duration: Maximum duration (seconds)
99
+ min_gap: Minimum gap (seconds)
100
+ max_lines: Maximum number of lines
101
+ max_chars_per_line: Maximum characters per line
102
+ """
103
+ self.config = StandardizationConfig(
104
+ min_duration=min_duration,
105
+ max_duration=max_duration,
106
+ min_gap=min_gap,
107
+ max_lines=max_lines,
108
+ max_chars_per_line=max_chars_per_line,
109
+ )
110
+
111
+ def process(self, segments: List[Union[Supervision, SupervisionSegment]]) -> List[Supervision]:
112
+ """
113
+ Main processing entry point.
114
+
115
+ Args:
116
+ segments: List of original caption segments
117
+
118
+ Returns:
119
+ List of processed caption segments
120
+ """
121
+ if not segments:
122
+ return []
123
+
124
+ # 1. Sort by start time
125
+ sorted_segments = sorted(segments, key=lambda s: s.start)
126
+
127
+ # 2. Timeline cleanup
128
+ processed = self._sanitize_timeline(sorted_segments)
129
+
130
+ # 3. Text formatting
131
+ processed = self._format_texts(processed)
132
+
133
+ return processed
134
+
135
+ def _sanitize_timeline(self, segments: List[Union[Supervision, SupervisionSegment]]) -> List[Supervision]:
136
+ """
137
+ Timeline cleanup.
138
+
139
+ Processing logic:
140
+ A. Gap check - Ensure sufficient gap between subtitles
141
+ B. Min duration check - Extend too-short subtitles
142
+ C. Max duration check - Truncate too-long subtitles
143
+
144
+ Priority: Gap > Min duration (insufficient gap causes display issues)
145
+ """
146
+ result: List[Supervision] = []
147
+
148
+ for i, seg in enumerate(segments):
149
+ # Create new instance
150
+ new_seg = self._copy_segment(seg)
151
+
152
+ # A. Check gap with previous subtitle
153
+ if result:
154
+ prev_seg = result[-1]
155
+ prev_end = prev_seg.start + prev_seg.duration
156
+ gap = new_seg.start - prev_end
157
+
158
+ if gap < self.config.min_gap:
159
+ # Gap too small or overlap
160
+ # Target: prev_end_new + min_gap = new_seg.start
161
+ # => prev_duration_new = new_seg.start - min_gap - prev_seg.start
162
+ target_prev_duration = new_seg.start - self.config.min_gap - prev_seg.start
163
+
164
+ if target_prev_duration >= self.config.min_duration:
165
+ # Safe to shorten previous subtitle (still meets min duration)
166
+ result[-1] = self._copy_segment(prev_seg, duration=target_prev_duration)
167
+ else:
168
+ # Shortening previous would go below min duration, delay current start
169
+ new_start = prev_end + self.config.min_gap
170
+ duration_diff = new_start - seg.start
171
+ new_duration = max(
172
+ 0.1, # Ensure at least some duration
173
+ new_seg.duration - duration_diff,
174
+ )
175
+ new_seg = self._copy_segment(new_seg, start=new_start, duration=new_duration)
176
+
177
+ # B. Min duration check
178
+ if new_seg.duration < self.config.min_duration:
179
+ # Check if extending would overlap with next subtitle
180
+ next_start = segments[i + 1].start if i + 1 < len(segments) else float("inf")
181
+ max_extend = next_start - new_seg.start - self.config.min_gap
182
+ new_duration = min(self.config.min_duration, max(max_extend, new_seg.duration))
183
+ new_seg = self._copy_segment(new_seg, duration=new_duration)
184
+
185
+ # C. Max duration check
186
+ if new_seg.duration > self.config.max_duration:
187
+ new_seg = self._copy_segment(new_seg, duration=self.config.max_duration)
188
+
189
+ result.append(new_seg)
190
+
191
+ return result
192
+
193
+ def _format_texts(self, segments: List[Supervision]) -> List[Supervision]:
194
+ """Apply text formatting to all subtitles."""
195
+ return [self._copy_segment(seg, text=self._smart_split_text(seg.text or "")) for seg in segments]
196
+
197
+ def _smart_split_text(self, text: str) -> str:
198
+ """
199
+ Smart text line breaking.
200
+
201
+ Priority:
202
+ 1. CJK punctuation (,。!? etc.)
203
+ 2. English punctuation (,.!? etc.)
204
+ 3. Whitespace
205
+ 4. Hard truncation
206
+
207
+ Args:
208
+ text: Original text
209
+
210
+ Returns:
211
+ Text with line breaks
212
+ """
213
+ # Clean text
214
+ text = self._normalize_text(text)
215
+
216
+ # Check if line break is needed
217
+ if len(text) <= self.config.max_chars_per_line:
218
+ return text
219
+
220
+ lines: List[str] = []
221
+ remaining = text
222
+
223
+ for _ in range(self.config.max_lines):
224
+ if len(remaining) <= self.config.max_chars_per_line:
225
+ lines.append(remaining)
226
+ remaining = ""
227
+ break
228
+
229
+ # Find best split point
230
+ split_pos = self._find_split_point(remaining, self.config.max_chars_per_line)
231
+
232
+ lines.append(remaining[:split_pos].rstrip())
233
+ remaining = remaining[split_pos:].lstrip()
234
+
235
+ # If remaining text exists and max lines reached, append to last line
236
+ if remaining and lines:
237
+ # Choose to append (may exceed char limit) rather than truncate
238
+ lines[-1] = lines[-1] + " " + remaining if lines[-1] else remaining
239
+
240
+ return "\n".join(lines)
241
+
242
+ def _find_split_point(self, text: str, max_len: int) -> int:
243
+ """
244
+ Find best split point.
245
+
246
+ Strategy: Find punctuation or whitespace near max_len
247
+ Search range: 40% - 110% of max_len
248
+
249
+ Args:
250
+ text: Text to split
251
+ max_len: Maximum length
252
+
253
+ Returns:
254
+ Split position index
255
+ """
256
+ search_start = int(max_len * 0.4)
257
+ search_end = min(len(text), int(max_len * 1.1))
258
+
259
+ best_pos = max_len
260
+ best_priority = 999 # Lower is better
261
+
262
+ # Search backwards, prefer split points closer to max_len
263
+ for i in range(min(search_end, len(text)) - 1, search_start - 1, -1):
264
+ char = text[i]
265
+ priority = self._get_split_priority(char)
266
+
267
+ if priority < best_priority:
268
+ best_priority = priority
269
+ best_pos = i + 1 # Split after punctuation/whitespace
270
+
271
+ # Exit early if highest priority (CJK punctuation) found
272
+ if priority == 1:
273
+ break
274
+
275
+ return best_pos
276
+
277
+ def _get_split_priority(self, char: str) -> int:
278
+ """
279
+ Get character split priority.
280
+
281
+ Returns:
282
+ 1 = CJK punctuation (highest priority)
283
+ 2 = English punctuation
284
+ 3 = Whitespace
285
+ 999 = Other characters (not suitable for splitting)
286
+ """
287
+ if re.match(self.CJK_PUNCTUATION, char):
288
+ return 1
289
+ elif re.match(self.EN_PUNCTUATION, char):
290
+ return 2
291
+ elif char.isspace():
292
+ return 3
293
+ return 999
294
+
295
+ def _normalize_text(self, text: str) -> str:
296
+ """
297
+ Normalize text.
298
+
299
+ - Remove excess whitespace
300
+ - Remove existing newlines (will be reformatted)
301
+ - Unify spaces
302
+ """
303
+ # Remove existing newlines
304
+ text = text.replace("\n", " ")
305
+ # Merge excess whitespace
306
+ text = re.sub(r"\s+", " ", text.strip())
307
+ return text
308
+
309
+ def _copy_segment(
310
+ self,
311
+ seg: Union[Supervision, SupervisionSegment],
312
+ **overrides,
313
+ ) -> Supervision:
314
+ """
315
+ Create a copy of Supervision.
316
+
317
+ Args:
318
+ seg: Original segment
319
+ **overrides: Fields to override
320
+
321
+ Returns:
322
+ New Supervision instance
323
+ """
324
+ return Supervision(
325
+ id=overrides.get("id", seg.id),
326
+ recording_id=overrides.get("recording_id", seg.recording_id),
327
+ start=overrides.get("start", seg.start),
328
+ duration=overrides.get("duration", seg.duration),
329
+ channel=overrides.get("channel", getattr(seg, "channel", None)),
330
+ text=overrides.get("text", seg.text),
331
+ language=overrides.get("language", getattr(seg, "language", None)),
332
+ speaker=overrides.get("speaker", getattr(seg, "speaker", None)),
333
+ gender=overrides.get("gender", getattr(seg, "gender", None)),
334
+ custom=overrides.get("custom", getattr(seg, "custom", None)),
335
+ alignment=overrides.get("alignment", getattr(seg, "alignment", None)),
336
+ )
337
+
338
+ def apply_margins(
339
+ self,
340
+ segments: List[Union[Supervision, SupervisionSegment]],
341
+ start_margin: Optional[float] = None,
342
+ end_margin: Optional[float] = None,
343
+ ) -> List[Supervision]:
344
+ """
345
+ Recalculate segment boundaries based on word-level alignment.
346
+
347
+ Uses precise word-level timestamps from supervision.alignment['word']
348
+ to recalculate segment start/end times.
349
+
350
+ Args:
351
+ segments: List of subtitles with alignment data
352
+ start_margin: Start margin (overrides config default)
353
+ end_margin: End margin (overrides config default)
354
+
355
+ Returns:
356
+ List of subtitles with new margins applied
357
+
358
+ Note:
359
+ - Segments without alignment data keep original timestamps
360
+ - Automatically handles boundary collisions
361
+
362
+ Example:
363
+ >>> standardizer = CaptionStandardizer()
364
+ >>> adjusted = standardizer.apply_margins(
365
+ ... supervisions, start_margin=0.05, end_margin=0.15
366
+ ... )
367
+ """
368
+ if not segments:
369
+ return []
370
+
371
+ # Resolve margins: parameter > config > 0.0 (no adjustment)
372
+ sm = start_margin if start_margin is not None else (self.config.start_margin or 0.0)
373
+ em = end_margin if end_margin is not None else (self.config.end_margin or 0.0)
374
+
375
+ # Sort by start time
376
+ sorted_segs = sorted(segments, key=lambda s: s.start)
377
+ result: List[Supervision] = []
378
+
379
+ for seg in sorted_segs:
380
+ # Get word alignment
381
+ words = self._get_word_alignment(seg)
382
+
383
+ if not words:
384
+ # No alignment data, keep original
385
+ result.append(self._copy_segment(seg))
386
+ continue
387
+
388
+ # Calculate precise boundaries
389
+ first_word_start = words[0].start
390
+ last_word_end = words[-1].start + words[-1].duration
391
+
392
+ # Apply margin (0.0 means no adjustment, just use word boundaries)
393
+ new_start = max(0, first_word_start - sm)
394
+ new_end = last_word_end + em
395
+
396
+ # Collision detection (with previous segment)
397
+ if result:
398
+ prev_end = result[-1].start + result[-1].duration
399
+ if new_start < prev_end + self.config.min_gap:
400
+ new_start = self._resolve_collision(prev_end, new_start, first_word_start, sm)
401
+
402
+ new_duration = new_end - new_start
403
+ result.append(self._copy_segment(seg, start=new_start, duration=new_duration))
404
+
405
+ return result
406
+
407
+ def _get_word_alignment(self, seg: Union[Supervision, SupervisionSegment]) -> List:
408
+ """
409
+ Safely get word alignment data.
410
+
411
+ Args:
412
+ seg: Subtitle segment
413
+
414
+ Returns:
415
+ Word alignment list, or empty list if not present
416
+ """
417
+ alignment = getattr(seg, "alignment", None)
418
+ if alignment and "word" in alignment:
419
+ return alignment["word"]
420
+ return []
421
+
422
+ def _resolve_collision(
423
+ self,
424
+ prev_end: float,
425
+ new_start: float,
426
+ first_word_start: float,
427
+ start_margin: float,
428
+ ) -> float:
429
+ """
430
+ Resolve collision with previous segment.
431
+
432
+ Args:
433
+ prev_end: End time of previous segment
434
+ new_start: Currently calculated start time
435
+ first_word_start: Start time of first word in current segment
436
+ start_margin: Requested start_margin
437
+
438
+ Returns:
439
+ Adjusted start time
440
+ """
441
+ if self.config.margin_collision_mode == "gap":
442
+ # Force maintain min_gap
443
+ return prev_end + self.config.min_gap
444
+ else:
445
+ # Trim mode: preserve margin as much as possible, but not beyond speech start
446
+ available_margin = first_word_start - (prev_end + self.config.min_gap)
447
+ actual_margin = max(0, min(start_margin, available_margin))
448
+ return first_word_start - actual_margin
449
+
450
+
451
+ class CaptionValidator:
452
+ """
453
+ Caption quality validator.
454
+
455
+ Validates subtitles against broadcast standards and generates quality metrics report.
456
+
457
+ Example:
458
+ >>> validator = CaptionValidator()
459
+ >>> result = validator.validate(supervisions)
460
+ >>> if not result.valid:
461
+ ... print(result.warnings)
462
+ """
463
+
464
+ def __init__(
465
+ self,
466
+ config: Optional[StandardizationConfig] = None,
467
+ min_duration: float = 0.8,
468
+ max_duration: float = 7.0,
469
+ min_gap: float = 0.08,
470
+ max_chars_per_line: int = 42,
471
+ ):
472
+ """
473
+ Initialize validator.
474
+
475
+ Args:
476
+ config: Standardization config (if provided, ignores other params)
477
+ min_duration: Minimum duration
478
+ max_duration: Maximum duration
479
+ min_gap: Minimum gap
480
+ max_chars_per_line: Maximum characters per line
481
+ """
482
+ if config:
483
+ self.config = config
484
+ else:
485
+ self.config = StandardizationConfig(
486
+ min_duration=min_duration,
487
+ max_duration=max_duration,
488
+ min_gap=min_gap,
489
+ max_chars_per_line=max_chars_per_line,
490
+ )
491
+
492
+ def validate(self, segments: List[Union[Supervision, SupervisionSegment]]) -> ValidationResult:
493
+ """
494
+ Validate subtitles and return quality metrics.
495
+
496
+ Args:
497
+ segments: List of subtitle segments
498
+
499
+ Returns:
500
+ ValidationResult containing validation results and metrics
501
+ """
502
+ result = ValidationResult()
503
+
504
+ if not segments:
505
+ return result
506
+
507
+ total_cps = 0.0
508
+ prev_end = 0.0
509
+
510
+ for i, seg in enumerate(segments):
511
+ text = seg.text or ""
512
+ duration = seg.duration
513
+
514
+ # CPS calculation (excluding newlines)
515
+ text_length = len(text.replace("\n", ""))
516
+ cps = text_length / duration if duration > 0 else 0
517
+ total_cps += cps
518
+
519
+ # CPL calculation
520
+ lines = text.split("\n")
521
+ max_line_len = max((len(line) for line in lines), default=0)
522
+ result.max_cpl = max(result.max_cpl, max_line_len)
523
+
524
+ # Duration check
525
+ if duration < self.config.min_duration:
526
+ result.segments_too_short += 1
527
+ result.warnings.append(
528
+ f"Segment {i} (id={seg.id}): duration {duration:.2f}s < min {self.config.min_duration}s"
529
+ )
530
+
531
+ if duration > self.config.max_duration:
532
+ result.segments_too_long += 1
533
+ result.warnings.append(
534
+ f"Segment {i} (id={seg.id}): duration {duration:.2f}s > max {self.config.max_duration}s"
535
+ )
536
+
537
+ # Gap check
538
+ if i > 0:
539
+ gap = seg.start - prev_end
540
+ if gap < self.config.min_gap and gap >= 0:
541
+ result.gaps_too_small += 1
542
+ result.warnings.append(f"Segment {i} (id={seg.id}): gap {gap:.3f}s < min {self.config.min_gap}s")
543
+
544
+ # CPL check
545
+ if max_line_len > self.config.max_chars_per_line:
546
+ result.warnings.append(
547
+ f"Segment {i} (id={seg.id}): line length {max_line_len} > max {self.config.max_chars_per_line}"
548
+ )
549
+
550
+ # CPS check (reading speed too fast)
551
+ if cps > self.config.optimal_cps * 1.5: # Exceeds optimal by 50%
552
+ result.warnings.append(
553
+ f"Segment {i} (id={seg.id}): CPS {cps:.1f} exceeds recommended {self.config.optimal_cps}"
554
+ )
555
+
556
+ prev_end = seg.start + seg.duration
557
+
558
+ # Calculate average CPS
559
+ result.avg_cps = total_cps / len(segments)
560
+
561
+ # Determine if validation passed
562
+ result.valid = result.segments_too_short == 0 and result.segments_too_long == 0 and result.gaps_too_small == 0
563
+
564
+ return result
565
+
566
+
567
+ def standardize_captions(
568
+ segments: List[Union[Supervision, SupervisionSegment]],
569
+ min_duration: float = 0.8,
570
+ max_duration: float = 7.0,
571
+ min_gap: float = 0.08,
572
+ max_lines: int = 2,
573
+ max_chars_per_line: int = 42,
574
+ ) -> List[Supervision]:
575
+ """
576
+ Convenience function: Standardize caption list.
577
+
578
+ Args:
579
+ segments: List of original caption segments
580
+ min_duration: Minimum duration (seconds)
581
+ max_duration: Maximum duration (seconds)
582
+ min_gap: Minimum gap (seconds)
583
+ max_lines: Maximum number of lines
584
+ max_chars_per_line: Maximum characters per line
585
+
586
+ Returns:
587
+ List of processed caption segments
588
+
589
+ Example:
590
+ >>> from lattifai.caption import standardize_captions
591
+ >>> processed = standardize_captions(supervisions, max_chars_per_line=22)
592
+ """
593
+ standardizer = CaptionStandardizer(
594
+ min_duration=min_duration,
595
+ max_duration=max_duration,
596
+ min_gap=min_gap,
597
+ max_lines=max_lines,
598
+ max_chars_per_line=max_chars_per_line,
599
+ )
600
+ return standardizer.process(segments)
601
+
602
+
603
+ def apply_margins_to_captions(
604
+ segments: List[Union[Supervision, SupervisionSegment]],
605
+ start_margin: float = 0.08,
606
+ end_margin: float = 0.20,
607
+ min_gap: float = 0.08,
608
+ collision_mode: str = "trim",
609
+ ) -> List[Supervision]:
610
+ """
611
+ Convenience function: Recalculate caption boundaries based on word-level alignment.
612
+
613
+ Uses precise word-level timestamps from supervision.alignment['word']
614
+ to recalculate segment start/end times.
615
+
616
+ Args:
617
+ segments: List of caption segments with alignment data
618
+ start_margin: Start margin (seconds) - extends before first word
619
+ end_margin: End margin (seconds) - extends after last word
620
+ min_gap: Minimum gap (seconds) - for collision handling
621
+ collision_mode: Collision mode 'trim' or 'gap'
622
+
623
+ Returns:
624
+ List of caption segments with new margins applied
625
+
626
+ Example:
627
+ >>> from lattifai.caption import apply_margins_to_captions
628
+ >>> adjusted = apply_margins_to_captions(
629
+ ... supervisions, start_margin=0.05, end_margin=0.15
630
+ ... )
631
+ """
632
+ standardizer = CaptionStandardizer(min_gap=min_gap)
633
+ standardizer.config.start_margin = start_margin
634
+ standardizer.config.end_margin = end_margin
635
+ standardizer.config.margin_collision_mode = collision_mode
636
+ return standardizer.apply_margins(segments, start_margin=start_margin, end_margin=end_margin)