lattifai 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/alignment/__init__.py +10 -1
- lattifai/alignment/lattice1_aligner.py +66 -58
- lattifai/alignment/punctuation.py +38 -0
- lattifai/alignment/sentence_splitter.py +152 -21
- lattifai/alignment/text_align.py +440 -0
- lattifai/alignment/tokenizer.py +82 -40
- lattifai/caption/__init__.py +82 -6
- lattifai/caption/caption.py +335 -1141
- lattifai/caption/formats/__init__.py +199 -0
- lattifai/caption/formats/base.py +211 -0
- lattifai/caption/{gemini_reader.py → formats/gemini.py} +320 -60
- lattifai/caption/formats/json.py +194 -0
- lattifai/caption/formats/lrc.py +309 -0
- lattifai/caption/formats/nle/__init__.py +9 -0
- lattifai/caption/formats/nle/audition.py +561 -0
- lattifai/caption/formats/nle/avid.py +423 -0
- lattifai/caption/formats/nle/fcpxml.py +549 -0
- lattifai/caption/formats/nle/premiere.py +589 -0
- lattifai/caption/formats/pysubs2.py +642 -0
- lattifai/caption/formats/sbv.py +147 -0
- lattifai/caption/formats/tabular.py +338 -0
- lattifai/caption/formats/textgrid.py +193 -0
- lattifai/caption/formats/ttml.py +652 -0
- lattifai/caption/formats/vtt.py +469 -0
- lattifai/caption/parsers/__init__.py +9 -0
- lattifai/caption/{text_parser.py → parsers/text_parser.py} +4 -2
- lattifai/caption/standardize.py +636 -0
- lattifai/caption/utils.py +474 -0
- lattifai/cli/__init__.py +2 -1
- lattifai/cli/caption.py +108 -1
- lattifai/cli/transcribe.py +1 -1
- lattifai/cli/youtube.py +4 -1
- lattifai/client.py +33 -113
- lattifai/config/__init__.py +11 -1
- lattifai/config/alignment.py +7 -0
- lattifai/config/caption.py +267 -23
- lattifai/config/media.py +20 -0
- lattifai/diarization/__init__.py +41 -1
- lattifai/mixin.py +27 -15
- lattifai/transcription/base.py +6 -1
- lattifai/transcription/lattifai.py +19 -54
- lattifai/utils.py +7 -13
- lattifai/workflow/__init__.py +28 -4
- lattifai/workflow/file_manager.py +2 -5
- lattifai/youtube/__init__.py +43 -0
- lattifai/youtube/client.py +1170 -0
- lattifai/youtube/types.py +23 -0
- lattifai-1.2.2.dist-info/METADATA +615 -0
- lattifai-1.2.2.dist-info/RECORD +76 -0
- {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/entry_points.txt +1 -2
- lattifai/caption/gemini_writer.py +0 -173
- lattifai/cli/app_installer.py +0 -142
- lattifai/cli/server.py +0 -44
- lattifai/server/app.py +0 -427
- lattifai/workflow/youtube.py +0 -577
- lattifai-1.2.1.dist-info/METADATA +0 -1134
- lattifai-1.2.1.dist-info/RECORD +0 -58
- {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/WHEEL +0 -0
- {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/licenses/LICENSE +0 -0
- {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Caption Standardization Module
|
|
3
|
+
|
|
4
|
+
Implements broadcast-grade caption standardization following Netflix/BBC guidelines:
|
|
5
|
+
- Timeline cleanup (min/max duration, gap checking)
|
|
6
|
+
- Smart text line breaking
|
|
7
|
+
- Quality validation
|
|
8
|
+
|
|
9
|
+
Reference Standards:
|
|
10
|
+
- Netflix Timed Text Style Guide
|
|
11
|
+
- BBC Subtitle Guidelines
|
|
12
|
+
- EBU-TT-D Standard
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import List, Optional, Union
|
|
18
|
+
|
|
19
|
+
from lhotse.supervision import SupervisionSegment
|
|
20
|
+
|
|
21
|
+
from ..config.caption import StandardizationConfig
|
|
22
|
+
from .supervision import Supervision
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"CaptionStandardizer",
|
|
26
|
+
"CaptionValidator",
|
|
27
|
+
"StandardizationConfig",
|
|
28
|
+
"ValidationResult",
|
|
29
|
+
"standardize_captions",
|
|
30
|
+
"apply_margins_to_captions",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ValidationResult:
|
|
36
|
+
"""Validation result."""
|
|
37
|
+
|
|
38
|
+
valid: bool = True
|
|
39
|
+
"""Whether all validations passed"""
|
|
40
|
+
|
|
41
|
+
warnings: List[str] = field(default_factory=list)
|
|
42
|
+
"""List of warning messages"""
|
|
43
|
+
|
|
44
|
+
# Statistics
|
|
45
|
+
avg_cps: float = 0.0
|
|
46
|
+
"""Average reading speed (chars/sec)"""
|
|
47
|
+
|
|
48
|
+
max_cpl: int = 0
|
|
49
|
+
"""Maximum characters per line"""
|
|
50
|
+
|
|
51
|
+
segments_too_short: int = 0
|
|
52
|
+
"""Number of segments too short"""
|
|
53
|
+
|
|
54
|
+
segments_too_long: int = 0
|
|
55
|
+
"""Number of segments too long"""
|
|
56
|
+
|
|
57
|
+
gaps_too_small: int = 0
|
|
58
|
+
"""Number of gaps too small"""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class CaptionStandardizer:
|
|
62
|
+
"""
|
|
63
|
+
Caption standardization processor.
|
|
64
|
+
|
|
65
|
+
Processing flow:
|
|
66
|
+
1. Timeline cleanup - Adjust duration and gaps
|
|
67
|
+
2. Text formatting - Smart line breaking
|
|
68
|
+
3. Validation - Generate quality metrics
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
>>> standardizer = CaptionStandardizer(min_duration=0.8, max_chars_per_line=42)
|
|
72
|
+
>>> processed = standardizer.process(supervisions)
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
# Chinese/Japanese punctuation (for line break priority)
|
|
76
|
+
# Reference: alignment/punctuation.py
|
|
77
|
+
CJK_PUNCTUATION = r"[,。、?!:;·…—~" "''()【】〔〕〖〗《》〈〉「」『』〘〙〚〛]"
|
|
78
|
+
|
|
79
|
+
# English/Western punctuation
|
|
80
|
+
EN_PUNCTUATION = r"[,.!?;:\-–—«»‹›]"
|
|
81
|
+
|
|
82
|
+
# All splittable punctuation (for line break search)
|
|
83
|
+
ALL_PUNCTUATION = r"[,。、?!:;·…—~,.!?;:\-–—\s]"
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
min_duration: float = 0.8,
|
|
88
|
+
max_duration: float = 7.0,
|
|
89
|
+
min_gap: float = 0.08,
|
|
90
|
+
max_lines: int = 2,
|
|
91
|
+
max_chars_per_line: int = 42,
|
|
92
|
+
):
|
|
93
|
+
"""
|
|
94
|
+
Initialize standardizer.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
min_duration: Minimum duration (seconds)
|
|
98
|
+
max_duration: Maximum duration (seconds)
|
|
99
|
+
min_gap: Minimum gap (seconds)
|
|
100
|
+
max_lines: Maximum number of lines
|
|
101
|
+
max_chars_per_line: Maximum characters per line
|
|
102
|
+
"""
|
|
103
|
+
self.config = StandardizationConfig(
|
|
104
|
+
min_duration=min_duration,
|
|
105
|
+
max_duration=max_duration,
|
|
106
|
+
min_gap=min_gap,
|
|
107
|
+
max_lines=max_lines,
|
|
108
|
+
max_chars_per_line=max_chars_per_line,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def process(self, segments: List[Union[Supervision, SupervisionSegment]]) -> List[Supervision]:
|
|
112
|
+
"""
|
|
113
|
+
Main processing entry point.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
segments: List of original caption segments
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
List of processed caption segments
|
|
120
|
+
"""
|
|
121
|
+
if not segments:
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
# 1. Sort by start time
|
|
125
|
+
sorted_segments = sorted(segments, key=lambda s: s.start)
|
|
126
|
+
|
|
127
|
+
# 2. Timeline cleanup
|
|
128
|
+
processed = self._sanitize_timeline(sorted_segments)
|
|
129
|
+
|
|
130
|
+
# 3. Text formatting
|
|
131
|
+
processed = self._format_texts(processed)
|
|
132
|
+
|
|
133
|
+
return processed
|
|
134
|
+
|
|
135
|
+
def _sanitize_timeline(self, segments: List[Union[Supervision, SupervisionSegment]]) -> List[Supervision]:
|
|
136
|
+
"""
|
|
137
|
+
Timeline cleanup.
|
|
138
|
+
|
|
139
|
+
Processing logic:
|
|
140
|
+
A. Gap check - Ensure sufficient gap between subtitles
|
|
141
|
+
B. Min duration check - Extend too-short subtitles
|
|
142
|
+
C. Max duration check - Truncate too-long subtitles
|
|
143
|
+
|
|
144
|
+
Priority: Gap > Min duration (insufficient gap causes display issues)
|
|
145
|
+
"""
|
|
146
|
+
result: List[Supervision] = []
|
|
147
|
+
|
|
148
|
+
for i, seg in enumerate(segments):
|
|
149
|
+
# Create new instance
|
|
150
|
+
new_seg = self._copy_segment(seg)
|
|
151
|
+
|
|
152
|
+
# A. Check gap with previous subtitle
|
|
153
|
+
if result:
|
|
154
|
+
prev_seg = result[-1]
|
|
155
|
+
prev_end = prev_seg.start + prev_seg.duration
|
|
156
|
+
gap = new_seg.start - prev_end
|
|
157
|
+
|
|
158
|
+
if gap < self.config.min_gap:
|
|
159
|
+
# Gap too small or overlap
|
|
160
|
+
# Target: prev_end_new + min_gap = new_seg.start
|
|
161
|
+
# => prev_duration_new = new_seg.start - min_gap - prev_seg.start
|
|
162
|
+
target_prev_duration = new_seg.start - self.config.min_gap - prev_seg.start
|
|
163
|
+
|
|
164
|
+
if target_prev_duration >= self.config.min_duration:
|
|
165
|
+
# Safe to shorten previous subtitle (still meets min duration)
|
|
166
|
+
result[-1] = self._copy_segment(prev_seg, duration=target_prev_duration)
|
|
167
|
+
else:
|
|
168
|
+
# Shortening previous would go below min duration, delay current start
|
|
169
|
+
new_start = prev_end + self.config.min_gap
|
|
170
|
+
duration_diff = new_start - seg.start
|
|
171
|
+
new_duration = max(
|
|
172
|
+
0.1, # Ensure at least some duration
|
|
173
|
+
new_seg.duration - duration_diff,
|
|
174
|
+
)
|
|
175
|
+
new_seg = self._copy_segment(new_seg, start=new_start, duration=new_duration)
|
|
176
|
+
|
|
177
|
+
# B. Min duration check
|
|
178
|
+
if new_seg.duration < self.config.min_duration:
|
|
179
|
+
# Check if extending would overlap with next subtitle
|
|
180
|
+
next_start = segments[i + 1].start if i + 1 < len(segments) else float("inf")
|
|
181
|
+
max_extend = next_start - new_seg.start - self.config.min_gap
|
|
182
|
+
new_duration = min(self.config.min_duration, max(max_extend, new_seg.duration))
|
|
183
|
+
new_seg = self._copy_segment(new_seg, duration=new_duration)
|
|
184
|
+
|
|
185
|
+
# C. Max duration check
|
|
186
|
+
if new_seg.duration > self.config.max_duration:
|
|
187
|
+
new_seg = self._copy_segment(new_seg, duration=self.config.max_duration)
|
|
188
|
+
|
|
189
|
+
result.append(new_seg)
|
|
190
|
+
|
|
191
|
+
return result
|
|
192
|
+
|
|
193
|
+
def _format_texts(self, segments: List[Supervision]) -> List[Supervision]:
|
|
194
|
+
"""Apply text formatting to all subtitles."""
|
|
195
|
+
return [self._copy_segment(seg, text=self._smart_split_text(seg.text or "")) for seg in segments]
|
|
196
|
+
|
|
197
|
+
def _smart_split_text(self, text: str) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Smart text line breaking.
|
|
200
|
+
|
|
201
|
+
Priority:
|
|
202
|
+
1. CJK punctuation (,。!? etc.)
|
|
203
|
+
2. English punctuation (,.!? etc.)
|
|
204
|
+
3. Whitespace
|
|
205
|
+
4. Hard truncation
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
text: Original text
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Text with line breaks
|
|
212
|
+
"""
|
|
213
|
+
# Clean text
|
|
214
|
+
text = self._normalize_text(text)
|
|
215
|
+
|
|
216
|
+
# Check if line break is needed
|
|
217
|
+
if len(text) <= self.config.max_chars_per_line:
|
|
218
|
+
return text
|
|
219
|
+
|
|
220
|
+
lines: List[str] = []
|
|
221
|
+
remaining = text
|
|
222
|
+
|
|
223
|
+
for _ in range(self.config.max_lines):
|
|
224
|
+
if len(remaining) <= self.config.max_chars_per_line:
|
|
225
|
+
lines.append(remaining)
|
|
226
|
+
remaining = ""
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
# Find best split point
|
|
230
|
+
split_pos = self._find_split_point(remaining, self.config.max_chars_per_line)
|
|
231
|
+
|
|
232
|
+
lines.append(remaining[:split_pos].rstrip())
|
|
233
|
+
remaining = remaining[split_pos:].lstrip()
|
|
234
|
+
|
|
235
|
+
# If remaining text exists and max lines reached, append to last line
|
|
236
|
+
if remaining and lines:
|
|
237
|
+
# Choose to append (may exceed char limit) rather than truncate
|
|
238
|
+
lines[-1] = lines[-1] + " " + remaining if lines[-1] else remaining
|
|
239
|
+
|
|
240
|
+
return "\n".join(lines)
|
|
241
|
+
|
|
242
|
+
def _find_split_point(self, text: str, max_len: int) -> int:
|
|
243
|
+
"""
|
|
244
|
+
Find best split point.
|
|
245
|
+
|
|
246
|
+
Strategy: Find punctuation or whitespace near max_len
|
|
247
|
+
Search range: 40% - 110% of max_len
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
text: Text to split
|
|
251
|
+
max_len: Maximum length
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Split position index
|
|
255
|
+
"""
|
|
256
|
+
search_start = int(max_len * 0.4)
|
|
257
|
+
search_end = min(len(text), int(max_len * 1.1))
|
|
258
|
+
|
|
259
|
+
best_pos = max_len
|
|
260
|
+
best_priority = 999 # Lower is better
|
|
261
|
+
|
|
262
|
+
# Search backwards, prefer split points closer to max_len
|
|
263
|
+
for i in range(min(search_end, len(text)) - 1, search_start - 1, -1):
|
|
264
|
+
char = text[i]
|
|
265
|
+
priority = self._get_split_priority(char)
|
|
266
|
+
|
|
267
|
+
if priority < best_priority:
|
|
268
|
+
best_priority = priority
|
|
269
|
+
best_pos = i + 1 # Split after punctuation/whitespace
|
|
270
|
+
|
|
271
|
+
# Exit early if highest priority (CJK punctuation) found
|
|
272
|
+
if priority == 1:
|
|
273
|
+
break
|
|
274
|
+
|
|
275
|
+
return best_pos
|
|
276
|
+
|
|
277
|
+
def _get_split_priority(self, char: str) -> int:
|
|
278
|
+
"""
|
|
279
|
+
Get character split priority.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
1 = CJK punctuation (highest priority)
|
|
283
|
+
2 = English punctuation
|
|
284
|
+
3 = Whitespace
|
|
285
|
+
999 = Other characters (not suitable for splitting)
|
|
286
|
+
"""
|
|
287
|
+
if re.match(self.CJK_PUNCTUATION, char):
|
|
288
|
+
return 1
|
|
289
|
+
elif re.match(self.EN_PUNCTUATION, char):
|
|
290
|
+
return 2
|
|
291
|
+
elif char.isspace():
|
|
292
|
+
return 3
|
|
293
|
+
return 999
|
|
294
|
+
|
|
295
|
+
def _normalize_text(self, text: str) -> str:
|
|
296
|
+
"""
|
|
297
|
+
Normalize text.
|
|
298
|
+
|
|
299
|
+
- Remove excess whitespace
|
|
300
|
+
- Remove existing newlines (will be reformatted)
|
|
301
|
+
- Unify spaces
|
|
302
|
+
"""
|
|
303
|
+
# Remove existing newlines
|
|
304
|
+
text = text.replace("\n", " ")
|
|
305
|
+
# Merge excess whitespace
|
|
306
|
+
text = re.sub(r"\s+", " ", text.strip())
|
|
307
|
+
return text
|
|
308
|
+
|
|
309
|
+
def _copy_segment(
|
|
310
|
+
self,
|
|
311
|
+
seg: Union[Supervision, SupervisionSegment],
|
|
312
|
+
**overrides,
|
|
313
|
+
) -> Supervision:
|
|
314
|
+
"""
|
|
315
|
+
Create a copy of Supervision.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
seg: Original segment
|
|
319
|
+
**overrides: Fields to override
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
New Supervision instance
|
|
323
|
+
"""
|
|
324
|
+
return Supervision(
|
|
325
|
+
id=overrides.get("id", seg.id),
|
|
326
|
+
recording_id=overrides.get("recording_id", seg.recording_id),
|
|
327
|
+
start=overrides.get("start", seg.start),
|
|
328
|
+
duration=overrides.get("duration", seg.duration),
|
|
329
|
+
channel=overrides.get("channel", getattr(seg, "channel", None)),
|
|
330
|
+
text=overrides.get("text", seg.text),
|
|
331
|
+
language=overrides.get("language", getattr(seg, "language", None)),
|
|
332
|
+
speaker=overrides.get("speaker", getattr(seg, "speaker", None)),
|
|
333
|
+
gender=overrides.get("gender", getattr(seg, "gender", None)),
|
|
334
|
+
custom=overrides.get("custom", getattr(seg, "custom", None)),
|
|
335
|
+
alignment=overrides.get("alignment", getattr(seg, "alignment", None)),
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
def apply_margins(
|
|
339
|
+
self,
|
|
340
|
+
segments: List[Union[Supervision, SupervisionSegment]],
|
|
341
|
+
start_margin: Optional[float] = None,
|
|
342
|
+
end_margin: Optional[float] = None,
|
|
343
|
+
) -> List[Supervision]:
|
|
344
|
+
"""
|
|
345
|
+
Recalculate segment boundaries based on word-level alignment.
|
|
346
|
+
|
|
347
|
+
Uses precise word-level timestamps from supervision.alignment['word']
|
|
348
|
+
to recalculate segment start/end times.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
segments: List of subtitles with alignment data
|
|
352
|
+
start_margin: Start margin (overrides config default)
|
|
353
|
+
end_margin: End margin (overrides config default)
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
List of subtitles with new margins applied
|
|
357
|
+
|
|
358
|
+
Note:
|
|
359
|
+
- Segments without alignment data keep original timestamps
|
|
360
|
+
- Automatically handles boundary collisions
|
|
361
|
+
|
|
362
|
+
Example:
|
|
363
|
+
>>> standardizer = CaptionStandardizer()
|
|
364
|
+
>>> adjusted = standardizer.apply_margins(
|
|
365
|
+
... supervisions, start_margin=0.05, end_margin=0.15
|
|
366
|
+
... )
|
|
367
|
+
"""
|
|
368
|
+
if not segments:
|
|
369
|
+
return []
|
|
370
|
+
|
|
371
|
+
# Resolve margins: parameter > config > 0.0 (no adjustment)
|
|
372
|
+
sm = start_margin if start_margin is not None else (self.config.start_margin or 0.0)
|
|
373
|
+
em = end_margin if end_margin is not None else (self.config.end_margin or 0.0)
|
|
374
|
+
|
|
375
|
+
# Sort by start time
|
|
376
|
+
sorted_segs = sorted(segments, key=lambda s: s.start)
|
|
377
|
+
result: List[Supervision] = []
|
|
378
|
+
|
|
379
|
+
for seg in sorted_segs:
|
|
380
|
+
# Get word alignment
|
|
381
|
+
words = self._get_word_alignment(seg)
|
|
382
|
+
|
|
383
|
+
if not words:
|
|
384
|
+
# No alignment data, keep original
|
|
385
|
+
result.append(self._copy_segment(seg))
|
|
386
|
+
continue
|
|
387
|
+
|
|
388
|
+
# Calculate precise boundaries
|
|
389
|
+
first_word_start = words[0].start
|
|
390
|
+
last_word_end = words[-1].start + words[-1].duration
|
|
391
|
+
|
|
392
|
+
# Apply margin (0.0 means no adjustment, just use word boundaries)
|
|
393
|
+
new_start = max(0, first_word_start - sm)
|
|
394
|
+
new_end = last_word_end + em
|
|
395
|
+
|
|
396
|
+
# Collision detection (with previous segment)
|
|
397
|
+
if result:
|
|
398
|
+
prev_end = result[-1].start + result[-1].duration
|
|
399
|
+
if new_start < prev_end + self.config.min_gap:
|
|
400
|
+
new_start = self._resolve_collision(prev_end, new_start, first_word_start, sm)
|
|
401
|
+
|
|
402
|
+
new_duration = new_end - new_start
|
|
403
|
+
result.append(self._copy_segment(seg, start=new_start, duration=new_duration))
|
|
404
|
+
|
|
405
|
+
return result
|
|
406
|
+
|
|
407
|
+
def _get_word_alignment(self, seg: Union[Supervision, SupervisionSegment]) -> List:
|
|
408
|
+
"""
|
|
409
|
+
Safely get word alignment data.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
seg: Subtitle segment
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
Word alignment list, or empty list if not present
|
|
416
|
+
"""
|
|
417
|
+
alignment = getattr(seg, "alignment", None)
|
|
418
|
+
if alignment and "word" in alignment:
|
|
419
|
+
return alignment["word"]
|
|
420
|
+
return []
|
|
421
|
+
|
|
422
|
+
def _resolve_collision(
|
|
423
|
+
self,
|
|
424
|
+
prev_end: float,
|
|
425
|
+
new_start: float,
|
|
426
|
+
first_word_start: float,
|
|
427
|
+
start_margin: float,
|
|
428
|
+
) -> float:
|
|
429
|
+
"""
|
|
430
|
+
Resolve collision with previous segment.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
prev_end: End time of previous segment
|
|
434
|
+
new_start: Currently calculated start time
|
|
435
|
+
first_word_start: Start time of first word in current segment
|
|
436
|
+
start_margin: Requested start_margin
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
Adjusted start time
|
|
440
|
+
"""
|
|
441
|
+
if self.config.margin_collision_mode == "gap":
|
|
442
|
+
# Force maintain min_gap
|
|
443
|
+
return prev_end + self.config.min_gap
|
|
444
|
+
else:
|
|
445
|
+
# Trim mode: preserve margin as much as possible, but not beyond speech start
|
|
446
|
+
available_margin = first_word_start - (prev_end + self.config.min_gap)
|
|
447
|
+
actual_margin = max(0, min(start_margin, available_margin))
|
|
448
|
+
return first_word_start - actual_margin
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
class CaptionValidator:
|
|
452
|
+
"""
|
|
453
|
+
Caption quality validator.
|
|
454
|
+
|
|
455
|
+
Validates subtitles against broadcast standards and generates quality metrics report.
|
|
456
|
+
|
|
457
|
+
Example:
|
|
458
|
+
>>> validator = CaptionValidator()
|
|
459
|
+
>>> result = validator.validate(supervisions)
|
|
460
|
+
>>> if not result.valid:
|
|
461
|
+
... print(result.warnings)
|
|
462
|
+
"""
|
|
463
|
+
|
|
464
|
+
def __init__(
|
|
465
|
+
self,
|
|
466
|
+
config: Optional[StandardizationConfig] = None,
|
|
467
|
+
min_duration: float = 0.8,
|
|
468
|
+
max_duration: float = 7.0,
|
|
469
|
+
min_gap: float = 0.08,
|
|
470
|
+
max_chars_per_line: int = 42,
|
|
471
|
+
):
|
|
472
|
+
"""
|
|
473
|
+
Initialize validator.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
config: Standardization config (if provided, ignores other params)
|
|
477
|
+
min_duration: Minimum duration
|
|
478
|
+
max_duration: Maximum duration
|
|
479
|
+
min_gap: Minimum gap
|
|
480
|
+
max_chars_per_line: Maximum characters per line
|
|
481
|
+
"""
|
|
482
|
+
if config:
|
|
483
|
+
self.config = config
|
|
484
|
+
else:
|
|
485
|
+
self.config = StandardizationConfig(
|
|
486
|
+
min_duration=min_duration,
|
|
487
|
+
max_duration=max_duration,
|
|
488
|
+
min_gap=min_gap,
|
|
489
|
+
max_chars_per_line=max_chars_per_line,
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
def validate(self, segments: List[Union[Supervision, SupervisionSegment]]) -> ValidationResult:
|
|
493
|
+
"""
|
|
494
|
+
Validate subtitles and return quality metrics.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
segments: List of subtitle segments
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
ValidationResult containing validation results and metrics
|
|
501
|
+
"""
|
|
502
|
+
result = ValidationResult()
|
|
503
|
+
|
|
504
|
+
if not segments:
|
|
505
|
+
return result
|
|
506
|
+
|
|
507
|
+
total_cps = 0.0
|
|
508
|
+
prev_end = 0.0
|
|
509
|
+
|
|
510
|
+
for i, seg in enumerate(segments):
|
|
511
|
+
text = seg.text or ""
|
|
512
|
+
duration = seg.duration
|
|
513
|
+
|
|
514
|
+
# CPS calculation (excluding newlines)
|
|
515
|
+
text_length = len(text.replace("\n", ""))
|
|
516
|
+
cps = text_length / duration if duration > 0 else 0
|
|
517
|
+
total_cps += cps
|
|
518
|
+
|
|
519
|
+
# CPL calculation
|
|
520
|
+
lines = text.split("\n")
|
|
521
|
+
max_line_len = max((len(line) for line in lines), default=0)
|
|
522
|
+
result.max_cpl = max(result.max_cpl, max_line_len)
|
|
523
|
+
|
|
524
|
+
# Duration check
|
|
525
|
+
if duration < self.config.min_duration:
|
|
526
|
+
result.segments_too_short += 1
|
|
527
|
+
result.warnings.append(
|
|
528
|
+
f"Segment {i} (id={seg.id}): duration {duration:.2f}s < min {self.config.min_duration}s"
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
if duration > self.config.max_duration:
|
|
532
|
+
result.segments_too_long += 1
|
|
533
|
+
result.warnings.append(
|
|
534
|
+
f"Segment {i} (id={seg.id}): duration {duration:.2f}s > max {self.config.max_duration}s"
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Gap check
|
|
538
|
+
if i > 0:
|
|
539
|
+
gap = seg.start - prev_end
|
|
540
|
+
if gap < self.config.min_gap and gap >= 0:
|
|
541
|
+
result.gaps_too_small += 1
|
|
542
|
+
result.warnings.append(f"Segment {i} (id={seg.id}): gap {gap:.3f}s < min {self.config.min_gap}s")
|
|
543
|
+
|
|
544
|
+
# CPL check
|
|
545
|
+
if max_line_len > self.config.max_chars_per_line:
|
|
546
|
+
result.warnings.append(
|
|
547
|
+
f"Segment {i} (id={seg.id}): line length {max_line_len} > max {self.config.max_chars_per_line}"
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# CPS check (reading speed too fast)
|
|
551
|
+
if cps > self.config.optimal_cps * 1.5: # Exceeds optimal by 50%
|
|
552
|
+
result.warnings.append(
|
|
553
|
+
f"Segment {i} (id={seg.id}): CPS {cps:.1f} exceeds recommended {self.config.optimal_cps}"
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
prev_end = seg.start + seg.duration
|
|
557
|
+
|
|
558
|
+
# Calculate average CPS
|
|
559
|
+
result.avg_cps = total_cps / len(segments)
|
|
560
|
+
|
|
561
|
+
# Determine if validation passed
|
|
562
|
+
result.valid = result.segments_too_short == 0 and result.segments_too_long == 0 and result.gaps_too_small == 0
|
|
563
|
+
|
|
564
|
+
return result
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def standardize_captions(
|
|
568
|
+
segments: List[Union[Supervision, SupervisionSegment]],
|
|
569
|
+
min_duration: float = 0.8,
|
|
570
|
+
max_duration: float = 7.0,
|
|
571
|
+
min_gap: float = 0.08,
|
|
572
|
+
max_lines: int = 2,
|
|
573
|
+
max_chars_per_line: int = 42,
|
|
574
|
+
) -> List[Supervision]:
|
|
575
|
+
"""
|
|
576
|
+
Convenience function: Standardize caption list.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
segments: List of original caption segments
|
|
580
|
+
min_duration: Minimum duration (seconds)
|
|
581
|
+
max_duration: Maximum duration (seconds)
|
|
582
|
+
min_gap: Minimum gap (seconds)
|
|
583
|
+
max_lines: Maximum number of lines
|
|
584
|
+
max_chars_per_line: Maximum characters per line
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
List of processed caption segments
|
|
588
|
+
|
|
589
|
+
Example:
|
|
590
|
+
>>> from lattifai.caption import standardize_captions
|
|
591
|
+
>>> processed = standardize_captions(supervisions, max_chars_per_line=22)
|
|
592
|
+
"""
|
|
593
|
+
standardizer = CaptionStandardizer(
|
|
594
|
+
min_duration=min_duration,
|
|
595
|
+
max_duration=max_duration,
|
|
596
|
+
min_gap=min_gap,
|
|
597
|
+
max_lines=max_lines,
|
|
598
|
+
max_chars_per_line=max_chars_per_line,
|
|
599
|
+
)
|
|
600
|
+
return standardizer.process(segments)
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def apply_margins_to_captions(
|
|
604
|
+
segments: List[Union[Supervision, SupervisionSegment]],
|
|
605
|
+
start_margin: float = 0.08,
|
|
606
|
+
end_margin: float = 0.20,
|
|
607
|
+
min_gap: float = 0.08,
|
|
608
|
+
collision_mode: str = "trim",
|
|
609
|
+
) -> List[Supervision]:
|
|
610
|
+
"""
|
|
611
|
+
Convenience function: Recalculate caption boundaries based on word-level alignment.
|
|
612
|
+
|
|
613
|
+
Uses precise word-level timestamps from supervision.alignment['word']
|
|
614
|
+
to recalculate segment start/end times.
|
|
615
|
+
|
|
616
|
+
Args:
|
|
617
|
+
segments: List of caption segments with alignment data
|
|
618
|
+
start_margin: Start margin (seconds) - extends before first word
|
|
619
|
+
end_margin: End margin (seconds) - extends after last word
|
|
620
|
+
min_gap: Minimum gap (seconds) - for collision handling
|
|
621
|
+
collision_mode: Collision mode 'trim' or 'gap'
|
|
622
|
+
|
|
623
|
+
Returns:
|
|
624
|
+
List of caption segments with new margins applied
|
|
625
|
+
|
|
626
|
+
Example:
|
|
627
|
+
>>> from lattifai.caption import apply_margins_to_captions
|
|
628
|
+
>>> adjusted = apply_margins_to_captions(
|
|
629
|
+
... supervisions, start_margin=0.05, end_margin=0.15
|
|
630
|
+
... )
|
|
631
|
+
"""
|
|
632
|
+
standardizer = CaptionStandardizer(min_gap=min_gap)
|
|
633
|
+
standardizer.config.start_margin = start_margin
|
|
634
|
+
standardizer.config.end_margin = end_margin
|
|
635
|
+
standardizer.config.margin_collision_mode = collision_mode
|
|
636
|
+
return standardizer.apply_margins(segments, start_margin=start_margin, end_margin=end_margin)
|