lattifai-captions 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai_captions-0.1.4/PKG-INFO +292 -0
- lattifai_captions-0.1.4/README.md +250 -0
- lattifai_captions-0.1.4/pyproject.toml +72 -0
- lattifai_captions-0.1.4/setup.cfg +4 -0
- lattifai_captions-0.1.4/src/lattifai/caption/__init__.py +99 -0
- lattifai_captions-0.1.4/src/lattifai/caption/caption.py +570 -0
- lattifai_captions-0.1.4/src/lattifai/caption/config.py +422 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/__init__.py +203 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/base.py +211 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/gemini.py +799 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/json.py +194 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/lrc.py +306 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/nle/__init__.py +9 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/nle/audition.py +555 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/nle/avid.py +417 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/nle/fcpxml.py +543 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/nle/premiere.py +584 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/pysubs2.py +671 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/sbv.py +147 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/srv3.py +391 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/tabular.py +332 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/textgrid.py +189 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/ttml.py +649 -0
- lattifai_captions-0.1.4/src/lattifai/caption/formats/vtt.py +468 -0
- lattifai_captions-0.1.4/src/lattifai/caption/parsers/__init__.py +9 -0
- lattifai_captions-0.1.4/src/lattifai/caption/parsers/text_parser.py +147 -0
- lattifai_captions-0.1.4/src/lattifai/caption/punctuation.py +39 -0
- lattifai_captions-0.1.4/src/lattifai/caption/sentence_splitter.py +412 -0
- lattifai_captions-0.1.4/src/lattifai/caption/standardize.py +634 -0
- lattifai_captions-0.1.4/src/lattifai/caption/supervision.py +392 -0
- lattifai_captions-0.1.4/src/lattifai/caption/utils.py +630 -0
- lattifai_captions-0.1.4/src/lattifai_captions.egg-info/PKG-INFO +292 -0
- lattifai_captions-0.1.4/src/lattifai_captions.egg-info/SOURCES.txt +34 -0
- lattifai_captions-0.1.4/src/lattifai_captions.egg-info/dependency_links.txt +1 -0
- lattifai_captions-0.1.4/src/lattifai_captions.egg-info/requires.txt +17 -0
- lattifai_captions-0.1.4/src/lattifai_captions.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lattifai-captions
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: Caption/subtitle processing library with multi-format support (SRT, VTT, ASS, TTML, TextGrid, NLE formats)
|
|
5
|
+
Author-email: LattifAI Technologies <tech@lattifai.com>
|
|
6
|
+
Maintainer-email: Lattice <tech@lattifai.com>
|
|
7
|
+
License: Apache-2.0
|
|
8
|
+
Project-URL: Homepage, https://github.com/lattifai/captions
|
|
9
|
+
Project-URL: Documentation, https://github.com/lattifai/captions/blob/main/README.md
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/lattifai/captions/issues
|
|
11
|
+
Keywords: captions,subtitles,srt,vtt,ass,ttml,textgrid,forced alignment
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
21
|
+
Classifier: Operating System :: OS Independent
|
|
22
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
23
|
+
Classifier: Topic :: Multimedia :: Video
|
|
24
|
+
Classifier: Topic :: Text Processing
|
|
25
|
+
Requires-Python: <3.15,>=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
Requires-Dist: pysubs2>=1.6.0
|
|
28
|
+
Requires-Dist: praatio>=6.0.0
|
|
29
|
+
Requires-Dist: tgt>=1.4.0
|
|
30
|
+
Provides-Extra: splitting
|
|
31
|
+
Requires-Dist: wtpsplit>=2.1.7; extra == "splitting"
|
|
32
|
+
Requires-Dist: onnxruntime; extra == "splitting"
|
|
33
|
+
Requires-Dist: huggingface_hub>=0.20.0; extra == "splitting"
|
|
34
|
+
Requires-Dist: modelscope>=1.33.0; extra == "splitting"
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
38
|
+
Requires-Dist: wtpsplit>=2.1.7; extra == "dev"
|
|
39
|
+
Requires-Dist: onnxruntime; extra == "dev"
|
|
40
|
+
Requires-Dist: huggingface_hub>=0.20.0; extra == "dev"
|
|
41
|
+
Requires-Dist: modelscope>=1.33.0; extra == "dev"
|
|
42
|
+
|
|
43
|
+
# lattifai-captions
|
|
44
|
+
|
|
45
|
+
Caption/subtitle processing library with comprehensive format support.
|
|
46
|
+
|
|
47
|
+
## Features
|
|
48
|
+
|
|
49
|
+
- **Multi-format support**: SRT, VTT, ASS, SSA, TTML, TextGrid, LRC, SRV3, and more
|
|
50
|
+
- **YouTube formats**: SRV3 (YTT v3), YouTube VTT with word-level timestamps
|
|
51
|
+
- **Professional NLE formats**: Avid DS, Final Cut Pro XML, Premiere Pro XML, Adobe Audition
|
|
52
|
+
- **Word-level timing**: Karaoke-style word-by-word timestamps
|
|
53
|
+
- **Standardization**: Netflix/BBC broadcast guidelines compliance
|
|
54
|
+
- **Sentence splitting**: AI-powered intelligent sentence segmentation
|
|
55
|
+
- **Zero dependencies on heavy ML frameworks**: Lightweight and fast
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Basic installation
|
|
61
|
+
pip install lattifai-captions
|
|
62
|
+
|
|
63
|
+
# With sentence splitting support
|
|
64
|
+
pip install lattifai-captions[splitting]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Quick Start
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from lattifai.caption import Caption
|
|
71
|
+
|
|
72
|
+
# Read a caption file
|
|
73
|
+
caption = Caption.read("input.srt")
|
|
74
|
+
|
|
75
|
+
# Write to different format
|
|
76
|
+
caption.write("output.vtt")
|
|
77
|
+
|
|
78
|
+
# Convert to string
|
|
79
|
+
vtt_content = caption.to_string("vtt")
|
|
80
|
+
|
|
81
|
+
# Access segments
|
|
82
|
+
for segment in caption.supervisions:
|
|
83
|
+
print(f"{segment.start:.2f} - {segment.end:.2f}: {segment.text}")
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Supported Formats
|
|
87
|
+
|
|
88
|
+
### Input/Output (Read & Write)
|
|
89
|
+
|
|
90
|
+
| Format | Extensions | Description |
|
|
91
|
+
|--------|------------|-------------|
|
|
92
|
+
| **SRT** | `.srt` | SubRip subtitle format |
|
|
93
|
+
| **VTT** | `.vtt` | WebVTT, includes YouTube VTT with word-level timestamps |
|
|
94
|
+
| **ASS/SSA** | `.ass`, `.ssa` | Advanced SubStation Alpha |
|
|
95
|
+
| **SRV3** | `.srv3`, `.ytt` | YouTube Timed Text v3 with word-level timing |
|
|
96
|
+
| **SBV** | `.sbv` | YouTube SubViewer format |
|
|
97
|
+
| **SUB** | `.sub` | MicroDVD subtitle format |
|
|
98
|
+
| **SAMI** | `.sami`, `.smi` | SAMI subtitle format |
|
|
99
|
+
| **JSON** | `.json` | Structured data with word-level support |
|
|
100
|
+
| **CSV/TSV** | `.csv`, `.tsv` | Tabular formats |
|
|
101
|
+
| **TextGrid** | `.textgrid` | Praat TextGrid format |
|
|
102
|
+
| **LRC** | `.lrc` | Lyrics format with word-level timestamps |
|
|
103
|
+
| **Gemini** | `.md` | Gemini AI transcript markdown |
|
|
104
|
+
|
|
105
|
+
### Output Only
|
|
106
|
+
|
|
107
|
+
| Format | Extensions | Description |
|
|
108
|
+
|--------|------------|-------------|
|
|
109
|
+
| **TTML** | `.ttml` | Timed Text Markup Language (W3C standard) |
|
|
110
|
+
| **IMSC1** | `.ttml` | Netflix/streaming TTML profile |
|
|
111
|
+
| **EBU-TT-D** | `.ttml` | European broadcast TTML profile |
|
|
112
|
+
| **Avid DS** | `.txt` | Avid Media Composer SubCap |
|
|
113
|
+
| **FCPXML** | `.fcpxml` | Final Cut Pro XML |
|
|
114
|
+
| **Premiere XML** | `.xml` | Adobe Premiere Pro XML |
|
|
115
|
+
| **Audition CSV** | `.csv` | Adobe Audition markers |
|
|
116
|
+
| **EdiMarker CSV** | `.csv` | Pro Tools markers |
|
|
117
|
+
|
|
118
|
+
## Word-Level Timing
|
|
119
|
+
|
|
120
|
+
Many formats support word-level timing for karaoke-style output:
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from lattifai.caption import Caption
|
|
124
|
+
|
|
125
|
+
caption = Caption.read("input.srv3") # SRV3 has built-in word timing
|
|
126
|
+
|
|
127
|
+
# Access word-level alignment
|
|
128
|
+
for segment in caption.supervisions:
|
|
129
|
+
if segment.alignment and "word" in segment.alignment:
|
|
130
|
+
for word in segment.alignment["word"]:
|
|
131
|
+
print(f" {word.symbol}: {word.start:.3f}s - {word.end:.3f}s")
|
|
132
|
+
|
|
133
|
+
# Export with word-level timing
|
|
134
|
+
caption.write("output.json", word_level=True) # JSON preserves words array
|
|
135
|
+
caption.write("output.ass", word_level=True, karaoke_config=KaraokeConfig(enabled=True))
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## YouTube SRV3 Format
|
|
139
|
+
|
|
140
|
+
SRV3 is YouTube's proprietary timed text format with millisecond-precision word timing:
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
from lattifai.caption import Caption
|
|
144
|
+
|
|
145
|
+
# Read SRV3 (automatically extracts word-level timing)
|
|
146
|
+
caption = Caption.read("video.srv3")
|
|
147
|
+
|
|
148
|
+
# Convert to other formats
|
|
149
|
+
caption.write("output.srt") # Standard SRT
|
|
150
|
+
caption.write("output.vtt", word_level=True) # VTT with word timing
|
|
151
|
+
caption.write("output.srv3", word_level=True) # Back to SRV3
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
SRV3 structure example:
|
|
155
|
+
```xml
|
|
156
|
+
<timedtext format="3">
|
|
157
|
+
<body>
|
|
158
|
+
<p t="240" d="6559" w="1">
|
|
159
|
+
<s ac="0">Does</s>
|
|
160
|
+
<s t="320" ac="0"> fast</s>
|
|
161
|
+
<s t="560" ac="0"> charging</s>
|
|
162
|
+
</p>
|
|
163
|
+
</body>
|
|
164
|
+
</timedtext>
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Sentence Splitting
|
|
168
|
+
|
|
169
|
+
Split captions into natural sentences (requires `[splitting]` extra):
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
from lattifai.caption import Caption, SentenceSplitter
|
|
173
|
+
|
|
174
|
+
# Using Caption method
|
|
175
|
+
caption = Caption.read("input.srt")
|
|
176
|
+
split_caption = caption.split_sentences()
|
|
177
|
+
|
|
178
|
+
# Or use SentenceSplitter directly
|
|
179
|
+
splitter = SentenceSplitter()
|
|
180
|
+
split_supervisions = splitter.split_sentences(caption.supervisions)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Format Conversion
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
from lattifai.caption import Caption
|
|
187
|
+
|
|
188
|
+
# Read any format
|
|
189
|
+
caption = Caption.read("input.srt")
|
|
190
|
+
|
|
191
|
+
# Write to any supported format
|
|
192
|
+
caption.write("output.vtt")
|
|
193
|
+
caption.write("output.ass")
|
|
194
|
+
caption.write("output.json")
|
|
195
|
+
caption.write("output.srv3", word_level=True)
|
|
196
|
+
caption.write("output.ttml")
|
|
197
|
+
|
|
198
|
+
# Or get as string
|
|
199
|
+
srt_content = caption.to_string("srt")
|
|
200
|
+
json_content = caption.to_string("json", word_level=True)
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Standardization
|
|
204
|
+
|
|
205
|
+
Apply broadcast standards to captions:
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
from lattifai.caption import Caption, CaptionStandardizer
|
|
209
|
+
|
|
210
|
+
standardizer = CaptionStandardizer(
|
|
211
|
+
min_duration=0.7, # Minimum segment duration
|
|
212
|
+
max_duration=7.0, # Maximum segment duration
|
|
213
|
+
min_gap=0.08, # Minimum gap between segments
|
|
214
|
+
max_lines=2, # Maximum lines per segment
|
|
215
|
+
max_chars_per_line=42, # Maximum characters per line
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
caption = Caption.read("input.srt")
|
|
219
|
+
standardized = standardizer.process(caption.supervisions)
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Validation
|
|
223
|
+
|
|
224
|
+
Check captions against quality standards:
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
from lattifai.caption import Caption, CaptionValidator
|
|
228
|
+
|
|
229
|
+
validator = CaptionValidator(
|
|
230
|
+
min_duration=0.7,
|
|
231
|
+
max_duration=7.0,
|
|
232
|
+
min_gap=0.08,
|
|
233
|
+
max_chars_per_line=42,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
caption = Caption.read("input.srt")
|
|
237
|
+
result = validator.validate(caption.supervisions)
|
|
238
|
+
|
|
239
|
+
print(f"Valid: {result.valid}")
|
|
240
|
+
print(f"Average CPS: {result.avg_cps:.1f}")
|
|
241
|
+
print(f"Max CPL: {result.max_cpl}")
|
|
242
|
+
print(f"Warnings: {result.warnings}")
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## API Reference
|
|
246
|
+
|
|
247
|
+
### Caption Class
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from lattifai.caption import Caption
|
|
251
|
+
|
|
252
|
+
# Class methods
|
|
253
|
+
Caption.read(path, format=None, normalize_text=True)
|
|
254
|
+
Caption.from_string(content, format)
|
|
255
|
+
Caption.from_supervisions(supervisions, language=None, metadata=None)
|
|
256
|
+
|
|
257
|
+
# Instance methods
|
|
258
|
+
caption.write(path, include_speaker=True, word_level=False, karaoke_config=None)
|
|
259
|
+
caption.to_string(format, include_speaker=True, word_level=False, karaoke_config=None)
|
|
260
|
+
caption.split_sentences()
|
|
261
|
+
caption.shift_time(seconds)
|
|
262
|
+
|
|
263
|
+
# Properties
|
|
264
|
+
caption.supervisions # List[Supervision]
|
|
265
|
+
caption.duration # Total duration in seconds
|
|
266
|
+
caption.language # Language code
|
|
267
|
+
caption.source_format # Original format
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Supervision Class
|
|
271
|
+
|
|
272
|
+
```python
|
|
273
|
+
from lattifai.caption import Supervision
|
|
274
|
+
|
|
275
|
+
sup = Supervision(
|
|
276
|
+
start=0.0, # Start time in seconds
|
|
277
|
+
duration=2.5, # Duration in seconds
|
|
278
|
+
text="Hello world", # Caption text
|
|
279
|
+
speaker="Alice", # Optional speaker label
|
|
280
|
+
alignment=None, # Optional word-level alignment
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Properties
|
|
284
|
+
sup.end # start + duration
|
|
285
|
+
sup.text # Caption text
|
|
286
|
+
sup.speaker # Speaker label
|
|
287
|
+
sup.alignment # Dict with "word" key containing AlignmentItem list
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## License
|
|
291
|
+
|
|
292
|
+
Apache-2.0
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# lattifai-captions
|
|
2
|
+
|
|
3
|
+
Caption/subtitle processing library with comprehensive format support.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Multi-format support**: SRT, VTT, ASS, SSA, TTML, TextGrid, LRC, SRV3, and more
|
|
8
|
+
- **YouTube formats**: SRV3 (YTT v3), YouTube VTT with word-level timestamps
|
|
9
|
+
- **Professional NLE formats**: Avid DS, Final Cut Pro XML, Premiere Pro XML, Adobe Audition
|
|
10
|
+
- **Word-level timing**: Karaoke-style word-by-word timestamps
|
|
11
|
+
- **Standardization**: Netflix/BBC broadcast guidelines compliance
|
|
12
|
+
- **Sentence splitting**: AI-powered intelligent sentence segmentation
|
|
13
|
+
- **Zero dependencies on heavy ML frameworks**: Lightweight and fast
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Basic installation
|
|
19
|
+
pip install lattifai-captions
|
|
20
|
+
|
|
21
|
+
# With sentence splitting support
|
|
22
|
+
pip install lattifai-captions[splitting]
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from lattifai.caption import Caption
|
|
29
|
+
|
|
30
|
+
# Read a caption file
|
|
31
|
+
caption = Caption.read("input.srt")
|
|
32
|
+
|
|
33
|
+
# Write to different format
|
|
34
|
+
caption.write("output.vtt")
|
|
35
|
+
|
|
36
|
+
# Convert to string
|
|
37
|
+
vtt_content = caption.to_string("vtt")
|
|
38
|
+
|
|
39
|
+
# Access segments
|
|
40
|
+
for segment in caption.supervisions:
|
|
41
|
+
print(f"{segment.start:.2f} - {segment.end:.2f}: {segment.text}")
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Supported Formats
|
|
45
|
+
|
|
46
|
+
### Input/Output (Read & Write)
|
|
47
|
+
|
|
48
|
+
| Format | Extensions | Description |
|
|
49
|
+
|--------|------------|-------------|
|
|
50
|
+
| **SRT** | `.srt` | SubRip subtitle format |
|
|
51
|
+
| **VTT** | `.vtt` | WebVTT, includes YouTube VTT with word-level timestamps |
|
|
52
|
+
| **ASS/SSA** | `.ass`, `.ssa` | Advanced SubStation Alpha |
|
|
53
|
+
| **SRV3** | `.srv3`, `.ytt` | YouTube Timed Text v3 with word-level timing |
|
|
54
|
+
| **SBV** | `.sbv` | YouTube SubViewer format |
|
|
55
|
+
| **SUB** | `.sub` | MicroDVD subtitle format |
|
|
56
|
+
| **SAMI** | `.sami`, `.smi` | SAMI subtitle format |
|
|
57
|
+
| **JSON** | `.json` | Structured data with word-level support |
|
|
58
|
+
| **CSV/TSV** | `.csv`, `.tsv` | Tabular formats |
|
|
59
|
+
| **TextGrid** | `.textgrid` | Praat TextGrid format |
|
|
60
|
+
| **LRC** | `.lrc` | Lyrics format with word-level timestamps |
|
|
61
|
+
| **Gemini** | `.md` | Gemini AI transcript markdown |
|
|
62
|
+
|
|
63
|
+
### Output Only
|
|
64
|
+
|
|
65
|
+
| Format | Extensions | Description |
|
|
66
|
+
|--------|------------|-------------|
|
|
67
|
+
| **TTML** | `.ttml` | Timed Text Markup Language (W3C standard) |
|
|
68
|
+
| **IMSC1** | `.ttml` | Netflix/streaming TTML profile |
|
|
69
|
+
| **EBU-TT-D** | `.ttml` | European broadcast TTML profile |
|
|
70
|
+
| **Avid DS** | `.txt` | Avid Media Composer SubCap |
|
|
71
|
+
| **FCPXML** | `.fcpxml` | Final Cut Pro XML |
|
|
72
|
+
| **Premiere XML** | `.xml` | Adobe Premiere Pro XML |
|
|
73
|
+
| **Audition CSV** | `.csv` | Adobe Audition markers |
|
|
74
|
+
| **EdiMarker CSV** | `.csv` | Pro Tools markers |
|
|
75
|
+
|
|
76
|
+
## Word-Level Timing
|
|
77
|
+
|
|
78
|
+
Many formats support word-level timing for karaoke-style output:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from lattifai.caption import Caption
|
|
82
|
+
|
|
83
|
+
caption = Caption.read("input.srv3") # SRV3 has built-in word timing
|
|
84
|
+
|
|
85
|
+
# Access word-level alignment
|
|
86
|
+
for segment in caption.supervisions:
|
|
87
|
+
if segment.alignment and "word" in segment.alignment:
|
|
88
|
+
for word in segment.alignment["word"]:
|
|
89
|
+
print(f" {word.symbol}: {word.start:.3f}s - {word.end:.3f}s")
|
|
90
|
+
|
|
91
|
+
# Export with word-level timing
|
|
92
|
+
caption.write("output.json", word_level=True) # JSON preserves words array
|
|
93
|
+
caption.write("output.ass", word_level=True, karaoke_config=KaraokeConfig(enabled=True))
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## YouTube SRV3 Format
|
|
97
|
+
|
|
98
|
+
SRV3 is YouTube's proprietary timed text format with millisecond-precision word timing:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from lattifai.caption import Caption
|
|
102
|
+
|
|
103
|
+
# Read SRV3 (automatically extracts word-level timing)
|
|
104
|
+
caption = Caption.read("video.srv3")
|
|
105
|
+
|
|
106
|
+
# Convert to other formats
|
|
107
|
+
caption.write("output.srt") # Standard SRT
|
|
108
|
+
caption.write("output.vtt", word_level=True) # VTT with word timing
|
|
109
|
+
caption.write("output.srv3", word_level=True) # Back to SRV3
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
SRV3 structure example:
|
|
113
|
+
```xml
|
|
114
|
+
<timedtext format="3">
|
|
115
|
+
<body>
|
|
116
|
+
<p t="240" d="6559" w="1">
|
|
117
|
+
<s ac="0">Does</s>
|
|
118
|
+
<s t="320" ac="0"> fast</s>
|
|
119
|
+
<s t="560" ac="0"> charging</s>
|
|
120
|
+
</p>
|
|
121
|
+
</body>
|
|
122
|
+
</timedtext>
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Sentence Splitting
|
|
126
|
+
|
|
127
|
+
Split captions into natural sentences (requires `[splitting]` extra):
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from lattifai.caption import Caption, SentenceSplitter
|
|
131
|
+
|
|
132
|
+
# Using Caption method
|
|
133
|
+
caption = Caption.read("input.srt")
|
|
134
|
+
split_caption = caption.split_sentences()
|
|
135
|
+
|
|
136
|
+
# Or use SentenceSplitter directly
|
|
137
|
+
splitter = SentenceSplitter()
|
|
138
|
+
split_supervisions = splitter.split_sentences(caption.supervisions)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Format Conversion
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from lattifai.caption import Caption
|
|
145
|
+
|
|
146
|
+
# Read any format
|
|
147
|
+
caption = Caption.read("input.srt")
|
|
148
|
+
|
|
149
|
+
# Write to any supported format
|
|
150
|
+
caption.write("output.vtt")
|
|
151
|
+
caption.write("output.ass")
|
|
152
|
+
caption.write("output.json")
|
|
153
|
+
caption.write("output.srv3", word_level=True)
|
|
154
|
+
caption.write("output.ttml")
|
|
155
|
+
|
|
156
|
+
# Or get as string
|
|
157
|
+
srt_content = caption.to_string("srt")
|
|
158
|
+
json_content = caption.to_string("json", word_level=True)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Standardization
|
|
162
|
+
|
|
163
|
+
Apply broadcast standards to captions:
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from lattifai.caption import Caption, CaptionStandardizer
|
|
167
|
+
|
|
168
|
+
standardizer = CaptionStandardizer(
|
|
169
|
+
min_duration=0.7, # Minimum segment duration
|
|
170
|
+
max_duration=7.0, # Maximum segment duration
|
|
171
|
+
min_gap=0.08, # Minimum gap between segments
|
|
172
|
+
max_lines=2, # Maximum lines per segment
|
|
173
|
+
max_chars_per_line=42, # Maximum characters per line
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
caption = Caption.read("input.srt")
|
|
177
|
+
standardized = standardizer.process(caption.supervisions)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Validation
|
|
181
|
+
|
|
182
|
+
Check captions against quality standards:
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
from lattifai.caption import Caption, CaptionValidator
|
|
186
|
+
|
|
187
|
+
validator = CaptionValidator(
|
|
188
|
+
min_duration=0.7,
|
|
189
|
+
max_duration=7.0,
|
|
190
|
+
min_gap=0.08,
|
|
191
|
+
max_chars_per_line=42,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
caption = Caption.read("input.srt")
|
|
195
|
+
result = validator.validate(caption.supervisions)
|
|
196
|
+
|
|
197
|
+
print(f"Valid: {result.valid}")
|
|
198
|
+
print(f"Average CPS: {result.avg_cps:.1f}")
|
|
199
|
+
print(f"Max CPL: {result.max_cpl}")
|
|
200
|
+
print(f"Warnings: {result.warnings}")
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## API Reference
|
|
204
|
+
|
|
205
|
+
### Caption Class
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
from lattifai.caption import Caption
|
|
209
|
+
|
|
210
|
+
# Class methods
|
|
211
|
+
Caption.read(path, format=None, normalize_text=True)
|
|
212
|
+
Caption.from_string(content, format)
|
|
213
|
+
Caption.from_supervisions(supervisions, language=None, metadata=None)
|
|
214
|
+
|
|
215
|
+
# Instance methods
|
|
216
|
+
caption.write(path, include_speaker=True, word_level=False, karaoke_config=None)
|
|
217
|
+
caption.to_string(format, include_speaker=True, word_level=False, karaoke_config=None)
|
|
218
|
+
caption.split_sentences()
|
|
219
|
+
caption.shift_time(seconds)
|
|
220
|
+
|
|
221
|
+
# Properties
|
|
222
|
+
caption.supervisions # List[Supervision]
|
|
223
|
+
caption.duration # Total duration in seconds
|
|
224
|
+
caption.language # Language code
|
|
225
|
+
caption.source_format # Original format
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Supervision Class
|
|
229
|
+
|
|
230
|
+
```python
|
|
231
|
+
from lattifai.caption import Supervision
|
|
232
|
+
|
|
233
|
+
sup = Supervision(
|
|
234
|
+
start=0.0, # Start time in seconds
|
|
235
|
+
duration=2.5, # Duration in seconds
|
|
236
|
+
text="Hello world", # Caption text
|
|
237
|
+
speaker="Alice", # Optional speaker label
|
|
238
|
+
alignment=None, # Optional word-level alignment
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Properties
|
|
242
|
+
sup.end # start + duration
|
|
243
|
+
sup.text # Caption text
|
|
244
|
+
sup.speaker # Speaker label
|
|
245
|
+
sup.alignment # Dict with "word" key containing AlignmentItem list
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
Apache-2.0
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ['setuptools>=60,<80', 'wheel']
|
|
3
|
+
build-backend = 'setuptools.build_meta'
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = 'lattifai-captions'
|
|
7
|
+
version = '0.1.4'
|
|
8
|
+
description = "Caption/subtitle processing library with multi-format support (SRT, VTT, ASS, TTML, TextGrid, NLE formats)"
|
|
9
|
+
keywords = ['captions', 'subtitles', 'srt', 'vtt', 'ass', 'ttml', 'textgrid', 'forced alignment']
|
|
10
|
+
readme = 'README.md'
|
|
11
|
+
license = {text = 'Apache-2.0'}
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = 'LattifAI Technologies', email = 'tech@lattifai.com' },
|
|
14
|
+
]
|
|
15
|
+
maintainers = [
|
|
16
|
+
{ name = 'Lattice', email = 'tech@lattifai.com' },
|
|
17
|
+
]
|
|
18
|
+
requires-python = '>=3.10,<3.15'
|
|
19
|
+
|
|
20
|
+
classifiers = [
|
|
21
|
+
'Development Status :: 4 - Beta',
|
|
22
|
+
'Intended Audience :: Developers',
|
|
23
|
+
'Intended Audience :: Science/Research',
|
|
24
|
+
'License :: OSI Approved :: Apache Software License',
|
|
25
|
+
'Programming Language :: Python :: 3.10',
|
|
26
|
+
'Programming Language :: Python :: 3.11',
|
|
27
|
+
'Programming Language :: Python :: 3.12',
|
|
28
|
+
'Programming Language :: Python :: 3.13',
|
|
29
|
+
'Programming Language :: Python :: 3.14',
|
|
30
|
+
'Operating System :: OS Independent',
|
|
31
|
+
'Topic :: Multimedia :: Sound/Audio',
|
|
32
|
+
'Topic :: Multimedia :: Video',
|
|
33
|
+
'Topic :: Text Processing',
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
dependencies = [
|
|
37
|
+
"pysubs2>=1.6.0",
|
|
38
|
+
"praatio>=6.0.0",
|
|
39
|
+
"tgt>=1.4.0",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
splitting = [
|
|
44
|
+
"wtpsplit>=2.1.7",
|
|
45
|
+
"onnxruntime",
|
|
46
|
+
"huggingface_hub>=0.20.0",
|
|
47
|
+
"modelscope>=1.33.0",
|
|
48
|
+
]
|
|
49
|
+
dev = [
|
|
50
|
+
"pytest>=8.0.0",
|
|
51
|
+
"pytest-cov>=4.1.0",
|
|
52
|
+
"wtpsplit>=2.1.7",
|
|
53
|
+
"onnxruntime",
|
|
54
|
+
"huggingface_hub>=0.20.0",
|
|
55
|
+
"modelscope>=1.33.0",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
[project.urls]
|
|
59
|
+
Homepage = 'https://github.com/lattifai/captions'
|
|
60
|
+
Documentation = 'https://github.com/lattifai/captions/blob/main/README.md'
|
|
61
|
+
'Bug Tracker' = 'https://github.com/lattifai/captions/issues'
|
|
62
|
+
|
|
63
|
+
[tool.setuptools.packages.find]
|
|
64
|
+
where = ["src"]
|
|
65
|
+
namespaces = true
|
|
66
|
+
|
|
67
|
+
[tool.setuptools.package-dir]
|
|
68
|
+
"" = "src"
|
|
69
|
+
|
|
70
|
+
[tool.pytest.ini_options]
|
|
71
|
+
addopts = "--durations=10"
|
|
72
|
+
testpaths = ["tests"]
|