lattifai 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/_init.py +20 -0
- lattifai/alignment/__init__.py +2 -3
- lattifai/alignment/lattice1_aligner.py +117 -4
- lattifai/alignment/lattice1_worker.py +47 -4
- lattifai/alignment/segmenter.py +3 -2
- lattifai/alignment/text_align.py +2 -1
- lattifai/alignment/tokenizer.py +56 -29
- lattifai/audio2.py +162 -183
- lattifai/cli/alignment.py +5 -0
- lattifai/cli/caption.py +6 -6
- lattifai/cli/transcribe.py +1 -5
- lattifai/cli/youtube.py +3 -0
- lattifai/client.py +41 -12
- lattifai/config/__init__.py +21 -3
- lattifai/config/alignment.py +7 -0
- lattifai/config/caption.py +13 -243
- lattifai/config/client.py +16 -0
- lattifai/config/event.py +102 -0
- lattifai/config/transcription.py +25 -1
- lattifai/data/__init__.py +8 -0
- lattifai/data/caption.py +228 -0
- lattifai/errors.py +78 -53
- lattifai/event/__init__.py +65 -0
- lattifai/event/lattifai.py +166 -0
- lattifai/mixin.py +22 -17
- lattifai/transcription/base.py +2 -1
- lattifai/transcription/gemini.py +147 -16
- lattifai/transcription/lattifai.py +8 -11
- lattifai/types.py +1 -1
- lattifai/youtube/client.py +143 -48
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/METADATA +129 -58
- lattifai-1.3.1.dist-info/RECORD +57 -0
- lattifai/__init__.py +0 -88
- lattifai/alignment/sentence_splitter.py +0 -350
- lattifai/caption/__init__.py +0 -96
- lattifai/caption/caption.py +0 -661
- lattifai/caption/formats/__init__.py +0 -199
- lattifai/caption/formats/base.py +0 -211
- lattifai/caption/formats/gemini.py +0 -722
- lattifai/caption/formats/json.py +0 -194
- lattifai/caption/formats/lrc.py +0 -309
- lattifai/caption/formats/nle/__init__.py +0 -9
- lattifai/caption/formats/nle/audition.py +0 -561
- lattifai/caption/formats/nle/avid.py +0 -423
- lattifai/caption/formats/nle/fcpxml.py +0 -549
- lattifai/caption/formats/nle/premiere.py +0 -589
- lattifai/caption/formats/pysubs2.py +0 -642
- lattifai/caption/formats/sbv.py +0 -147
- lattifai/caption/formats/tabular.py +0 -338
- lattifai/caption/formats/textgrid.py +0 -193
- lattifai/caption/formats/ttml.py +0 -652
- lattifai/caption/formats/vtt.py +0 -469
- lattifai/caption/parsers/__init__.py +0 -9
- lattifai/caption/parsers/text_parser.py +0 -147
- lattifai/caption/standardize.py +0 -636
- lattifai/caption/supervision.py +0 -34
- lattifai/caption/utils.py +0 -474
- lattifai-1.2.2.dist-info/RECORD +0 -76
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/WHEEL +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/entry_points.txt +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,561 +0,0 @@
|
|
|
1
|
-
"""Adobe Audition marker CSV format writer for audio post-production.
|
|
2
|
-
|
|
3
|
-
This module provides functionality to export captions as Adobe Audition markers,
|
|
4
|
-
enabling audio editors to navigate and search transcripts in their audio editing workflow.
|
|
5
|
-
|
|
6
|
-
Format specification (Audition CSV):
|
|
7
|
-
- Header: Name,Start,Duration,Time Format,Type,Description
|
|
8
|
-
- Time Format: "decimal" (seconds with decimal)
|
|
9
|
-
- Type: "Cue" for markers
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import csv
|
|
13
|
-
from dataclasses import dataclass
|
|
14
|
-
from io import StringIO
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
from typing import List, Optional, Union
|
|
17
|
-
|
|
18
|
-
from lhotse.utils import Pathlike
|
|
19
|
-
|
|
20
|
-
from ...supervision import Supervision
|
|
21
|
-
from .. import register_writer
|
|
22
|
-
from ..base import FormatReader, FormatWriter
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@dataclass
|
|
26
|
-
class AuditionCSVConfig:
|
|
27
|
-
"""Configuration for Adobe Audition CSV marker export.
|
|
28
|
-
|
|
29
|
-
Attributes:
|
|
30
|
-
time_format: Time format for markers ("decimal" or "samples")
|
|
31
|
-
marker_type: Marker type ("Cue", "Subclip", "Track")
|
|
32
|
-
include_speaker_in_name: Include speaker name in marker name
|
|
33
|
-
use_description: Put text content in description field
|
|
34
|
-
sample_rate: Sample rate (only used when time_format="samples")
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
time_format: str = "decimal"
|
|
38
|
-
marker_type: str = "Cue"
|
|
39
|
-
include_speaker_in_name: bool = True
|
|
40
|
-
use_description: bool = True
|
|
41
|
-
sample_rate: int = 48000
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class AuditionCSVWriter:
|
|
45
|
-
"""Writer for Adobe Audition marker CSV format.
|
|
46
|
-
|
|
47
|
-
Generates CSV files compatible with Adobe Audition's marker import feature,
|
|
48
|
-
allowing transcripts to be imported as navigable markers in audio projects.
|
|
49
|
-
|
|
50
|
-
Example:
|
|
51
|
-
>>> from lattifai.caption import Caption
|
|
52
|
-
>>> from lattifai.caption.formats.nle.audition_writer import AuditionCSVWriter, AuditionCSVConfig
|
|
53
|
-
>>> caption = Caption.read("input.srt")
|
|
54
|
-
>>> config = AuditionCSVConfig(include_speaker_in_name=True)
|
|
55
|
-
>>> AuditionCSVWriter.write(caption.supervisions, "markers.csv", config)
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
# CSV header required by Adobe Audition
|
|
59
|
-
HEADER = ["Name", "Start", "Duration", "Time Format", "Type", "Description"]
|
|
60
|
-
|
|
61
|
-
@classmethod
|
|
62
|
-
def _format_time(
|
|
63
|
-
cls,
|
|
64
|
-
seconds: float,
|
|
65
|
-
time_format: str = "decimal",
|
|
66
|
-
sample_rate: int = 48000,
|
|
67
|
-
) -> str:
|
|
68
|
-
"""Format time value for Audition CSV.
|
|
69
|
-
|
|
70
|
-
Args:
|
|
71
|
-
seconds: Time in seconds
|
|
72
|
-
time_format: "decimal" for seconds, "samples" for sample count
|
|
73
|
-
sample_rate: Sample rate for sample-based timing
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
Formatted time string
|
|
77
|
-
"""
|
|
78
|
-
if time_format == "samples":
|
|
79
|
-
return str(int(round(seconds * sample_rate)))
|
|
80
|
-
else:
|
|
81
|
-
# Decimal format with millisecond precision
|
|
82
|
-
return f"{seconds:.3f}"
|
|
83
|
-
|
|
84
|
-
@classmethod
|
|
85
|
-
def _format_marker_name(
|
|
86
|
-
cls,
|
|
87
|
-
supervision: "Supervision",
|
|
88
|
-
index: int,
|
|
89
|
-
include_speaker: bool,
|
|
90
|
-
) -> str:
|
|
91
|
-
"""Format marker name from supervision.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
supervision: Supervision segment
|
|
95
|
-
index: Marker index (1-based)
|
|
96
|
-
include_speaker: Whether to include speaker in name
|
|
97
|
-
|
|
98
|
-
Returns:
|
|
99
|
-
Formatted marker name
|
|
100
|
-
"""
|
|
101
|
-
if include_speaker and supervision.speaker:
|
|
102
|
-
return f"{supervision.speaker} - Marker {index:03d}"
|
|
103
|
-
else:
|
|
104
|
-
return f"Marker {index:03d}"
|
|
105
|
-
|
|
106
|
-
@classmethod
|
|
107
|
-
def _generate_csv_content(
|
|
108
|
-
cls,
|
|
109
|
-
supervisions: List["Supervision"],
|
|
110
|
-
config: AuditionCSVConfig,
|
|
111
|
-
) -> str:
|
|
112
|
-
"""Generate CSV content string.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
supervisions: List of supervision segments
|
|
116
|
-
config: Export configuration
|
|
117
|
-
|
|
118
|
-
Returns:
|
|
119
|
-
CSV content as string
|
|
120
|
-
"""
|
|
121
|
-
output = StringIO()
|
|
122
|
-
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
|
|
123
|
-
|
|
124
|
-
# Write header
|
|
125
|
-
writer.writerow(cls.HEADER)
|
|
126
|
-
|
|
127
|
-
# Write markers
|
|
128
|
-
for i, sup in enumerate(supervisions, 1):
|
|
129
|
-
name = cls._format_marker_name(sup, i, config.include_speaker_in_name)
|
|
130
|
-
start = cls._format_time(sup.start, config.time_format, config.sample_rate)
|
|
131
|
-
duration = cls._format_time(sup.duration, config.time_format, config.sample_rate)
|
|
132
|
-
|
|
133
|
-
if config.use_description:
|
|
134
|
-
description = sup.text.strip() if sup.text else ""
|
|
135
|
-
else:
|
|
136
|
-
description = ""
|
|
137
|
-
|
|
138
|
-
writer.writerow(
|
|
139
|
-
[
|
|
140
|
-
name,
|
|
141
|
-
start,
|
|
142
|
-
duration,
|
|
143
|
-
config.time_format,
|
|
144
|
-
config.marker_type,
|
|
145
|
-
description,
|
|
146
|
-
]
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
return output.getvalue()
|
|
150
|
-
|
|
151
|
-
@classmethod
|
|
152
|
-
def write(
|
|
153
|
-
cls,
|
|
154
|
-
supervisions: List["Supervision"],
|
|
155
|
-
output_path: Pathlike,
|
|
156
|
-
config: Optional[AuditionCSVConfig] = None,
|
|
157
|
-
) -> Path:
|
|
158
|
-
"""Write supervisions to Audition CSV marker format.
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
supervisions: List of supervision segments
|
|
162
|
-
output_path: Output file path
|
|
163
|
-
config: Export configuration
|
|
164
|
-
|
|
165
|
-
Returns:
|
|
166
|
-
Path to written file
|
|
167
|
-
"""
|
|
168
|
-
if config is None:
|
|
169
|
-
config = AuditionCSVConfig()
|
|
170
|
-
|
|
171
|
-
output_path = Path(output_path)
|
|
172
|
-
content = cls._generate_csv_content(supervisions, config)
|
|
173
|
-
|
|
174
|
-
with open(output_path, "w", encoding="utf-8", newline="") as f:
|
|
175
|
-
f.write(content)
|
|
176
|
-
|
|
177
|
-
return output_path
|
|
178
|
-
|
|
179
|
-
@classmethod
|
|
180
|
-
def to_bytes(
|
|
181
|
-
cls,
|
|
182
|
-
supervisions: List["Supervision"],
|
|
183
|
-
config: Optional[AuditionCSVConfig] = None,
|
|
184
|
-
) -> bytes:
|
|
185
|
-
"""Convert supervisions to Audition CSV format bytes.
|
|
186
|
-
|
|
187
|
-
Args:
|
|
188
|
-
supervisions: List of supervision segments
|
|
189
|
-
config: Export configuration
|
|
190
|
-
|
|
191
|
-
Returns:
|
|
192
|
-
CSV content as bytes
|
|
193
|
-
"""
|
|
194
|
-
if config is None:
|
|
195
|
-
config = AuditionCSVConfig()
|
|
196
|
-
|
|
197
|
-
content = cls._generate_csv_content(supervisions, config)
|
|
198
|
-
return content.encode("utf-8")
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
@dataclass
|
|
202
|
-
class EdiMarkerConfig:
|
|
203
|
-
"""Configuration for EdiMarker (Pro Tools) compatible CSV export.
|
|
204
|
-
|
|
205
|
-
Attributes:
|
|
206
|
-
include_speaker: Include speaker name in marker
|
|
207
|
-
marker_prefix: Prefix for marker names
|
|
208
|
-
"""
|
|
209
|
-
|
|
210
|
-
include_speaker: bool = True
|
|
211
|
-
marker_prefix: str = "M"
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
class EdiMarkerWriter:
|
|
215
|
-
"""Writer for EdiMarker-compatible CSV format (Pro Tools bridge).
|
|
216
|
-
|
|
217
|
-
EdiMarker is a third-party tool that converts CSV files to Pro Tools marker format.
|
|
218
|
-
This writer generates CSV files compatible with EdiMarker's expected input format.
|
|
219
|
-
|
|
220
|
-
Example:
|
|
221
|
-
>>> from lattifai.caption import Caption
|
|
222
|
-
>>> from lattifai.caption.formats.nle.audition_writer import EdiMarkerWriter
|
|
223
|
-
>>> caption = Caption.read("input.srt")
|
|
224
|
-
>>> EdiMarkerWriter.write(caption.supervisions, "markers_edimarker.csv")
|
|
225
|
-
"""
|
|
226
|
-
|
|
227
|
-
# EdiMarker expected CSV header
|
|
228
|
-
HEADER = ["Name", "Start", "End", "Text"]
|
|
229
|
-
|
|
230
|
-
@classmethod
|
|
231
|
-
def _seconds_to_timecode(cls, seconds: float, fps: float = 24.0) -> str:
|
|
232
|
-
"""Convert seconds to timecode format HH:MM:SS:FF.
|
|
233
|
-
|
|
234
|
-
Args:
|
|
235
|
-
seconds: Time in seconds
|
|
236
|
-
fps: Frame rate
|
|
237
|
-
|
|
238
|
-
Returns:
|
|
239
|
-
Timecode string
|
|
240
|
-
"""
|
|
241
|
-
total_frames = int(round(seconds * fps))
|
|
242
|
-
frames = int(total_frames % fps)
|
|
243
|
-
total_seconds = int(total_frames // fps)
|
|
244
|
-
secs = total_seconds % 60
|
|
245
|
-
total_minutes = total_seconds // 60
|
|
246
|
-
mins = total_minutes % 60
|
|
247
|
-
hours = total_minutes // 60
|
|
248
|
-
return f"{hours:02d}:{mins:02d}:{secs:02d}:{frames:02d}"
|
|
249
|
-
|
|
250
|
-
@classmethod
|
|
251
|
-
def _generate_csv_content(
|
|
252
|
-
cls,
|
|
253
|
-
supervisions: List["Supervision"],
|
|
254
|
-
config: EdiMarkerConfig,
|
|
255
|
-
fps: float = 24.0,
|
|
256
|
-
) -> str:
|
|
257
|
-
"""Generate CSV content string.
|
|
258
|
-
|
|
259
|
-
Args:
|
|
260
|
-
supervisions: List of supervision segments
|
|
261
|
-
config: Export configuration
|
|
262
|
-
fps: Frame rate for timecode conversion
|
|
263
|
-
|
|
264
|
-
Returns:
|
|
265
|
-
CSV content as string
|
|
266
|
-
"""
|
|
267
|
-
output = StringIO()
|
|
268
|
-
writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
|
|
269
|
-
|
|
270
|
-
# Write header
|
|
271
|
-
writer.writerow(cls.HEADER)
|
|
272
|
-
|
|
273
|
-
# Write markers
|
|
274
|
-
for i, sup in enumerate(supervisions, 1):
|
|
275
|
-
if config.include_speaker and sup.speaker:
|
|
276
|
-
name = f"{config.marker_prefix}{i:03d}_{sup.speaker}"
|
|
277
|
-
else:
|
|
278
|
-
name = f"{config.marker_prefix}{i:03d}"
|
|
279
|
-
|
|
280
|
-
start_tc = cls._seconds_to_timecode(sup.start, fps)
|
|
281
|
-
end_tc = cls._seconds_to_timecode(sup.end, fps)
|
|
282
|
-
text = sup.text.strip() if sup.text else ""
|
|
283
|
-
|
|
284
|
-
writer.writerow([name, start_tc, end_tc, text])
|
|
285
|
-
|
|
286
|
-
return output.getvalue()
|
|
287
|
-
|
|
288
|
-
@classmethod
|
|
289
|
-
def write(
|
|
290
|
-
cls,
|
|
291
|
-
supervisions: List["Supervision"],
|
|
292
|
-
output_path: Pathlike,
|
|
293
|
-
config: Optional[EdiMarkerConfig] = None,
|
|
294
|
-
fps: float = 24.0,
|
|
295
|
-
) -> Path:
|
|
296
|
-
"""Write supervisions to EdiMarker-compatible CSV format.
|
|
297
|
-
|
|
298
|
-
Args:
|
|
299
|
-
supervisions: List of supervision segments
|
|
300
|
-
output_path: Output file path
|
|
301
|
-
config: Export configuration
|
|
302
|
-
fps: Frame rate for timecode conversion
|
|
303
|
-
|
|
304
|
-
Returns:
|
|
305
|
-
Path to written file
|
|
306
|
-
"""
|
|
307
|
-
if config is None:
|
|
308
|
-
config = EdiMarkerConfig()
|
|
309
|
-
|
|
310
|
-
output_path = Path(output_path)
|
|
311
|
-
content = cls._generate_csv_content(supervisions, config, fps)
|
|
312
|
-
|
|
313
|
-
with open(output_path, "w", encoding="utf-8", newline="") as f:
|
|
314
|
-
f.write(content)
|
|
315
|
-
|
|
316
|
-
return output_path
|
|
317
|
-
|
|
318
|
-
@classmethod
|
|
319
|
-
def to_bytes(
|
|
320
|
-
cls,
|
|
321
|
-
supervisions: List["Supervision"],
|
|
322
|
-
config: Optional[EdiMarkerConfig] = None,
|
|
323
|
-
fps: float = 24.0,
|
|
324
|
-
) -> bytes:
|
|
325
|
-
"""Convert supervisions to EdiMarker CSV format bytes.
|
|
326
|
-
|
|
327
|
-
Args:
|
|
328
|
-
supervisions: List of supervision segments
|
|
329
|
-
config: Export configuration
|
|
330
|
-
fps: Frame rate for timecode conversion
|
|
331
|
-
|
|
332
|
-
Returns:
|
|
333
|
-
CSV content as bytes
|
|
334
|
-
"""
|
|
335
|
-
if config is None:
|
|
336
|
-
config = EdiMarkerConfig()
|
|
337
|
-
|
|
338
|
-
content = cls._generate_csv_content(supervisions, config, fps)
|
|
339
|
-
return content.encode("utf-8")
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
@register_writer("audition_csv")
|
|
343
|
-
class AuditionCSVFormat(FormatWriter):
|
|
344
|
-
"""Format handler for Adobe Audition CSV markers."""
|
|
345
|
-
|
|
346
|
-
format_id = "audition_csv"
|
|
347
|
-
extensions = [".csv"]
|
|
348
|
-
description = "Adobe Audition CSV Marker Format"
|
|
349
|
-
|
|
350
|
-
@classmethod
|
|
351
|
-
def write(
|
|
352
|
-
cls,
|
|
353
|
-
supervisions: List[Supervision],
|
|
354
|
-
output_path: Pathlike,
|
|
355
|
-
include_speaker: bool = True,
|
|
356
|
-
**kwargs,
|
|
357
|
-
):
|
|
358
|
-
"""Write supervisions to Audition CSV format.
|
|
359
|
-
|
|
360
|
-
Args:
|
|
361
|
-
supervisions: List of supervision segments
|
|
362
|
-
output_path: Path to output file
|
|
363
|
-
include_speaker: Whether to include speaker labels
|
|
364
|
-
**kwargs: Additional config options
|
|
365
|
-
|
|
366
|
-
Returns:
|
|
367
|
-
Path to written file
|
|
368
|
-
"""
|
|
369
|
-
# Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by Audition CSV)
|
|
370
|
-
kwargs.pop("word_level", None)
|
|
371
|
-
kwargs.pop("karaoke", None)
|
|
372
|
-
kwargs.pop("karaoke_config", None)
|
|
373
|
-
kwargs.pop("metadata", None)
|
|
374
|
-
config = AuditionCSVConfig(include_speaker_in_name=include_speaker, **kwargs)
|
|
375
|
-
return AuditionCSVWriter.write(supervisions, output_path, config)
|
|
376
|
-
|
|
377
|
-
@classmethod
|
|
378
|
-
def to_bytes(
|
|
379
|
-
cls,
|
|
380
|
-
supervisions: List[Supervision],
|
|
381
|
-
include_speaker: bool = True,
|
|
382
|
-
**kwargs,
|
|
383
|
-
) -> bytes:
|
|
384
|
-
"""Convert supervisions to Audition CSV bytes.
|
|
385
|
-
|
|
386
|
-
Args:
|
|
387
|
-
supervisions: List of supervision segments
|
|
388
|
-
include_speaker: Whether to include speaker labels
|
|
389
|
-
**kwargs: Additional config options
|
|
390
|
-
|
|
391
|
-
Returns:
|
|
392
|
-
Audition CSV content as bytes
|
|
393
|
-
"""
|
|
394
|
-
# Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by Audition CSV)
|
|
395
|
-
kwargs.pop("word_level", None)
|
|
396
|
-
kwargs.pop("karaoke", None)
|
|
397
|
-
kwargs.pop("karaoke_config", None)
|
|
398
|
-
kwargs.pop("metadata", None)
|
|
399
|
-
config = AuditionCSVConfig(include_speaker_in_name=include_speaker, **kwargs)
|
|
400
|
-
return AuditionCSVWriter.to_bytes(supervisions, config)
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
class AuditionCSVReader:
|
|
404
|
-
"""Reader for Adobe Audition CSV markers."""
|
|
405
|
-
|
|
406
|
-
@classmethod
|
|
407
|
-
def read(cls, source: str, normalize_text: bool = True, **kwargs) -> List[Supervision]:
|
|
408
|
-
"""Read Audition CSV content and return supervisions."""
|
|
409
|
-
supervisions = []
|
|
410
|
-
|
|
411
|
-
# Use csv module to handle quoting correctly
|
|
412
|
-
f = StringIO(source)
|
|
413
|
-
reader = csv.DictReader(f)
|
|
414
|
-
|
|
415
|
-
# Mapping for flexible header names if needed, but assuming standard Audition export
|
|
416
|
-
# Standard: Name,Start,Duration,Time Format,Type,Description
|
|
417
|
-
|
|
418
|
-
sample_rate = kwargs.get("sample_rate", 48000)
|
|
419
|
-
|
|
420
|
-
for row in reader:
|
|
421
|
-
# Check for required fields
|
|
422
|
-
if "Start" not in row or "Duration" not in row:
|
|
423
|
-
continue
|
|
424
|
-
|
|
425
|
-
time_format = row.get("Time Format", "decimal")
|
|
426
|
-
start_val = row["Start"]
|
|
427
|
-
duration_val = row["Duration"]
|
|
428
|
-
|
|
429
|
-
try:
|
|
430
|
-
if time_format == "samples":
|
|
431
|
-
start_sec = float(start_val) / sample_rate
|
|
432
|
-
duration_sec = float(duration_val) / sample_rate
|
|
433
|
-
else:
|
|
434
|
-
# decimal
|
|
435
|
-
start_sec = float(start_val)
|
|
436
|
-
duration_sec = float(duration_val)
|
|
437
|
-
except ValueError:
|
|
438
|
-
continue
|
|
439
|
-
|
|
440
|
-
# Extract text from Description or Name
|
|
441
|
-
description = row.get("Description", "")
|
|
442
|
-
name = row.get("Name", "")
|
|
443
|
-
|
|
444
|
-
# Logic: If description has content, prefer it as the caption text?
|
|
445
|
-
# Or is Name the text? The Writer puts text in Description if configured,
|
|
446
|
-
# and Name is "Speaker - Marker X".
|
|
447
|
-
# So Description is the best candidate for caption text.
|
|
448
|
-
text = description
|
|
449
|
-
if not text and name:
|
|
450
|
-
# Fallback to Name provided it doesn't look like generic "Marker 01"
|
|
451
|
-
if not name.startswith("Marker "):
|
|
452
|
-
text = name
|
|
453
|
-
|
|
454
|
-
if duration_sec > 0 and text:
|
|
455
|
-
supervisions.append(
|
|
456
|
-
Supervision(
|
|
457
|
-
id=str(uuid.uuid4()),
|
|
458
|
-
recording_id="audition_import",
|
|
459
|
-
start=start_sec,
|
|
460
|
-
duration=duration_sec,
|
|
461
|
-
text=text.strip() if normalize_text else text,
|
|
462
|
-
)
|
|
463
|
-
)
|
|
464
|
-
|
|
465
|
-
return sorted(supervisions, key=lambda s: s.start)
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
import uuid
|
|
469
|
-
|
|
470
|
-
from .. import register_reader
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
@register_reader("audition_csv")
|
|
474
|
-
class AuditionCSVReaderHandler(FormatReader):
|
|
475
|
-
"""Reader handler for Audition CSV."""
|
|
476
|
-
|
|
477
|
-
format_id = "audition_csv"
|
|
478
|
-
extensions = [".csv"]
|
|
479
|
-
|
|
480
|
-
@classmethod
|
|
481
|
-
def can_read(cls, path: Union[Pathlike, str]) -> bool:
|
|
482
|
-
# Check first line for "Time Format" or "Audition" specific headers
|
|
483
|
-
if isinstance(path, (str, Path)) and not cls.is_content(path):
|
|
484
|
-
# We rely on upstream detection because .csv is too generic
|
|
485
|
-
return str(path).lower().endswith(".csv")
|
|
486
|
-
return False
|
|
487
|
-
|
|
488
|
-
@classmethod
|
|
489
|
-
def read(cls, source: Union[Pathlike, str], normalize_text: bool = True, **kwargs) -> List[Supervision]:
|
|
490
|
-
if isinstance(source, (str, Path)) and not cls.is_content(source):
|
|
491
|
-
with open(source, "r", encoding="utf-8") as f:
|
|
492
|
-
content = f.read()
|
|
493
|
-
else:
|
|
494
|
-
content = str(source)
|
|
495
|
-
|
|
496
|
-
return AuditionCSVReader.read(content, normalize_text=normalize_text, **kwargs)
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
@register_writer("edimarker_csv")
|
|
500
|
-
class EdiMarkerCSVFormat(FormatWriter):
|
|
501
|
-
"""Format handler for EdiMarker (Pro Tools) CSV markers."""
|
|
502
|
-
|
|
503
|
-
format_id = "edimarker_csv"
|
|
504
|
-
extensions = [".csv"]
|
|
505
|
-
description = "EdiMarker (Pro Tools) CSV Marker Format"
|
|
506
|
-
|
|
507
|
-
@classmethod
|
|
508
|
-
def write(
|
|
509
|
-
cls,
|
|
510
|
-
supervisions: List[Supervision],
|
|
511
|
-
output_path: Pathlike,
|
|
512
|
-
include_speaker: bool = True,
|
|
513
|
-
fps: float = 24.0,
|
|
514
|
-
**kwargs,
|
|
515
|
-
):
|
|
516
|
-
"""Write supervisions to EdiMarker CSV format.
|
|
517
|
-
|
|
518
|
-
Args:
|
|
519
|
-
supervisions: List of supervision segments
|
|
520
|
-
output_path: Path to output file
|
|
521
|
-
include_speaker: Whether to include speaker labels
|
|
522
|
-
fps: Frame rate for timecode conversion
|
|
523
|
-
**kwargs: Additional config options
|
|
524
|
-
|
|
525
|
-
Returns:
|
|
526
|
-
Path to written file
|
|
527
|
-
"""
|
|
528
|
-
# Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by EdiMarker)
|
|
529
|
-
kwargs.pop("word_level", None)
|
|
530
|
-
kwargs.pop("karaoke", None)
|
|
531
|
-
kwargs.pop("karaoke_config", None)
|
|
532
|
-
kwargs.pop("metadata", None)
|
|
533
|
-
config = EdiMarkerConfig(include_speaker=include_speaker, **kwargs)
|
|
534
|
-
return EdiMarkerWriter.write(supervisions, output_path, config, fps=fps)
|
|
535
|
-
|
|
536
|
-
@classmethod
|
|
537
|
-
def to_bytes(
|
|
538
|
-
cls,
|
|
539
|
-
supervisions: List[Supervision],
|
|
540
|
-
include_speaker: bool = True,
|
|
541
|
-
fps: float = 24.0,
|
|
542
|
-
**kwargs,
|
|
543
|
-
) -> bytes:
|
|
544
|
-
"""Convert supervisions to EdiMarker CSV bytes.
|
|
545
|
-
|
|
546
|
-
Args:
|
|
547
|
-
supervisions: List of supervision segments
|
|
548
|
-
include_speaker: Whether to include speaker labels
|
|
549
|
-
fps: Frame rate for timecode conversion
|
|
550
|
-
**kwargs: Additional config options
|
|
551
|
-
|
|
552
|
-
Returns:
|
|
553
|
-
EdiMarker CSV content as bytes
|
|
554
|
-
"""
|
|
555
|
-
# Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by EdiMarker)
|
|
556
|
-
kwargs.pop("word_level", None)
|
|
557
|
-
kwargs.pop("karaoke", None)
|
|
558
|
-
kwargs.pop("karaoke_config", None)
|
|
559
|
-
kwargs.pop("metadata", None)
|
|
560
|
-
config = EdiMarkerConfig(include_speaker=include_speaker, **kwargs)
|
|
561
|
-
return EdiMarkerWriter.to_bytes(supervisions, config, fps=fps)
|