lattifai 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lattifai/_init.py +20 -0
  2. lattifai/alignment/__init__.py +2 -3
  3. lattifai/alignment/lattice1_aligner.py +117 -4
  4. lattifai/alignment/lattice1_worker.py +47 -4
  5. lattifai/alignment/segmenter.py +3 -2
  6. lattifai/alignment/text_align.py +2 -1
  7. lattifai/alignment/tokenizer.py +56 -29
  8. lattifai/audio2.py +162 -183
  9. lattifai/cli/alignment.py +5 -0
  10. lattifai/cli/caption.py +6 -6
  11. lattifai/cli/transcribe.py +1 -5
  12. lattifai/cli/youtube.py +3 -0
  13. lattifai/client.py +41 -12
  14. lattifai/config/__init__.py +21 -3
  15. lattifai/config/alignment.py +7 -0
  16. lattifai/config/caption.py +13 -243
  17. lattifai/config/client.py +16 -0
  18. lattifai/config/event.py +102 -0
  19. lattifai/config/transcription.py +25 -1
  20. lattifai/data/__init__.py +8 -0
  21. lattifai/data/caption.py +228 -0
  22. lattifai/errors.py +78 -53
  23. lattifai/event/__init__.py +65 -0
  24. lattifai/event/lattifai.py +166 -0
  25. lattifai/mixin.py +22 -17
  26. lattifai/transcription/base.py +2 -1
  27. lattifai/transcription/gemini.py +147 -16
  28. lattifai/transcription/lattifai.py +8 -11
  29. lattifai/types.py +1 -1
  30. lattifai/youtube/client.py +143 -48
  31. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/METADATA +129 -58
  32. lattifai-1.3.1.dist-info/RECORD +57 -0
  33. lattifai/__init__.py +0 -88
  34. lattifai/alignment/sentence_splitter.py +0 -350
  35. lattifai/caption/__init__.py +0 -96
  36. lattifai/caption/caption.py +0 -661
  37. lattifai/caption/formats/__init__.py +0 -199
  38. lattifai/caption/formats/base.py +0 -211
  39. lattifai/caption/formats/gemini.py +0 -722
  40. lattifai/caption/formats/json.py +0 -194
  41. lattifai/caption/formats/lrc.py +0 -309
  42. lattifai/caption/formats/nle/__init__.py +0 -9
  43. lattifai/caption/formats/nle/audition.py +0 -561
  44. lattifai/caption/formats/nle/avid.py +0 -423
  45. lattifai/caption/formats/nle/fcpxml.py +0 -549
  46. lattifai/caption/formats/nle/premiere.py +0 -589
  47. lattifai/caption/formats/pysubs2.py +0 -642
  48. lattifai/caption/formats/sbv.py +0 -147
  49. lattifai/caption/formats/tabular.py +0 -338
  50. lattifai/caption/formats/textgrid.py +0 -193
  51. lattifai/caption/formats/ttml.py +0 -652
  52. lattifai/caption/formats/vtt.py +0 -469
  53. lattifai/caption/parsers/__init__.py +0 -9
  54. lattifai/caption/parsers/text_parser.py +0 -147
  55. lattifai/caption/standardize.py +0 -636
  56. lattifai/caption/supervision.py +0 -34
  57. lattifai/caption/utils.py +0 -474
  58. lattifai-1.2.2.dist-info/RECORD +0 -76
  59. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/WHEEL +0 -0
  60. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/entry_points.txt +0 -0
  61. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/licenses/LICENSE +0 -0
  62. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,561 +0,0 @@
1
- """Adobe Audition marker CSV format writer for audio post-production.
2
-
3
- This module provides functionality to export captions as Adobe Audition markers,
4
- enabling audio editors to navigate and search transcripts in their audio editing workflow.
5
-
6
- Format specification (Audition CSV):
7
- - Header: Name,Start,Duration,Time Format,Type,Description
8
- - Time Format: "decimal" (seconds with decimal)
9
- - Type: "Cue" for markers
10
- """
11
-
12
- import csv
13
- from dataclasses import dataclass
14
- from io import StringIO
15
- from pathlib import Path
16
- from typing import List, Optional, Union
17
-
18
- from lhotse.utils import Pathlike
19
-
20
- from ...supervision import Supervision
21
- from .. import register_writer
22
- from ..base import FormatReader, FormatWriter
23
-
24
-
25
- @dataclass
26
- class AuditionCSVConfig:
27
- """Configuration for Adobe Audition CSV marker export.
28
-
29
- Attributes:
30
- time_format: Time format for markers ("decimal" or "samples")
31
- marker_type: Marker type ("Cue", "Subclip", "Track")
32
- include_speaker_in_name: Include speaker name in marker name
33
- use_description: Put text content in description field
34
- sample_rate: Sample rate (only used when time_format="samples")
35
- """
36
-
37
- time_format: str = "decimal"
38
- marker_type: str = "Cue"
39
- include_speaker_in_name: bool = True
40
- use_description: bool = True
41
- sample_rate: int = 48000
42
-
43
-
44
- class AuditionCSVWriter:
45
- """Writer for Adobe Audition marker CSV format.
46
-
47
- Generates CSV files compatible with Adobe Audition's marker import feature,
48
- allowing transcripts to be imported as navigable markers in audio projects.
49
-
50
- Example:
51
- >>> from lattifai.caption import Caption
52
- >>> from lattifai.caption.formats.nle.audition_writer import AuditionCSVWriter, AuditionCSVConfig
53
- >>> caption = Caption.read("input.srt")
54
- >>> config = AuditionCSVConfig(include_speaker_in_name=True)
55
- >>> AuditionCSVWriter.write(caption.supervisions, "markers.csv", config)
56
- """
57
-
58
- # CSV header required by Adobe Audition
59
- HEADER = ["Name", "Start", "Duration", "Time Format", "Type", "Description"]
60
-
61
- @classmethod
62
- def _format_time(
63
- cls,
64
- seconds: float,
65
- time_format: str = "decimal",
66
- sample_rate: int = 48000,
67
- ) -> str:
68
- """Format time value for Audition CSV.
69
-
70
- Args:
71
- seconds: Time in seconds
72
- time_format: "decimal" for seconds, "samples" for sample count
73
- sample_rate: Sample rate for sample-based timing
74
-
75
- Returns:
76
- Formatted time string
77
- """
78
- if time_format == "samples":
79
- return str(int(round(seconds * sample_rate)))
80
- else:
81
- # Decimal format with millisecond precision
82
- return f"{seconds:.3f}"
83
-
84
- @classmethod
85
- def _format_marker_name(
86
- cls,
87
- supervision: "Supervision",
88
- index: int,
89
- include_speaker: bool,
90
- ) -> str:
91
- """Format marker name from supervision.
92
-
93
- Args:
94
- supervision: Supervision segment
95
- index: Marker index (1-based)
96
- include_speaker: Whether to include speaker in name
97
-
98
- Returns:
99
- Formatted marker name
100
- """
101
- if include_speaker and supervision.speaker:
102
- return f"{supervision.speaker} - Marker {index:03d}"
103
- else:
104
- return f"Marker {index:03d}"
105
-
106
- @classmethod
107
- def _generate_csv_content(
108
- cls,
109
- supervisions: List["Supervision"],
110
- config: AuditionCSVConfig,
111
- ) -> str:
112
- """Generate CSV content string.
113
-
114
- Args:
115
- supervisions: List of supervision segments
116
- config: Export configuration
117
-
118
- Returns:
119
- CSV content as string
120
- """
121
- output = StringIO()
122
- writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
123
-
124
- # Write header
125
- writer.writerow(cls.HEADER)
126
-
127
- # Write markers
128
- for i, sup in enumerate(supervisions, 1):
129
- name = cls._format_marker_name(sup, i, config.include_speaker_in_name)
130
- start = cls._format_time(sup.start, config.time_format, config.sample_rate)
131
- duration = cls._format_time(sup.duration, config.time_format, config.sample_rate)
132
-
133
- if config.use_description:
134
- description = sup.text.strip() if sup.text else ""
135
- else:
136
- description = ""
137
-
138
- writer.writerow(
139
- [
140
- name,
141
- start,
142
- duration,
143
- config.time_format,
144
- config.marker_type,
145
- description,
146
- ]
147
- )
148
-
149
- return output.getvalue()
150
-
151
- @classmethod
152
- def write(
153
- cls,
154
- supervisions: List["Supervision"],
155
- output_path: Pathlike,
156
- config: Optional[AuditionCSVConfig] = None,
157
- ) -> Path:
158
- """Write supervisions to Audition CSV marker format.
159
-
160
- Args:
161
- supervisions: List of supervision segments
162
- output_path: Output file path
163
- config: Export configuration
164
-
165
- Returns:
166
- Path to written file
167
- """
168
- if config is None:
169
- config = AuditionCSVConfig()
170
-
171
- output_path = Path(output_path)
172
- content = cls._generate_csv_content(supervisions, config)
173
-
174
- with open(output_path, "w", encoding="utf-8", newline="") as f:
175
- f.write(content)
176
-
177
- return output_path
178
-
179
- @classmethod
180
- def to_bytes(
181
- cls,
182
- supervisions: List["Supervision"],
183
- config: Optional[AuditionCSVConfig] = None,
184
- ) -> bytes:
185
- """Convert supervisions to Audition CSV format bytes.
186
-
187
- Args:
188
- supervisions: List of supervision segments
189
- config: Export configuration
190
-
191
- Returns:
192
- CSV content as bytes
193
- """
194
- if config is None:
195
- config = AuditionCSVConfig()
196
-
197
- content = cls._generate_csv_content(supervisions, config)
198
- return content.encode("utf-8")
199
-
200
-
201
- @dataclass
202
- class EdiMarkerConfig:
203
- """Configuration for EdiMarker (Pro Tools) compatible CSV export.
204
-
205
- Attributes:
206
- include_speaker: Include speaker name in marker
207
- marker_prefix: Prefix for marker names
208
- """
209
-
210
- include_speaker: bool = True
211
- marker_prefix: str = "M"
212
-
213
-
214
- class EdiMarkerWriter:
215
- """Writer for EdiMarker-compatible CSV format (Pro Tools bridge).
216
-
217
- EdiMarker is a third-party tool that converts CSV files to Pro Tools marker format.
218
- This writer generates CSV files compatible with EdiMarker's expected input format.
219
-
220
- Example:
221
- >>> from lattifai.caption import Caption
222
- >>> from lattifai.caption.formats.nle.audition_writer import EdiMarkerWriter
223
- >>> caption = Caption.read("input.srt")
224
- >>> EdiMarkerWriter.write(caption.supervisions, "markers_edimarker.csv")
225
- """
226
-
227
- # EdiMarker expected CSV header
228
- HEADER = ["Name", "Start", "End", "Text"]
229
-
230
- @classmethod
231
- def _seconds_to_timecode(cls, seconds: float, fps: float = 24.0) -> str:
232
- """Convert seconds to timecode format HH:MM:SS:FF.
233
-
234
- Args:
235
- seconds: Time in seconds
236
- fps: Frame rate
237
-
238
- Returns:
239
- Timecode string
240
- """
241
- total_frames = int(round(seconds * fps))
242
- frames = int(total_frames % fps)
243
- total_seconds = int(total_frames // fps)
244
- secs = total_seconds % 60
245
- total_minutes = total_seconds // 60
246
- mins = total_minutes % 60
247
- hours = total_minutes // 60
248
- return f"{hours:02d}:{mins:02d}:{secs:02d}:{frames:02d}"
249
-
250
- @classmethod
251
- def _generate_csv_content(
252
- cls,
253
- supervisions: List["Supervision"],
254
- config: EdiMarkerConfig,
255
- fps: float = 24.0,
256
- ) -> str:
257
- """Generate CSV content string.
258
-
259
- Args:
260
- supervisions: List of supervision segments
261
- config: Export configuration
262
- fps: Frame rate for timecode conversion
263
-
264
- Returns:
265
- CSV content as string
266
- """
267
- output = StringIO()
268
- writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
269
-
270
- # Write header
271
- writer.writerow(cls.HEADER)
272
-
273
- # Write markers
274
- for i, sup in enumerate(supervisions, 1):
275
- if config.include_speaker and sup.speaker:
276
- name = f"{config.marker_prefix}{i:03d}_{sup.speaker}"
277
- else:
278
- name = f"{config.marker_prefix}{i:03d}"
279
-
280
- start_tc = cls._seconds_to_timecode(sup.start, fps)
281
- end_tc = cls._seconds_to_timecode(sup.end, fps)
282
- text = sup.text.strip() if sup.text else ""
283
-
284
- writer.writerow([name, start_tc, end_tc, text])
285
-
286
- return output.getvalue()
287
-
288
- @classmethod
289
- def write(
290
- cls,
291
- supervisions: List["Supervision"],
292
- output_path: Pathlike,
293
- config: Optional[EdiMarkerConfig] = None,
294
- fps: float = 24.0,
295
- ) -> Path:
296
- """Write supervisions to EdiMarker-compatible CSV format.
297
-
298
- Args:
299
- supervisions: List of supervision segments
300
- output_path: Output file path
301
- config: Export configuration
302
- fps: Frame rate for timecode conversion
303
-
304
- Returns:
305
- Path to written file
306
- """
307
- if config is None:
308
- config = EdiMarkerConfig()
309
-
310
- output_path = Path(output_path)
311
- content = cls._generate_csv_content(supervisions, config, fps)
312
-
313
- with open(output_path, "w", encoding="utf-8", newline="") as f:
314
- f.write(content)
315
-
316
- return output_path
317
-
318
- @classmethod
319
- def to_bytes(
320
- cls,
321
- supervisions: List["Supervision"],
322
- config: Optional[EdiMarkerConfig] = None,
323
- fps: float = 24.0,
324
- ) -> bytes:
325
- """Convert supervisions to EdiMarker CSV format bytes.
326
-
327
- Args:
328
- supervisions: List of supervision segments
329
- config: Export configuration
330
- fps: Frame rate for timecode conversion
331
-
332
- Returns:
333
- CSV content as bytes
334
- """
335
- if config is None:
336
- config = EdiMarkerConfig()
337
-
338
- content = cls._generate_csv_content(supervisions, config, fps)
339
- return content.encode("utf-8")
340
-
341
-
342
- @register_writer("audition_csv")
343
- class AuditionCSVFormat(FormatWriter):
344
- """Format handler for Adobe Audition CSV markers."""
345
-
346
- format_id = "audition_csv"
347
- extensions = [".csv"]
348
- description = "Adobe Audition CSV Marker Format"
349
-
350
- @classmethod
351
- def write(
352
- cls,
353
- supervisions: List[Supervision],
354
- output_path: Pathlike,
355
- include_speaker: bool = True,
356
- **kwargs,
357
- ):
358
- """Write supervisions to Audition CSV format.
359
-
360
- Args:
361
- supervisions: List of supervision segments
362
- output_path: Path to output file
363
- include_speaker: Whether to include speaker labels
364
- **kwargs: Additional config options
365
-
366
- Returns:
367
- Path to written file
368
- """
369
- # Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by Audition CSV)
370
- kwargs.pop("word_level", None)
371
- kwargs.pop("karaoke", None)
372
- kwargs.pop("karaoke_config", None)
373
- kwargs.pop("metadata", None)
374
- config = AuditionCSVConfig(include_speaker_in_name=include_speaker, **kwargs)
375
- return AuditionCSVWriter.write(supervisions, output_path, config)
376
-
377
- @classmethod
378
- def to_bytes(
379
- cls,
380
- supervisions: List[Supervision],
381
- include_speaker: bool = True,
382
- **kwargs,
383
- ) -> bytes:
384
- """Convert supervisions to Audition CSV bytes.
385
-
386
- Args:
387
- supervisions: List of supervision segments
388
- include_speaker: Whether to include speaker labels
389
- **kwargs: Additional config options
390
-
391
- Returns:
392
- Audition CSV content as bytes
393
- """
394
- # Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by Audition CSV)
395
- kwargs.pop("word_level", None)
396
- kwargs.pop("karaoke", None)
397
- kwargs.pop("karaoke_config", None)
398
- kwargs.pop("metadata", None)
399
- config = AuditionCSVConfig(include_speaker_in_name=include_speaker, **kwargs)
400
- return AuditionCSVWriter.to_bytes(supervisions, config)
401
-
402
-
403
- class AuditionCSVReader:
404
- """Reader for Adobe Audition CSV markers."""
405
-
406
- @classmethod
407
- def read(cls, source: str, normalize_text: bool = True, **kwargs) -> List[Supervision]:
408
- """Read Audition CSV content and return supervisions."""
409
- supervisions = []
410
-
411
- # Use csv module to handle quoting correctly
412
- f = StringIO(source)
413
- reader = csv.DictReader(f)
414
-
415
- # Mapping for flexible header names if needed, but assuming standard Audition export
416
- # Standard: Name,Start,Duration,Time Format,Type,Description
417
-
418
- sample_rate = kwargs.get("sample_rate", 48000)
419
-
420
- for row in reader:
421
- # Check for required fields
422
- if "Start" not in row or "Duration" not in row:
423
- continue
424
-
425
- time_format = row.get("Time Format", "decimal")
426
- start_val = row["Start"]
427
- duration_val = row["Duration"]
428
-
429
- try:
430
- if time_format == "samples":
431
- start_sec = float(start_val) / sample_rate
432
- duration_sec = float(duration_val) / sample_rate
433
- else:
434
- # decimal
435
- start_sec = float(start_val)
436
- duration_sec = float(duration_val)
437
- except ValueError:
438
- continue
439
-
440
- # Extract text from Description or Name
441
- description = row.get("Description", "")
442
- name = row.get("Name", "")
443
-
444
- # Logic: If description has content, prefer it as the caption text?
445
- # Or is Name the text? The Writer puts text in Description if configured,
446
- # and Name is "Speaker - Marker X".
447
- # So Description is the best candidate for caption text.
448
- text = description
449
- if not text and name:
450
- # Fallback to Name provided it doesn't look like generic "Marker 01"
451
- if not name.startswith("Marker "):
452
- text = name
453
-
454
- if duration_sec > 0 and text:
455
- supervisions.append(
456
- Supervision(
457
- id=str(uuid.uuid4()),
458
- recording_id="audition_import",
459
- start=start_sec,
460
- duration=duration_sec,
461
- text=text.strip() if normalize_text else text,
462
- )
463
- )
464
-
465
- return sorted(supervisions, key=lambda s: s.start)
466
-
467
-
468
- import uuid
469
-
470
- from .. import register_reader
471
-
472
-
473
- @register_reader("audition_csv")
474
- class AuditionCSVReaderHandler(FormatReader):
475
- """Reader handler for Audition CSV."""
476
-
477
- format_id = "audition_csv"
478
- extensions = [".csv"]
479
-
480
- @classmethod
481
- def can_read(cls, path: Union[Pathlike, str]) -> bool:
482
- # Check first line for "Time Format" or "Audition" specific headers
483
- if isinstance(path, (str, Path)) and not cls.is_content(path):
484
- # We rely on upstream detection because .csv is too generic
485
- return str(path).lower().endswith(".csv")
486
- return False
487
-
488
- @classmethod
489
- def read(cls, source: Union[Pathlike, str], normalize_text: bool = True, **kwargs) -> List[Supervision]:
490
- if isinstance(source, (str, Path)) and not cls.is_content(source):
491
- with open(source, "r", encoding="utf-8") as f:
492
- content = f.read()
493
- else:
494
- content = str(source)
495
-
496
- return AuditionCSVReader.read(content, normalize_text=normalize_text, **kwargs)
497
-
498
-
499
- @register_writer("edimarker_csv")
500
- class EdiMarkerCSVFormat(FormatWriter):
501
- """Format handler for EdiMarker (Pro Tools) CSV markers."""
502
-
503
- format_id = "edimarker_csv"
504
- extensions = [".csv"]
505
- description = "EdiMarker (Pro Tools) CSV Marker Format"
506
-
507
- @classmethod
508
- def write(
509
- cls,
510
- supervisions: List[Supervision],
511
- output_path: Pathlike,
512
- include_speaker: bool = True,
513
- fps: float = 24.0,
514
- **kwargs,
515
- ):
516
- """Write supervisions to EdiMarker CSV format.
517
-
518
- Args:
519
- supervisions: List of supervision segments
520
- output_path: Path to output file
521
- include_speaker: Whether to include speaker labels
522
- fps: Frame rate for timecode conversion
523
- **kwargs: Additional config options
524
-
525
- Returns:
526
- Path to written file
527
- """
528
- # Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by EdiMarker)
529
- kwargs.pop("word_level", None)
530
- kwargs.pop("karaoke", None)
531
- kwargs.pop("karaoke_config", None)
532
- kwargs.pop("metadata", None)
533
- config = EdiMarkerConfig(include_speaker=include_speaker, **kwargs)
534
- return EdiMarkerWriter.write(supervisions, output_path, config, fps=fps)
535
-
536
- @classmethod
537
- def to_bytes(
538
- cls,
539
- supervisions: List[Supervision],
540
- include_speaker: bool = True,
541
- fps: float = 24.0,
542
- **kwargs,
543
- ) -> bytes:
544
- """Convert supervisions to EdiMarker CSV bytes.
545
-
546
- Args:
547
- supervisions: List of supervision segments
548
- include_speaker: Whether to include speaker labels
549
- fps: Frame rate for timecode conversion
550
- **kwargs: Additional config options
551
-
552
- Returns:
553
- EdiMarker CSV content as bytes
554
- """
555
- # Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by EdiMarker)
556
- kwargs.pop("word_level", None)
557
- kwargs.pop("karaoke", None)
558
- kwargs.pop("karaoke_config", None)
559
- kwargs.pop("metadata", None)
560
- config = EdiMarkerConfig(include_speaker=include_speaker, **kwargs)
561
- return EdiMarkerWriter.to_bytes(supervisions, config, fps=fps)