lattifai 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. lattifai/alignment/__init__.py +10 -1
  2. lattifai/alignment/lattice1_aligner.py +66 -58
  3. lattifai/alignment/punctuation.py +38 -0
  4. lattifai/alignment/sentence_splitter.py +152 -21
  5. lattifai/alignment/text_align.py +440 -0
  6. lattifai/alignment/tokenizer.py +82 -40
  7. lattifai/caption/__init__.py +82 -6
  8. lattifai/caption/caption.py +335 -1141
  9. lattifai/caption/formats/__init__.py +199 -0
  10. lattifai/caption/formats/base.py +211 -0
  11. lattifai/caption/{gemini_reader.py → formats/gemini.py} +320 -60
  12. lattifai/caption/formats/json.py +194 -0
  13. lattifai/caption/formats/lrc.py +309 -0
  14. lattifai/caption/formats/nle/__init__.py +9 -0
  15. lattifai/caption/formats/nle/audition.py +561 -0
  16. lattifai/caption/formats/nle/avid.py +423 -0
  17. lattifai/caption/formats/nle/fcpxml.py +549 -0
  18. lattifai/caption/formats/nle/premiere.py +589 -0
  19. lattifai/caption/formats/pysubs2.py +642 -0
  20. lattifai/caption/formats/sbv.py +147 -0
  21. lattifai/caption/formats/tabular.py +338 -0
  22. lattifai/caption/formats/textgrid.py +193 -0
  23. lattifai/caption/formats/ttml.py +652 -0
  24. lattifai/caption/formats/vtt.py +469 -0
  25. lattifai/caption/parsers/__init__.py +9 -0
  26. lattifai/caption/{text_parser.py → parsers/text_parser.py} +4 -2
  27. lattifai/caption/standardize.py +636 -0
  28. lattifai/caption/utils.py +474 -0
  29. lattifai/cli/__init__.py +2 -1
  30. lattifai/cli/caption.py +108 -1
  31. lattifai/cli/transcribe.py +1 -1
  32. lattifai/cli/youtube.py +4 -1
  33. lattifai/client.py +33 -113
  34. lattifai/config/__init__.py +11 -1
  35. lattifai/config/alignment.py +7 -0
  36. lattifai/config/caption.py +267 -23
  37. lattifai/config/media.py +20 -0
  38. lattifai/diarization/__init__.py +41 -1
  39. lattifai/mixin.py +27 -15
  40. lattifai/transcription/base.py +6 -1
  41. lattifai/transcription/lattifai.py +19 -54
  42. lattifai/utils.py +7 -13
  43. lattifai/workflow/__init__.py +28 -4
  44. lattifai/workflow/file_manager.py +2 -5
  45. lattifai/youtube/__init__.py +43 -0
  46. lattifai/youtube/client.py +1170 -0
  47. lattifai/youtube/types.py +23 -0
  48. lattifai-1.2.2.dist-info/METADATA +615 -0
  49. lattifai-1.2.2.dist-info/RECORD +76 -0
  50. {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/entry_points.txt +1 -2
  51. lattifai/caption/gemini_writer.py +0 -173
  52. lattifai/cli/app_installer.py +0 -142
  53. lattifai/cli/server.py +0 -44
  54. lattifai/server/app.py +0 -427
  55. lattifai/workflow/youtube.py +0 -577
  56. lattifai-1.2.1.dist-info/METADATA +0 -1134
  57. lattifai-1.2.1.dist-info/RECORD +0 -58
  58. {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/WHEEL +0 -0
  59. {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/licenses/LICENSE +0 -0
  60. {lattifai-1.2.1.dist-info → lattifai-1.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,549 @@
1
+ """FCPXML (Final Cut Pro XML) format writer for Final Cut Pro and DaVinci Resolve.
2
+
3
+ This module provides functionality to export captions in FCPXML v1.10 format,
4
+ which is compatible with Final Cut Pro and DaVinci Resolve.
5
+
6
+ Key features:
7
+ - Speaker diarization mapped to FCP Roles
8
+ - Text style definitions
9
+ - Bundle format support (.fcpxmld)
10
+ """
11
+
12
+ import os
13
+ import uuid
14
+ import xml.etree.ElementTree as ET
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path
17
+ from typing import Dict, List, Optional, Union
18
+ from xml.dom import minidom
19
+
20
+ from lhotse.utils import Pathlike
21
+
22
+ from ...supervision import Supervision
23
+ from .. import register_writer
24
+ from ..base import FormatReader, FormatWriter
25
+
26
+
27
+ @dataclass
28
+ class FCPXMLStyle:
29
+ """Text style configuration for FCPXML captions.
30
+
31
+ Attributes:
32
+ font: Font family name
33
+ font_size: Font size in points
34
+ font_color: Font color in RGBA format (e.g., "1 1 1 1" for white)
35
+ background_color: Background color in RGBA format
36
+ alignment: Text alignment ("left", "center", "right")
37
+ """
38
+
39
+ font: str = "Helvetica"
40
+ font_size: int = 40
41
+ font_color: str = "1 1 1 1" # White
42
+ background_color: Optional[str] = None
43
+ alignment: str = "center"
44
+
45
+
46
+ @dataclass
47
+ class FCPXMLConfig:
48
+ """Configuration for FCPXML export.
49
+
50
+ Attributes:
51
+ fps: Frame rate (affects duration calculations)
52
+ map_speakers_to_roles: Map different speakers to FCP roles
53
+ default_style: Default text style
54
+ speaker_styles: Speaker-specific styles
55
+ project_name: Name for the FCPXML project
56
+ event_name: Name for the FCPXML event
57
+ use_bundle: Export as .fcpxmld bundle (directory with Info.fcpxml)
58
+ """
59
+
60
+ fps: float = 25.0
61
+ map_speakers_to_roles: bool = True
62
+ default_style: FCPXMLStyle = field(default_factory=FCPXMLStyle)
63
+ speaker_styles: Dict[str, FCPXMLStyle] = field(default_factory=dict)
64
+ project_name: str = "LattifAI Captions"
65
+ event_name: str = "LattifAI Import"
66
+ use_bundle: bool = True
67
+
68
+
69
+ class FCPXMLWriter:
70
+ """Writer for FCPXML (Final Cut Pro XML) format.
71
+
72
+ Generates FCPXML v1.10 compatible files for Final Cut Pro and DaVinci Resolve.
73
+ Supports speaker-to-role mapping for advanced editing workflows.
74
+
75
+ Example:
76
+ >>> from lattifai.caption import Caption
77
+ >>> from lattifai.caption.formats.nle.fcpxml_writer import FCPXMLWriter, FCPXMLConfig
78
+ >>> caption = Caption.read("input.srt")
79
+ >>> config = FCPXMLConfig(map_speakers_to_roles=True)
80
+ >>> FCPXMLWriter.write(caption.supervisions, "output.fcpxmld", config)
81
+ """
82
+
83
+ FCPXML_VERSION = "1.10"
84
+
85
+ @classmethod
86
+ def _seconds_to_fcpxml_time(cls, seconds: float, fps: float = 25.0) -> str:
87
+ """Convert seconds to FCPXML time format.
88
+
89
+ FCPXML uses rational time format: "numerator/denominator s"
90
+ For simplicity, we use a large denominator for precision.
91
+
92
+ Args:
93
+ seconds: Time in seconds
94
+ fps: Frame rate for calculation
95
+
96
+ Returns:
97
+ Time string in FCPXML format (e.g., "10/1s" or "1001/100s")
98
+ """
99
+ # Use 1000 as denominator for millisecond precision
100
+ numerator = int(round(seconds * 1000))
101
+ return f"{numerator}/1000s"
102
+
103
+ @classmethod
104
+ def _generate_uuid(cls) -> str:
105
+ """Generate a unique identifier for FCPXML elements."""
106
+ return str(uuid.uuid4()).upper()
107
+
108
+ @classmethod
109
+ def _create_text_style_def(
110
+ cls,
111
+ parent: ET.Element,
112
+ style_id: str,
113
+ style: FCPXMLStyle,
114
+ ) -> ET.Element:
115
+ """Create a text-style-def element.
116
+
117
+ Args:
118
+ parent: Parent XML element
119
+ style_id: Unique style identifier
120
+ style: Style configuration
121
+
122
+ Returns:
123
+ Created text-style-def element
124
+ """
125
+ style_def = ET.SubElement(parent, "text-style-def", id=style_id)
126
+ text_style = ET.SubElement(
127
+ style_def,
128
+ "text-style",
129
+ font=style.font,
130
+ fontSize=str(style.font_size),
131
+ fontColor=style.font_color,
132
+ alignment=style.alignment,
133
+ )
134
+ if style.background_color:
135
+ text_style.set("backgroundColor", style.background_color)
136
+ return style_def
137
+
138
+ @classmethod
139
+ def _get_role_name(cls, speaker: Optional[str]) -> str:
140
+ """Convert speaker name to FCP role format.
141
+
142
+ Args:
143
+ speaker: Speaker name or None
144
+
145
+ Returns:
146
+ Role name in FCP format
147
+ """
148
+ if not speaker:
149
+ return "iTT?captionFormat=ITT.en"
150
+ # Clean speaker name for role
151
+ clean_name = speaker.replace(" ", "_").replace(".", "_")
152
+ return f"iTT?role=Dialogue.{clean_name}"
153
+
154
+ @classmethod
155
+ def _build_fcpxml(
156
+ cls,
157
+ supervisions: List["Supervision"],
158
+ config: FCPXMLConfig,
159
+ ) -> ET.Element:
160
+ """Build FCPXML document structure.
161
+
162
+ Args:
163
+ supervisions: List of supervision segments
164
+ config: FCPXML configuration
165
+
166
+ Returns:
167
+ Root FCPXML element
168
+ """
169
+ # Create root element
170
+ root = ET.Element("fcpxml", version=cls.FCPXML_VERSION)
171
+
172
+ # Create resources section
173
+ resources = ET.SubElement(root, "resources")
174
+
175
+ # Add format resource (for timing calculations)
176
+ format_id = "r1"
177
+ # Frame duration as rational: 1/fps
178
+ frame_duration = f"100/{int(config.fps * 100)}s"
179
+ ET.SubElement(
180
+ resources,
181
+ "format",
182
+ id=format_id,
183
+ frameDuration=frame_duration,
184
+ width="1920",
185
+ height="1080",
186
+ )
187
+
188
+ # Create default style
189
+ default_style_id = "ts1"
190
+ cls._create_text_style_def(resources, default_style_id, config.default_style)
191
+
192
+ # Create speaker-specific styles
193
+ style_counter = 2
194
+ speaker_style_ids = {}
195
+ if config.map_speakers_to_roles:
196
+ speakers = set(sup.speaker for sup in supervisions if sup.speaker)
197
+ for speaker in speakers:
198
+ style = config.speaker_styles.get(speaker, config.default_style)
199
+ style_id = f"ts{style_counter}"
200
+ cls._create_text_style_def(resources, style_id, style)
201
+ speaker_style_ids[speaker] = style_id
202
+ style_counter += 1
203
+
204
+ # Create library structure
205
+ library = ET.SubElement(root, "library")
206
+ event = ET.SubElement(library, "event", name=config.event_name)
207
+ project = ET.SubElement(project := ET.SubElement(event, "project", name=config.project_name), "sequence")
208
+
209
+ # Calculate total duration
210
+ if supervisions:
211
+ total_duration = max(sup.end for sup in supervisions)
212
+ else:
213
+ total_duration = 3600 # Default 1 hour
214
+
215
+ total_duration_str = cls._seconds_to_fcpxml_time(total_duration, config.fps)
216
+
217
+ # Create spine (main timeline container)
218
+ spine = ET.SubElement(project, "spine")
219
+
220
+ # Create a gap element as the base for attaching captions
221
+ gap = ET.SubElement(
222
+ spine,
223
+ "gap",
224
+ name="Base",
225
+ offset="0/1s",
226
+ duration=total_duration_str,
227
+ start="0/1s",
228
+ )
229
+
230
+ # Add captions to the gap
231
+ for i, sup in enumerate(supervisions, 1):
232
+ start_time = cls._seconds_to_fcpxml_time(sup.start, config.fps)
233
+ duration = cls._seconds_to_fcpxml_time(sup.duration, config.fps)
234
+
235
+ # Determine role based on speaker
236
+ if config.map_speakers_to_roles and sup.speaker:
237
+ role = cls._get_role_name(sup.speaker)
238
+ else:
239
+ role = cls._get_role_name(None)
240
+
241
+ # Create caption element
242
+ caption = ET.SubElement(
243
+ gap,
244
+ "caption",
245
+ role=role,
246
+ name=f"Caption {i}",
247
+ offset=start_time,
248
+ duration=duration,
249
+ start=start_time,
250
+ )
251
+
252
+ # Add text content
253
+ text_elem = ET.SubElement(caption, "text")
254
+ text_elem.text = sup.text or ""
255
+
256
+ # Add style reference
257
+ style_id = speaker_style_ids.get(sup.speaker, default_style_id)
258
+ caption.append(ET.Element("text-style-ref", ref=style_id))
259
+
260
+ return root
261
+
262
+ @classmethod
263
+ def _prettify_xml(cls, element: ET.Element) -> str:
264
+ """Convert XML element to pretty-printed string.
265
+
266
+ Args:
267
+ element: XML element to format
268
+
269
+ Returns:
270
+ Formatted XML string with proper indentation
271
+ """
272
+ rough_string = ET.tostring(element, encoding="unicode")
273
+ reparsed = minidom.parseString(rough_string)
274
+ # Remove extra whitespace and use 2-space indentation
275
+ pretty = reparsed.toprettyxml(indent=" ")
276
+ # Remove the XML declaration line and extra blank lines
277
+ lines = [line for line in pretty.split("\n") if line.strip()]
278
+ # Add proper XML declaration
279
+ return '<?xml version="1.0" encoding="UTF-8"?>\n' + "\n".join(lines[1:])
280
+
281
+ @classmethod
282
+ def write(
283
+ cls,
284
+ supervisions: List["Supervision"],
285
+ output_path: Pathlike,
286
+ config: Optional[FCPXMLConfig] = None,
287
+ ) -> Path:
288
+ """Write supervisions to FCPXML format.
289
+
290
+ Args:
291
+ supervisions: List of supervision segments
292
+ output_path: Output file path (.fcpxml or .fcpxmld)
293
+ config: FCPXML export configuration
294
+
295
+ Returns:
296
+ Path to written file/bundle
297
+ """
298
+ if config is None:
299
+ config = FCPXMLConfig()
300
+
301
+ output_path = Path(output_path)
302
+ root = cls._build_fcpxml(supervisions, config)
303
+ xml_content = cls._prettify_xml(root)
304
+
305
+ if config.use_bundle or output_path.suffix.lower() == ".fcpxmld":
306
+ # Create bundle directory structure
307
+ bundle_path = output_path.with_suffix(".fcpxmld")
308
+ bundle_path.mkdir(parents=True, exist_ok=True)
309
+
310
+ # Write Info.fcpxml inside bundle
311
+ info_path = bundle_path / "Info.fcpxml"
312
+ with open(info_path, "w", encoding="utf-8") as f:
313
+ f.write(xml_content)
314
+
315
+ return bundle_path
316
+ else:
317
+ # Write single FCPXML file
318
+ output_path = output_path.with_suffix(".fcpxml")
319
+ with open(output_path, "w", encoding="utf-8") as f:
320
+ f.write(xml_content)
321
+
322
+ return output_path
323
+
324
+ @classmethod
325
+ def to_bytes(
326
+ cls,
327
+ supervisions: List["Supervision"],
328
+ config: Optional[FCPXMLConfig] = None,
329
+ ) -> bytes:
330
+ """Convert supervisions to FCPXML format bytes.
331
+
332
+ Note: This returns a single FCPXML file content, not a bundle.
333
+
334
+ Args:
335
+ supervisions: List of supervision segments
336
+ config: FCPXML export configuration
337
+
338
+ Returns:
339
+ FCPXML content as bytes
340
+ """
341
+ if config is None:
342
+ config = FCPXMLConfig()
343
+
344
+ root = cls._build_fcpxml(supervisions, config)
345
+ xml_content = cls._prettify_xml(root)
346
+ return xml_content.encode("utf-8")
347
+
348
+ @classmethod
349
+ def write_with_word_level(
350
+ cls,
351
+ supervisions: List["Supervision"],
352
+ output_path: Pathlike,
353
+ config: Optional[FCPXMLConfig] = None,
354
+ ) -> Path:
355
+ """Write supervisions with word-level timing to FCPXML.
356
+
357
+ This creates individual caption elements for each word, enabling
358
+ karaoke-style effects in Final Cut Pro.
359
+
360
+ Args:
361
+ supervisions: List of supervision segments with word-level alignment
362
+ output_path: Output file path
363
+ config: FCPXML export configuration
364
+
365
+ Returns:
366
+ Path to written file/bundle
367
+ """
368
+ if config is None:
369
+ config = FCPXMLConfig()
370
+
371
+ # Expand word-level alignments into individual supervisions
372
+ from ...supervision import Supervision as SupClass
373
+
374
+ expanded = []
375
+ for sup in supervisions:
376
+ alignment = getattr(sup, "alignment", None)
377
+ if alignment and "word" in alignment:
378
+ for word_item in alignment["word"]:
379
+ expanded.append(
380
+ SupClass(
381
+ text=word_item.symbol,
382
+ start=word_item.start,
383
+ duration=word_item.duration,
384
+ speaker=sup.speaker,
385
+ )
386
+ )
387
+ else:
388
+ expanded.append(sup)
389
+
390
+ return cls.write(expanded, output_path, config)
391
+
392
+
393
+ @register_writer("fcpxml")
394
+ class FCPXMLFormat(FormatWriter):
395
+ """Format handler for Final Cut Pro XML (FCPXML)."""
396
+
397
+ format_id = "fcpxml"
398
+ extensions = [".fcpxml", ".fcpxmld"]
399
+ description = "Final Cut Pro XML Format"
400
+
401
+ @classmethod
402
+ def write(
403
+ cls,
404
+ supervisions: List[Supervision],
405
+ output_path: Pathlike,
406
+ include_speaker: bool = True,
407
+ **kwargs,
408
+ ):
409
+ """Write supervisions to FCPXML format.
410
+
411
+ Args:
412
+ supervisions: List of supervision segments
413
+ output_path: Path to output file
414
+ include_speaker: Whether to include speaker labels
415
+ **kwargs: Additional config options
416
+
417
+ Returns:
418
+ Path to written file
419
+ """
420
+ # Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by FCPXML)
421
+ kwargs.pop("word_level", None)
422
+ kwargs.pop("karaoke", None)
423
+ kwargs.pop("karaoke_config", None)
424
+ kwargs.pop("metadata", None)
425
+ config = FCPXMLConfig(**kwargs)
426
+ return FCPXMLWriter.write(supervisions, output_path, config)
427
+
428
+ @classmethod
429
+ def to_bytes(
430
+ cls,
431
+ supervisions: List[Supervision],
432
+ include_speaker: bool = True,
433
+ **kwargs,
434
+ ) -> bytes:
435
+ """Convert supervisions to FCPXML bytes.
436
+
437
+ Args:
438
+ supervisions: List of supervision segments
439
+ include_speaker: Whether to include speaker labels
440
+ **kwargs: Additional config options
441
+
442
+ Returns:
443
+ FCPXML content as bytes
444
+ """
445
+ # Filter out unsupported kwargs (word_level, karaoke, karaoke_config, metadata not supported by FCPXML)
446
+ kwargs.pop("word_level", None)
447
+ kwargs.pop("karaoke", None)
448
+ kwargs.pop("karaoke_config", None)
449
+ kwargs.pop("metadata", None)
450
+ config = FCPXMLConfig(**kwargs)
451
+ return FCPXMLWriter.to_bytes(supervisions, config)
452
+
453
+
454
+ class FCPXMLReader:
455
+ """Reader for FCPXML format."""
456
+
457
+ @classmethod
458
+ def _parse_rational_time(cls, time_str: str) -> float:
459
+ """Parse rational time string (e.g., "100/25s") to seconds."""
460
+ if not time_str or not time_str.endswith("s"):
461
+ return 0.0
462
+
463
+ val_str = time_str[:-1] # Remove 's'
464
+ if "/" in val_str:
465
+ num, den = val_str.split("/")
466
+ return float(num) / float(den)
467
+ else:
468
+ return float(val_str)
469
+
470
+ @classmethod
471
+ def read(cls, source: str, normalize_text: bool = True) -> List[Supervision]:
472
+ """Read FCPXML content and return supervisions."""
473
+ try:
474
+ root = ET.fromstring(source)
475
+ except ET.ParseError:
476
+ return []
477
+
478
+ supervisions = []
479
+
480
+ # Traverse recursively to find caption elements
481
+ # FCPXML structure is flexible, captions can be nested in spines, gaps, clips, etc.
482
+ for caption in root.iter("caption"):
483
+ # Get timing
484
+ offset_str = caption.get("offset", "0s")
485
+ # start_str = caption.get("start", "0s")
486
+ duration_str = caption.get("duration", "0s")
487
+
488
+ # In FCPXML, logic for absolute time is complex depending on parent containers.
489
+ # Simplified approach: If direct child of a gap/spine in a simple project,
490
+ # offset + start might be enough.
491
+ # However, standard caption export usually puts them relative to the start of the project
492
+ # or the 'offset' attribute is the absolute time on the timeline.
493
+ # Let's assume 'offset' is the timeline start time for the caption clip.
494
+
495
+ start_sec = cls._parse_rational_time(offset_str)
496
+ duration_sec = cls._parse_rational_time(duration_str)
497
+
498
+ # Get text
499
+ text_elem = caption.find("text")
500
+ text_content = ""
501
+ if text_elem is not None:
502
+ text_content = text_elem.text
503
+
504
+ # Fallback if text element is empty or missing (some versions might differ)
505
+ if not text_content:
506
+ # Sometimes text is in 'name' attribute if it's a title?
507
+ # But for 'caption' element, <text> child is standard.
508
+ continue
509
+
510
+ if duration_sec > 0:
511
+ supervisions.append(
512
+ Supervision(
513
+ id=caption.get("name", str(uuid.uuid4())),
514
+ recording_id="fcpxml_import",
515
+ start=start_sec,
516
+ duration=duration_sec,
517
+ text=text_content.strip() if normalize_text else text_content,
518
+ )
519
+ )
520
+
521
+ return sorted(supervisions, key=lambda s: s.start)
522
+
523
+
524
+ from .. import register_reader
525
+
526
+
527
+ @register_reader("fcpxml")
528
+ class FCPXMLReaderHandler(FormatReader):
529
+ """Reader handler for FCPXML."""
530
+
531
+ format_id = "fcpxml"
532
+ extensions = [".fcpxml", ".fcpxmld"]
533
+
534
+ @classmethod
535
+ def read(cls, source: Union[Pathlike, str], normalize_text: bool = True, **kwargs) -> List[Supervision]:
536
+ if isinstance(source, (str, Path)) and not cls.is_content(source):
537
+ # Check if it's a bundle directory
538
+ p = Path(source)
539
+ if p.is_dir() and p.suffix == ".fcpxmld":
540
+ info_path = p / "Info.fcpxml"
541
+ if info_path.exists():
542
+ p = info_path
543
+
544
+ with open(p, "r", encoding="utf-8") as f:
545
+ content = f.read()
546
+ else:
547
+ content = str(source)
548
+
549
+ return FCPXMLReader.read(content, normalize_text=normalize_text)