lattifai-captions 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/PKG-INFO +1 -1
  2. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/pyproject.toml +1 -1
  3. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/config.py +0 -175
  4. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/textgrid.py +60 -0
  5. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai_captions.egg-info/PKG-INFO +1 -1
  6. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/README.md +0 -0
  7. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/setup.cfg +0 -0
  8. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/__init__.py +0 -0
  9. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/caption.py +0 -0
  10. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/__init__.py +0 -0
  11. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/base.py +0 -0
  12. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/gemini.py +0 -0
  13. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/json.py +0 -0
  14. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/lrc.py +0 -0
  15. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/nle/__init__.py +0 -0
  16. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/nle/audition.py +0 -0
  17. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/nle/avid.py +0 -0
  18. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/nle/fcpxml.py +0 -0
  19. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/nle/premiere.py +0 -0
  20. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/pysubs2.py +0 -0
  21. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/sbv.py +0 -0
  22. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/srv3.py +0 -0
  23. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/tabular.py +0 -0
  24. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/ttml.py +0 -0
  25. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/formats/vtt.py +0 -0
  26. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/parsers/__init__.py +0 -0
  27. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/parsers/text_parser.py +0 -0
  28. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/punctuation.py +0 -0
  29. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/sentence_splitter.py +0 -0
  30. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/standardize.py +0 -0
  31. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/supervision.py +0 -0
  32. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai/caption/utils.py +0 -0
  33. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai_captions.egg-info/SOURCES.txt +0 -0
  34. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai_captions.egg-info/dependency_links.txt +0 -0
  35. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai_captions.egg-info/requires.txt +0 -0
  36. {lattifai_captions-0.1.4 → lattifai_captions-0.1.5}/src/lattifai_captions.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattifai-captions
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Caption/subtitle processing library with multi-format support (SRT, VTT, ASS, TTML, TextGrid, NLE formats)
5
5
  Author-email: LattifAI Technologies <tech@lattifai.com>
6
6
  Maintainer-email: Lattice <tech@lattifai.com>
@@ -4,7 +4,7 @@ build-backend = 'setuptools.build_meta'
4
4
 
5
5
  [project]
6
6
  name = 'lattifai-captions'
7
- version = '0.1.4'
7
+ version = '0.1.5'
8
8
  description = "Caption/subtitle processing library with multi-format support (SRT, VTT, ASS, TTML, TextGrid, NLE formats)"
9
9
  keywords = ['captions', 'subtitles', 'srt', 'vtt', 'ass', 'ttml', 'textgrid', 'forced alignment']
10
10
  readme = 'README.md'
@@ -245,178 +245,3 @@ CAPTION_FORMATS: list[str] = ["srt", "vtt", "ass", "ssa", "sub", "sbv", "txt", "
245
245
  ALL_CAPTION_FORMATS: list[str] = list(set(INPUT_CAPTION_FORMATS + OUTPUT_CAPTION_FORMATS) - {"auto"})
246
246
 
247
247
 
248
- @dataclass
249
- class CaptionConfig:
250
- """
251
- Caption I/O configuration.
252
-
253
- Controls caption file reading, writing, and formatting options.
254
- """
255
-
256
- input_format: InputCaptionFormat = "auto"
257
- """Input caption format. Supports: 'auto' (detect),
258
- standard formats (srt, vtt, ass, ssa, sub, sbv, txt, sami, smi),
259
- tabular (csv, tsv, aud, json),
260
- specialized (textgrid, gemini),
261
- NLE (avid_ds, fcpxml, premiere_xml, audition_csv).
262
- Note: VTT format auto-detects YouTube VTT with word-level timestamps.
263
- """
264
-
265
- input_path: Optional[str] = None
266
- """Path to input caption file."""
267
-
268
- output_format: OutputCaptionFormat = "srt"
269
- """Output caption format. Supports: standard formats, tabular, specialized, TTML profiles (ttml, imsc1, ebu_tt_d),
270
- NLE (avid_ds, fcpxml, premiere_xml, audition_csv, edimarker_csv)."""
271
-
272
- output_path: Optional[str] = None
273
- """Path to output caption file."""
274
-
275
- include_speaker_in_text: bool = True
276
- """Preserve speaker labels in caption text content."""
277
-
278
- normalize_text: bool = True
279
- """Clean HTML entities and normalize whitespace in caption text."""
280
-
281
- split_sentence: bool = False
282
- """Re-segment captions intelligently based on punctuation and semantics."""
283
-
284
- word_level: bool = False
285
- """Include word-level timestamps in alignment results (useful for karaoke, dubbing)."""
286
-
287
- karaoke: Optional[KaraokeConfig] = None
288
- """Karaoke configuration when word_level=True (e.g., ASS \\kf tags, enhanced LRC).
289
- When None with word_level=True, outputs word-per-segment instead of karaoke styling.
290
- When provided, karaoke.enabled controls whether karaoke styling is applied."""
291
-
292
- encoding: str = "utf-8"
293
- """Character encoding for reading/writing caption files (default: utf-8)."""
294
-
295
- source_lang: Optional[str] = None
296
- """Source language code for the caption content (e.g., 'en', 'zh', 'de')."""
297
-
298
- standardization: Optional[StandardizationConfig] = None
299
- """Standardization configuration for broadcast-grade captions.
300
- When provided, captions will be standardized according to Netflix/BBC guidelines."""
301
-
302
- def __post_init__(self):
303
- """Validate configuration after initialization."""
304
- self._normalize_paths()
305
- self._validate_formats()
306
-
307
- @property
308
- def need_alignment(self, trust_timestamps: bool) -> bool:
309
- """Determine if alignment is needed based on configuration."""
310
- if trust_timestamps and not self.split_sentence:
311
- if not self.word_level:
312
- return False
313
- if self.normalize_text:
314
- print(
315
- "⚠️ Warning: Text normalization with 'trust_input_timestamps=True' and 'split_sentence=False'"
316
- "💡 Recommended command:\n"
317
- " lai caption normalize input.srt normalized.srt\n"
318
- )
319
-
320
- return False
321
-
322
- return True
323
-
324
- def _normalize_paths(self) -> None:
325
- """Normalize and expand input/output paths.
326
-
327
- Uses Path.resolve() to get absolute paths and prevent path traversal issues.
328
- """
329
- # Expand and normalize input path if provided, but don't require it to exist yet
330
- # (it might be set later after downloading captions)
331
- if self.input_path is not None:
332
- self.input_path = str(Path(self.input_path).expanduser().resolve())
333
-
334
- if self.output_path is not None:
335
- self.output_path = str(Path(self.output_path).expanduser().resolve())
336
- output_dir = Path(self.output_path).parent
337
- output_dir.mkdir(parents=True, exist_ok=True)
338
-
339
- def _validate_formats(self) -> None:
340
- """Validate input and output format fields."""
341
- if self.input_format not in INPUT_CAPTION_FORMATS:
342
- raise ValueError(f"input_format must be one of {INPUT_CAPTION_FORMATS}, got '{self.input_format}'")
343
-
344
- if self.output_format not in OUTPUT_CAPTION_FORMATS:
345
- raise ValueError(f"output_format must be one of {OUTPUT_CAPTION_FORMATS}, got '{self.output_format}'")
346
-
347
- def set_input_path(self, path: Pathlike) -> Path:
348
- """
349
- Set input caption path and validate it.
350
-
351
- Args:
352
- path: Path to input caption file (str or Path)
353
-
354
- Returns:
355
- Resolved path as Path object
356
-
357
- Raises:
358
- FileNotFoundError: If the file does not exist
359
- ValueError: If the path is not a file
360
- """
361
- resolved = Path(path).expanduser().resolve()
362
- if not resolved.exists():
363
- raise FileNotFoundError(f"Input caption file does not exist: '{resolved}'")
364
- if not resolved.is_file():
365
- raise ValueError(f"Input caption path is not a file: '{resolved}'")
366
- self.input_path = str(resolved)
367
- self.check_input_sanity()
368
- return resolved
369
-
370
- def set_output_path(self, path: Pathlike) -> Path:
371
- """
372
- Set output caption path and create parent directories if needed.
373
-
374
- Args:
375
- path: Path to output caption file (str or Path)
376
-
377
- Returns:
378
- Resolved path as Path object
379
- """
380
- resolved = Path(path).expanduser().resolve()
381
- resolved.parent.mkdir(parents=True, exist_ok=True)
382
- self.output_path = str(resolved)
383
- return resolved
384
-
385
- def check_input_sanity(self) -> None:
386
- """
387
- Validate that input_path is properly configured and accessible.
388
-
389
- Raises:
390
- ValueError: If input_path is not set or is invalid
391
- FileNotFoundError: If input_path does not exist
392
- """
393
- if not self.input_path:
394
- raise ValueError("input_path is required but not set in CaptionConfig")
395
-
396
- input_file = Path(self.input_path).expanduser().resolve()
397
- if not input_file.exists():
398
- raise FileNotFoundError(
399
- f"Input caption file does not exist: '{input_file}'. " "Please check the path and try again."
400
- )
401
- if not input_file.is_file():
402
- raise ValueError(
403
- f"Input caption path is not a file: '{input_file}'. " "Expected a valid caption file path."
404
- )
405
-
406
- def check_sanity(self) -> None:
407
- """Perform sanity checks on the configuration.
408
-
409
- Raises:
410
- ValueError: If input path is not provided or does not exist.
411
- """
412
- if not self.is_input_path_existed():
413
- raise ValueError("Input caption path must be provided and exist.")
414
-
415
- def is_input_path_existed(self) -> bool:
416
- """Check if input caption path is provided and exists."""
417
- if self.input_path is None:
418
- return False
419
-
420
- input_file = Path(self.input_path).expanduser().resolve()
421
- self.input_path = str(input_file)
422
- return input_file.exists() and input_file.is_file()
@@ -12,6 +12,47 @@ from . import register_format
12
12
  from .base import FormatHandler
13
13
 
14
14
 
15
+ def _is_event(sup: Supervision) -> bool:
16
+ """Detect if a supervision is an event type.
17
+
18
+ Event detection via:
19
+ 1. custom["segment_type"] == "event"
20
+ 2. Text format [xxx] (e.g., [Applause], [Music])
21
+ """
22
+ if sup.custom and sup.custom.get("segment_type") == "event":
23
+ return True
24
+ text = (sup.text or "").strip()
25
+ return text.startswith("[") and text.endswith("]") and len(text) > 2
26
+
27
+
28
+ def _assign_event_tiers(events: List[Supervision]) -> Dict[str, List]:
29
+ """Assign events to non-overlapping tiers using greedy algorithm.
30
+
31
+ Returns dict mapping tier names to lists of (start, end, text) tuples.
32
+ Tier names: "Event", "Event2", "Event3", ...
33
+ """
34
+ tiers: Dict[str, List] = {}
35
+
36
+ for event in sorted(events, key=lambda x: x.start):
37
+ assigned = False
38
+ tier_num = 1
39
+
40
+ while not assigned:
41
+ tier_name = "Event" if tier_num == 1 else f"Event{tier_num}"
42
+
43
+ if tier_name not in tiers:
44
+ tiers[tier_name] = []
45
+
46
+ # Check overlap with last interval in this tier
47
+ if not tiers[tier_name] or tiers[tier_name][-1][1] <= event.start:
48
+ tiers[tier_name].append((event.start, event.end, event.text or ""))
49
+ assigned = True
50
+ else:
51
+ tier_num += 1
52
+
53
+ return tiers
54
+
55
+
15
56
  @register_format("textgrid")
16
57
  class TextGridFormat(FormatHandler):
17
58
  """Praat TextGrid format for phonetic analysis."""
@@ -80,6 +121,10 @@ class TextGridFormat(FormatHandler):
80
121
  output_path: Output file path
81
122
  include_speaker: Whether to include speaker in text
82
123
  metadata: Optional metadata (for API consistency)
124
+
125
+ Note:
126
+ Events (text like [Applause] or segment_type="event") are placed
127
+ in separate tiers (Event, Event2, ...) to handle overlaps.
83
128
  """
84
129
  from tgt import Interval, IntervalTier, TextGrid, write_to_file
85
130
 
@@ -87,10 +132,16 @@ class TextGridFormat(FormatHandler):
87
132
  tg = TextGrid()
88
133
 
89
134
  utterances = []
135
+ events = []
90
136
  words = []
91
137
  scores = {"utterances": [], "words": []}
92
138
 
93
139
  for sup in sorted(supervisions, key=lambda x: x.start):
140
+ # Separate events from utterances
141
+ if _is_event(sup):
142
+ events.append(sup)
143
+ continue
144
+
94
145
  text = sup.text or ""
95
146
  if include_speaker and sup.speaker:
96
147
  # Check if speaker should be included
@@ -114,8 +165,17 @@ class TextGridFormat(FormatHandler):
114
165
  if hasattr(sup, "custom") and sup.custom and "score" in sup.custom:
115
166
  scores["utterances"].append(Interval(sup.start, sup.end, f"{sup.custom['score']:.2f}"))
116
167
 
168
+ # Add utterances tier
117
169
  tg.add_tier(IntervalTier(name="utterances", objects=utterances))
118
170
 
171
+ # Add event tiers (Event, Event2, ...) for overlapping events
172
+ if events:
173
+ event_tiers = _assign_event_tiers(events)
174
+ # Sort tier names: Event, Event2, Event3, ...
175
+ for tier_name in sorted(event_tiers.keys(), key=lambda x: (len(x), x)):
176
+ intervals = [Interval(s, e, t) for s, e, t in event_tiers[tier_name]]
177
+ tg.add_tier(IntervalTier(name=tier_name, objects=intervals))
178
+
119
179
  if words:
120
180
  tg.add_tier(IntervalTier(name="words", objects=words))
121
181
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattifai-captions
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Caption/subtitle processing library with multi-format support (SRT, VTT, ASS, TTML, TextGrid, NLE formats)
5
5
  Author-email: LattifAI Technologies <tech@lattifai.com>
6
6
  Maintainer-email: Lattice <tech@lattifai.com>