lattifai 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,219 @@
1
+ import re
2
+ from typing import List, Optional
3
+
4
+ from lattifai.caption import Supervision
5
+ from lattifai.utils import _resolve_model_path
6
+
7
+ END_PUNCTUATION = '.!?"]。!?"】'
8
+
9
+
10
+ class SentenceSplitter:
11
+ """Lazy-initialized sentence splitter using wtpsplit."""
12
+
13
+ def __init__(self, device: str = "cpu", model_hub: Optional[str] = None, lazy_init: bool = True):
14
+ """Initialize sentence splitter with lazy loading.
15
+
16
+ Args:
17
+ device: Device to run the model on (cpu, cuda, mps)
18
+ model_hub: Model hub to use (None for huggingface, "modelscope" for modelscope)
19
+ """
20
+ self.device = device
21
+ self.model_hub = model_hub
22
+ if lazy_init:
23
+ self._splitter = None
24
+ else:
25
+ self._init_splitter()
26
+
27
+ def _init_splitter(self):
28
+ """Initialize the sentence splitter model on first use."""
29
+ if self._splitter is not None:
30
+ return
31
+
32
+ import onnxruntime as ort
33
+ from wtpsplit import SaT
34
+
35
+ providers = []
36
+ device = self.device
37
+ if device.startswith("cuda") and ort.get_all_providers().count("CUDAExecutionProvider") > 0:
38
+ providers.append("CUDAExecutionProvider")
39
+ elif device.startswith("mps") and ort.get_all_providers().count("MPSExecutionProvider") > 0:
40
+ providers.append("MPSExecutionProvider")
41
+
42
+ if self.model_hub == "modelscope":
43
+ downloaded_path = _resolve_model_path("LattifAI/OmniTokenizer", model_hub="modelscope")
44
+ sat = SaT(
45
+ f"{downloaded_path}/sat-3l-sm",
46
+ tokenizer_name_or_path=f"{downloaded_path}/xlm-roberta-base",
47
+ ort_providers=providers + ["CPUExecutionProvider"],
48
+ )
49
+ else:
50
+ sat_path = _resolve_model_path("segment-any-text/sat-3l-sm", model_hub="huggingface")
51
+ sat = SaT(
52
+ sat_path,
53
+ tokenizer_name_or_path="facebookAI/xlm-roberta-base",
54
+ hub_prefix="segment-any-text",
55
+ ort_providers=providers + ["CPUExecutionProvider"],
56
+ )
57
+ self._splitter = sat
58
+
59
+ @staticmethod
60
+ def _resplit_special_sentence_types(sentence: str) -> List[str]:
61
+ """
62
+ Re-split special sentence types.
63
+
64
+ Examples:
65
+ '[APPLAUSE] >> MIRA MURATI:' -> ['[APPLAUSE]', '>> MIRA MURATI:']
66
+ '[MUSIC] >> SPEAKER:' -> ['[MUSIC]', '>> SPEAKER:']
67
+
68
+ Special handling patterns:
69
+ 1. Separate special marks at the beginning (e.g., [APPLAUSE], [MUSIC], etc.) from subsequent speaker marks
70
+ 2. Use speaker marks (>> or other separators) as split points
71
+
72
+ Args:
73
+ sentence: Input sentence string
74
+
75
+ Returns:
76
+ List of re-split sentences. If no special marks are found, returns the original sentence in a list
77
+ """
78
+ # Detect special mark patterns: [SOMETHING] >> SPEAKER:
79
+ # or other forms like [SOMETHING] SPEAKER:
80
+
81
+ # Pattern 1: [mark] HTML-encoded separator speaker:
82
+ pattern1 = r"^(\[[^\]]+\])\s+(>>|>>)\s+(.+)$"
83
+ match1 = re.match(pattern1, sentence.strip())
84
+ if match1:
85
+ special_mark = match1.group(1)
86
+ separator = match1.group(2)
87
+ speaker_part = match1.group(3)
88
+ return [special_mark, f"{separator} {speaker_part}"]
89
+
90
+ # Pattern 2: [mark] speaker:
91
+ pattern2 = r"^(\[[^\]]+\])\s+([^:]+:)(.*)$"
92
+ match2 = re.match(pattern2, sentence.strip())
93
+ if match2:
94
+ special_mark = match2.group(1)
95
+ speaker_label = match2.group(2)
96
+ remaining = match2.group(3).strip()
97
+ if remaining:
98
+ return [special_mark, f"{speaker_label} {remaining}"]
99
+ else:
100
+ return [special_mark, speaker_label]
101
+
102
+ # If no special pattern matches, return the original sentence
103
+ return [sentence]
104
+
105
+ def split_sentences(self, supervisions: List[Supervision], strip_whitespace=True) -> List[Supervision]:
106
+ """Split supervisions into sentences using the sentence splitter.
107
+
108
+ Careful about speaker changes.
109
+
110
+ Args:
111
+ supervisions: List of Supervision objects to split
112
+ strip_whitespace: Whether to strip whitespace from split sentences
113
+
114
+ Returns:
115
+ List of Supervision objects with split sentences
116
+ """
117
+ self._init_splitter()
118
+
119
+ texts, speakers = [], []
120
+ text_len, sidx = 0, 0
121
+
122
+ def flush_segment(end_idx: int, speaker: Optional[str] = None):
123
+ """Flush accumulated text from sidx to end_idx with given speaker."""
124
+ nonlocal text_len, sidx
125
+ if sidx <= end_idx:
126
+ if len(speakers) < len(texts) + 1:
127
+ speakers.append(speaker)
128
+ text = " ".join(sup.text for sup in supervisions[sidx : end_idx + 1])
129
+ texts.append(text)
130
+ sidx = end_idx + 1
131
+ text_len = 0
132
+
133
+ for s, supervision in enumerate(supervisions):
134
+ text_len += len(supervision.text)
135
+ is_last = s == len(supervisions) - 1
136
+
137
+ if supervision.speaker:
138
+ # Flush previous segment without speaker (if any)
139
+ if sidx < s:
140
+ flush_segment(s - 1, None)
141
+ text_len = len(supervision.text)
142
+
143
+ # Check if we should flush this speaker's segment now
144
+ next_has_speaker = not is_last and supervisions[s + 1].speaker
145
+ if is_last or next_has_speaker:
146
+ flush_segment(s, supervision.speaker)
147
+ else:
148
+ speakers.append(supervision.speaker)
149
+
150
+ elif text_len >= 2000 or is_last:
151
+ flush_segment(s, None)
152
+
153
+ assert len(speakers) == len(texts), f"len(speakers)={len(speakers)} != len(texts)={len(texts)}"
154
+ sentences = self._splitter.split(texts, threshold=0.15, strip_whitespace=strip_whitespace, batch_size=8)
155
+
156
+ supervisions, remainder = [], ""
157
+ for k, (_speaker, _sentences) in enumerate(zip(speakers, sentences)):
158
+ # Prepend remainder from previous iteration to the first sentence
159
+ if _sentences and remainder:
160
+ _sentences[0] = remainder + _sentences[0]
161
+ remainder = ""
162
+
163
+ if not _sentences:
164
+ continue
165
+
166
+ # Process and re-split special sentence types
167
+ processed_sentences = []
168
+ for s, _sentence in enumerate(_sentences):
169
+ if remainder:
170
+ _sentence = remainder + _sentence
171
+ remainder = ""
172
+ # Detect and split special sentence types: e.g., '[APPLAUSE] &gt;&gt; MIRA MURATI:' -> ['[APPLAUSE]', '&gt;&gt; MIRA MURATI:'] # noqa: E501
173
+ resplit_parts = self._resplit_special_sentence_types(_sentence)
174
+ if any(resplit_parts[-1].endswith(sp) for sp in [":", ":"]):
175
+ if s < len(_sentences) - 1:
176
+ _sentences[s + 1] = resplit_parts[-1] + " " + _sentences[s + 1]
177
+ else: # last part
178
+ remainder = resplit_parts[-1] + " "
179
+ processed_sentences.extend(resplit_parts[:-1])
180
+ else:
181
+ processed_sentences.extend(resplit_parts)
182
+ _sentences = processed_sentences
183
+
184
+ if not _sentences:
185
+ if remainder:
186
+ _sentences, remainder = [remainder.strip()], ""
187
+ else:
188
+ continue
189
+
190
+ if any(_sentences[-1].endswith(ep) for ep in END_PUNCTUATION):
191
+ supervisions.extend(
192
+ Supervision(text=text, speaker=(_speaker if s == 0 else None)) for s, text in enumerate(_sentences)
193
+ )
194
+ _speaker = None # reset speaker after use
195
+ else:
196
+ supervisions.extend(
197
+ Supervision(text=text, speaker=(_speaker if s == 0 else None))
198
+ for s, text in enumerate(_sentences[:-1])
199
+ )
200
+ remainder = _sentences[-1] + " " + remainder
201
+ if k < len(speakers) - 1 and speakers[k + 1] is not None: # next speaker is set
202
+ supervisions.append(
203
+ Supervision(text=remainder.strip(), speaker=_speaker if len(_sentences) == 1 else None)
204
+ )
205
+ remainder = ""
206
+ elif len(_sentences) == 1:
207
+ if k == len(speakers) - 1:
208
+ pass # keep _speaker for the last supervision
209
+ else:
210
+ assert speakers[k + 1] is None
211
+ speakers[k + 1] = _speaker
212
+ else:
213
+ assert len(_sentences) > 1
214
+ _speaker = None # reset speaker if sentence not ended
215
+
216
+ if remainder.strip():
217
+ supervisions.append(Supervision(text=remainder.strip(), speaker=_speaker))
218
+
219
+ return supervisions
@@ -4,9 +4,8 @@ import re
4
4
  from collections import defaultdict
5
5
  from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar
6
6
 
7
- import torch
7
+ import numpy as np
8
8
 
9
- from lattifai.alignment.phonemizer import G2Phonemizer
10
9
  from lattifai.caption import Supervision
11
10
  from lattifai.caption import normalize_text as normalize_html_text
12
11
  from lattifai.errors import (
@@ -16,8 +15,10 @@ from lattifai.errors import (
16
15
  QuotaExceededError,
17
16
  )
18
17
 
18
+ from .phonemizer import G2Phonemizer
19
+ from .sentence_splitter import SentenceSplitter
20
+
19
21
  PUNCTUATION = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~'
20
- END_PUNCTUATION = '.!?"]。!?”】'
21
22
  PUNCTUATION_SPACE = PUNCTUATION + " "
22
23
  STAR_TOKEN = "※"
23
24
 
@@ -121,78 +122,17 @@ class LatticeTokenizer:
121
122
  def __init__(self, client_wrapper: Any):
122
123
  self.client_wrapper = client_wrapper
123
124
  self.model_name = ""
125
+ self.model_hub: Optional[str] = None
124
126
  self.words: List[str] = []
125
127
  self.g2p_model: Any = None # Placeholder for G2P model
126
128
  self.dictionaries = defaultdict(lambda: [])
127
129
  self.oov_word = "<unk>"
128
- self.sentence_splitter = None
130
+ self.sentence_splitter: Optional[SentenceSplitter] = None
129
131
  self.device = "cpu"
130
132
 
131
133
  def init_sentence_splitter(self):
132
- if self.sentence_splitter is not None:
133
- return
134
-
135
- import onnxruntime as ort
136
- from wtpsplit import SaT
137
-
138
- providers = []
139
- device = self.device
140
- if device.startswith("cuda") and ort.get_all_providers().count("CUDAExecutionProvider") > 0:
141
- providers.append("CUDAExecutionProvider")
142
- elif device.startswith("mps") and ort.get_all_providers().count("MPSExecutionProvider") > 0:
143
- providers.append("MPSExecutionProvider")
144
-
145
- sat = SaT(
146
- "sat-3l-sm",
147
- ort_providers=providers + ["CPUExecutionProvider"],
148
- )
149
- self.sentence_splitter = sat
150
-
151
- @staticmethod
152
- def _resplit_special_sentence_types(sentence: str) -> List[str]:
153
- """
154
- Re-split special sentence types.
155
-
156
- Examples:
157
- '[APPLAUSE] &gt;&gt; MIRA MURATI:' -> ['[APPLAUSE]', '&gt;&gt; MIRA MURATI:']
158
- '[MUSIC] &gt;&gt; SPEAKER:' -> ['[MUSIC]', '&gt;&gt; SPEAKER:']
159
-
160
- Special handling patterns:
161
- 1. Separate special marks at the beginning (e.g., [APPLAUSE], [MUSIC], etc.) from subsequent speaker marks
162
- 2. Use speaker marks (&gt;&gt; or other separators) as split points
163
-
164
- Args:
165
- sentence: Input sentence string
166
-
167
- Returns:
168
- List of re-split sentences. If no special marks are found, returns the original sentence in a list
169
- """
170
- # Detect special mark patterns: [SOMETHING] &gt;&gt; SPEAKER:
171
- # or other forms like [SOMETHING] SPEAKER:
172
-
173
- # Pattern 1: [mark] HTML-encoded separator speaker:
174
- pattern1 = r"^(\[[^\]]+\])\s+(&gt;&gt;|>>)\s+(.+)$"
175
- match1 = re.match(pattern1, sentence.strip())
176
- if match1:
177
- special_mark = match1.group(1)
178
- separator = match1.group(2)
179
- speaker_part = match1.group(3)
180
- return [special_mark, f"{separator} {speaker_part}"]
181
-
182
- # Pattern 2: [mark] speaker:
183
- pattern2 = r"^(\[[^\]]+\])\s+([^:]+:)(.*)$"
184
- match2 = re.match(pattern2, sentence.strip())
185
- if match2:
186
- special_mark = match2.group(1)
187
- speaker_label = match2.group(2)
188
- remaining = match2.group(3).strip()
189
- if remaining:
190
- return [special_mark, f"{speaker_label} {remaining}"]
191
- else:
192
- return [special_mark, speaker_label]
193
-
194
- # If no special pattern matches, return the original sentence
195
- return [sentence]
134
+ if self.sentence_splitter is None:
135
+ self.sentence_splitter = SentenceSplitter(device=self.device, model_hub=self.model_hub)
196
136
 
197
137
  @classmethod
198
138
  def from_pretrained(
@@ -200,6 +140,7 @@ class LatticeTokenizer:
200
140
  client_wrapper: Any,
201
141
  model_path: str,
202
142
  model_name: str,
143
+ model_hub: Optional[str] = None,
203
144
  device: str = "cpu",
204
145
  compressed: bool = True,
205
146
  ) -> TokenizerT:
@@ -227,6 +168,7 @@ class LatticeTokenizer:
227
168
 
228
169
  tokenizer = cls(client_wrapper=client_wrapper)
229
170
  tokenizer.model_name = model_name
171
+ tokenizer.model_hub = model_hub
230
172
  tokenizer.words = data["words"]
231
173
  tokenizer.dictionaries = defaultdict(list, data["dictionaries"])
232
174
  tokenizer.oov_word = data["oov_word"]
@@ -295,116 +237,16 @@ class LatticeTokenizer:
295
237
 
296
238
  return {}
297
239
 
298
- def split_sentences(self, supervisions: List[Supervision], strip_whitespace=True) -> List[str]:
240
+ def split_sentences(self, supervisions: List[Supervision], strip_whitespace=True) -> List[Supervision]:
299
241
  """Split supervisions into sentences using the sentence splitter.
300
242
 
301
- Carefull about speaker changes.
243
+ Careful about speaker changes.
302
244
  """
303
- texts, speakers = [], []
304
- text_len, sidx = 0, 0
305
-
306
- def flush_segment(end_idx: int, speaker: Optional[str] = None):
307
- """Flush accumulated text from sidx to end_idx with given speaker."""
308
- nonlocal text_len, sidx
309
- if sidx <= end_idx:
310
- if len(speakers) < len(texts) + 1:
311
- speakers.append(speaker)
312
- text = " ".join(sup.text for sup in supervisions[sidx : end_idx + 1])
313
- texts.append(text)
314
- sidx = end_idx + 1
315
- text_len = 0
316
-
317
- for s, supervision in enumerate(supervisions):
318
- text_len += len(supervision.text)
319
- is_last = s == len(supervisions) - 1
320
-
321
- if supervision.speaker:
322
- # Flush previous segment without speaker (if any)
323
- if sidx < s:
324
- flush_segment(s - 1, None)
325
- text_len = len(supervision.text)
326
-
327
- # Check if we should flush this speaker's segment now
328
- next_has_speaker = not is_last and supervisions[s + 1].speaker
329
- if is_last or next_has_speaker:
330
- flush_segment(s, supervision.speaker)
331
- else:
332
- speakers.append(supervision.speaker)
333
-
334
- elif text_len >= 2000 or is_last:
335
- flush_segment(s, None)
336
-
337
- assert len(speakers) == len(texts), f"len(speakers)={len(speakers)} != len(texts)={len(texts)}"
338
- sentences = self.sentence_splitter.split(texts, threshold=0.15, strip_whitespace=strip_whitespace, batch_size=8)
339
-
340
- supervisions, remainder = [], ""
341
- for k, (_speaker, _sentences) in enumerate(zip(speakers, sentences)):
342
- # Prepend remainder from previous iteration to the first sentence
343
- if _sentences and remainder:
344
- _sentences[0] = remainder + _sentences[0]
345
- remainder = ""
346
-
347
- if not _sentences:
348
- continue
349
-
350
- # Process and re-split special sentence types
351
- processed_sentences = []
352
- for s, _sentence in enumerate(_sentences):
353
- if remainder:
354
- _sentence = remainder + _sentence
355
- remainder = ""
356
- # Detect and split special sentence types: e.g., '[APPLAUSE] &gt;&gt; MIRA MURATI:' -> ['[APPLAUSE]', '&gt;&gt; MIRA MURATI:'] # noqa: E501
357
- resplit_parts = self._resplit_special_sentence_types(_sentence)
358
- if any(resplit_parts[-1].endswith(sp) for sp in [":", ":"]):
359
- if s < len(_sentences) - 1:
360
- _sentences[s + 1] = resplit_parts[-1] + " " + _sentences[s + 1]
361
- else: # last part
362
- remainder = resplit_parts[-1] + " "
363
- processed_sentences.extend(resplit_parts[:-1])
364
- else:
365
- processed_sentences.extend(resplit_parts)
366
- _sentences = processed_sentences
367
-
368
- if not _sentences:
369
- if remainder:
370
- _sentences, remainder = [remainder.strip()], ""
371
- else:
372
- continue
373
-
374
- if any(_sentences[-1].endswith(ep) for ep in END_PUNCTUATION):
375
- supervisions.extend(
376
- Supervision(text=text, speaker=(_speaker if s == 0 else None)) for s, text in enumerate(_sentences)
377
- )
378
- _speaker = None # reset speaker after use
379
- else:
380
- supervisions.extend(
381
- Supervision(text=text, speaker=(_speaker if s == 0 else None))
382
- for s, text in enumerate(_sentences[:-1])
383
- )
384
- remainder = _sentences[-1] + " " + remainder
385
- if k < len(speakers) - 1 and speakers[k + 1] is not None: # next speaker is set
386
- supervisions.append(
387
- Supervision(text=remainder.strip(), speaker=_speaker if len(_sentences) == 1 else None)
388
- )
389
- remainder = ""
390
- elif len(_sentences) == 1:
391
- if k == len(speakers) - 1:
392
- pass # keep _speaker for the last supervision
393
- else:
394
- assert speakers[k + 1] is None
395
- speakers[k + 1] = _speaker
396
- else:
397
- assert len(_sentences) > 1
398
- _speaker = None # reset speaker if sentence not ended
399
-
400
- if remainder.strip():
401
- supervisions.append(Supervision(text=remainder.strip(), speaker=_speaker))
402
-
403
- return supervisions
245
+ self.init_sentence_splitter()
246
+ return self.sentence_splitter.split_sentences(supervisions, strip_whitespace=strip_whitespace)
404
247
 
405
248
  def tokenize(self, supervisions: List[Supervision], split_sentence: bool = False) -> Tuple[str, Dict[str, Any]]:
406
249
  if split_sentence:
407
- self.init_sentence_splitter()
408
250
  supervisions = self.split_sentences(supervisions)
409
251
 
410
252
  pronunciation_dictionaries = self.prenormalize([s.text for s in supervisions])
@@ -431,7 +273,7 @@ class LatticeTokenizer:
431
273
  def detokenize(
432
274
  self,
433
275
  lattice_id: str,
434
- lattice_results: Tuple[torch.Tensor, Any, Any, float, float],
276
+ lattice_results: Tuple[np.ndarray, Any, Any, float, float],
435
277
  supervisions: List[Supervision],
436
278
  return_details: bool = False,
437
279
  start_margin: float = 0.08,
@@ -481,7 +323,7 @@ class LatticeTokenizer:
481
323
 
482
324
  def _add_confidence_scores(
483
325
  supervisions: List[Supervision],
484
- emission: torch.Tensor,
326
+ emission: np.ndarray,
485
327
  labels: List[int],
486
328
  frame_shift: float,
487
329
  offset: float = 0.0,
@@ -499,17 +341,17 @@ def _add_confidence_scores(
499
341
  labels: Token labels corresponding to aligned tokens
500
342
  frame_shift: Frame shift in seconds for converting frames to time
501
343
  """
502
- tokens = torch.tensor(labels, dtype=torch.int64, device=emission.device)
344
+ tokens = np.array(labels, dtype=np.int64)
503
345
 
504
346
  for supervision in supervisions:
505
347
  start_frame = int((supervision.start - offset) / frame_shift)
506
348
  end_frame = int((supervision.end - offset) / frame_shift)
507
349
 
508
350
  # Compute segment-level confidence
509
- probabilities = emission[0, start_frame:end_frame].softmax(dim=-1)
351
+ probabilities = np.exp(emission[0, start_frame:end_frame])
510
352
  aligned = probabilities[range(0, end_frame - start_frame), tokens[start_frame:end_frame]]
511
- diffprobs = (probabilities.max(dim=-1).values - aligned).cpu()
512
- supervision.score = round(1.0 - diffprobs.mean().item(), ndigits=4)
353
+ diffprobs = np.max(probabilities, axis=-1) - aligned
354
+ supervision.score = round(1.0 - diffprobs.mean(), ndigits=4)
513
355
 
514
356
  # Compute word-level confidence if alignment exists
515
357
  if hasattr(supervision, "alignment") and supervision.alignment:
@@ -517,7 +359,7 @@ def _add_confidence_scores(
517
359
  for w, item in enumerate(words):
518
360
  start = int((item.start - offset) / frame_shift) - start_frame
519
361
  end = int((item.end - offset) / frame_shift) - start_frame
520
- words[w] = item._replace(score=round(1.0 - diffprobs[start:end].mean().item(), ndigits=4))
362
+ words[w] = item._replace(score=round(1.0 - diffprobs[start:end].mean(), ndigits=4))
521
363
 
522
364
 
523
365
  def _update_alignments_speaker(supervisions: List[Supervision], alignments: List[Supervision]) -> List[Supervision]:
@@ -539,6 +381,7 @@ def _load_tokenizer(
539
381
  model_name: str,
540
382
  device: str,
541
383
  *,
384
+ model_hub: Optional[str] = None,
542
385
  tokenizer_cls: Type[LatticeTokenizer] = LatticeTokenizer,
543
386
  ) -> LatticeTokenizer:
544
387
  """Instantiate tokenizer with consistent error handling."""
@@ -546,5 +389,6 @@ def _load_tokenizer(
546
389
  client_wrapper=client_wrapper,
547
390
  model_path=model_path,
548
391
  model_name=model_name,
392
+ model_hub=model_hub,
549
393
  device=device,
550
394
  )
lattifai/audio2.py CHANGED
@@ -36,7 +36,7 @@ class AudioData(namedtuple("AudioData", ["sampling_rate", "ndarray", "path", "st
36
36
  @property
37
37
  def streaming_mode(self) -> bool:
38
38
  """Indicates whether streaming mode is enabled based on streaming_chunk_secs."""
39
- if self.streaming_chunk_secs is not None:
39
+ if self.streaming_chunk_secs:
40
40
  return self.duration > self.streaming_chunk_secs * 1.1
41
41
  return False
42
42
 
@@ -467,7 +467,6 @@ class Caption:
467
467
  sup_dict = sup.to_dict()
468
468
  json_data.append(sup_dict)
469
469
  json.dump(json_data, f, ensure_ascii=False, indent=4)
470
-
471
470
  elif str(output_path).lower().endswith(".textgrid"):
472
471
  from tgt import Interval, IntervalTier, TextGrid, write_to_file
473
472
 
@@ -506,7 +505,6 @@ class Caption:
506
505
  tg.add_tier(IntervalTier(name="word_scores", objects=scores["words"]))
507
506
 
508
507
  write_to_file(tg, output_path, format="long")
509
-
510
508
  elif str(output_path)[-4:].lower() == ".tsv":
511
509
  cls._write_tsv(alignments, output_path, include_speaker_in_text)
512
510
  elif str(output_path)[-4:].lower() == ".csv":