lattifai 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. lattifai/_init.py +20 -0
  2. lattifai/alignment/__init__.py +9 -1
  3. lattifai/alignment/lattice1_aligner.py +175 -54
  4. lattifai/alignment/lattice1_worker.py +47 -4
  5. lattifai/alignment/punctuation.py +38 -0
  6. lattifai/alignment/segmenter.py +3 -2
  7. lattifai/alignment/text_align.py +441 -0
  8. lattifai/alignment/tokenizer.py +134 -65
  9. lattifai/audio2.py +162 -183
  10. lattifai/cli/__init__.py +2 -1
  11. lattifai/cli/alignment.py +5 -0
  12. lattifai/cli/caption.py +111 -4
  13. lattifai/cli/transcribe.py +2 -6
  14. lattifai/cli/youtube.py +7 -1
  15. lattifai/client.py +72 -123
  16. lattifai/config/__init__.py +28 -0
  17. lattifai/config/alignment.py +14 -0
  18. lattifai/config/caption.py +45 -31
  19. lattifai/config/client.py +16 -0
  20. lattifai/config/event.py +102 -0
  21. lattifai/config/media.py +20 -0
  22. lattifai/config/transcription.py +25 -1
  23. lattifai/data/__init__.py +8 -0
  24. lattifai/data/caption.py +228 -0
  25. lattifai/diarization/__init__.py +41 -1
  26. lattifai/errors.py +78 -53
  27. lattifai/event/__init__.py +65 -0
  28. lattifai/event/lattifai.py +166 -0
  29. lattifai/mixin.py +49 -32
  30. lattifai/transcription/base.py +8 -2
  31. lattifai/transcription/gemini.py +147 -16
  32. lattifai/transcription/lattifai.py +25 -63
  33. lattifai/types.py +1 -1
  34. lattifai/utils.py +7 -13
  35. lattifai/workflow/__init__.py +28 -4
  36. lattifai/workflow/file_manager.py +2 -5
  37. lattifai/youtube/__init__.py +43 -0
  38. lattifai/youtube/client.py +1265 -0
  39. lattifai/youtube/types.py +23 -0
  40. lattifai-1.3.0.dist-info/METADATA +678 -0
  41. lattifai-1.3.0.dist-info/RECORD +57 -0
  42. {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/entry_points.txt +1 -2
  43. lattifai/__init__.py +0 -88
  44. lattifai/alignment/sentence_splitter.py +0 -219
  45. lattifai/caption/__init__.py +0 -20
  46. lattifai/caption/caption.py +0 -1467
  47. lattifai/caption/gemini_reader.py +0 -462
  48. lattifai/caption/gemini_writer.py +0 -173
  49. lattifai/caption/supervision.py +0 -34
  50. lattifai/caption/text_parser.py +0 -145
  51. lattifai/cli/app_installer.py +0 -142
  52. lattifai/cli/server.py +0 -44
  53. lattifai/server/app.py +0 -427
  54. lattifai/workflow/youtube.py +0 -577
  55. lattifai-1.2.1.dist-info/METADATA +0 -1134
  56. lattifai-1.2.1.dist-info/RECORD +0 -58
  57. {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/WHEEL +0 -0
  58. {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/licenses/LICENSE +0 -0
  59. {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,57 @@
1
+ lattifai/_init.py,sha256=ZpiYRpP7kPh6XpQK6nX1zKKvtQE3Xr0T2Ed5S4wDiwU,609
2
+ lattifai/audio2.py,sha256=JpMvwB0e09hLoffji8zOSlgrIhqUUq_zq0ubT_QMspc,15317
3
+ lattifai/client.py,sha256=d6WpodzseoSv6aA8jkj4hwiMOz3irSkD15i1hr2JUsQ,18182
4
+ lattifai/errors.py,sha256=aPTMhTaeQjY0KMifnm-Kpo9MQPsoYsEanZB2f0AA6qQ,11911
5
+ lattifai/logging.py,sha256=MbUEeOUFlF92pA9v532DiPPWKl03S7UHCJ6Z652cf0w,2860
6
+ lattifai/mixin.py,sha256=_d-kzu2w5Y4mZTFMEumh30baWzKsQ9n_IS432woi5vQ,26295
7
+ lattifai/types.py,sha256=JK7KVaZhX89BiKPm4okY0DWLHY1S8aj-YiZXoVH1akw,667
8
+ lattifai/utils.py,sha256=5LeunAN0OQ1jWoKMIThpXSEOxFYD2dCRTdsglosodUU,7963
9
+ lattifai/alignment/__init__.py,sha256=aOyC1P5DqESNLpDh6Gu6LyUZAVMba-IKI7Ugz7v9G4w,344
10
+ lattifai/alignment/lattice1_aligner.py,sha256=MvBpPnhfF8NYtH2ANhQszKNRQROUiSYrBz3aN1bdT3U,11120
11
+ lattifai/alignment/lattice1_worker.py,sha256=Z7hxaS-nucNsUmrphbD8tgBBYPkJOgQb-85nFON94_I,13041
12
+ lattifai/alignment/phonemizer.py,sha256=fbhN2DOl39lW4nQWKzyUUTMUabg7v61lB1kj8SKK-Sw,1761
13
+ lattifai/alignment/punctuation.py,sha256=qLcvuXhBzoEa6bznWZiAB5TAxR6eLr_ZV-PnnCY90UA,1218
14
+ lattifai/alignment/segmenter.py,sha256=JTbBYEXn8hkFwy0tITORy7nKoUPiNYLfi3w1DJNeHZ0,6303
15
+ lattifai/alignment/text_align.py,sha256=sF-6Tsf863BhJcii3joeNa6Auv-7l3SiOhh9j8oPGME,14935
16
+ lattifai/alignment/tokenizer.py,sha256=OIpMGHg1rJ7n97zncDMPpXy32uGOSt1yXiNO4sO6eP0,18839
17
+ lattifai/cli/__init__.py,sha256=PdqoCTqRSFSWrqL3FjBTa5VzJy_e6Rq0OzyT7YkyHpc,541
18
+ lattifai/cli/alignment.py,sha256=rqg6wU2vf6RJ058yWVoXft_UJfOCrEpmE-ye5fhTphg,6129
19
+ lattifai/cli/caption.py,sha256=jkMme73sJ16dkVpRh7O6qjbr14SUeBif00vCTBn7ed0,10339
20
+ lattifai/cli/diarization.py,sha256=GTd2vnTm6cJN6Q3mFP-ShY9bZBl1_zKzWFu-4HHcMzk,4075
21
+ lattifai/cli/transcribe.py,sha256=vZIV0TCbZG_IL2F_Mg49cCGSCBinOOFAtROajVTpNWE,7853
22
+ lattifai/cli/youtube.py,sha256=FJwDl48-cuacP1sdPvX19vdszXdT7EoOZgGYzJpoLeM,6360
23
+ lattifai/config/__init__.py,sha256=nJUVk03JRj4rujoEmkCkQ8akZF7kqIj7ci3XphU9uVA,1249
24
+ lattifai/config/alignment.py,sha256=3JUtgHBueIK_lH9PgeBPjuHGL4VvDEYVs9fvylir6bc,5392
25
+ lattifai/config/caption.py,sha256=OMLsW8QKDWM6A3G5V3Gf-9bgB3D1PC5gO8LiiNNeOwM,7195
26
+ lattifai/config/client.py,sha256=qqHKFPV4iEjVHCDOuGx7kj-tYFtgZZAszOQRFsNFbO8,2359
27
+ lattifai/config/diarization.py,sha256=cIkwCfsYqfMns3i6tKWcwBBBkdnhhmB_Eo0TuOPCw9o,2484
28
+ lattifai/config/event.py,sha256=P-_2yOzSATZSXz-ctlWeJQGOKCbNLFnWLBvUZ8JclyA,3845
29
+ lattifai/config/media.py,sha256=nxvgC7zeLsthCARPPUbnK2eMJY8R1d-1XgiAsy8kroA,15568
30
+ lattifai/config/transcription.py,sha256=V0WtZ_p-WsBienRbGyd-zLdX6F_XRsDWGlba_qzwet0,4115
31
+ lattifai/data/__init__.py,sha256=hdUhvlUjPgb3_Hd_cJ30f2oXHBMZRGzaSafd64b3vYA,168
32
+ lattifai/data/caption.py,sha256=MVuZiQ47Lr3A1afFqGkqFzpWjPakmsQusQ86t210Y2Y,7800
33
+ lattifai/diarization/__init__.py,sha256=-ZZ_a5hIQgnlHIOehCTtmVmWOWC2H6eOhSs4AcVtRtk,1782
34
+ lattifai/diarization/lattifai.py,sha256=tCnFL6ywITqeKR8YoCsYvyJxNoIwoC6GsnI9zkXNB-Q,3128
35
+ lattifai/event/__init__.py,sha256=PPAWzrkRK8YgWhG6CtIUkb7nH8svd9_zGOhxjz0_dcM,2448
36
+ lattifai/event/lattifai.py,sha256=QJqUxJsIWryVVoud_qE8af6zoJ89ZyPgHDvQp4OzXg0,5826
37
+ lattifai/transcription/__init__.py,sha256=vMHciyCEPKhhfM3KjMCeDqnyxU1oghF8g5o5SvpnT_4,2669
38
+ lattifai/transcription/base.py,sha256=ywRjIGg6emTx1v8PCSPyHcdugR6PvdTl10H64Iu1iqs,4617
39
+ lattifai/transcription/gemini.py,sha256=p6uZlhPQuzzUsj226Jk-INOt7NF5g4TIN6yEn1ZwrBI,18030
40
+ lattifai/transcription/lattifai.py,sha256=DA7QSN-a_yIZq79Nc_f6lf8_VWW4qqhyXfoZ1Um-31M,3451
41
+ lattifai/transcription/prompts/README.md,sha256=X49KWSQVdjWxxWUp4R2w3ZqKrAOi6_kDNHh1hMaQ4PE,694
42
+ lattifai/transcription/prompts/__init__.py,sha256=G9b42COaCYv3sPPNkHsGDLOMBuVGKt4mXGYal_BYtYQ,1351
43
+ lattifai/transcription/prompts/gemini/README.md,sha256=rt7f7yDGtaobKBo95LG3u56mqa3ABOXQd0UVgJYtYuo,781
44
+ lattifai/transcription/prompts/gemini/transcription_gem.txt,sha256=cljzZ--BDgnnKzqVCakr-fTp2Xk38UOsUquvruNX-LU,4600
45
+ lattifai/workflow/__init__.py,sha256=INpQgc9gZ2Fp-aTHcpR3TEHGtEtPzjOB8T7-jLzVM0E,1547
46
+ lattifai/workflow/agents.py,sha256=yEOnxnhcTvr1iOhCorNvp8B76P6nQsLRXJCu_rCYFfM,38
47
+ lattifai/workflow/base.py,sha256=8QoVIBZwJfr5mppJbtUFafHv5QR9lL-XrULjTWD0oBg,6257
48
+ lattifai/workflow/file_manager.py,sha256=yc29Vb7JNUMJ9rwM_YjkAHfDInl8HMVAl9A7z7XiIOU,32974
49
+ lattifai/youtube/__init__.py,sha256=_uO3KCx-t6I-JaYFpcYLYpvkbmEOOni3xBqGEbExg68,1587
50
+ lattifai/youtube/client.py,sha256=VU8FC1N7YYpbc4LeJNAsahNAI1R7e3_7Yjmb1rz7tyI,52878
51
+ lattifai/youtube/types.py,sha256=80RgBmvM4tRbxqyNv9GU6hr9vPp_yhKrK0RJ_vG2h4E,472
52
+ lattifai-1.3.0.dist-info/licenses/LICENSE,sha256=xGMLmdFJy6Jkz3Hd0znyQLmcxC93FSZB5isKnEDMoQQ,1066
53
+ lattifai-1.3.0.dist-info/METADATA,sha256=WMgLRzKiJv_Zn1aoxPjWofNZRE4tSjTWxa16zWNYVTk,23008
54
+ lattifai-1.3.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
55
+ lattifai-1.3.0.dist-info/entry_points.txt,sha256=MfoqXNjXrhD7VMApHgaHmAECTcGVUMUiR0uqnTg7Ads,502
56
+ lattifai-1.3.0.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
57
+ lattifai-1.3.0.dist-info/RECORD,,
@@ -1,11 +1,10 @@
1
1
  [console_scripts]
2
2
  lai-align = lattifai.cli.alignment:main
3
- lai-app-install = lattifai.cli.app_installer:main
4
3
  lai-diarize = lattifai.cli.diarization:main
5
- lai-server = lattifai.cli.server:main
6
4
  lai-transcribe = lattifai.cli.transcribe:main
7
5
  lai-youtube = lattifai.cli.youtube:main
8
6
  laicap-convert = lattifai.cli.caption:main_convert
7
+ laicap-diff = lattifai.cli.caption:main_diff
9
8
  laicap-normalize = lattifai.cli.caption:main_normalize
10
9
  laicap-shift = lattifai.cli.caption:main_shift
11
10
 
lattifai/__init__.py DELETED
@@ -1,88 +0,0 @@
1
- import os
2
- import warnings
3
- from importlib.metadata import version
4
-
5
- # Suppress SWIG deprecation warnings before any imports
6
- warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*SwigPy.*")
7
-
8
- # Suppress PyTorch transformer nested tensor warning
9
- warnings.filterwarnings("ignore", category=UserWarning, message=".*enable_nested_tensor.*")
10
-
11
- # Disable tokenizers parallelism warning
12
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
13
-
14
- # Re-export I/O classes
15
- from .caption import Caption
16
-
17
- # Re-export client classes
18
- from .client import LattifAI
19
-
20
- # Re-export config classes
21
- from .config import (
22
- AUDIO_FORMATS,
23
- MEDIA_FORMATS,
24
- VIDEO_FORMATS,
25
- AlignmentConfig,
26
- CaptionConfig,
27
- ClientConfig,
28
- DiarizationConfig,
29
- MediaConfig,
30
- )
31
- from .errors import (
32
- AlignmentError,
33
- APIError,
34
- AudioFormatError,
35
- AudioLoadError,
36
- AudioProcessingError,
37
- CaptionParseError,
38
- CaptionProcessingError,
39
- ConfigurationError,
40
- DependencyError,
41
- LatticeDecodingError,
42
- LatticeEncodingError,
43
- LattifAIError,
44
- ModelLoadError,
45
- )
46
- from .logging import get_logger, set_log_level, setup_logger
47
-
48
- try:
49
- __version__ = version("lattifai")
50
- except Exception:
51
- __version__ = "0.1.0" # fallback version
52
-
53
-
54
- __all__ = [
55
- # Client classes
56
- "LattifAI",
57
- # Config classes
58
- "AlignmentConfig",
59
- "ClientConfig",
60
- "CaptionConfig",
61
- "DiarizationConfig",
62
- "MediaConfig",
63
- "AUDIO_FORMATS",
64
- "VIDEO_FORMATS",
65
- "MEDIA_FORMATS",
66
- # Error classes
67
- "LattifAIError",
68
- "AudioProcessingError",
69
- "AudioLoadError",
70
- "AudioFormatError",
71
- "CaptionProcessingError",
72
- "CaptionParseError",
73
- "AlignmentError",
74
- "LatticeEncodingError",
75
- "LatticeDecodingError",
76
- "ModelLoadError",
77
- "DependencyError",
78
- "APIError",
79
- "ConfigurationError",
80
- # Logging
81
- "setup_logger",
82
- "get_logger",
83
- "set_log_level",
84
- # I/O
85
- "Caption",
86
- # Version
87
- "__version__",
88
- ]
@@ -1,219 +0,0 @@
1
- import re
2
- from typing import List, Optional
3
-
4
- from lattifai.caption import Supervision
5
- from lattifai.utils import _resolve_model_path
6
-
7
- END_PUNCTUATION = '.!?"]。!?"】'
8
-
9
-
10
- class SentenceSplitter:
11
- """Lazy-initialized sentence splitter using wtpsplit."""
12
-
13
- def __init__(self, device: str = "cpu", model_hub: Optional[str] = None, lazy_init: bool = True):
14
- """Initialize sentence splitter with lazy loading.
15
-
16
- Args:
17
- device: Device to run the model on (cpu, cuda, mps)
18
- model_hub: Model hub to use (None for huggingface, "modelscope" for modelscope)
19
- """
20
- self.device = device
21
- self.model_hub = model_hub
22
- if lazy_init:
23
- self._splitter = None
24
- else:
25
- self._init_splitter()
26
-
27
- def _init_splitter(self):
28
- """Initialize the sentence splitter model on first use."""
29
- if self._splitter is not None:
30
- return
31
-
32
- import onnxruntime as ort
33
- from wtpsplit import SaT
34
-
35
- providers = []
36
- device = self.device
37
- if device.startswith("cuda") and ort.get_all_providers().count("CUDAExecutionProvider") > 0:
38
- providers.append("CUDAExecutionProvider")
39
- elif device.startswith("mps") and ort.get_all_providers().count("MPSExecutionProvider") > 0:
40
- providers.append("MPSExecutionProvider")
41
-
42
- if self.model_hub == "modelscope":
43
- downloaded_path = _resolve_model_path("LattifAI/OmniTokenizer", model_hub="modelscope")
44
- sat = SaT(
45
- f"{downloaded_path}/sat-3l-sm",
46
- tokenizer_name_or_path=f"{downloaded_path}/xlm-roberta-base",
47
- ort_providers=providers + ["CPUExecutionProvider"],
48
- )
49
- else:
50
- sat_path = _resolve_model_path("segment-any-text/sat-3l-sm", model_hub="huggingface")
51
- sat = SaT(
52
- sat_path,
53
- tokenizer_name_or_path="facebookAI/xlm-roberta-base",
54
- hub_prefix="segment-any-text",
55
- ort_providers=providers + ["CPUExecutionProvider"],
56
- )
57
- self._splitter = sat
58
-
59
- @staticmethod
60
- def _resplit_special_sentence_types(sentence: str) -> List[str]:
61
- """
62
- Re-split special sentence types.
63
-
64
- Examples:
65
- '[APPLAUSE] >> MIRA MURATI:' -> ['[APPLAUSE]', '>> MIRA MURATI:']
66
- '[MUSIC] >> SPEAKER:' -> ['[MUSIC]', '>> SPEAKER:']
67
-
68
- Special handling patterns:
69
- 1. Separate special marks at the beginning (e.g., [APPLAUSE], [MUSIC], etc.) from subsequent speaker marks
70
- 2. Use speaker marks (>> or other separators) as split points
71
-
72
- Args:
73
- sentence: Input sentence string
74
-
75
- Returns:
76
- List of re-split sentences. If no special marks are found, returns the original sentence in a list
77
- """
78
- # Detect special mark patterns: [SOMETHING] >> SPEAKER:
79
- # or other forms like [SOMETHING] SPEAKER:
80
-
81
- # Pattern 1: [mark] HTML-encoded separator speaker:
82
- pattern1 = r"^(\[[^\]]+\])\s+(>>|>>)\s+(.+)$"
83
- match1 = re.match(pattern1, sentence.strip())
84
- if match1:
85
- special_mark = match1.group(1)
86
- separator = match1.group(2)
87
- speaker_part = match1.group(3)
88
- return [special_mark, f"{separator} {speaker_part}"]
89
-
90
- # Pattern 2: [mark] speaker:
91
- pattern2 = r"^(\[[^\]]+\])\s+([^:]+:)(.*)$"
92
- match2 = re.match(pattern2, sentence.strip())
93
- if match2:
94
- special_mark = match2.group(1)
95
- speaker_label = match2.group(2)
96
- remaining = match2.group(3).strip()
97
- if remaining:
98
- return [special_mark, f"{speaker_label} {remaining}"]
99
- else:
100
- return [special_mark, speaker_label]
101
-
102
- # If no special pattern matches, return the original sentence
103
- return [sentence]
104
-
105
- def split_sentences(self, supervisions: List[Supervision], strip_whitespace=True) -> List[Supervision]:
106
- """Split supervisions into sentences using the sentence splitter.
107
-
108
- Careful about speaker changes.
109
-
110
- Args:
111
- supervisions: List of Supervision objects to split
112
- strip_whitespace: Whether to strip whitespace from split sentences
113
-
114
- Returns:
115
- List of Supervision objects with split sentences
116
- """
117
- self._init_splitter()
118
-
119
- texts, speakers = [], []
120
- text_len, sidx = 0, 0
121
-
122
- def flush_segment(end_idx: int, speaker: Optional[str] = None):
123
- """Flush accumulated text from sidx to end_idx with given speaker."""
124
- nonlocal text_len, sidx
125
- if sidx <= end_idx:
126
- if len(speakers) < len(texts) + 1:
127
- speakers.append(speaker)
128
- text = " ".join(sup.text for sup in supervisions[sidx : end_idx + 1])
129
- texts.append(text)
130
- sidx = end_idx + 1
131
- text_len = 0
132
-
133
- for s, supervision in enumerate(supervisions):
134
- text_len += len(supervision.text)
135
- is_last = s == len(supervisions) - 1
136
-
137
- if supervision.speaker:
138
- # Flush previous segment without speaker (if any)
139
- if sidx < s:
140
- flush_segment(s - 1, None)
141
- text_len = len(supervision.text)
142
-
143
- # Check if we should flush this speaker's segment now
144
- next_has_speaker = not is_last and supervisions[s + 1].speaker
145
- if is_last or next_has_speaker:
146
- flush_segment(s, supervision.speaker)
147
- else:
148
- speakers.append(supervision.speaker)
149
-
150
- elif text_len >= 2000 or is_last:
151
- flush_segment(s, None)
152
-
153
- assert len(speakers) == len(texts), f"len(speakers)={len(speakers)} != len(texts)={len(texts)}"
154
- sentences = self._splitter.split(texts, threshold=0.15, strip_whitespace=strip_whitespace, batch_size=8)
155
-
156
- supervisions, remainder = [], ""
157
- for k, (_speaker, _sentences) in enumerate(zip(speakers, sentences)):
158
- # Prepend remainder from previous iteration to the first sentence
159
- if _sentences and remainder:
160
- _sentences[0] = remainder + _sentences[0]
161
- remainder = ""
162
-
163
- if not _sentences:
164
- continue
165
-
166
- # Process and re-split special sentence types
167
- processed_sentences = []
168
- for s, _sentence in enumerate(_sentences):
169
- if remainder:
170
- _sentence = remainder + _sentence
171
- remainder = ""
172
- # Detect and split special sentence types: e.g., '[APPLAUSE] &gt;&gt; MIRA MURATI:' -> ['[APPLAUSE]', '&gt;&gt; MIRA MURATI:'] # noqa: E501
173
- resplit_parts = self._resplit_special_sentence_types(_sentence)
174
- if any(resplit_parts[-1].endswith(sp) for sp in [":", ":"]):
175
- if s < len(_sentences) - 1:
176
- _sentences[s + 1] = resplit_parts[-1] + " " + _sentences[s + 1]
177
- else: # last part
178
- remainder = resplit_parts[-1] + " "
179
- processed_sentences.extend(resplit_parts[:-1])
180
- else:
181
- processed_sentences.extend(resplit_parts)
182
- _sentences = processed_sentences
183
-
184
- if not _sentences:
185
- if remainder:
186
- _sentences, remainder = [remainder.strip()], ""
187
- else:
188
- continue
189
-
190
- if any(_sentences[-1].endswith(ep) for ep in END_PUNCTUATION):
191
- supervisions.extend(
192
- Supervision(text=text, speaker=(_speaker if s == 0 else None)) for s, text in enumerate(_sentences)
193
- )
194
- _speaker = None # reset speaker after use
195
- else:
196
- supervisions.extend(
197
- Supervision(text=text, speaker=(_speaker if s == 0 else None))
198
- for s, text in enumerate(_sentences[:-1])
199
- )
200
- remainder = _sentences[-1] + " " + remainder
201
- if k < len(speakers) - 1 and speakers[k + 1] is not None: # next speaker is set
202
- supervisions.append(
203
- Supervision(text=remainder.strip(), speaker=_speaker if len(_sentences) == 1 else None)
204
- )
205
- remainder = ""
206
- elif len(_sentences) == 1:
207
- if k == len(speakers) - 1:
208
- pass # keep _speaker for the last supervision
209
- else:
210
- assert speakers[k + 1] is None
211
- speakers[k + 1] = _speaker
212
- else:
213
- assert len(_sentences) > 1
214
- _speaker = None # reset speaker if sentence not ended
215
-
216
- if remainder.strip():
217
- supervisions.append(Supervision(text=remainder.strip(), speaker=_speaker))
218
-
219
- return supervisions
@@ -1,20 +0,0 @@
1
- from typing import List, Optional
2
-
3
- from lhotse.utils import Pathlike
4
-
5
- from ..config.caption import InputCaptionFormat
6
- from .caption import Caption
7
- from .gemini_reader import GeminiReader, GeminiSegment
8
- from .gemini_writer import GeminiWriter
9
- from .supervision import Supervision
10
- from .text_parser import normalize_text
11
-
12
- __all__ = [
13
- "Caption",
14
- "Supervision",
15
- "GeminiReader",
16
- "GeminiWriter",
17
- "GeminiSegment",
18
- "normalize_text",
19
- "InputCaptionFormat",
20
- ]