lattifai 0.4.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. lattifai/__init__.py +61 -47
  2. lattifai/alignment/__init__.py +6 -0
  3. lattifai/alignment/lattice1_aligner.py +119 -0
  4. lattifai/alignment/lattice1_worker.py +185 -0
  5. lattifai/{tokenizer → alignment}/phonemizer.py +4 -4
  6. lattifai/alignment/segmenter.py +166 -0
  7. lattifai/{tokenizer → alignment}/tokenizer.py +244 -169
  8. lattifai/audio2.py +211 -0
  9. lattifai/caption/__init__.py +20 -0
  10. lattifai/caption/caption.py +1275 -0
  11. lattifai/{io → caption}/gemini_reader.py +30 -30
  12. lattifai/{io → caption}/gemini_writer.py +17 -17
  13. lattifai/{io → caption}/supervision.py +4 -3
  14. lattifai/caption/text_parser.py +145 -0
  15. lattifai/cli/__init__.py +17 -0
  16. lattifai/cli/alignment.py +153 -0
  17. lattifai/cli/caption.py +204 -0
  18. lattifai/cli/server.py +19 -0
  19. lattifai/cli/transcribe.py +197 -0
  20. lattifai/cli/youtube.py +128 -0
  21. lattifai/client.py +460 -251
  22. lattifai/config/__init__.py +20 -0
  23. lattifai/config/alignment.py +73 -0
  24. lattifai/config/caption.py +178 -0
  25. lattifai/config/client.py +46 -0
  26. lattifai/config/diarization.py +67 -0
  27. lattifai/config/media.py +335 -0
  28. lattifai/config/transcription.py +84 -0
  29. lattifai/diarization/__init__.py +5 -0
  30. lattifai/diarization/lattifai.py +89 -0
  31. lattifai/errors.py +98 -91
  32. lattifai/logging.py +116 -0
  33. lattifai/mixin.py +552 -0
  34. lattifai/server/app.py +420 -0
  35. lattifai/transcription/__init__.py +76 -0
  36. lattifai/transcription/base.py +108 -0
  37. lattifai/transcription/gemini.py +219 -0
  38. lattifai/transcription/lattifai.py +103 -0
  39. lattifai/{workflows → transcription}/prompts/__init__.py +4 -4
  40. lattifai/types.py +30 -0
  41. lattifai/utils.py +16 -44
  42. lattifai/workflow/__init__.py +22 -0
  43. lattifai/workflow/agents.py +6 -0
  44. lattifai/{workflows → workflow}/base.py +22 -22
  45. lattifai/{workflows → workflow}/file_manager.py +239 -215
  46. lattifai/workflow/youtube.py +564 -0
  47. lattifai-1.0.0.dist-info/METADATA +736 -0
  48. lattifai-1.0.0.dist-info/RECORD +52 -0
  49. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
  50. lattifai-1.0.0.dist-info/entry_points.txt +13 -0
  51. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +1 -1
  52. lattifai/base_client.py +0 -126
  53. lattifai/bin/__init__.py +0 -3
  54. lattifai/bin/agent.py +0 -325
  55. lattifai/bin/align.py +0 -296
  56. lattifai/bin/cli_base.py +0 -25
  57. lattifai/bin/subtitle.py +0 -210
  58. lattifai/io/__init__.py +0 -42
  59. lattifai/io/reader.py +0 -85
  60. lattifai/io/text_parser.py +0 -75
  61. lattifai/io/utils.py +0 -15
  62. lattifai/io/writer.py +0 -90
  63. lattifai/tokenizer/__init__.py +0 -3
  64. lattifai/workers/__init__.py +0 -3
  65. lattifai/workers/lattice1_alpha.py +0 -284
  66. lattifai/workflows/__init__.py +0 -34
  67. lattifai/workflows/agents.py +0 -10
  68. lattifai/workflows/gemini.py +0 -167
  69. lattifai/workflows/prompts/README.md +0 -22
  70. lattifai/workflows/prompts/gemini/README.md +0 -24
  71. lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
  72. lattifai/workflows/youtube.py +0 -931
  73. lattifai-0.4.5.dist-info/METADATA +0 -808
  74. lattifai-0.4.5.dist-info/RECORD +0 -39
  75. lattifai-0.4.5.dist-info/entry_points.txt +0 -3
  76. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,335 @@
1
+ """Media I/O configuration for LattifAI."""
2
+
3
+ from dataclasses import dataclass, field, replace
4
+ from pathlib import Path
5
+ from typing import Optional
6
+ from urllib.parse import urlparse
7
+
8
+ from lhotse.utils import Pathlike
9
+
10
+ # Supported media formats for both audio and video content
11
+ AUDIO_FORMATS = (
12
+ "aac",
13
+ "aiff",
14
+ "alac",
15
+ "flac",
16
+ "m4a",
17
+ "mp3",
18
+ "ogg",
19
+ "opus",
20
+ "wav",
21
+ "wma",
22
+ )
23
+
24
+ VIDEO_FORMATS = (
25
+ "3gp",
26
+ "avi",
27
+ "flv",
28
+ "m4v",
29
+ "mkv",
30
+ "mov",
31
+ "mp4",
32
+ "mpeg",
33
+ "mpg",
34
+ "webm",
35
+ "wmv",
36
+ )
37
+
38
+ MEDIA_FORMATS = tuple(sorted(set(AUDIO_FORMATS + VIDEO_FORMATS)))
39
+
40
+
41
+ @dataclass
42
+ class MediaConfig:
43
+ """Unified configuration for audio/video input and output handling."""
44
+
45
+ # Input configuration (local filesystem path or URL)
46
+ input_path: Optional[str] = None
47
+ """Local file path or URL to audio/video content."""
48
+
49
+ media_format: str = "auto"
50
+ """Media format (mp3, wav, mp4, etc.) or 'auto' for automatic detection."""
51
+
52
+ sample_rate: Optional[int] = None
53
+ """Audio sample rate in Hz (e.g., 16000, 44100)."""
54
+
55
+ channels: Optional[int] = None
56
+ """Number of audio channels (1=mono, 2=stereo)."""
57
+
58
+ channel_selector: Optional[str | int] = "average"
59
+ """Audio channel selection strategy: 'average', 'left', 'right', or channel index."""
60
+
61
+ # Output / download configuration
62
+ output_dir: Path = field(default_factory=lambda: Path.cwd())
63
+ """Directory for output files (default: current working directory)."""
64
+
65
+ output_path: Optional[str] = None
66
+ """Full path for output file (overrides output_dir + filename)."""
67
+
68
+ output_format: Optional[str] = None
69
+ """Output media format (mp3, wav, mp4, etc.)."""
70
+
71
+ prefer_audio: bool = True
72
+ """Prefer audio format when 'auto' is specified."""
73
+
74
+ default_audio_format: str = "mp3"
75
+ """Default audio format when no format is specified."""
76
+
77
+ default_video_format: str = "mp4"
78
+ """Default video format when no format is specified."""
79
+
80
+ force_overwrite: bool = False
81
+ """Overwrite existing output files without prompting."""
82
+
83
+ def __post_init__(self) -> None:
84
+ """Validate configuration and normalize paths/formats."""
85
+ self._setup_output_directory()
86
+ self._validate_default_formats()
87
+ self._normalize_media_format()
88
+ self._process_input_path()
89
+ self._process_output_path()
90
+
91
+ def _setup_output_directory(self) -> None:
92
+ """Ensure output directory exists and is valid."""
93
+ resolved_output_dir = self._ensure_dir(self.output_dir)
94
+ self.output_dir = resolved_output_dir
95
+
96
+ def _validate_default_formats(self) -> None:
97
+ """Validate default audio and video formats."""
98
+ self.default_audio_format = self._normalize_format(self.default_audio_format)
99
+ self.default_video_format = self._normalize_format(self.default_video_format)
100
+
101
+ def _normalize_media_format(self) -> None:
102
+ """Normalize media format, allowing 'auto' during initialization."""
103
+ self.media_format = self._normalize_format(self.media_format, allow_auto=True)
104
+
105
+ def _process_input_path(self) -> None:
106
+ """Process and validate input path if provided."""
107
+ if self.input_path is None:
108
+ return
109
+
110
+ if self._is_url(self.input_path):
111
+ normalized_url = self._normalize_url(self.input_path)
112
+ self.input_path = normalized_url
113
+ if self.media_format == "auto":
114
+ inferred_format = self._infer_format_from_source(normalized_url)
115
+ if inferred_format:
116
+ self.media_format = self._normalize_format(inferred_format)
117
+ else:
118
+ # For local paths, normalize to string without validation here
119
+ # Validation will be done in check_input_sanity()
120
+ self.input_path = str(Path(self.input_path).expanduser())
121
+ if self.media_format == "auto":
122
+ inferred_format = Path(self.input_path).suffix.lstrip(".").lower()
123
+ if inferred_format:
124
+ self.media_format = self._normalize_format(inferred_format)
125
+
126
+ # Validate input after setting
127
+ self.check_input_sanity()
128
+
129
+ def _process_output_path(self) -> None:
130
+ """Process output path and format."""
131
+ if self.output_path is not None:
132
+ self.set_output_path(self.output_path)
133
+ elif self.output_format is not None:
134
+ self.output_format = self._normalize_format(self.output_format)
135
+ else:
136
+ self.output_format = None
137
+
138
+ # ------------------------------------------------------------------
139
+ # Public helpers
140
+ # ------------------------------------------------------------------
141
+ def clone(self, **updates: object) -> "MediaConfig":
142
+ """Return a shallow copy of the config with optional overrides."""
143
+ return replace(self, **updates)
144
+
145
+ def normalize_format(self, media_format: Optional[str] = None, *, prefer_audio: Optional[bool] = None) -> str:
146
+ """Resolve a media format (handling the special "auto" value)."""
147
+ prefer_audio = self.prefer_audio if prefer_audio is None else prefer_audio
148
+ candidate = (media_format or self.media_format or "auto").lower()
149
+ if candidate == "auto":
150
+ candidate = self.default_audio_format if prefer_audio else self.default_video_format
151
+ return self._normalize_format(candidate)
152
+
153
+ def is_audio_format(self, media_format: Optional[str] = None) -> bool:
154
+ """Check whether the provided (or effective) format is an audio format."""
155
+ return self.normalize_format(media_format) in AUDIO_FORMATS
156
+
157
+ def is_video_format(self, media_format: Optional[str] = None) -> bool:
158
+ """Check whether the provided (or effective) format is a video format."""
159
+ return self.normalize_format(media_format) in VIDEO_FORMATS
160
+
161
+ def set_media_format(self, media_format: Optional[str], *, prefer_audio: Optional[bool] = None) -> str:
162
+ """Update media_format and return the normalized value."""
163
+ normalized = self.normalize_format(media_format, prefer_audio=prefer_audio)
164
+ self.media_format = normalized
165
+ return normalized
166
+
167
+ def set_input_path(self, path: Pathlike) -> Path | str:
168
+ """Update the input path (local path or URL) and infer format if possible."""
169
+ path = str(path)
170
+ if self._is_url(path):
171
+ normalized_url = self._normalize_url(path)
172
+ self.input_path = normalized_url
173
+ inferred_format = self._infer_format_from_source(normalized_url)
174
+ if inferred_format:
175
+ self.media_format = self._normalize_format(inferred_format)
176
+ self.check_input_sanity()
177
+ return normalized_url
178
+
179
+ resolved = self._ensure_file(path)
180
+ self.input_path = str(resolved)
181
+ inferred_format = resolved.suffix.lstrip(".").lower()
182
+ if inferred_format:
183
+ self.media_format = self._normalize_format(inferred_format)
184
+ self.check_input_sanity()
185
+ return resolved
186
+
187
+ def set_output_dir(self, output_dir: Pathlike) -> Path:
188
+ """Update the output directory (creating it if needed)."""
189
+ resolved = self._ensure_dir(output_dir)
190
+ self.output_dir = resolved
191
+ return resolved
192
+
193
+ def set_output_path(self, output_path: Pathlike) -> Path:
194
+ """Update the output path and synchronize output format and directory."""
195
+ resolved = self._ensure_file(output_path, must_exist=False, create_parent=True)
196
+ if not resolved.suffix:
197
+ raise ValueError("output_path must include a filename with an extension.")
198
+ fmt = resolved.suffix.lstrip(".").lower()
199
+ self.output_path = str(resolved)
200
+ self.output_dir = resolved.parent
201
+ self.output_format = self._normalize_format(fmt)
202
+ return resolved
203
+
204
+ def prepare_output_path(self, stem: Optional[str] = None, format: Optional[str] = None) -> Path:
205
+ """Return an output path, creating one if not set yet."""
206
+ if self.output_path:
207
+ return Path(self.output_path)
208
+
209
+ effective_format = self.normalize_format(format or self.output_format or self.media_format)
210
+ base_name = stem or (self._derive_input_stem() or "output")
211
+ candidate = self.output_dir / f"{base_name}.{effective_format}"
212
+ self.output_path = str(candidate)
213
+ self.output_format = effective_format
214
+ return candidate
215
+
216
+ def is_input_remote(self) -> bool:
217
+ """Return True if the configured input is a URL."""
218
+ return bool(self.input_path and self._is_url(self.input_path))
219
+
220
+ def check_input_sanity(self) -> None:
221
+ """
222
+ Validate that input_path is properly configured and accessible.
223
+
224
+ Raises:
225
+ ValueError: If input_path is not set or is invalid.
226
+ FileNotFoundError: If input_path is a local file that does not exist.
227
+ """
228
+ if not self.input_path:
229
+ raise ValueError("input_path is required but not set in MediaConfig")
230
+
231
+ if self._is_url(self.input_path):
232
+ # For URLs, validate that it's properly formatted
233
+ try:
234
+ parsed = urlparse(self.input_path)
235
+ if not parsed.scheme or not parsed.netloc:
236
+ raise ValueError(
237
+ f"Invalid URL format for input_path: '{self.input_path}'. "
238
+ "URL must include scheme (http/https) and domain."
239
+ )
240
+ except (ValueError, AttributeError) as e:
241
+ # ValueError: Invalid URL format
242
+ # AttributeError: urlparse issues with malformed input
243
+ raise ValueError(f"Failed to parse input_path as URL: {e}") from e
244
+ else:
245
+ # For local files, validate that the file exists and is accessible
246
+ input_file = Path(self.input_path).expanduser()
247
+ if not input_file.exists():
248
+ raise FileNotFoundError(
249
+ f"Input media file does not exist: '{input_file}'. " "Please check the path and try again."
250
+ )
251
+ if not input_file.is_file():
252
+ raise ValueError(
253
+ f"Input media path is not a file: '{input_file}'. " "Expected a valid media file path."
254
+ )
255
+
256
+ # ------------------------------------------------------------------
257
+ # Internal utilities
258
+ # ------------------------------------------------------------------
259
+ def _ensure_dir(self, directory: Pathlike) -> Path:
260
+ path = Path(directory).expanduser()
261
+ path.mkdir(parents=True, exist_ok=True)
262
+ if not path.is_dir():
263
+ raise NotADirectoryError(f"Output directory '{path}' is not a directory.")
264
+ return path
265
+
266
+ def _ensure_file(self, path: Pathlike, *, must_exist: bool = True, create_parent: bool = False) -> Path:
267
+ file_path = Path(path).expanduser()
268
+ if must_exist:
269
+ if not file_path.exists():
270
+ raise FileNotFoundError(f"Input media path '{file_path}' does not exist.")
271
+ if not file_path.is_file():
272
+ raise ValueError(f"Input media path '{file_path}' is not a file.")
273
+ else:
274
+ if create_parent:
275
+ file_path.parent.mkdir(parents=True, exist_ok=True)
276
+ return file_path
277
+
278
+ def _normalize_format(self, media_format: Optional[str], *, allow_auto: bool = False) -> str:
279
+ if media_format is None:
280
+ raise ValueError("media_format cannot be None")
281
+ normalized = media_format.strip().lower()
282
+ if not normalized:
283
+ raise ValueError("media_format cannot be empty")
284
+ if normalized == "auto":
285
+ if allow_auto:
286
+ return normalized
287
+ normalized = self.default_audio_format if self.prefer_audio else self.default_video_format
288
+ if normalized not in MEDIA_FORMATS:
289
+ raise ValueError(
290
+ "Unsupported media format '{fmt}'. Supported formats: {supported}".format(
291
+ fmt=media_format,
292
+ supported=", ".join(MEDIA_FORMATS),
293
+ )
294
+ )
295
+ return normalized
296
+
297
+ def _clean_url_escapes(self, url: str) -> str:
298
+ """Remove shell escape backslashes from URL special characters."""
299
+ return url.strip().replace(r"\?", "?").replace(r"\=", "=").replace(r"\&", "&")
300
+
301
+ def _is_url(self, value: Pathlike) -> bool:
302
+ if not isinstance(value, str):
303
+ return False
304
+ cleaned = self._clean_url_escapes(value)
305
+ parsed = urlparse(cleaned)
306
+ return bool(parsed.scheme and parsed.netloc)
307
+
308
+ def _normalize_url(self, url: str) -> str:
309
+ cleaned = self._clean_url_escapes(url)
310
+ parsed = urlparse(cleaned)
311
+ if not parsed.scheme or not parsed.netloc:
312
+ raise ValueError("input_path must be an absolute URL when provided as a remote source.")
313
+ return cleaned
314
+
315
+ def _infer_format_from_source(self, source: str) -> Optional[str]:
316
+ path_segment = Path(urlparse(source).path) if self._is_url(source) else Path(source)
317
+ suffix = path_segment.suffix.lstrip(".").lower()
318
+ return suffix or None
319
+
320
+ def _derive_input_stem(self) -> Optional[str]:
321
+ if not self.input_path:
322
+ return None
323
+ if self.is_input_remote():
324
+ path_segment = Path(urlparse(self.input_path).path)
325
+ stem = path_segment.stem
326
+ return stem or None
327
+ return Path(self.input_path).stem or None
328
+
329
+
330
+ __all__ = [
331
+ "MediaConfig",
332
+ "AUDIO_FORMATS",
333
+ "VIDEO_FORMATS",
334
+ "MEDIA_FORMATS",
335
+ ]
@@ -0,0 +1,84 @@
1
+ """Transcription service configuration for LattifAI."""
2
+
3
+ import os
4
+ from dataclasses import dataclass, field
5
+ from typing import TYPE_CHECKING, Literal, Optional
6
+
7
+ from ..utils import _select_device
8
+
9
+ if TYPE_CHECKING:
10
+ from ..base_client import SyncAPIClient
11
+
12
+ SUPPORTED_TRANSCRIPTION_MODELS = Literal[
13
+ "gemini-2.5-pro",
14
+ "gemini-3-pro-preview",
15
+ "nvidia/parakeet-tdt-0.6b-v3",
16
+ "nvidia/canary-1b-v2",
17
+ "iic/SenseVoiceSmall",
18
+ ]
19
+
20
+
21
+ @dataclass
22
+ class TranscriptionConfig:
23
+ """
24
+ Transcription service configuration.
25
+
26
+ Settings for audio/video transcription using various providers.
27
+ """
28
+
29
+ model_name: SUPPORTED_TRANSCRIPTION_MODELS = "nvidia/parakeet-tdt-0.6b-v3"
30
+ """Model name for transcription."""
31
+
32
+ gemini_api_key: Optional[str] = None
33
+ """Gemini API key. If None, reads from GEMINI_API_KEY environment variable."""
34
+
35
+ device: Literal["cpu", "cuda", "mps", "auto"] = "auto"
36
+ """Computation device for transcription models."""
37
+
38
+ max_retries: int = 0
39
+ """Maximum number of retry attempts for failed transcription requests."""
40
+
41
+ force_overwrite: bool = False
42
+ """Force overwrite existing transcription files."""
43
+
44
+ verbose: bool = False
45
+ """Enable debug logging for transcription operations."""
46
+
47
+ language: Optional[str] = None
48
+ """Target language code for transcription (e.g., 'en', 'zh', 'ja')."""
49
+
50
+ lattice_model_path: Optional[str] = None
51
+ """Path to local LattifAI model. Will be auto-set in LattifAI client."""
52
+
53
+ client_wrapper: Optional["SyncAPIClient"] = field(default=None, repr=False)
54
+ """Reference to the SyncAPIClient instance. Auto-set during client initialization."""
55
+
56
+ def __post_init__(self):
57
+ """Validate and auto-populate configuration after initialization."""
58
+
59
+ if self.model_name not in SUPPORTED_TRANSCRIPTION_MODELS.__args__:
60
+ raise ValueError(
61
+ f"Unsupported model_name: '{self.model_name}'. "
62
+ f"Supported models are: {SUPPORTED_TRANSCRIPTION_MODELS.__args__}"
63
+ )
64
+
65
+ # Load environment variables from .env file
66
+ from dotenv import find_dotenv, load_dotenv
67
+
68
+ # Try to find and load .env file from current directory or parent directories
69
+ load_dotenv(find_dotenv(usecwd=True))
70
+
71
+ # Auto-load Gemini API key from environment if not provided
72
+ if self.gemini_api_key is None:
73
+ self.gemini_api_key = os.environ.get("GEMINI_API_KEY")
74
+
75
+ # Validate max_retries
76
+ if self.max_retries < 0:
77
+ raise ValueError("max_retries must be non-negative")
78
+
79
+ # Validate device
80
+ if self.device not in ("cpu", "cuda", "mps", "auto") and not self.device.startswith("cuda:"):
81
+ raise ValueError(f"device must be one of ('cpu', 'cuda', 'mps', 'auto'), got '{self.device}'")
82
+
83
+ if self.device == "auto":
84
+ self.device = _select_device(self.device)
@@ -0,0 +1,5 @@
1
+ """Speaker diarization module for LattifAI."""
2
+
3
+ from .lattifai import LattifAIDiarizer
4
+
5
+ __all__ = ["LattifAIDiarizer"]
@@ -0,0 +1,89 @@
1
+ """LattifAI speaker diarization implementation."""
2
+
3
+ import logging
4
+ from collections import defaultdict
5
+ from typing import List, Optional, Tuple
6
+
7
+ import torch
8
+ from tgt import Interval, IntervalTier, TextGrid
9
+
10
+ from lattifai.audio2 import AudioData
11
+ from lattifai.caption import Supervision
12
+ from lattifai.config.diarization import DiarizationConfig
13
+ from lattifai.logging import get_logger
14
+
15
+ formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
16
+ logging.basicConfig(format=formatter, level=logging.INFO)
17
+
18
+
19
+ NOT_KNOWN = "NotKnown"
20
+
21
+
22
+ class LattifAIDiarizer:
23
+ """
24
+ LattifAI Diarizer implementation using pyannote.audio.
25
+ """
26
+
27
+ def __init__(self, config: Optional[DiarizationConfig] = None):
28
+ """
29
+ Initialize LattifAI diarizer.
30
+
31
+ Args:
32
+ config: Diarization configuration
33
+ """
34
+ if config is None:
35
+ config = DiarizationConfig()
36
+
37
+ self.config = config
38
+ self.logger = get_logger("diarization")
39
+
40
+ self._diarizer = None
41
+
42
+ @property
43
+ def name(self) -> str:
44
+ """Human-readable name of the diarizer."""
45
+ return "LattifAI_Diarizer"
46
+
47
+ @property
48
+ def diarizer(self):
49
+ """Lazy-load and return the diarization pipeline."""
50
+ if self._diarizer is None:
51
+ from lattifai_core.diarization import LattifAIDiarizer as CoreLattifAIDiarizer
52
+
53
+ self._diarizer = CoreLattifAIDiarizer(config=self.config)
54
+
55
+ return self._diarizer
56
+
57
+ def diarize(
58
+ self,
59
+ input_media: AudioData,
60
+ num_speakers: Optional[int] = None,
61
+ min_speakers: Optional[int] = None,
62
+ max_speakers: Optional[int] = None,
63
+ ) -> TextGrid:
64
+ """Perform speaker diarization on the input audio."""
65
+ return self.diarizer.diarize(
66
+ input_media,
67
+ num_speakers=num_speakers,
68
+ min_speakers=min_speakers,
69
+ max_speakers=max_speakers,
70
+ )
71
+
72
+ def diarize_with_alignments(
73
+ self,
74
+ input_media: AudioData,
75
+ alignments: List[Supervision],
76
+ diarization: Optional[TextGrid] = None,
77
+ num_speakers: Optional[int] = None,
78
+ min_speakers: Optional[int] = None,
79
+ max_speakers: Optional[int] = None,
80
+ ) -> Tuple[TextGrid, List[Supervision]]:
81
+ """Diarize the given media input and return alignments with refined speaker labels."""
82
+ return self.diarizer.diarize_with_alignments(
83
+ input_media,
84
+ alignments=alignments,
85
+ diarization=diarization,
86
+ num_speakers=num_speakers,
87
+ min_speakers=min_speakers,
88
+ max_speakers=max_speakers,
89
+ )