lattifai 0.4.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. lattifai/__init__.py +61 -47
  2. lattifai/alignment/__init__.py +6 -0
  3. lattifai/alignment/lattice1_aligner.py +119 -0
  4. lattifai/alignment/lattice1_worker.py +185 -0
  5. lattifai/{tokenizer → alignment}/phonemizer.py +4 -4
  6. lattifai/alignment/segmenter.py +166 -0
  7. lattifai/{tokenizer → alignment}/tokenizer.py +244 -169
  8. lattifai/audio2.py +211 -0
  9. lattifai/caption/__init__.py +20 -0
  10. lattifai/caption/caption.py +1275 -0
  11. lattifai/{io → caption}/gemini_reader.py +30 -30
  12. lattifai/{io → caption}/gemini_writer.py +17 -17
  13. lattifai/{io → caption}/supervision.py +4 -3
  14. lattifai/caption/text_parser.py +145 -0
  15. lattifai/cli/__init__.py +17 -0
  16. lattifai/cli/alignment.py +153 -0
  17. lattifai/cli/caption.py +204 -0
  18. lattifai/cli/server.py +19 -0
  19. lattifai/cli/transcribe.py +197 -0
  20. lattifai/cli/youtube.py +128 -0
  21. lattifai/client.py +460 -251
  22. lattifai/config/__init__.py +20 -0
  23. lattifai/config/alignment.py +73 -0
  24. lattifai/config/caption.py +178 -0
  25. lattifai/config/client.py +46 -0
  26. lattifai/config/diarization.py +67 -0
  27. lattifai/config/media.py +335 -0
  28. lattifai/config/transcription.py +84 -0
  29. lattifai/diarization/__init__.py +5 -0
  30. lattifai/diarization/lattifai.py +89 -0
  31. lattifai/errors.py +98 -91
  32. lattifai/logging.py +116 -0
  33. lattifai/mixin.py +552 -0
  34. lattifai/server/app.py +420 -0
  35. lattifai/transcription/__init__.py +76 -0
  36. lattifai/transcription/base.py +108 -0
  37. lattifai/transcription/gemini.py +219 -0
  38. lattifai/transcription/lattifai.py +103 -0
  39. lattifai/{workflows → transcription}/prompts/__init__.py +4 -4
  40. lattifai/types.py +30 -0
  41. lattifai/utils.py +16 -44
  42. lattifai/workflow/__init__.py +22 -0
  43. lattifai/workflow/agents.py +6 -0
  44. lattifai/{workflows → workflow}/base.py +22 -22
  45. lattifai/{workflows → workflow}/file_manager.py +239 -215
  46. lattifai/workflow/youtube.py +564 -0
  47. lattifai-1.0.0.dist-info/METADATA +736 -0
  48. lattifai-1.0.0.dist-info/RECORD +52 -0
  49. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
  50. lattifai-1.0.0.dist-info/entry_points.txt +13 -0
  51. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +1 -1
  52. lattifai/base_client.py +0 -126
  53. lattifai/bin/__init__.py +0 -3
  54. lattifai/bin/agent.py +0 -325
  55. lattifai/bin/align.py +0 -296
  56. lattifai/bin/cli_base.py +0 -25
  57. lattifai/bin/subtitle.py +0 -210
  58. lattifai/io/__init__.py +0 -42
  59. lattifai/io/reader.py +0 -85
  60. lattifai/io/text_parser.py +0 -75
  61. lattifai/io/utils.py +0 -15
  62. lattifai/io/writer.py +0 -90
  63. lattifai/tokenizer/__init__.py +0 -3
  64. lattifai/workers/__init__.py +0 -3
  65. lattifai/workers/lattice1_alpha.py +0 -284
  66. lattifai/workflows/__init__.py +0 -34
  67. lattifai/workflows/agents.py +0 -10
  68. lattifai/workflows/gemini.py +0 -167
  69. lattifai/workflows/prompts/README.md +0 -22
  70. lattifai/workflows/prompts/gemini/README.md +0 -24
  71. lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
  72. lattifai/workflows/youtube.py +0 -931
  73. lattifai-0.4.5.dist-info/METADATA +0 -808
  74. lattifai-0.4.5.dist-info/RECORD +0 -39
  75. lattifai-0.4.5.dist-info/entry_points.txt +0 -3
  76. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
lattifai/server/app.py ADDED
@@ -0,0 +1,420 @@
1
+ import asyncio
2
+ import os
3
+ import subprocess
4
+ import sys
5
+ import tempfile
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ # Load environment variables from .env file
10
+ from dotenv import find_dotenv, load_dotenv
11
+ from fastapi import BackgroundTasks, FastAPI, File, Form, Request, UploadFile
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from fastapi.responses import JSONResponse
14
+
15
+ # Try to find and load .env file from current directory or parent directories
16
+ load_dotenv(find_dotenv(usecwd=True))
17
+
18
+
19
+ app = FastAPI(title="LattifAI Web Interface")
20
+
21
+ print(f"LOADING APP FROM: {__file__}")
22
+
23
+ # Lazy-initialized client - will be created on first use
24
+ _client = None
25
+
26
+
27
+ def get_client():
28
+ """Get or create the LattifAI client (lazy initialization)."""
29
+ global _client
30
+ if _client is None:
31
+ from lattifai.client import LattifAI
32
+
33
+ _client = LattifAI()
34
+ return _client
35
+
36
+
37
+ @app.on_event("startup")
38
+ async def startup_event():
39
+ print("Listing all registered routes:")
40
+ for route in app.routes:
41
+ print(f"Route: {route.path} - {route.name}")
42
+
43
+
44
+ @app.middleware("http")
45
+ async def log_requests(request: Request, call_next):
46
+ print(f"INCOMING REQUEST: {request.method} {request.url}")
47
+ response = await call_next(request)
48
+ print(f"OUTGOING RESPONSE: {response.status_code}")
49
+ return response
50
+
51
+
52
+ app.add_middleware(
53
+ CORSMiddleware,
54
+ allow_origins=["*"], # Allow all origins for dev
55
+ allow_credentials=True,
56
+ allow_methods=["*"],
57
+ allow_headers=["*"],
58
+ )
59
+
60
+
61
+ def mask_api_key(key: str) -> str:
62
+ """Mask API key for display, showing only first 6 and last 4 characters."""
63
+ if len(key) <= 10:
64
+ return "*" * len(key)
65
+ return key[:6] + "*" * (len(key) - 10) + key[-4:]
66
+
67
+
68
+ @app.get("/api/keys")
69
+ async def get_api_keys():
70
+ """Get status of API keys from environment variables."""
71
+ lattifai_key = os.environ.get("LATTIFAI_API_KEY", "")
72
+ gemini_key = os.environ.get("GEMINI_API_KEY", "")
73
+
74
+ return {
75
+ "lattifai": {
76
+ "exists": bool(lattifai_key),
77
+ "masked_value": mask_api_key(lattifai_key) if lattifai_key else None,
78
+ "create_url": "https://lattifai.com/dashboard/api-keys",
79
+ },
80
+ "gemini": {
81
+ "exists": bool(gemini_key),
82
+ "masked_value": mask_api_key(gemini_key) if gemini_key else None,
83
+ "create_url": "https://aistudio.google.com/apikey",
84
+ },
85
+ }
86
+
87
+
88
+ @app.post("/api/keys")
89
+ async def save_api_keys(request: Request):
90
+ """Save API keys to environment variables and optionally to .env file."""
91
+ try:
92
+ data = await request.json()
93
+ lattifai_key = data.get("lattifai_key", "").strip()
94
+ gemini_key = data.get("gemini_key", "").strip()
95
+ save_to_file = data.get("save_to_file", False) # Optional: save to .env file
96
+
97
+ # Always update environment variables in current process
98
+ if lattifai_key:
99
+ os.environ["LATTIFAI_API_KEY"] = lattifai_key
100
+ if gemini_key:
101
+ os.environ["GEMINI_API_KEY"] = gemini_key
102
+
103
+ # Reset client to force re-initialization with new keys
104
+ global _client
105
+ _client = None
106
+
107
+ result = {
108
+ "status": "success",
109
+ "message": "API keys updated in environment variables",
110
+ }
111
+
112
+ # Optionally save to .env file for persistence
113
+ if save_to_file:
114
+ # Find the .env file path
115
+ env_path = find_dotenv(usecwd=True)
116
+ if not env_path:
117
+ # Create .env in current working directory
118
+ env_path = Path.cwd() / ".env"
119
+
120
+ # Read existing .env content
121
+ env_lines = []
122
+ if Path(env_path).exists():
123
+ with open(env_path, "r") as f:
124
+ env_lines = f.readlines()
125
+
126
+ # Update or add API keys
127
+ updated_lines = []
128
+ lattifai_updated = False
129
+ gemini_updated = False
130
+
131
+ for line in env_lines:
132
+ if line.strip().startswith("LATTIFAI_API_KEY=") or line.strip().startswith("#LATTIFAI_API_KEY="):
133
+ if lattifai_key:
134
+ updated_lines.append(f"LATTIFAI_API_KEY={lattifai_key}\n")
135
+ lattifai_updated = True
136
+ else:
137
+ updated_lines.append(line) # Keep existing or commented out
138
+ elif line.strip().startswith("GEMINI_API_KEY=") or line.strip().startswith("#GEMINI_API_KEY="):
139
+ if gemini_key:
140
+ updated_lines.append(f"GEMINI_API_KEY={gemini_key}\n")
141
+ gemini_updated = True
142
+ else:
143
+ updated_lines.append(line) # Keep existing or commented out
144
+ else:
145
+ updated_lines.append(line)
146
+
147
+ # Add new keys if they weren't in the file
148
+ if lattifai_key and not lattifai_updated:
149
+ updated_lines.append(f"LATTIFAI_API_KEY={lattifai_key}\n")
150
+ if gemini_key and not gemini_updated:
151
+ updated_lines.append(f"GEMINI_API_KEY={gemini_key}\n")
152
+
153
+ # Write back to .env file
154
+ with open(env_path, "w") as f:
155
+ f.writelines(updated_lines)
156
+
157
+ result["message"] = "API keys saved to environment variables and .env file"
158
+ result["env_path"] = str(env_path)
159
+
160
+ return result
161
+
162
+ except Exception as e:
163
+ import traceback
164
+
165
+ traceback.print_exc()
166
+ return JSONResponse(status_code=500, content={"error": str(e), "traceback": traceback.format_exc()})
167
+
168
+
169
+ @app.post("/api/utils/select-directory")
170
+ async def select_directory():
171
+ """
172
+ Open a native directory selection dialog on the server (local machine).
173
+ Returns the selected path.
174
+ """
175
+ try:
176
+ path = ""
177
+ if sys.platform == "darwin":
178
+ # Use AppleScript for macOS - it's cleaner than Tkinter on Mac
179
+ script = """
180
+ try
181
+ set theFolder to choose folder with prompt "Select Output Directory"
182
+ POSIX path of theFolder
183
+ on error
184
+ return ""
185
+ end try
186
+ """
187
+ result = subprocess.run(["osascript", "-e", script], capture_output=True, text=True)
188
+ if result.returncode == 0:
189
+ path = result.stdout.strip()
190
+
191
+ # Fallback to Tkinter if path is still empty (e.g. not mac or mac script failed)
192
+ # Note: Tkinter might not be installed or might fail in some environments
193
+ if not path and sys.platform != "darwin":
194
+ try:
195
+ import tkinter
196
+ from tkinter import filedialog
197
+
198
+ root = tkinter.Tk()
199
+ root.withdraw() # Hide main window
200
+ root.wm_attributes("-topmost", 1) # Bring to front
201
+ path = filedialog.askdirectory(title="Select Output Directory")
202
+ root.destroy()
203
+ except ImportError:
204
+ pass
205
+ except Exception as e:
206
+ print(f"Tkinter dialog failed: {e}")
207
+
208
+ return {"path": path}
209
+ except Exception as e:
210
+ # Don't fail the request, just return empty path or error logged
211
+ print(f"Directory selection failed: {e}")
212
+ return {"path": "", "error": str(e)}
213
+
214
+
215
+ @app.post("/align")
216
+ async def align_files(
217
+ background_tasks: BackgroundTasks,
218
+ media_file: Optional[UploadFile] = File(None),
219
+ caption_file: Optional[UploadFile] = File(None),
220
+ local_media_path: Optional[str] = Form(None),
221
+ local_caption_path: Optional[str] = Form(None),
222
+ local_output_dir: Optional[str] = Form(None),
223
+ youtube_url: Optional[str] = Form(None),
224
+ youtube_output_dir: Optional[str] = Form(None),
225
+ split_sentence: bool = Form(True),
226
+ normalize_text: bool = Form(False),
227
+ output_format: str = Form("srt"),
228
+ transcription_model: str = Form("nvidia/parakeet-tdt-0.6b-v3"),
229
+ alignment_model: str = Form("Lattifai/Lattice-1"),
230
+ ):
231
+ # Check if LATTIFAI_API_KEY is set
232
+ if not os.environ.get("LATTIFAI_API_KEY"):
233
+ return JSONResponse(
234
+ status_code=400,
235
+ content={
236
+ "error": "LATTIFAI_API_KEY is not set. Please set the environment variable or add it to your .env file.",
237
+ "help_url": "https://lattifai.com/dashboard/api-keys",
238
+ },
239
+ )
240
+
241
+ if not media_file and not youtube_url and not local_media_path:
242
+ return JSONResponse(
243
+ status_code=400, content={"error": "Either media file, local media path, or YouTube URL must be provided."}
244
+ )
245
+
246
+ # Get lazily initialized client
247
+ client = get_client()
248
+ if not client:
249
+ # This should rarely happen due to lazy init, but just in case
250
+ return JSONResponse(
251
+ status_code=500,
252
+ content={
253
+ "error": "LattifAI client not initialized. Please check API key configuration.",
254
+ },
255
+ )
256
+
257
+ media_path = None
258
+ caption_path = None
259
+ temp_files_to_delete = []
260
+
261
+ try:
262
+ if media_file:
263
+ # Save uploaded media file to a temporary location
264
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(media_file.filename).suffix) as tmp_media:
265
+ content = await media_file.read()
266
+ tmp_media.write(content)
267
+ media_path = tmp_media.name
268
+ temp_files_to_delete.append(media_path)
269
+
270
+ if caption_file:
271
+ # Save uploaded caption file to a temporary location
272
+ with tempfile.NamedTemporaryFile(
273
+ delete=False, suffix=Path(caption_file.filename).suffix
274
+ ) as tmp_caption:
275
+ content = await caption_file.read()
276
+ tmp_caption.write(content)
277
+ caption_path = tmp_caption.name
278
+ temp_files_to_delete.append(caption_path)
279
+
280
+ elif local_media_path:
281
+ media_path = local_media_path
282
+ if not Path(media_path).exists():
283
+ return JSONResponse(status_code=400, content={"error": f"Local media file not found: {media_path}"})
284
+
285
+ if local_caption_path:
286
+ caption_path = local_caption_path
287
+ if not Path(caption_path).exists():
288
+ return JSONResponse(
289
+ status_code=400, content={"error": f"Local caption file not found: {caption_path}"}
290
+ )
291
+
292
+ # Process in thread pool to not block event loop
293
+ loop = asyncio.get_event_loop()
294
+ result_caption = await loop.run_in_executor(
295
+ None,
296
+ process_alignment,
297
+ media_path,
298
+ youtube_url,
299
+ youtube_output_dir,
300
+ caption_path,
301
+ local_output_dir,
302
+ split_sentence,
303
+ normalize_text,
304
+ transcription_model,
305
+ alignment_model,
306
+ output_format,
307
+ )
308
+
309
+ # Convert result to dict with specified output format
310
+ caption_content = result_caption.to_string(format=output_format)
311
+
312
+ return {
313
+ "status": "success",
314
+ "segments": [
315
+ {
316
+ "start": seg.start,
317
+ "end": seg.end,
318
+ "text": seg.text,
319
+ "speaker": seg.speaker if hasattr(seg, "speaker") else None,
320
+ }
321
+ for seg in result_caption.alignments
322
+ ],
323
+ "caption_content": caption_content,
324
+ "output_format": output_format,
325
+ }
326
+
327
+ except Exception as e:
328
+ import traceback
329
+
330
+ traceback.print_exc()
331
+ return JSONResponse(status_code=500, content={"error": str(e), "traceback": traceback.format_exc()})
332
+
333
+
334
+ def process_alignment(
335
+ media_path,
336
+ youtube_url,
337
+ youtube_output_dir,
338
+ caption_path,
339
+ local_output_dir,
340
+ split_sentence,
341
+ normalize_text,
342
+ transcription_model,
343
+ alignment_model,
344
+ output_format,
345
+ ):
346
+ """
347
+ Wrapper to call LattifAI client.
348
+ Note: Transcription will be automatically triggered when no caption is provided.
349
+ """
350
+ # Get lazily initialized client
351
+ client = get_client()
352
+ if not client:
353
+ raise RuntimeError("LattifAI client not initialized")
354
+
355
+ # Update caption config
356
+ client.caption_config.normalize_text = normalize_text
357
+
358
+ # Check if alignment model changed - if so, reinitialize aligner
359
+ if client.aligner.config.model_name != alignment_model:
360
+ print(
361
+ f"Alignment model changed from {client.aligner.config.model_name} to {alignment_model}, reinitializing aligner..."
362
+ ) # noqa: E501
363
+ from lattifai.alignment import Lattice1Aligner
364
+
365
+ client.aligner.config.model_name = alignment_model
366
+ client.aligner = Lattice1Aligner(config=client.aligner.config)
367
+
368
+ # Check if transcription model changed - if so, reinitialize transcriber
369
+ if transcription_model != client.transcription_config.model_name:
370
+ print(
371
+ f"Transcription model changed from {client.transcription_config.model_name} to {transcription_model}, reinitializing transcriber..."
372
+ ) # noqa: E501
373
+ from lattifai.config import TranscriptionConfig
374
+
375
+ client.transcription_config = TranscriptionConfig(model_name=transcription_model)
376
+ client._transcriber = None
377
+
378
+ if youtube_url:
379
+ # If youtube, we use client.youtube
380
+ # Note: client.youtube handles download + alignment
381
+ # Will try to download YT captions first, if not available, will transcribe
382
+
383
+ # Determine output directory
384
+ # Default: ~/Downloads/YYYY-MM-DD
385
+ if not youtube_output_dir or not youtube_output_dir.strip():
386
+ from datetime import datetime
387
+
388
+ today = datetime.now().strftime("%Y-%m-%d")
389
+ youtube_output_dir = f"~/Downloads/{today}"
390
+
391
+ temp_path = Path(youtube_output_dir).expanduser()
392
+ temp_path.mkdir(parents=True, exist_ok=True)
393
+
394
+ result = client.youtube(
395
+ url=youtube_url,
396
+ output_dir=temp_path,
397
+ use_transcription=False, # Try to download captions first
398
+ force_overwrite=True, # No user prompt in server mode
399
+ split_sentence=split_sentence,
400
+ )
401
+ return result
402
+ else:
403
+ # Local file alignment
404
+ output_caption_path = None
405
+ if local_output_dir:
406
+ output_dir = Path(local_output_dir).expanduser()
407
+ output_dir.mkdir(parents=True, exist_ok=True)
408
+ stem = Path(media_path).stem
409
+ # Prevent overwriting input if names clash, use _LattifAI suffix
410
+ output_filename = f"{stem}_LattifAI.{output_format}"
411
+ output_caption_path = output_dir / output_filename
412
+ print(f"Saving alignment result to: {output_caption_path}")
413
+
414
+ # If no caption_path provided, client.alignment will automatically call _transcribe
415
+ return client.alignment(
416
+ input_media=str(media_path),
417
+ input_caption=str(caption_path) if caption_path else None,
418
+ output_caption_path=str(output_caption_path) if output_caption_path else None,
419
+ split_sentence=split_sentence,
420
+ )
@@ -0,0 +1,76 @@
1
+ """Transcription module for LattifAI."""
2
+
3
+ from typing import TYPE_CHECKING, Optional
4
+
5
+ from lattifai.config import TranscriptionConfig
6
+
7
+ from .gemini import GeminiTranscriber
8
+ from .lattifai import LattifAITranscriber
9
+
10
+ if TYPE_CHECKING:
11
+ from .base import BaseTranscriber
12
+
13
+ __all__ = [
14
+ "LattifAITranscriber",
15
+ "GeminiTranscriber",
16
+ "create_transcriber",
17
+ ]
18
+
19
+
20
+ def create_transcriber(
21
+ transcription_config: TranscriptionConfig,
22
+ ) -> "BaseTranscriber":
23
+ """
24
+ Create a transcriber instance based on model_name in configuration.
25
+
26
+ This factory method automatically selects the appropriate transcriber
27
+ implementation based on the model_name specified in TranscriptionConfig.
28
+
29
+ Args:
30
+ transcription_config: Transcription configuration. If None, uses default
31
+ (which defaults to Gemini 2.5 Pro).
32
+
33
+ Returns:
34
+ BaseTranscriber: An instance of GeminiTranscriber or LattifAITranscriber
35
+
36
+ Raises:
37
+ ValueError: If model_name is not supported or ambiguous.
38
+
39
+ Example:
40
+ >>> from lattifai.config import TranscriptionConfig
41
+ >>> from lattifai.transcription import create_transcriber
42
+ >>>
43
+ >>> # Create Gemini transcriber (default)
44
+ >>> transcriber = create_transcriber()
45
+ >>>
46
+ >>> # Create specific transcriber
47
+ >>> config = TranscriptionConfig(model_name="gemini-2.5-pro")
48
+ >>> transcriber = create_transcriber(config)
49
+ >>>
50
+ >>> # Use local model
51
+ >>> config = TranscriptionConfig(model_name="nvidia/parakeet-tdt-0.6b")
52
+ >>> transcriber = create_transcriber(config)
53
+ """
54
+ model_name = transcription_config.model_name
55
+
56
+ # Gemini models (API-based)
57
+ if "gemini" in model_name:
58
+ assert (
59
+ transcription_config.gemini_api_key is not None
60
+ ), "Gemini API key must be provided in TranscriptionConfig for Gemini models."
61
+ return GeminiTranscriber(transcription_config=transcription_config)
62
+
63
+ # LattifAI local models (HuggingFace/NVIDIA models)
64
+ # Pattern: nvidia/*, iic/*, or any HF model path
65
+ elif "/" in model_name:
66
+ return LattifAITranscriber(transcription_config=transcription_config)
67
+
68
+ else:
69
+ # No clear indicator, raise error
70
+ raise ValueError(
71
+ f"Cannot determine transcriber for model_name='{transcription_config.model_name}'. "
72
+ f"Supported patterns: \n"
73
+ f" - Gemini API models: 'gemini-2.5-pro', 'gemini-3-pro-preview'\n"
74
+ f" - Local HF models: 'nvidia/parakeet-*', 'iic/SenseVoiceSmall', etc.\n"
75
+ f"Please specify a valid model_name."
76
+ )
@@ -0,0 +1,108 @@
1
+ """Base transcriber abstractions for LattifAI."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+ from typing import Optional, Union
6
+
7
+ from lattifai.audio2 import AudioData
8
+ from lattifai.caption import Caption
9
+ from lattifai.config import TranscriptionConfig
10
+ from lattifai.logging import get_logger
11
+
12
+
13
+ class BaseTranscriber(ABC):
14
+ """
15
+ Base class that standardizes how transcribers handle inputs/outputs.
16
+
17
+ Subclasses only need to implement the media-specific transcription
18
+ routines, while the base class handles URL vs file routing and saving
19
+ the resulting transcript to disk.
20
+ """
21
+
22
+ # Subclasses should override these properties
23
+ file_suffix: str = ".txt"
24
+ supports_url: bool = True
25
+ """Whether this transcriber supports direct URL transcription."""
26
+
27
+ def __init__(self, config: Optional[TranscriptionConfig] = None):
28
+ """
29
+ Initialize base transcriber.
30
+
31
+ Args:
32
+ config: Transcription configuration.
33
+ """
34
+ # Initialize config with default if not provided
35
+ if config is None:
36
+ config = TranscriptionConfig()
37
+
38
+ self.config = config
39
+ self.logger = get_logger("transcription")
40
+
41
+ @property
42
+ def name(self) -> str:
43
+ """Human-readable name of the transcriber."""
44
+
45
+ @property
46
+ def file_name(self) -> str:
47
+ """File name identifier for the transcriber."""
48
+ return f"{self.name.replace('/', '_')}{self.file_suffix}"
49
+
50
+ async def __call__(self, url_or_data: Union[str, AudioData], language: Optional[str] = None) -> str:
51
+ """Main entry point for transcription."""
52
+ return await self.transcribe(url_or_data, language=language)
53
+
54
+ async def transcribe(self, url_or_data: Union[str, AudioData], language: Optional[str] = None) -> str:
55
+ """
56
+ Route transcription based on input type.
57
+
58
+ For URL inputs, only works if the transcriber supports direct URL transcription.
59
+ Otherwise, the caller should download the media first and pass AudioData.
60
+
61
+ Args:
62
+ url_or_data: URL string or AudioData object to transcribe.
63
+ language: Optional language code for transcription (e.g., 'en', 'zh').
64
+ """
65
+ if isinstance(url_or_data, AudioData):
66
+ return await self.transcribe_file(url_or_data, language=language)
67
+ elif self._is_url(url_or_data):
68
+ if self.supports_url:
69
+ return await self.transcribe_url(url_or_data, language=language)
70
+ else:
71
+ raise ValueError(
72
+ f"{self.__class__.__name__} does not support direct URL transcription. "
73
+ f"Please download the media first and pass AudioData instead."
74
+ )
75
+ return await self.transcribe_file(url_or_data, language=language) # file path
76
+
77
+ @abstractmethod
78
+ async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
79
+ """
80
+ Transcribe audio from a remote URL (e.g., YouTube).
81
+
82
+ Args:
83
+ url: URL of the audio/video to transcribe.
84
+ language: Optional language code for transcription.
85
+ """
86
+
87
+ @abstractmethod
88
+ async def transcribe_file(
89
+ self, media_file: Union[str, Path, AudioData], language: Optional[str] = None
90
+ ) -> Union[str, Caption]:
91
+ """
92
+ Transcribe audio from a local media file.
93
+
94
+ Args:
95
+ media_file: Path to media file or AudioData object.
96
+ language: Optional language code for transcription.
97
+ """
98
+
99
+ @abstractmethod
100
+ def write(self, transcript: Union[str, Caption], output_file: Path, encoding: str = "utf-8") -> Path:
101
+ """
102
+ Persist transcript text to disk and return the file path.
103
+ """
104
+
105
+ @staticmethod
106
+ def _is_url(value: str) -> bool:
107
+ """Best-effort detection of web URLs."""
108
+ return value.startswith(("http://", "https://"))