asub 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
asub/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """asub — generate and translate subtitles from audio/video files."""
2
+
3
+ __version__ = "1.0.0"
asub/__main__.py ADDED
@@ -0,0 +1,7 @@
1
+ """Allow running asub as ``python -m asub``."""
2
+
3
+ import sys
4
+
5
+ from asub.cli import main
6
+
7
+ sys.exit(main())
asub/cli.py ADDED
@@ -0,0 +1,206 @@
1
+ """Command-line interface for asub."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import logging
7
+ from pathlib import Path
8
+
9
+ from asub import __version__
10
+ from asub.progress import Spinner
11
+ from asub.subtitle import SubtitleFormat, infer_output_path, write_subtitle_file
12
+ from asub.transcriber import AVAILABLE_MODELS, DEFAULT_MODEL, Segment, load_model, transcribe
13
+ from asub.translator import translate_segments
14
+
15
+
16
+ def _build_parser() -> argparse.ArgumentParser:
17
+ parser = argparse.ArgumentParser(
18
+ prog="asub",
19
+ description="Generate and translate subtitles from audio/video files.",
20
+ )
21
+ parser.add_argument(
22
+ "input",
23
+ nargs="?",
24
+ type=Path,
25
+ default=None,
26
+ help="Path to an audio or video file.",
27
+ )
28
+ parser.add_argument(
29
+ "-o",
30
+ "--output",
31
+ type=Path,
32
+ default=None,
33
+ help="Output subtitle file path. Defaults to <input>.srt.",
34
+ )
35
+ parser.add_argument(
36
+ "-f",
37
+ "--format",
38
+ choices=[f.value for f in SubtitleFormat],
39
+ default=None,
40
+ help="Subtitle format (default: inferred from output extension, or srt).",
41
+ )
42
+
43
+ # --- Transcription options ---
44
+ transcription = parser.add_argument_group("transcription")
45
+ transcription.add_argument(
46
+ "-m",
47
+ "--model",
48
+ choices=AVAILABLE_MODELS,
49
+ default=DEFAULT_MODEL,
50
+ help=f"Whisper model size (default: {DEFAULT_MODEL}).",
51
+ )
52
+ transcription.add_argument(
53
+ "--device",
54
+ choices=["auto", "cpu", "cuda"],
55
+ default="auto",
56
+ help='Device to run inference on (default: "auto").',
57
+ )
58
+ transcription.add_argument(
59
+ "--compute-type",
60
+ default=None,
61
+ help="Quantisation type (e.g. int8, float16). Auto-selected if omitted.",
62
+ )
63
+ transcription.add_argument(
64
+ "-l",
65
+ "--language",
66
+ default=None,
67
+ help="Source language code (e.g. en, it, de). Auto-detected if omitted.",
68
+ )
69
+ transcription.add_argument(
70
+ "--no-vad",
71
+ action="store_true",
72
+ help="Disable Voice Activity Detection filter.",
73
+ )
74
+
75
+ # --- Translation options ---
76
+ translation = parser.add_argument_group("translation")
77
+ translation.add_argument(
78
+ "-t",
79
+ "--translate",
80
+ metavar="LANG",
81
+ default=None,
82
+ help="Translate subtitles to this language code (e.g. it, de, fr, es).",
83
+ )
84
+
85
+ # --- General ---
86
+ parser.add_argument(
87
+ "-v",
88
+ "--verbose",
89
+ action="count",
90
+ default=0,
91
+ help="Increase verbosity (-v for INFO, -vv for DEBUG).",
92
+ )
93
+ parser.add_argument(
94
+ "--version",
95
+ action="version",
96
+ version=f"%(prog)s {__version__}",
97
+ )
98
+ parser.add_argument(
99
+ "--list-languages",
100
+ action="store_true",
101
+ help="Print supported translation languages and exit.",
102
+ )
103
+ return parser
104
+
105
+
106
+ def _configure_logging(verbosity: int) -> None:
107
+ level = logging.WARNING
108
+ if verbosity == 1:
109
+ level = logging.INFO
110
+ elif verbosity >= 2:
111
+ level = logging.DEBUG
112
+
113
+ logging.basicConfig(
114
+ level=level,
115
+ format="%(asctime)s %(levelname)-8s %(name)s %(message)s",
116
+ datefmt="%H:%M:%S",
117
+ )
118
+
119
+
120
+ def main(argv: list[str] | None = None) -> int:
121
+ """Entry point for the CLI.
122
+
123
+ Returns
124
+ -------
125
+ Exit code: ``0`` on success, ``1`` on user error.
126
+
127
+ """
128
+ parser = _build_parser()
129
+ args = parser.parse_args(argv)
130
+
131
+ # --- List languages and exit ---
132
+ if args.list_languages:
133
+ from asub.translator import supported_languages
134
+
135
+ langs = supported_languages()
136
+ for name, code in sorted(langs.items()):
137
+ print(f" {code:6s} {name}")
138
+ return 0
139
+
140
+ # --- Validate input ---
141
+ input_path: Path | None = args.input
142
+ if input_path is None:
143
+ parser.error("the following arguments are required: input")
144
+ if not input_path.is_file():
145
+ parser.error(f"Input file not found: {input_path}")
146
+
147
+ _configure_logging(args.verbose)
148
+ logger = logging.getLogger("asub")
149
+
150
+ # --- Determine output path & format ---
151
+ fmt: SubtitleFormat | None = None
152
+ if args.format is not None:
153
+ fmt = SubtitleFormat(args.format)
154
+
155
+ output_path: Path
156
+ if args.output is not None:
157
+ output_path = args.output
158
+ else:
159
+ target_fmt = fmt if fmt is not None else SubtitleFormat.SRT
160
+ suffix = f"_{args.translate}" if args.translate else ""
161
+ output_path = infer_output_path(input_path, target_fmt, suffix=suffix)
162
+
163
+ # --- Transcribe ---
164
+ logger.info("Model: %s | Device: %s", args.model, args.device)
165
+ with Spinner(f"Loading model '{args.model}'"):
166
+ model = load_model(args.model, device=args.device, compute_type=args.compute_type)
167
+ print(f"Model '{args.model}' loaded.", flush=True)
168
+
169
+ with Spinner(f"Transcribing '{input_path.name}'…") as spinner:
170
+
171
+ def _on_segment(index: int, seg: Segment, duration: float) -> None:
172
+ pct = min(seg.end / duration * 100, 100.0) if duration > 0 else 0
173
+ spinner.update(f"Transcribing '{input_path.name}' — {index} segments ({pct:.0f}%)")
174
+
175
+ result = transcribe(
176
+ model,
177
+ input_path,
178
+ language=args.language,
179
+ vad_filter=not args.no_vad,
180
+ on_segment=_on_segment,
181
+ )
182
+
183
+ segments = result.segments
184
+ print(
185
+ f"Transcribed {len(segments)} segments "
186
+ f"(detected language: {result.language}, "
187
+ f"confidence: {result.language_probability:.0%})",
188
+ flush=True,
189
+ )
190
+
191
+ # --- Translate (optional) ---
192
+ if args.translate:
193
+ with Spinner(f"Translating to '{args.translate}'"):
194
+ segments = translate_segments(
195
+ segments,
196
+ source=result.language,
197
+ target=args.translate,
198
+ )
199
+ print(f"Translated to '{args.translate}'.", flush=True)
200
+
201
+ # --- Write output ---
202
+ with Spinner("Writing subtitle file"):
203
+ written = write_subtitle_file(segments, output_path, fmt=fmt)
204
+ print(f"Saved → {written}", flush=True)
205
+
206
+ return 0
asub/progress.py ADDED
@@ -0,0 +1,65 @@
1
+ """Terminal progress utilities (spinner + inline status)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ import threading
7
+
8
+
9
+ class Spinner:
10
+ """A context-manager that shows an animated spinner with a message.
11
+
12
+ Usage::
13
+
14
+ with Spinner("Loading model"):
15
+ do_slow_work()
16
+
17
+ The spinner runs in a background thread and clears itself on exit.
18
+ """
19
+
20
+ _FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
21
+
22
+ def __init__(self, message: str = "") -> None:
23
+ self._message = message
24
+ self._stop_event = threading.Event()
25
+ self._thread: threading.Thread | None = None
26
+ self._last_line_len = 0
27
+
28
+ # -- public helpers for updating the message mid-spin --
29
+
30
+ def update(self, message: str) -> None:
31
+ """Change the displayed message while the spinner is running."""
32
+ self._message = message
33
+
34
+ # -- context manager --
35
+
36
+ def __enter__(self) -> Spinner:
37
+ self._stop_event.clear()
38
+ self._thread = threading.Thread(target=self._spin, daemon=True)
39
+ self._thread.start()
40
+ return self
41
+
42
+ def __exit__(self, *_: object) -> None:
43
+ self._stop_event.set()
44
+ if self._thread is not None:
45
+ self._thread.join()
46
+ self._clear_line()
47
+
48
+ # -- internals --
49
+
50
+ def _spin(self) -> None:
51
+ idx = 0
52
+ while not self._stop_event.is_set():
53
+ frame = self._FRAMES[idx % len(self._FRAMES)]
54
+ line = f"\r {frame} {self._message}"
55
+ # Pad with spaces to overwrite any previous longer line
56
+ padded = line.ljust(self._last_line_len)
57
+ sys.stderr.write(padded)
58
+ sys.stderr.flush()
59
+ self._last_line_len = len(line)
60
+ idx += 1
61
+ self._stop_event.wait(0.08)
62
+
63
+ def _clear_line(self) -> None:
64
+ sys.stderr.write("\r" + " " * self._last_line_len + "\r")
65
+ sys.stderr.flush()
asub/subtitle.py ADDED
@@ -0,0 +1,135 @@
1
+ """Generate subtitle files (SRT, VTT) from transcription segments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from enum import Enum
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from collections.abc import Sequence
12
+
13
+ from asub.transcriber import Segment
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class SubtitleFormat(Enum):
19
+ """Supported subtitle output formats."""
20
+
21
+ SRT = "srt"
22
+ VTT = "vtt"
23
+
24
+
25
+ def _format_timestamp_srt(seconds: float) -> str:
26
+ """Format seconds as ``HH:MM:SS,mmm`` (SRT standard)."""
27
+ hours, remainder = divmod(seconds, 3600)
28
+ minutes, secs = divmod(remainder, 60)
29
+ millis = round((secs - int(secs)) * 1000)
30
+ return f"{int(hours):02d}:{int(minutes):02d}:{int(secs):02d},{millis:03d}"
31
+
32
+
33
+ def _format_timestamp_vtt(seconds: float) -> str:
34
+ """Format seconds as ``HH:MM:SS.mmm`` (WebVTT standard)."""
35
+ hours, remainder = divmod(seconds, 3600)
36
+ minutes, secs = divmod(remainder, 60)
37
+ millis = round((secs - int(secs)) * 1000)
38
+ return f"{int(hours):02d}:{int(minutes):02d}:{int(secs):02d}.{millis:03d}"
39
+
40
+
41
+ def generate_srt(segments: Sequence[Segment]) -> str:
42
+ """Build an SRT-formatted string from segments."""
43
+ lines: list[str] = []
44
+ for index, seg in enumerate(segments, start=1):
45
+ start = _format_timestamp_srt(seg.start)
46
+ end = _format_timestamp_srt(seg.end)
47
+ lines.append(f"{index}")
48
+ lines.append(f"{start} --> {end}")
49
+ lines.append(seg.text)
50
+ lines.append("") # blank line between cues
51
+ return "\n".join(lines)
52
+
53
+
54
+ def generate_vtt(segments: Sequence[Segment]) -> str:
55
+ """Build a WebVTT-formatted string from segments."""
56
+ lines: list[str] = ["WEBVTT", ""]
57
+ for index, seg in enumerate(segments, start=1):
58
+ start = _format_timestamp_vtt(seg.start)
59
+ end = _format_timestamp_vtt(seg.end)
60
+ lines.append(f"{index}")
61
+ lines.append(f"{start} --> {end}")
62
+ lines.append(seg.text)
63
+ lines.append("")
64
+ return "\n".join(lines)
65
+
66
+
67
+ def generate(segments: Sequence[Segment], fmt: SubtitleFormat) -> str:
68
+ """Generate subtitle content in the requested format."""
69
+ generators = {
70
+ SubtitleFormat.SRT: generate_srt,
71
+ SubtitleFormat.VTT: generate_vtt,
72
+ }
73
+ return generators[fmt](segments)
74
+
75
+
76
+ def write_subtitle_file(
77
+ segments: Sequence[Segment],
78
+ output_path: str | Path,
79
+ fmt: SubtitleFormat | None = None,
80
+ ) -> Path:
81
+ """Write segments to a subtitle file.
82
+
83
+ Parameters
84
+ ----------
85
+ segments:
86
+ Timed text segments.
87
+ output_path:
88
+ Destination file path.
89
+ fmt:
90
+ Subtitle format. If ``None``, inferred from *output_path*'s extension.
91
+
92
+ Returns
93
+ -------
94
+ The resolved :class:`Path` of the written file.
95
+
96
+ """
97
+ output_path = Path(output_path)
98
+
99
+ if fmt is None:
100
+ ext = output_path.suffix.lower().lstrip(".")
101
+ try:
102
+ fmt = SubtitleFormat(ext)
103
+ except ValueError:
104
+ msg = (
105
+ f"Cannot infer subtitle format from extension '.{ext}'. "
106
+ f"Use one of: {', '.join(f.value for f in SubtitleFormat)}."
107
+ )
108
+ raise ValueError(msg) from None
109
+
110
+ content = generate(segments, fmt)
111
+ output_path.parent.mkdir(parents=True, exist_ok=True)
112
+ output_path.write_text(content, encoding="utf-8")
113
+
114
+ logger.info("Subtitle file written → %s", output_path)
115
+ return output_path
116
+
117
+
118
+ def infer_output_path(
119
+ input_path: str | Path,
120
+ fmt: SubtitleFormat,
121
+ *,
122
+ suffix: str = "",
123
+ ) -> Path:
124
+ """Derive an output path from the input file.
125
+
126
+ Example
127
+ -------
128
+ >>> infer_output_path("video.mp4", SubtitleFormat.SRT)
129
+ PosixPath('video.srt')
130
+ >>> infer_output_path("video.mp4", SubtitleFormat.SRT, suffix="_en")
131
+ PosixPath('video_en.srt')
132
+
133
+ """
134
+ p = Path(input_path)
135
+ return p.with_name(f"{p.stem}{suffix}.{fmt.value}")
asub/transcriber.py ADDED
@@ -0,0 +1,174 @@
1
+ """Transcription engine powered by faster-whisper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from collections.abc import Callable
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ from faster_whisper import WhisperModel
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Models ordered by size — users pick the trade-off between speed and accuracy.
15
+ AVAILABLE_MODELS: tuple[str, ...] = (
16
+ "tiny",
17
+ "base",
18
+ "small",
19
+ "medium",
20
+ "large-v3",
21
+ "turbo",
22
+ "distil-large-v3",
23
+ )
24
+
25
+ DEFAULT_MODEL = "medium"
26
+
27
+
28
+ @dataclass(frozen=True, slots=True)
29
+ class Segment:
30
+ """A single transcribed segment with timing information."""
31
+
32
+ start: float
33
+ end: float
34
+ text: str
35
+
36
+
37
+ @dataclass(frozen=True, slots=True)
38
+ class TranscriptionResult:
39
+ """Complete result of a transcription run."""
40
+
41
+ language: str
42
+ language_probability: float
43
+ duration: float
44
+ segments: list[Segment]
45
+
46
+
47
+ def _cuda_available() -> bool:
48
+ """Return True if a CUDA-capable GPU is available."""
49
+ try:
50
+ import ctranslate2
51
+
52
+ return "cuda" in ctranslate2.get_supported_compute_types("cuda")
53
+ except Exception:
54
+ return False
55
+
56
+
57
+ def _resolve_device(device: str) -> str:
58
+ """Resolve the device, auto-detecting CUDA availability when needed."""
59
+ if device == "auto":
60
+ return "cuda" if _cuda_available() else "cpu"
61
+ if device in ("cpu", "cuda"):
62
+ return device
63
+ msg = f"Unsupported device: {device!r}. Use 'auto', 'cpu', or 'cuda'."
64
+ raise ValueError(msg)
65
+
66
+
67
+ def _resolve_compute_type(device: str, compute_type: str | None) -> str:
68
+ """Pick a sensible compute type when the caller doesn't specify one."""
69
+ if compute_type is not None:
70
+ return compute_type
71
+ if device == "cuda":
72
+ return "float16"
73
+ return "int8"
74
+
75
+
76
+ def load_model(
77
+ model_size: str = DEFAULT_MODEL,
78
+ *,
79
+ device: str = "auto",
80
+ compute_type: str | None = None,
81
+ ) -> WhisperModel:
82
+ """Load a Whisper model for transcription.
83
+
84
+ Parameters
85
+ ----------
86
+ model_size:
87
+ One of :data:`AVAILABLE_MODELS`.
88
+ device:
89
+ ``"auto"`` (default), ``"cpu"``, or ``"cuda"``.
90
+ compute_type:
91
+ Quantisation type. ``None`` picks a sensible default per device.
92
+
93
+ """
94
+ device = _resolve_device(device)
95
+ compute_type = _resolve_compute_type(device, compute_type)
96
+
97
+ logger.info("Loading Whisper model '%s' on %s (%s)…", model_size, device, compute_type)
98
+ return WhisperModel(model_size, device=device, compute_type=compute_type)
99
+
100
+
101
+ def transcribe(
102
+ model: WhisperModel,
103
+ audio_path: str | Path,
104
+ *,
105
+ language: str | None = None,
106
+ vad_filter: bool = True,
107
+ word_timestamps: bool = False,
108
+ on_segment: Callable[[int, Segment, float], None] | None = None,
109
+ ) -> TranscriptionResult:
110
+ """Transcribe an audio or video file and return timed segments.
111
+
112
+ Parameters
113
+ ----------
114
+ model:
115
+ A loaded :class:`WhisperModel`.
116
+ audio_path:
117
+ Path to an audio or video file (any format supported by FFmpeg / PyAV).
118
+ language:
119
+ ISO-639-1 code (e.g. ``"en"``). ``None`` for auto-detection.
120
+ vad_filter:
121
+ Use Silero VAD to skip silence — reduces hallucination.
122
+ word_timestamps:
123
+ Request word-level timestamps (slower, but more precise).
124
+ on_segment:
125
+ Optional callback invoked after each segment is transcribed.
126
+ Receives ``(segment_index, segment, audio_duration)``.
127
+
128
+ """
129
+ audio_path = str(Path(audio_path).resolve())
130
+ logger.info("Transcribing '%s'…", audio_path)
131
+
132
+ segments_gen, info = model.transcribe(
133
+ audio_path,
134
+ language=language,
135
+ vad_filter=vad_filter,
136
+ word_timestamps=word_timestamps,
137
+ beam_size=5,
138
+ )
139
+
140
+ logger.info(
141
+ "Detected language: %s (probability %.2f%%)",
142
+ info.language,
143
+ info.language_probability * 100,
144
+ )
145
+
146
+ segments: list[Segment] = []
147
+ for seg in segments_gen:
148
+ segment = Segment(start=seg.start, end=seg.end, text=seg.text.strip())
149
+ segments.append(segment)
150
+ logger.debug("[%.2fs → %.2fs] %s", seg.start, seg.end, segment.text)
151
+ if on_segment is not None:
152
+ on_segment(len(segments), segment, info.duration)
153
+
154
+ logger.info("Transcription complete — %d segments.", len(segments))
155
+ return TranscriptionResult(
156
+ language=info.language,
157
+ language_probability=info.language_probability,
158
+ duration=info.duration,
159
+ segments=segments,
160
+ )
161
+
162
+
163
+ def transcribe_file(
164
+ audio_path: str | Path,
165
+ *,
166
+ model_size: str = DEFAULT_MODEL,
167
+ device: str = "auto",
168
+ compute_type: str | None = None,
169
+ language: str | None = None,
170
+ vad_filter: bool = True,
171
+ ) -> TranscriptionResult:
172
+ """Convenience wrapper: load a model, transcribe, and return the result."""
173
+ model = load_model(model_size, device=device, compute_type=compute_type)
174
+ return transcribe(model, audio_path, language=language, vad_filter=vad_filter)
asub/translator.py ADDED
@@ -0,0 +1,120 @@
1
+ """Translation layer using deep-translator (Google Translate by default)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING
7
+
8
+ from deep_translator import GoogleTranslator
9
+
10
+ if TYPE_CHECKING:
11
+ from collections.abc import Sequence
12
+
13
+ from asub.transcriber import Segment, TranscriptionResult
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Maximum characters Google Translate accepts per request.
18
+ _GOOGLE_CHAR_LIMIT = 5000
19
+
20
+
21
+ def supported_languages() -> dict[str, str]:
22
+ """Return a ``{name: code}`` mapping of supported target languages."""
23
+ return GoogleTranslator().get_supported_languages(as_dict=True)
24
+
25
+
26
+ def translate_text(text: str, *, source: str = "auto", target: str = "en") -> str:
27
+ """Translate a single string."""
28
+ if not text.strip():
29
+ return text
30
+ return GoogleTranslator(source=source, target=target).translate(text)
31
+
32
+
33
+ def translate_segments(
34
+ segments: Sequence[Segment],
35
+ *,
36
+ source: str = "auto",
37
+ target: str = "en",
38
+ ) -> list[Segment]:
39
+ """Translate every segment's text while preserving timestamps.
40
+
41
+ Segments are batched to stay under the Google Translate character limit,
42
+ then split back to keep one-to-one correspondence with the originals.
43
+
44
+ Parameters
45
+ ----------
46
+ segments:
47
+ The transcribed segments to translate.
48
+ source:
49
+ Source language code, or ``"auto"`` for auto-detection.
50
+ target:
51
+ Target language code (e.g. ``"it"``, ``"de"``, ``"fr"``).
52
+
53
+ Returns
54
+ -------
55
+ A new list of :class:`~asub.transcriber.Segment` with translated text.
56
+
57
+ """
58
+ from asub.transcriber import Segment as SegmentCls
59
+
60
+ if not segments:
61
+ return []
62
+
63
+ logger.info("Translating %d segments → %s…", len(segments), target)
64
+ translator = GoogleTranslator(source=source, target=target)
65
+
66
+ # Build batches that fit under the character limit.
67
+ separator = "\n"
68
+ batches: list[list[int]] = []
69
+ current_batch: list[int] = []
70
+ current_length = 0
71
+
72
+ for idx, seg in enumerate(segments):
73
+ addition = len(seg.text) + len(separator)
74
+ if current_length + addition > _GOOGLE_CHAR_LIMIT and current_batch:
75
+ batches.append(current_batch)
76
+ current_batch = []
77
+ current_length = 0
78
+ current_batch.append(idx)
79
+ current_length += addition
80
+
81
+ if current_batch:
82
+ batches.append(current_batch)
83
+
84
+ # Translate each batch and map results back.
85
+ translated_texts: list[str] = [""] * len(segments)
86
+
87
+ for batch_indices in batches:
88
+ combined = separator.join(segments[i].text for i in batch_indices)
89
+ result = translator.translate(combined)
90
+ parts = result.split("\n")
91
+
92
+ # If the translator merges/splits lines, fall back to per-segment translation.
93
+ if len(parts) != len(batch_indices):
94
+ logger.debug("Batch split mismatch — falling back to per-segment translation.")
95
+ for i in batch_indices:
96
+ translated_texts[i] = translator.translate(segments[i].text)
97
+ else:
98
+ for i, part in zip(batch_indices, parts, strict=True):
99
+ translated_texts[i] = part.strip()
100
+
101
+ result_segments = [
102
+ SegmentCls(start=seg.start, end=seg.end, text=translated_texts[i])
103
+ for i, seg in enumerate(segments)
104
+ ]
105
+ logger.info("Translation complete.")
106
+ return result_segments
107
+
108
+
109
+ def translate_result(
110
+ result: TranscriptionResult,
111
+ *,
112
+ target: str,
113
+ source: str | None = None,
114
+ ) -> list[Segment]:
115
+ """Translate a full :class:`TranscriptionResult`.
116
+
117
+ If *source* is ``None``, the detected language from the transcription is used.
118
+ """
119
+ src = source if source is not None else result.language
120
+ return translate_segments(result.segments, source=src, target=target)
@@ -0,0 +1,256 @@
1
+ Metadata-Version: 2.4
2
+ Name: asub
3
+ Version: 1.0.0
4
+ Summary: Generate and translate subtitles from audio/video files using Whisper.
5
+ Author: asub contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/simoneraffaelli/subtitle-generator
8
+ Project-URL: Repository, https://github.com/simoneraffaelli/subtitle-generator
9
+ Project-URL: Issues, https://github.com/simoneraffaelli/subtitle-generator/issues
10
+ Keywords: subtitles,whisper,transcription,translation,srt,vtt
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: End Users/Desktop
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
19
+ Classifier: Topic :: Multimedia :: Video
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: faster-whisper<2.0.0,>=1.0.0
24
+ Requires-Dist: deep-translator<2.0.0,>=1.11.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
28
+ Requires-Dist: pyinstaller>=6.0; extra == "dev"
29
+ Dynamic: license-file
30
+
31
+ # asub
32
+
33
+ Generate and translate subtitles from any audio or video file — powered by
34
+ [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and
35
+ [deep-translator](https://github.com/nidhaloff/deep-translator).
36
+
37
+ ## Features
38
+
39
+ - **Fast transcription** — up to 4× faster than OpenAI Whisper with the same
40
+ accuracy, using CTranslate2.
41
+ - **Automatic language detection** — or specify the source language manually.
42
+ - **Translation** — translate subtitles to 100+ languages via Google Translate
43
+ (free, no API key).
44
+ - **Multiple output formats** — SRT and WebVTT.
45
+ - **VAD filtering** — Silero VAD removes silence and reduces hallucination.
46
+ - **Model choice** — from `tiny` (fast, less accurate) to `large-v3`
47
+ (slow, most accurate).
48
+ - **CPU & GPU** — works on both, with int8 quantisation for low-memory setups.
49
+ - **Packagable as .exe** — single-file Windows executable via PyInstaller.
50
+
51
+ ## Installation
52
+
53
+ ### From source (recommended for development)
54
+
55
+ ```bash
56
+ git clone https://github.com/simoneraffaelli/subtitle-generator.git
57
+ cd subtitle-generator
58
+ pip install -e ".[dev]"
59
+ ```
60
+
61
+ ### From PyPI (once published)
62
+
63
+ ```bash
64
+ pip install asub
65
+ ```
66
+
67
+ ## Quick start
68
+
69
+ ```bash
70
+ # Transcribe a video and generate subtitles (auto-detect language)
71
+ asub video.mp4
72
+
73
+ # Use a specific model and output format
74
+ asub video.mp4 -m large-v3 -f vtt
75
+
76
+ # Transcribe and translate to Italian
77
+ asub video.mp4 -t it
78
+
79
+ # Specify source language, translate to German, verbose output
80
+ asub podcast.mp3 -l en -t de -v
81
+
82
+ # Use CPU with int8 quantisation
83
+ asub interview.wav --device cpu --compute-type int8
84
+ ```
85
+
86
+ ## CLI reference
87
+
88
+ ```
89
+ usage: asub [-h] [-o OUTPUT] [-f {srt,vtt}] [-m MODEL] [--device {auto,cpu,cuda}]
90
+ [--compute-type TYPE] [-l LANG] [--no-vad] [-t LANG] [-v] [--version]
91
+ [--list-languages]
92
+ input
93
+
94
+ positional arguments:
95
+ input Path to an audio or video file.
96
+
97
+ options:
98
+ -o, --output Output subtitle file path (default: <input>.srt)
99
+ -f, --format Subtitle format: srt, vtt
100
+ -v, --verbose Increase verbosity (-v INFO, -vv DEBUG)
101
+ --version Show version and exit
102
+ --list-languages Print supported translation languages and exit
103
+
104
+ transcription:
105
+ -m, --model Whisper model size (default: medium)
106
+ --device auto | cpu | cuda (default: auto)
107
+ --compute-type Quantisation type (auto-selected if omitted)
108
+ -l, --language Source language code (auto-detected if omitted)
109
+ --no-vad Disable Voice Activity Detection
110
+
111
+ translation:
112
+ -t, --translate LANG Translate subtitles to this language code
113
+ ```
114
+
115
+ ## Python API
116
+
117
+ ```python
118
+ from asub.transcriber import load_model, transcribe
119
+ from asub.translator import translate_segments
120
+ from asub.subtitle import write_subtitle_file, SubtitleFormat
121
+
122
+ # 1. Transcribe
123
+ model = load_model("medium", device="auto")
124
+ result = transcribe(model, "video.mp4")
125
+
126
+ # 2. Translate (optional)
127
+ translated = translate_segments(result.segments, source=result.language, target="it")
128
+
129
+ # 3. Write subtitle file
130
+ write_subtitle_file(translated, "video_it.srt")
131
+ ```
132
+
133
+ ## Building a Windows .exe
134
+
135
+ ```bash
136
+ pip install ".[dev]"
137
+ pyinstaller asub.spec
138
+ ```
139
+
140
+ The executable will be in `dist/asub.exe`.
141
+
142
+ > **Note:** The .exe does not bundle Whisper model weights. Models are downloaded
143
+ > on first run and cached in the default Hugging Face cache directory.
144
+
145
+ ## Hugging Face token (optional)
146
+
147
+ On first run, Whisper model weights are downloaded from the Hugging Face Hub.
148
+ Without authentication you may see this warning:
149
+
150
+ > You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN
151
+ > to enable higher rate limits and faster downloads
152
+
153
+ This is **not an error** — the download still works, just at lower rate limits.
154
+ To silence the warning and get faster downloads:
155
+
156
+ 1. Create a free account at <https://huggingface.co>.
157
+ 2. Go to **Settings → Access Tokens** and generate a token.
158
+ 3. Set the token before running asub:
159
+
160
+ ```bash
161
+ # Linux / macOS
162
+ export HF_TOKEN="hf_your_token_here"
163
+
164
+ # Windows PowerShell
165
+ $env:HF_TOKEN = "hf_your_token_here"
166
+ ```
167
+
168
+ To make this permanent, add the variable to your shell profile or set it via
169
+ **System → Environment Variables** on Windows.
170
+
171
+ ## Available models
172
+
173
+ | Model | Parameters | Relative speed | VRAM |
174
+ | ---------------- | ---------- | -------------- | ------ |
175
+ | `tiny` | 39 M | ~10× | ~1 GB |
176
+ | `base` | 74 M | ~7× | ~1 GB |
177
+ | `small` | 244 M | ~4× | ~2 GB |
178
+ | `medium` | 769 M | ~2× | ~5 GB |
179
+ | `large-v3` | 1550 M | 1× | ~10 GB |
180
+ | `turbo` | 809 M | ~8× | ~6 GB |
181
+ | `distil-large-v3`| 756 M | ~6× | ~6 GB |
182
+
183
+ ### Choosing the right model
184
+
185
+ Not every model is the best choice for every situation. Here's a breakdown to
186
+ help you pick:
187
+
188
+ - **`tiny`** — Fastest model by far. Good for quick previews or testing your
189
+ pipeline. Accuracy is noticeably lower, especially on non-English audio or
190
+ noisy recordings. Use it when speed matters more than quality.
191
+ - **`base`** — A small step up from `tiny`. Slightly more accurate, still very
192
+ fast. Suitable for clear speech in common languages.
193
+ - **`small`** — A solid mid-range option. Handles most languages well and runs
194
+ comfortably on CPU. Good balance for everyday use when you don't have a GPU.
195
+ - **`medium`** — The default. Significantly more accurate than `small`,
196
+ especially for accented speech, niche languages, and overlapping speakers.
197
+ Slower on CPU, but a great choice with a GPU.
198
+ - **`large-v3`** — The most accurate model. Best for professional-quality
199
+ subtitles, rare languages, or heavily accented audio. Requires a CUDA GPU
200
+ with at least 10 GB VRAM for practical use.
201
+ - **`turbo`** — Near `large-v3` accuracy at roughly 8× the speed. This is the
202
+ best "quality per second" option if you have a GPU with ≥6 GB VRAM.
203
+ - **`distil-large-v3`** — A distilled version of `large-v3`. Similar accuracy
204
+ on English, slightly worse on other languages. Fast and memory-efficient.
205
+ Best for English-heavy workloads on a GPU.
206
+
207
+ ### Recommended commands
208
+
209
+ **Fastest result** — use `tiny` when you just need a rough draft quickly:
210
+
211
+ ```bash
212
+ asub video.mp4 -m tiny
213
+ ```
214
+
215
+ **Best result** — use `large-v3` (GPU required) for maximum accuracy:
216
+
217
+ ```bash
218
+ asub video.mp4 -m large-v3
219
+ ```
220
+
221
+ **Best compromise** — use `turbo` on GPU for near-best accuracy at high speed,
222
+ or `small` on CPU for a good quality-to-speed ratio:
223
+
224
+ ```bash
225
+ # With a CUDA GPU (recommended)
226
+ asub video.mp4 -m turbo
227
+
228
+ # CPU only
229
+ asub video.mp4 -m small
230
+ ```
231
+
232
+ > **Tip:** The device and compute type are auto-detected. If you have a CUDA
233
+ > GPU, asub will use it with `float16` automatically. On CPU it falls back
234
+ > to `int8` quantisation.
235
+
236
+ ## Upgrading dependencies
237
+
238
+ ```bash
239
+ pip install --upgrade faster-whisper deep-translator
240
+ ```
241
+
242
+ ## Contributing
243
+
244
+ 1. Fork the repo and create a feature branch.
245
+ 2. Install dev dependencies: `pip install -e ".[dev]"`
246
+ 3. Run tests: `python -m pytest`
247
+ 4. Lint: `ruff check src/ tests/`
248
+ 5. Open a pull request.
249
+
250
+ ## License
251
+
252
+ [MIT](LICENSE)
253
+
254
+ ## Acknowledgements
255
+
256
+ Built with the great help of [Claude Opus 4.6](https://www.anthropic.com/) by Anthropic.
@@ -0,0 +1,13 @@
1
+ asub/__init__.py,sha256=FGNPRlcf5_cG4iOMGF5OOLxhCLicXSoUj8uSqxmSSQs,95
2
+ asub/__main__.py,sha256=9pVBG03fOAlQKoQ-q_aaovG-GnjWKPZsFNiUQS88MoY,105
3
+ asub/cli.py,sha256=pERnwVQ6OF38jNXNDuGdUsZF8kVa8trpzKSRPAjZy40,6147
4
+ asub/progress.py,sha256=y2TfJOtG2nn_7p8To7WMeNsbMu9NZ2HropjnsNFLR28,1924
5
+ asub/subtitle.py,sha256=nunCzo_VKBvQQMjXjnnuVEt_3Mg4mtr4T1l6szYO8TA,3913
6
+ asub/transcriber.py,sha256=_jJtd_YHeTrLN6N1q9Mu_NoVkQgAZXQYauMajvI5qTc,4967
7
+ asub/translator.py,sha256=l7-mZDE4Us2suhWrzsyTPZ9xM1D-Ji6jFKOJ4PcKoOk,3802
8
+ asub-1.0.0.dist-info/licenses/LICENSE,sha256=DtvtvigYjtTAuUJP5WajXCQPmoUwvM9IfdSTQyEfmbQ,1074
9
+ asub-1.0.0.dist-info/METADATA,sha256=MUFHxwVzZUIZCxMQuwu5dmg0TpZxM72XRU0tIP63u00,8588
10
+ asub-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ asub-1.0.0.dist-info/entry_points.txt,sha256=wZY8nSzorQrkjMzBuii7VpHPS1wyW9r1Q7aOPzIAjx8,39
12
+ asub-1.0.0.dist-info/top_level.txt,sha256=8O8t9Gz7O1wW15qT1VKGGyMAMPEzp_HX7VNEB7c6OuM,5
13
+ asub-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ asub = asub.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 asub contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ asub