PyPI - asub - Versions diffs - 1.0.0__py3-none-any.whl - Mend

asub 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

asub/__init__.py +3 -0
asub/__main__.py +7 -0
asub/cli.py +206 -0
asub/progress.py +65 -0
asub/subtitle.py +135 -0
asub/transcriber.py +174 -0
asub/translator.py +120 -0
asub-1.0.0.dist-info/METADATA +256 -0
asub-1.0.0.dist-info/RECORD +13 -0
asub-1.0.0.dist-info/WHEEL +5 -0
asub-1.0.0.dist-info/entry_points.txt +2 -0
asub-1.0.0.dist-info/licenses/LICENSE +21 -0
asub-1.0.0.dist-info/top_level.txt +1 -0

asub/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""asub — generate and translate subtitles from audio/video files."""
+__version__ = "1.0.0"

asub/__main__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Allow running asub as ``python -m asub``."""
+import sys
+from asub.cli import main
+sys.exit(main())

asub/cli.py ADDED Viewed

@@ -0,0 +1,206 @@
+"""Command-line interface for asub."""
+from __future__ import annotations
+import argparse
+import logging
+from pathlib import Path
+from asub import __version__
+from asub.progress import Spinner
+from asub.subtitle import SubtitleFormat, infer_output_path, write_subtitle_file
+from asub.transcriber import AVAILABLE_MODELS, DEFAULT_MODEL, Segment, load_model, transcribe
+from asub.translator import translate_segments
+def _build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="asub",
+        description="Generate and translate subtitles from audio/video files.",
+    )
+    parser.add_argument(
+        "input",
+        nargs="?",
+        type=Path,
+        default=None,
+        help="Path to an audio or video file.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        type=Path,
+        default=None,
+        help="Output subtitle file path. Defaults to <input>.srt.",
+    )
+    parser.add_argument(
+        "-f",
+        "--format",
+        choices=[f.value for f in SubtitleFormat],
+        default=None,
+        help="Subtitle format (default: inferred from output extension, or srt).",
+    )
+    # --- Transcription options ---
+    transcription = parser.add_argument_group("transcription")
+    transcription.add_argument(
+        "-m",
+        "--model",
+        choices=AVAILABLE_MODELS,
+        default=DEFAULT_MODEL,
+        help=f"Whisper model size (default: {DEFAULT_MODEL}).",
+    )
+    transcription.add_argument(
+        "--device",
+        choices=["auto", "cpu", "cuda"],
+        default="auto",
+        help='Device to run inference on (default: "auto").',
+    )
+    transcription.add_argument(
+        "--compute-type",
+        default=None,
+        help="Quantisation type (e.g. int8, float16). Auto-selected if omitted.",
+    )
+    transcription.add_argument(
+        "-l",
+        "--language",
+        default=None,
+        help="Source language code (e.g. en, it, de). Auto-detected if omitted.",
+    )
+    transcription.add_argument(
+        "--no-vad",
+        action="store_true",
+        help="Disable Voice Activity Detection filter.",
+    )
+    # --- Translation options ---
+    translation = parser.add_argument_group("translation")
+    translation.add_argument(
+        "-t",
+        "--translate",
+        metavar="LANG",
+        default=None,
+        help="Translate subtitles to this language code (e.g. it, de, fr, es).",
+    )
+    # --- General ---
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="count",
+        default=0,
+        help="Increase verbosity (-v for INFO, -vv for DEBUG).",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"%(prog)s {__version__}",
+    )
+    parser.add_argument(
+        "--list-languages",
+        action="store_true",
+        help="Print supported translation languages and exit.",
+    )
+    return parser
+def _configure_logging(verbosity: int) -> None:
+    level = logging.WARNING
+    if verbosity == 1:
+        level = logging.INFO
+    elif verbosity >= 2:
+        level = logging.DEBUG
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s  %(levelname)-8s  %(name)s  %(message)s",
+        datefmt="%H:%M:%S",
+    )
+def main(argv: list[str] | None = None) -> int:
+    """Entry point for the CLI.
+    Returns
+    -------
+    Exit code: ``0`` on success, ``1`` on user error.
+    """
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+    # --- List languages and exit ---
+    if args.list_languages:
+        from asub.translator import supported_languages
+        langs = supported_languages()
+        for name, code in sorted(langs.items()):
+            print(f"  {code:6s}  {name}")
+        return 0
+    # --- Validate input ---
+    input_path: Path | None = args.input
+    if input_path is None:
+        parser.error("the following arguments are required: input")
+    if not input_path.is_file():
+        parser.error(f"Input file not found: {input_path}")
+    _configure_logging(args.verbose)
+    logger = logging.getLogger("asub")
+    # --- Determine output path & format ---
+    fmt: SubtitleFormat | None = None
+    if args.format is not None:
+        fmt = SubtitleFormat(args.format)
+    output_path: Path
+    if args.output is not None:
+        output_path = args.output
+    else:
+        target_fmt = fmt if fmt is not None else SubtitleFormat.SRT
+        suffix = f"_{args.translate}" if args.translate else ""
+        output_path = infer_output_path(input_path, target_fmt, suffix=suffix)
+    # --- Transcribe ---
+    logger.info("Model: %s | Device: %s", args.model, args.device)
+    with Spinner(f"Loading model '{args.model}'"):
+        model = load_model(args.model, device=args.device, compute_type=args.compute_type)
+    print(f"Model '{args.model}' loaded.", flush=True)
+    with Spinner(f"Transcribing '{input_path.name}'…") as spinner:
+        def _on_segment(index: int, seg: Segment, duration: float) -> None:
+            pct = min(seg.end / duration * 100, 100.0) if duration > 0 else 0
+            spinner.update(f"Transcribing '{input_path.name}' — {index} segments ({pct:.0f}%)")
+        result = transcribe(
+            model,
+            input_path,
+            language=args.language,
+            vad_filter=not args.no_vad,
+            on_segment=_on_segment,
+        )
+    segments = result.segments
+    print(
+        f"Transcribed {len(segments)} segments "
+        f"(detected language: {result.language}, "
+        f"confidence: {result.language_probability:.0%})",
+        flush=True,
+    )
+    # --- Translate (optional) ---
+    if args.translate:
+        with Spinner(f"Translating to '{args.translate}'"):
+            segments = translate_segments(
+                segments,
+                source=result.language,
+                target=args.translate,
+            )
+        print(f"Translated to '{args.translate}'.", flush=True)
+    # --- Write output ---
+    with Spinner("Writing subtitle file"):
+        written = write_subtitle_file(segments, output_path, fmt=fmt)
+    print(f"Saved → {written}", flush=True)
+    return 0

asub/progress.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Terminal progress utilities (spinner + inline status)."""
+from __future__ import annotations
+import sys
+import threading
+class Spinner:
+    """A context-manager that shows an animated spinner with a message.
+    Usage::
+        with Spinner("Loading model"):
+            do_slow_work()
+    The spinner runs in a background thread and clears itself on exit.
+    """
+    _FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
+    def __init__(self, message: str = "") -> None:
+        self._message = message
+        self._stop_event = threading.Event()
+        self._thread: threading.Thread | None = None
+        self._last_line_len = 0
+    # -- public helpers for updating the message mid-spin --
+    def update(self, message: str) -> None:
+        """Change the displayed message while the spinner is running."""
+        self._message = message
+    # -- context manager --
+    def __enter__(self) -> Spinner:
+        self._stop_event.clear()
+        self._thread = threading.Thread(target=self._spin, daemon=True)
+        self._thread.start()
+        return self
+    def __exit__(self, *_: object) -> None:
+        self._stop_event.set()
+        if self._thread is not None:
+            self._thread.join()
+        self._clear_line()
+    # -- internals --
+    def _spin(self) -> None:
+        idx = 0
+        while not self._stop_event.is_set():
+            frame = self._FRAMES[idx % len(self._FRAMES)]
+            line = f"\r  {frame} {self._message}"
+            # Pad with spaces to overwrite any previous longer line
+            padded = line.ljust(self._last_line_len)
+            sys.stderr.write(padded)
+            sys.stderr.flush()
+            self._last_line_len = len(line)
+            idx += 1
+            self._stop_event.wait(0.08)
+    def _clear_line(self) -> None:
+        sys.stderr.write("\r" + " " * self._last_line_len + "\r")
+        sys.stderr.flush()

asub/subtitle.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""Generate subtitle files (SRT, VTT) from transcription segments."""
+from __future__ import annotations
+import logging
+from enum import Enum
+from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+    from asub.transcriber import Segment
+logger = logging.getLogger(__name__)
+class SubtitleFormat(Enum):
+    """Supported subtitle output formats."""
+    SRT = "srt"
+    VTT = "vtt"
+def _format_timestamp_srt(seconds: float) -> str:
+    """Format seconds as ``HH:MM:SS,mmm`` (SRT standard)."""
+    hours, remainder = divmod(seconds, 3600)
+    minutes, secs = divmod(remainder, 60)
+    millis = round((secs - int(secs)) * 1000)
+    return f"{int(hours):02d}:{int(minutes):02d}:{int(secs):02d},{millis:03d}"
+def _format_timestamp_vtt(seconds: float) -> str:
+    """Format seconds as ``HH:MM:SS.mmm`` (WebVTT standard)."""
+    hours, remainder = divmod(seconds, 3600)
+    minutes, secs = divmod(remainder, 60)
+    millis = round((secs - int(secs)) * 1000)
+    return f"{int(hours):02d}:{int(minutes):02d}:{int(secs):02d}.{millis:03d}"
+def generate_srt(segments: Sequence[Segment]) -> str:
+    """Build an SRT-formatted string from segments."""
+    lines: list[str] = []
+    for index, seg in enumerate(segments, start=1):
+        start = _format_timestamp_srt(seg.start)
+        end = _format_timestamp_srt(seg.end)
+        lines.append(f"{index}")
+        lines.append(f"{start} --> {end}")
+        lines.append(seg.text)
+        lines.append("")  # blank line between cues
+    return "\n".join(lines)
+def generate_vtt(segments: Sequence[Segment]) -> str:
+    """Build a WebVTT-formatted string from segments."""
+    lines: list[str] = ["WEBVTT", ""]
+    for index, seg in enumerate(segments, start=1):
+        start = _format_timestamp_vtt(seg.start)
+        end = _format_timestamp_vtt(seg.end)
+        lines.append(f"{index}")
+        lines.append(f"{start} --> {end}")
+        lines.append(seg.text)
+        lines.append("")
+    return "\n".join(lines)
+def generate(segments: Sequence[Segment], fmt: SubtitleFormat) -> str:
+    """Generate subtitle content in the requested format."""
+    generators = {
+        SubtitleFormat.SRT: generate_srt,
+        SubtitleFormat.VTT: generate_vtt,
+    }
+    return generators[fmt](segments)
+def write_subtitle_file(
+    segments: Sequence[Segment],
+    output_path: str | Path,
+    fmt: SubtitleFormat | None = None,
+) -> Path:
+    """Write segments to a subtitle file.
+    Parameters
+    ----------
+    segments:
+        Timed text segments.
+    output_path:
+        Destination file path.
+    fmt:
+        Subtitle format.  If ``None``, inferred from *output_path*'s extension.
+    Returns
+    -------
+    The resolved :class:`Path` of the written file.
+    """
+    output_path = Path(output_path)
+    if fmt is None:
+        ext = output_path.suffix.lower().lstrip(".")
+        try:
+            fmt = SubtitleFormat(ext)
+        except ValueError:
+            msg = (
+                f"Cannot infer subtitle format from extension '.{ext}'. "
+                f"Use one of: {', '.join(f.value for f in SubtitleFormat)}."
+            )
+            raise ValueError(msg) from None
+    content = generate(segments, fmt)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(content, encoding="utf-8")
+    logger.info("Subtitle file written → %s", output_path)
+    return output_path
+def infer_output_path(
+    input_path: str | Path,
+    fmt: SubtitleFormat,
+    *,
+    suffix: str = "",
+) -> Path:
+    """Derive an output path from the input file.
+    Example
+    -------
+    >>> infer_output_path("video.mp4", SubtitleFormat.SRT)
+    PosixPath('video.srt')
+    >>> infer_output_path("video.mp4", SubtitleFormat.SRT, suffix="_en")
+    PosixPath('video_en.srt')
+    """
+    p = Path(input_path)
+    return p.with_name(f"{p.stem}{suffix}.{fmt.value}")

asub/transcriber.py ADDED Viewed

@@ -0,0 +1,174 @@
+"""Transcription engine powered by faster-whisper."""
+from __future__ import annotations
+import logging
+from collections.abc import Callable
+from dataclasses import dataclass
+from pathlib import Path
+from faster_whisper import WhisperModel
+logger = logging.getLogger(__name__)
+# Models ordered by size — users pick the trade-off between speed and accuracy.
+AVAILABLE_MODELS: tuple[str, ...] = (
+    "tiny",
+    "base",
+    "small",
+    "medium",
+    "large-v3",
+    "turbo",
+    "distil-large-v3",
+)
+DEFAULT_MODEL = "medium"
+@dataclass(frozen=True, slots=True)
+class Segment:
+    """A single transcribed segment with timing information."""
+    start: float
+    end: float
+    text: str
+@dataclass(frozen=True, slots=True)
+class TranscriptionResult:
+    """Complete result of a transcription run."""
+    language: str
+    language_probability: float
+    duration: float
+    segments: list[Segment]
+def _cuda_available() -> bool:
+    """Return True if a CUDA-capable GPU is available."""
+    try:
+        import ctranslate2
+        return "cuda" in ctranslate2.get_supported_compute_types("cuda")
+    except Exception:
+        return False
+def _resolve_device(device: str) -> str:
+    """Resolve the device, auto-detecting CUDA availability when needed."""
+    if device == "auto":
+        return "cuda" if _cuda_available() else "cpu"
+    if device in ("cpu", "cuda"):
+        return device
+    msg = f"Unsupported device: {device!r}. Use 'auto', 'cpu', or 'cuda'."
+    raise ValueError(msg)
+def _resolve_compute_type(device: str, compute_type: str | None) -> str:
+    """Pick a sensible compute type when the caller doesn't specify one."""
+    if compute_type is not None:
+        return compute_type
+    if device == "cuda":
+        return "float16"
+    return "int8"
+def load_model(
+    model_size: str = DEFAULT_MODEL,
+    *,
+    device: str = "auto",
+    compute_type: str | None = None,
+) -> WhisperModel:
+    """Load a Whisper model for transcription.
+    Parameters
+    ----------
+    model_size:
+        One of :data:`AVAILABLE_MODELS`.
+    device:
+        ``"auto"`` (default), ``"cpu"``, or ``"cuda"``.
+    compute_type:
+        Quantisation type.  ``None`` picks a sensible default per device.
+    """
+    device = _resolve_device(device)
+    compute_type = _resolve_compute_type(device, compute_type)
+    logger.info("Loading Whisper model '%s' on %s (%s)…", model_size, device, compute_type)
+    return WhisperModel(model_size, device=device, compute_type=compute_type)
+def transcribe(
+    model: WhisperModel,
+    audio_path: str | Path,
+    *,
+    language: str | None = None,
+    vad_filter: bool = True,
+    word_timestamps: bool = False,
+    on_segment: Callable[[int, Segment, float], None] | None = None,
+) -> TranscriptionResult:
+    """Transcribe an audio or video file and return timed segments.
+    Parameters
+    ----------
+    model:
+        A loaded :class:`WhisperModel`.
+    audio_path:
+        Path to an audio or video file (any format supported by FFmpeg / PyAV).
+    language:
+        ISO-639-1 code (e.g. ``"en"``).  ``None`` for auto-detection.
+    vad_filter:
+        Use Silero VAD to skip silence — reduces hallucination.
+    word_timestamps:
+        Request word-level timestamps (slower, but more precise).
+    on_segment:
+        Optional callback invoked after each segment is transcribed.
+        Receives ``(segment_index, segment, audio_duration)``.
+    """
+    audio_path = str(Path(audio_path).resolve())
+    logger.info("Transcribing '%s'…", audio_path)
+    segments_gen, info = model.transcribe(
+        audio_path,
+        language=language,
+        vad_filter=vad_filter,
+        word_timestamps=word_timestamps,
+        beam_size=5,
+    )
+    logger.info(
+        "Detected language: %s (probability %.2f%%)",
+        info.language,
+        info.language_probability * 100,
+    )
+    segments: list[Segment] = []
+    for seg in segments_gen:
+        segment = Segment(start=seg.start, end=seg.end, text=seg.text.strip())
+        segments.append(segment)
+        logger.debug("[%.2fs → %.2fs] %s", seg.start, seg.end, segment.text)
+        if on_segment is not None:
+            on_segment(len(segments), segment, info.duration)
+    logger.info("Transcription complete — %d segments.", len(segments))
+    return TranscriptionResult(
+        language=info.language,
+        language_probability=info.language_probability,
+        duration=info.duration,
+        segments=segments,
+    )
+def transcribe_file(
+    audio_path: str | Path,
+    *,
+    model_size: str = DEFAULT_MODEL,
+    device: str = "auto",
+    compute_type: str | None = None,
+    language: str | None = None,
+    vad_filter: bool = True,
+) -> TranscriptionResult:
+    """Convenience wrapper: load a model, transcribe, and return the result."""
+    model = load_model(model_size, device=device, compute_type=compute_type)
+    return transcribe(model, audio_path, language=language, vad_filter=vad_filter)

asub/translator.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Translation layer using deep-translator (Google Translate by default)."""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING
+from deep_translator import GoogleTranslator
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+    from asub.transcriber import Segment, TranscriptionResult
+logger = logging.getLogger(__name__)
+# Maximum characters Google Translate accepts per request.
+_GOOGLE_CHAR_LIMIT = 5000
+def supported_languages() -> dict[str, str]:
+    """Return a ``{name: code}`` mapping of supported target languages."""
+    return GoogleTranslator().get_supported_languages(as_dict=True)
+def translate_text(text: str, *, source: str = "auto", target: str = "en") -> str:
+    """Translate a single string."""
+    if not text.strip():
+        return text
+    return GoogleTranslator(source=source, target=target).translate(text)
+def translate_segments(
+    segments: Sequence[Segment],
+    *,
+    source: str = "auto",
+    target: str = "en",
+) -> list[Segment]:
+    """Translate every segment's text while preserving timestamps.
+    Segments are batched to stay under the Google Translate character limit,
+    then split back to keep one-to-one correspondence with the originals.
+    Parameters
+    ----------
+    segments:
+        The transcribed segments to translate.
+    source:
+        Source language code, or ``"auto"`` for auto-detection.
+    target:
+        Target language code (e.g. ``"it"``, ``"de"``, ``"fr"``).
+    Returns
+    -------
+    A new list of :class:`~asub.transcriber.Segment` with translated text.
+    """
+    from asub.transcriber import Segment as SegmentCls
+    if not segments:
+        return []
+    logger.info("Translating %d segments → %s…", len(segments), target)
+    translator = GoogleTranslator(source=source, target=target)
+    # Build batches that fit under the character limit.
+    separator = "\n"
+    batches: list[list[int]] = []
+    current_batch: list[int] = []
+    current_length = 0
+    for idx, seg in enumerate(segments):
+        addition = len(seg.text) + len(separator)
+        if current_length + addition > _GOOGLE_CHAR_LIMIT and current_batch:
+            batches.append(current_batch)
+            current_batch = []
+            current_length = 0
+        current_batch.append(idx)
+        current_length += addition
+    if current_batch:
+        batches.append(current_batch)
+    # Translate each batch and map results back.
+    translated_texts: list[str] = [""] * len(segments)
+    for batch_indices in batches:
+        combined = separator.join(segments[i].text for i in batch_indices)
+        result = translator.translate(combined)
+        parts = result.split("\n")
+        # If the translator merges/splits lines, fall back to per-segment translation.
+        if len(parts) != len(batch_indices):
+            logger.debug("Batch split mismatch — falling back to per-segment translation.")
+            for i in batch_indices:
+                translated_texts[i] = translator.translate(segments[i].text)
+        else:
+            for i, part in zip(batch_indices, parts, strict=True):
+                translated_texts[i] = part.strip()
+    result_segments = [
+        SegmentCls(start=seg.start, end=seg.end, text=translated_texts[i])
+        for i, seg in enumerate(segments)
+    ]
+    logger.info("Translation complete.")
+    return result_segments
+def translate_result(
+    result: TranscriptionResult,
+    *,
+    target: str,
+    source: str | None = None,
+) -> list[Segment]:
+    """Translate a full :class:`TranscriptionResult`.
+    If *source* is ``None``, the detected language from the transcription is used.
+    """
+    src = source if source is not None else result.language
+    return translate_segments(result.segments, source=src, target=target)

asub-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,256 @@
+Metadata-Version: 2.4
+Name: asub
+Version: 1.0.0
+Summary: Generate and translate subtitles from audio/video files using Whisper.
+Author: asub contributors
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/simoneraffaelli/subtitle-generator
+Project-URL: Repository, https://github.com/simoneraffaelli/subtitle-generator
+Project-URL: Issues, https://github.com/simoneraffaelli/subtitle-generator/issues
+Keywords: subtitles,whisper,transcription,translation,srt,vtt
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
+Classifier: Topic :: Multimedia :: Video
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: faster-whisper<2.0.0,>=1.0.0
+Requires-Dist: deep-translator<2.0.0,>=1.11.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: ruff>=0.4.0; extra == "dev"
+Requires-Dist: pyinstaller>=6.0; extra == "dev"
+Dynamic: license-file
+# asub
+Generate and translate subtitles from any audio or video file — powered by
+[faster-whisper](https://github.com/SYSTRAN/faster-whisper) and
+[deep-translator](https://github.com/nidhaloff/deep-translator).
+## Features
+- **Fast transcription** — up to 4× faster than OpenAI Whisper with the same
+  accuracy, using CTranslate2.
+- **Automatic language detection** — or specify the source language manually.
+- **Translation** — translate subtitles to 100+ languages via Google Translate
+  (free, no API key).
+- **Multiple output formats** — SRT and WebVTT.
+- **VAD filtering** — Silero VAD removes silence and reduces hallucination.
+- **Model choice** — from `tiny` (fast, less accurate) to `large-v3`
+  (slow, most accurate).
+- **CPU & GPU** — works on both, with int8 quantisation for low-memory setups.
+- **Packagable as .exe** — single-file Windows executable via PyInstaller.
+## Installation
+### From source (recommended for development)
+```bash
+git clone https://github.com/simoneraffaelli/subtitle-generator.git
+cd subtitle-generator
+pip install -e ".[dev]"
+```
+### From PyPI (once published)
+```bash
+pip install asub
+```
+## Quick start
+```bash
+# Transcribe a video and generate subtitles (auto-detect language)
+asub video.mp4
+# Use a specific model and output format
+asub video.mp4 -m large-v3 -f vtt
+# Transcribe and translate to Italian
+asub video.mp4 -t it
+# Specify source language, translate to German, verbose output
+asub podcast.mp3 -l en -t de -v
+# Use CPU with int8 quantisation
+asub interview.wav --device cpu --compute-type int8
+```
+## CLI reference
+```
+usage: asub [-h] [-o OUTPUT] [-f {srt,vtt}] [-m MODEL] [--device {auto,cpu,cuda}]
+                 [--compute-type TYPE] [-l LANG] [--no-vad] [-t LANG] [-v] [--version]
+                 [--list-languages]
+                 input
+positional arguments:
+  input                 Path to an audio or video file.
+options:
+  -o, --output          Output subtitle file path (default: <input>.srt)
+  -f, --format          Subtitle format: srt, vtt
+  -v, --verbose         Increase verbosity (-v INFO, -vv DEBUG)
+  --version             Show version and exit
+  --list-languages      Print supported translation languages and exit
+transcription:
+  -m, --model           Whisper model size (default: medium)
+  --device              auto | cpu | cuda (default: auto)
+  --compute-type        Quantisation type (auto-selected if omitted)
+  -l, --language        Source language code (auto-detected if omitted)
+  --no-vad              Disable Voice Activity Detection
+translation:
+  -t, --translate LANG  Translate subtitles to this language code
+```
+## Python API
+```python
+from asub.transcriber import load_model, transcribe
+from asub.translator import translate_segments
+from asub.subtitle import write_subtitle_file, SubtitleFormat
+# 1. Transcribe
+model = load_model("medium", device="auto")
+result = transcribe(model, "video.mp4")
+# 2. Translate (optional)
+translated = translate_segments(result.segments, source=result.language, target="it")
+# 3. Write subtitle file
+write_subtitle_file(translated, "video_it.srt")
+```
+## Building a Windows .exe
+```bash
+pip install ".[dev]"
+pyinstaller asub.spec
+```
+The executable will be in `dist/asub.exe`.
+> **Note:** The .exe does not bundle Whisper model weights. Models are downloaded
+> on first run and cached in the default Hugging Face cache directory.
+## Hugging Face token (optional)
+On first run, Whisper model weights are downloaded from the Hugging Face Hub.
+Without authentication you may see this warning:
+> You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN
+> to enable higher rate limits and faster downloads
+This is **not an error** — the download still works, just at lower rate limits.
+To silence the warning and get faster downloads:
+1. Create a free account at <https://huggingface.co>.
+2. Go to **Settings → Access Tokens** and generate a token.
+3. Set the token before running asub:
+```bash
+# Linux / macOS
+export HF_TOKEN="hf_your_token_here"
+# Windows PowerShell
+$env:HF_TOKEN = "hf_your_token_here"
+```
+To make this permanent, add the variable to your shell profile or set it via
+**System → Environment Variables** on Windows.
+## Available models
+| Model            | Parameters | Relative speed | VRAM   |
+| ---------------- | ---------- | -------------- | ------ |
+| `tiny`           | 39 M       | ~10×           | ~1 GB  |
+| `base`           | 74 M       | ~7×            | ~1 GB  |
+| `small`          | 244 M      | ~4×            | ~2 GB  |
+| `medium`         | 769 M      | ~2×            | ~5 GB  |
+| `large-v3`       | 1550 M     | 1×             | ~10 GB |
+| `turbo`          | 809 M      | ~8×            | ~6 GB  |
+| `distil-large-v3`| 756 M      | ~6×            | ~6 GB  |
+### Choosing the right model
+Not every model is the best choice for every situation. Here's a breakdown to
+help you pick:
+- **`tiny`** — Fastest model by far. Good for quick previews or testing your
+  pipeline. Accuracy is noticeably lower, especially on non-English audio or
+  noisy recordings. Use it when speed matters more than quality.
+- **`base`** — A small step up from `tiny`. Slightly more accurate, still very
+  fast. Suitable for clear speech in common languages.
+- **`small`** — A solid mid-range option. Handles most languages well and runs
+  comfortably on CPU. Good balance for everyday use when you don't have a GPU.
+- **`medium`** — The default. Significantly more accurate than `small`,
+  especially for accented speech, niche languages, and overlapping speakers.
+  Slower on CPU, but a great choice with a GPU.
+- **`large-v3`** — The most accurate model. Best for professional-quality
+  subtitles, rare languages, or heavily accented audio. Requires a CUDA GPU
+  with at least 10 GB VRAM for practical use.
+- **`turbo`** — Near `large-v3` accuracy at roughly 8× the speed. This is the
+  best "quality per second" option if you have a GPU with ≥6 GB VRAM.
+- **`distil-large-v3`** — A distilled version of `large-v3`. Similar accuracy
+  on English, slightly worse on other languages. Fast and memory-efficient.
+  Best for English-heavy workloads on a GPU.
+### Recommended commands
+**Fastest result** — use `tiny` when you just need a rough draft quickly:
+```bash
+asub video.mp4 -m tiny
+```
+**Best result** — use `large-v3` (GPU required) for maximum accuracy:
+```bash
+asub video.mp4 -m large-v3
+```
+**Best compromise** — use `turbo` on GPU for near-best accuracy at high speed,
+or `small` on CPU for a good quality-to-speed ratio:
+```bash
+# With a CUDA GPU (recommended)
+asub video.mp4 -m turbo
+# CPU only
+asub video.mp4 -m small
+```
+> **Tip:** The device and compute type are auto-detected. If you have a CUDA
+> GPU, asub will use it with `float16` automatically. On CPU it falls back
+> to `int8` quantisation.
+## Upgrading dependencies
+```bash
+pip install --upgrade faster-whisper deep-translator
+```
+## Contributing
+1. Fork the repo and create a feature branch.
+2. Install dev dependencies: `pip install -e ".[dev]"`
+3. Run tests: `python -m pytest`
+4. Lint: `ruff check src/ tests/`
+5. Open a pull request.
+## License
+[MIT](LICENSE)
+## Acknowledgements
+Built with the great help of [Claude Opus 4.6](https://www.anthropic.com/) by Anthropic.

asub-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+asub/__init__.py,sha256=FGNPRlcf5_cG4iOMGF5OOLxhCLicXSoUj8uSqxmSSQs,95
+asub/__main__.py,sha256=9pVBG03fOAlQKoQ-q_aaovG-GnjWKPZsFNiUQS88MoY,105
+asub/cli.py,sha256=pERnwVQ6OF38jNXNDuGdUsZF8kVa8trpzKSRPAjZy40,6147
+asub/progress.py,sha256=y2TfJOtG2nn_7p8To7WMeNsbMu9NZ2HropjnsNFLR28,1924
+asub/subtitle.py,sha256=nunCzo_VKBvQQMjXjnnuVEt_3Mg4mtr4T1l6szYO8TA,3913
+asub/transcriber.py,sha256=_jJtd_YHeTrLN6N1q9Mu_NoVkQgAZXQYauMajvI5qTc,4967
+asub/translator.py,sha256=l7-mZDE4Us2suhWrzsyTPZ9xM1D-Ji6jFKOJ4PcKoOk,3802
+asub-1.0.0.dist-info/licenses/LICENSE,sha256=DtvtvigYjtTAuUJP5WajXCQPmoUwvM9IfdSTQyEfmbQ,1074
+asub-1.0.0.dist-info/METADATA,sha256=MUFHxwVzZUIZCxMQuwu5dmg0TpZxM72XRU0tIP63u00,8588
+asub-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+asub-1.0.0.dist-info/entry_points.txt,sha256=wZY8nSzorQrkjMzBuii7VpHPS1wyW9r1Q7aOPzIAjx8,39
+asub-1.0.0.dist-info/top_level.txt,sha256=8O8t9Gz7O1wW15qT1VKGGyMAMPEzp_HX7VNEB7c6OuM,5
+asub-1.0.0.dist-info/RECORD,,

asub-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

asub-1.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ asub = asub.cli:main

asub-1.0.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 asub contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

asub-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ asub