PyPI - mazinger - Versions diffs - 1.7.0__tar.gz - Mend

mazinger 1.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

mazinger-1.7.0/LICENSE +21 -0
mazinger-1.7.0/PKG-INFO +189 -0
mazinger-1.7.0/README.md +145 -0
mazinger-1.7.0/mazinger/__init__.py +14 -0
mazinger-1.7.0/mazinger/__main__.py +25 -0
mazinger-1.7.0/mazinger/assemble.py +358 -0
mazinger-1.7.0/mazinger/cli/__init__.py +72 -0
mazinger-1.7.0/mazinger/cli/_describe.py +63 -0
mazinger-1.7.0/mazinger/cli/_download.py +19 -0
mazinger-1.7.0/mazinger/cli/_dub.py +91 -0
mazinger-1.7.0/mazinger/cli/_groups.py +327 -0
mazinger-1.7.0/mazinger/cli/_resegment.py +40 -0
mazinger-1.7.0/mazinger/cli/_slice.py +32 -0
mazinger-1.7.0/mazinger/cli/_speak.py +112 -0
mazinger-1.7.0/mazinger/cli/_subtitle.py +79 -0
mazinger-1.7.0/mazinger/cli/_thumbnails.py +57 -0
mazinger-1.7.0/mazinger/cli/_transcribe.py +61 -0
mazinger-1.7.0/mazinger/cli/_translate.py +90 -0
mazinger-1.7.0/mazinger/describe.py +83 -0
mazinger-1.7.0/mazinger/download.py +452 -0
mazinger-1.7.0/mazinger/paths.py +81 -0
mazinger-1.7.0/mazinger/pipeline.py +366 -0
mazinger-1.7.0/mazinger/profiles.py +112 -0
mazinger-1.7.0/mazinger/resegment.py +554 -0
mazinger-1.7.0/mazinger/srt.py +94 -0
mazinger-1.7.0/mazinger/subtitle.py +561 -0
mazinger-1.7.0/mazinger/thumbnails.py +190 -0
mazinger-1.7.0/mazinger/transcribe.py +579 -0
mazinger-1.7.0/mazinger/translate.py +403 -0
mazinger-1.7.0/mazinger/tts.py +352 -0
mazinger-1.7.0/mazinger/utils.py +182 -0
mazinger-1.7.0/mazinger.egg-info/PKG-INFO +189 -0
mazinger-1.7.0/mazinger.egg-info/SOURCES.txt +37 -0
mazinger-1.7.0/mazinger.egg-info/dependency_links.txt +1 -0
mazinger-1.7.0/mazinger.egg-info/entry_points.txt +2 -0
mazinger-1.7.0/mazinger.egg-info/requires.txt +43 -0
mazinger-1.7.0/mazinger.egg-info/top_level.txt +4 -0
mazinger-1.7.0/pyproject.toml +99 -0
mazinger-1.7.0/setup.cfg +4 -0

mazinger-1.7.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Mazinger Dubber Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

mazinger-1.7.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,189 @@
+Metadata-Version: 2.4
+Name: mazinger
+Version: 1.7.0
+Summary: End-to-end video dubbing pipeline: transcribe, translate, and voice-clone.
+License: MIT
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: yt-dlp>=2024.0
+Requires-Dist: openai>=1.0
+Requires-Dist: json-repair>=0.28
+Requires-Dist: Pillow>=10.0
+Requires-Dist: soundfile>=0.12
+Requires-Dist: numpy>=1.24
+Requires-Dist: tqdm>=4.60
+Provides-Extra: transcribe-faster
+Requires-Dist: faster-whisper>=1.0; extra == "transcribe-faster"
+Provides-Extra: transcribe-whisperx
+Requires-Dist: whisperx>=3.0; extra == "transcribe-whisperx"
+Requires-Dist: torch>=2.0; extra == "transcribe-whisperx"
+Provides-Extra: transcribe
+Requires-Dist: mazinger[transcribe-whisperx]; extra == "transcribe"
+Provides-Extra: tts
+Requires-Dist: qwen-tts; extra == "tts"
+Requires-Dist: torch>=2.0; extra == "tts"
+Requires-Dist: soundfile>=0.12; extra == "tts"
+Provides-Extra: tts-chatterbox
+Requires-Dist: chatterbox-tts; extra == "tts-chatterbox"
+Requires-Dist: resemble-perth>=1.0.1; extra == "tts-chatterbox"
+Requires-Dist: torch>=2.0; extra == "tts-chatterbox"
+Requires-Dist: torchaudio>=2.0; extra == "tts-chatterbox"
+Requires-Dist: soundfile>=0.12; extra == "tts-chatterbox"
+Requires-Dist: numpy>=1.26; extra == "tts-chatterbox"
+Requires-Dist: pandas>=2.2; extra == "tts-chatterbox"
+Provides-Extra: flash-attn
+Requires-Dist: flash-attn>=2.0; extra == "flash-attn"
+Provides-Extra: audio-enhance
+Requires-Dist: demucs>=4.0.1; extra == "audio-enhance"
+Provides-Extra: all-qwen
+Requires-Dist: mazinger[audio-enhance,transcribe-whisperx,tts]; extra == "all-qwen"
+Provides-Extra: all-chatterbox
+Requires-Dist: mazinger[audio-enhance,transcribe-faster,tts-chatterbox]; extra == "all-chatterbox"
+Dynamic: license-file
+<p align="center">
+  <img src="docs/assets/main-logo-refined.png" alt="Mazinger Dubber" width="320" height="320" />
+</p>
+<h1 align="center">Mazinger Dubber</h1>
+<p align="center">
+  End-to-end video dubbing pipeline. Download a video, transcribe it, translate the subtitles, clone a voice, and produce a fully dubbed audio or video file — in one command.
+</p>
+## What It Does
+Mazinger chains nine stages into a single pipeline:
+1. **Download** — fetch a video from a URL or ingest a local file, extract the audio track
+2. **Transcribe** — convert speech to SRT subtitles (OpenAI Whisper API, faster-whisper, or WhisperX)
+3. **Thumbnails** — use an LLM to pick key frames from the video for visual context
+4. **Describe** — analyze the transcript and thumbnails to produce a structured summary (title, key points, keywords)
+5. **Translate** — translate the SRT into another language with duration-aware word budgets
+6. **Re-segment** — merge fragments and split oversized subtitles for readability
+7. **Speak** — synthesize voice-cloned speech for every subtitle entry (Qwen3-TTS or Chatterbox)
+8. **Assemble** — place each audio segment on the original timeline with optional tempo adjustment, loudness matching, and background audio mixing
+9. **Subtitle** — burn styled subtitles into the video and/or mux the new audio track
+Every stage can run independently or as part of the full pipeline. Interrupted runs resume automatically — completed stages and individual TTS segments are cached and skipped.
+## Prerequisites
+- Python 3.10 or later
+- ffmpeg installed and on `PATH` (`apt install ffmpeg` / `brew install ffmpeg`)
+- An OpenAI API key for LLM-powered stages (transcription, translation, thumbnails, description)
+- A CUDA GPU for local transcription and TTS (not needed for cloud-only workflows)
+## Installation
+The base install covers download, transcription (cloud), thumbnails, description, translation, re-segmentation, and subtitle embedding. No GPU needed.
+```bash
+pip install .
+```
+Add local transcription or TTS as optional extras:
+```bash
+# Local transcription
+pip install ".[transcribe-faster]"      # faster-whisper (Chatterbox-compatible)
+pip install ".[transcribe-whisperx]"    # WhisperX (best word-level alignment)
+# Voice synthesis
+pip install ".[tts]"                    # Qwen3-TTS (voice sample + transcript)
+pip install ".[tts-chatterbox]"         # Chatterbox (voice sample only, emotion control)
+# Full bundles
+pip install ".[all-qwen]"              # WhisperX + Qwen3-TTS
+pip install ".[all-chatterbox]"        # faster-whisper + Chatterbox
+```
+> Qwen and Chatterbox require different `transformers` versions and cannot share an environment.
+> WhisperX also conflicts with Chatterbox — pair it with Qwen, or use faster-whisper with Chatterbox.
+See the [Installation Guide](docs/installation.md) for venv recipes, Colab setup, and uv overrides.
+## Quick Start
+### Dub a video in one command
+```bash
+mazinger dub "https://youtube.com/watch?v=VIDEO_ID" \
+    --voice-sample speaker.m4a \
+    --voice-script speaker_transcript.txt \
+    --target-language Spanish \
+    --base-dir ./output
+```
+### Use a voice profile instead of local files
+Voice profiles are hosted on HuggingFace and downloaded automatically:
+```bash
+mazinger dub "https://youtube.com/watch?v=VIDEO_ID" \
+    --clone-profile abubakr \
+    --target-language Arabic
+```
+### Produce a video with burned subtitles
+```bash
+mazinger dub "https://youtube.com/watch?v=VIDEO_ID" \
+    --clone-profile abubakr \
+    --output-type video \
+    --embed-subtitles \
+    --subtitle-google-font "Noto Sans Arabic" \
+    --subtitle-font-size 24
+```
+### Run a single stage
+Every stage has its own sub-command:
+```bash
+mazinger download   "https://youtube.com/watch?v=VIDEO_ID" --base-dir ./output
+mazinger slice      "https://youtube.com/watch?v=VIDEO_ID" --start 00:01:00 --end 00:04:00
+mazinger transcribe ./output/projects/my-video/source/audio.mp3 -o subs.srt
+mazinger translate  --srt subs.srt --target-language French -o translated.srt
+mazinger subtitle   video.mp4 --srt translated.srt -o output.mp4
+```
+### Python API
+```python
+from mazinger import MazingerDubber
+dubber = MazingerDubber(openai_api_key="sk-...", base_dir="./output")
+proj = dubber.dub(
+    source="https://youtube.com/watch?v=VIDEO_ID",
+    voice_sample="speaker.m4a",
+    voice_script="speaker_transcript.txt",
+    target_language="Spanish",
+    output_type="video",
+    embed_subtitles=True,
+)
+print(proj.final_video)   # ./output/projects/<slug>/tts/dubbed.mp4
+```
+## Documentation
+Full documentation lives in the [`docs/`](docs/) directory:
+| Chapter | Contents |
+|---------|----------|
+| [Installation](docs/installation.md) | All install methods, extras, compatibility matrix, Colab and venv recipes |
+| [Quick Start](docs/quick-start.md) | Common workflows with copy-paste examples |
+| [Pipeline Overview](docs/pipeline.md) | How the nine stages connect, data flow, and resume behavior |
+| [CLI Reference](docs/cli-reference.md) | Every command, flag, and default value |
+| [Python API](docs/python-api.md) | Classes, functions, and parameters for programmatic use |
+| [Voice Profiles](docs/voice-profiles.md) | Using, creating, and uploading voice profiles |
+| [Subtitle Styling](docs/subtitle-styling.md) | Fonts, colors, positioning, RTL support, Google Fonts |
+| [Configuration](docs/configuration.md) | Environment variables, caching, tempo control, LLM usage tracking |
+| [Project Structure](docs/project-structure.md) | Output directory layout and file naming conventions |
+## License
+MIT

mazinger-1.7.0/README.md ADDED Viewed

@@ -0,0 +1,145 @@
+<p align="center">
+  <img src="docs/assets/main-logo-refined.png" alt="Mazinger Dubber" width="320" height="320" />
+</p>
+<h1 align="center">Mazinger Dubber</h1>
+<p align="center">
+  End-to-end video dubbing pipeline. Download a video, transcribe it, translate the subtitles, clone a voice, and produce a fully dubbed audio or video file — in one command.
+</p>
+## What It Does
+Mazinger chains nine stages into a single pipeline:
+1. **Download** — fetch a video from a URL or ingest a local file, extract the audio track
+2. **Transcribe** — convert speech to SRT subtitles (OpenAI Whisper API, faster-whisper, or WhisperX)
+3. **Thumbnails** — use an LLM to pick key frames from the video for visual context
+4. **Describe** — analyze the transcript and thumbnails to produce a structured summary (title, key points, keywords)
+5. **Translate** — translate the SRT into another language with duration-aware word budgets
+6. **Re-segment** — merge fragments and split oversized subtitles for readability
+7. **Speak** — synthesize voice-cloned speech for every subtitle entry (Qwen3-TTS or Chatterbox)
+8. **Assemble** — place each audio segment on the original timeline with optional tempo adjustment, loudness matching, and background audio mixing
+9. **Subtitle** — burn styled subtitles into the video and/or mux the new audio track
+Every stage can run independently or as part of the full pipeline. Interrupted runs resume automatically — completed stages and individual TTS segments are cached and skipped.
+## Prerequisites
+- Python 3.10 or later
+- ffmpeg installed and on `PATH` (`apt install ffmpeg` / `brew install ffmpeg`)
+- An OpenAI API key for LLM-powered stages (transcription, translation, thumbnails, description)
+- A CUDA GPU for local transcription and TTS (not needed for cloud-only workflows)
+## Installation
+The base install covers download, transcription (cloud), thumbnails, description, translation, re-segmentation, and subtitle embedding. No GPU needed.
+```bash
+pip install .
+```
+Add local transcription or TTS as optional extras:
+```bash
+# Local transcription
+pip install ".[transcribe-faster]"      # faster-whisper (Chatterbox-compatible)
+pip install ".[transcribe-whisperx]"    # WhisperX (best word-level alignment)
+# Voice synthesis
+pip install ".[tts]"                    # Qwen3-TTS (voice sample + transcript)
+pip install ".[tts-chatterbox]"         # Chatterbox (voice sample only, emotion control)
+# Full bundles
+pip install ".[all-qwen]"              # WhisperX + Qwen3-TTS
+pip install ".[all-chatterbox]"        # faster-whisper + Chatterbox
+```
+> Qwen and Chatterbox require different `transformers` versions and cannot share an environment.
+> WhisperX also conflicts with Chatterbox — pair it with Qwen, or use faster-whisper with Chatterbox.
+See the [Installation Guide](docs/installation.md) for venv recipes, Colab setup, and uv overrides.
+## Quick Start
+### Dub a video in one command
+```bash
+mazinger dub "https://youtube.com/watch?v=VIDEO_ID" \
+    --voice-sample speaker.m4a \
+    --voice-script speaker_transcript.txt \
+    --target-language Spanish \
+    --base-dir ./output
+```
+### Use a voice profile instead of local files
+Voice profiles are hosted on HuggingFace and downloaded automatically:
+```bash
+mazinger dub "https://youtube.com/watch?v=VIDEO_ID" \
+    --clone-profile abubakr \
+    --target-language Arabic
+```
+### Produce a video with burned subtitles
+```bash
+mazinger dub "https://youtube.com/watch?v=VIDEO_ID" \
+    --clone-profile abubakr \
+    --output-type video \
+    --embed-subtitles \
+    --subtitle-google-font "Noto Sans Arabic" \
+    --subtitle-font-size 24
+```
+### Run a single stage
+Every stage has its own sub-command:
+```bash
+mazinger download   "https://youtube.com/watch?v=VIDEO_ID" --base-dir ./output
+mazinger slice      "https://youtube.com/watch?v=VIDEO_ID" --start 00:01:00 --end 00:04:00
+mazinger transcribe ./output/projects/my-video/source/audio.mp3 -o subs.srt
+mazinger translate  --srt subs.srt --target-language French -o translated.srt
+mazinger subtitle   video.mp4 --srt translated.srt -o output.mp4
+```
+### Python API
+```python
+from mazinger import MazingerDubber
+dubber = MazingerDubber(openai_api_key="sk-...", base_dir="./output")
+proj = dubber.dub(
+    source="https://youtube.com/watch?v=VIDEO_ID",
+    voice_sample="speaker.m4a",
+    voice_script="speaker_transcript.txt",
+    target_language="Spanish",
+    output_type="video",
+    embed_subtitles=True,
+)
+print(proj.final_video)   # ./output/projects/<slug>/tts/dubbed.mp4
+```
+## Documentation
+Full documentation lives in the [`docs/`](docs/) directory:
+| Chapter | Contents |
+|---------|----------|
+| [Installation](docs/installation.md) | All install methods, extras, compatibility matrix, Colab and venv recipes |
+| [Quick Start](docs/quick-start.md) | Common workflows with copy-paste examples |
+| [Pipeline Overview](docs/pipeline.md) | How the nine stages connect, data flow, and resume behavior |
+| [CLI Reference](docs/cli-reference.md) | Every command, flag, and default value |
+| [Python API](docs/python-api.md) | Classes, functions, and parameters for programmatic use |
+| [Voice Profiles](docs/voice-profiles.md) | Using, creating, and uploading voice profiles |
+| [Subtitle Styling](docs/subtitle-styling.md) | Fonts, colors, positioning, RTL support, Google Fonts |
+| [Configuration](docs/configuration.md) | Environment variables, caching, tempo control, LLM usage tracking |
+| [Project Structure](docs/project-structure.md) | Output directory layout and file naming conventions |
+## License
+MIT

mazinger-1.7.0/mazinger/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+Mazinger Dubber -- End-to-end video dubbing pipeline.
+Transcribe, translate, and voice-clone audio from any video URL.
+Each stage can be used independently or chained through the unified
+``MazingerDubber`` pipeline class.
+"""
+from mazinger.pipeline import MazingerDubber
+from mazinger.paths import ProjectPaths
+from mazinger.utils import LLMUsageTracker
+__all__ = ["MazingerDubber", "ProjectPaths", "LLMUsageTracker"]
+__version__ = "1.1.0"

mazinger-1.7.0/mazinger/__main__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Allow ``python -m mazinger`` invocation."""
+import os
+import warnings
+# Remove Jupyter/Colab-specific matplotlib backend that may not be available
+# in this virtual environment, causing an import error in downstream libs.
+os.environ.pop("MPLBACKEND", None)
+# ── Silence noisy third-party warnings ──────────────────────────────────────
+# torchcodec FFmpeg compatibility warning from pyannote
+warnings.filterwarnings("ignore", message=".*torchcodec is not installed correctly.*")
+# pyannote TF32 reproducibility warning
+warnings.filterwarnings("ignore", category=UserWarning, module="pyannote")
+# Lightning checkpoint auto-upgrade info
+warnings.filterwarnings("ignore", message=".*Lightning automatically upgraded.*")
+# Suppress Lightning upgrade log at the logging level too
+import logging
+logging.getLogger("lightning.pytorch.utilities.migration").setLevel(logging.WARNING)
+from mazinger.cli import main
+if __name__ == "__main__":
+    main()