PyPI - epub2speech - Versions diffs - 0.0.1__tar.gz - Mend

epub2speech 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

epub2speech-0.0.1/LICENSE +21 -0
epub2speech-0.0.1/PKG-INFO +161 -0
epub2speech-0.0.1/README.md +124 -0
epub2speech-0.0.1/epub2speech/__init__.py +9 -0
epub2speech-0.0.1/epub2speech/chapter_tts.py +179 -0
epub2speech-0.0.1/epub2speech/cli.py +144 -0
epub2speech-0.0.1/epub2speech/convertor.py +139 -0
epub2speech-0.0.1/epub2speech/epub_picker.py +202 -0
epub2speech-0.0.1/epub2speech/extractor.py +74 -0
epub2speech-0.0.1/epub2speech/m4b_generator.py +178 -0
epub2speech-0.0.1/epub2speech/tts/__init__.py +7 -0
epub2speech-0.0.1/epub2speech/tts/azure_provider.py +138 -0
epub2speech-0.0.1/epub2speech/tts/protocol.py +19 -0
epub2speech-0.0.1/pyproject.toml +70 -0

epub2speech-0.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 OOMOL Lab
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

epub2speech-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,161 @@
+Metadata-Version: 2.3
+Name: epub2speech
+Version: 0.0.1
+Summary: Convert EPUB e-books into high-quality audiobooks using Azure Text-to-Speech technology
+License: MIT
+Keywords: epub,audiobook,text-to-speech,tts,azure,m4b,ebook
+Author: TaoZeyu
+Author-email: i@taozeyu.com
+Maintainer: TaoZeyu
+Maintainer-email: i@taozeyu.com
+Requires-Python: >=3.11,<3.14
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Multimedia :: Sound/Audio :: Conversion
+Classifier: Topic :: Text Processing :: Markup
+Classifier: Topic :: Utilities
+Provides-Extra: dev
+Requires-Dist: azure-cognitiveservices-speech (>=1.46.0,<2.0.0)
+Requires-Dist: ebooklib (>=0.19,<0.20)
+Requires-Dist: numpy (>=2.3.3,<3.0.0)
+Requires-Dist: pytest (>=7.0.0) ; extra == "dev"
+Requires-Dist: pytest-cov (>=4.0.0) ; extra == "dev"
+Requires-Dist: resource-segmentation (==0.0.5)
+Requires-Dist: soundfile (>=0.13.1,<0.14.0)
+Requires-Dist: spacy (>=3.8.7,<4.0.0)
+Project-URL: Bug Tracker, https://github.com/oomol-lab/epub2speech/issues
+Project-URL: Documentation, https://github.com/oomol-lab/epub2speech/blob/main/README.md
+Project-URL: Homepage, https://github.com/oomol-lab/epub2speech
+Project-URL: Repository, https://github.com/oomol-lab/epub2speech
+Description-Content-Type: text/markdown
+<div align=center>
+  <h1>EPUB to Speech</h1>
+  <p>English | <a href="./README_zh-CN.md">中文</a></p>
+</div>
+Convert EPUB e-books into high-quality audiobooks using Azure Text-to-Speech technology.
+## Features
+- **📚 EPUB Support**: Compatible with EPUB 2 and EPUB 3 formats
+- **🎙️ High-Quality TTS**: Uses Azure Cognitive Services Speech for natural voice synthesis
+- **🌍 Multi-Language Support**: Supports various languages and voices via Azure TTS
+- **📱 M4B Output**: Generates standard M4B audiobook format with chapter navigation
+- **🔧 CLI Interface**: Easy-to-use command-line tool with progress tracking
+## Basic Usage
+Convert an EPUB file to audiobook:
+```bash
+epub2speech input.epub output.m4b --voice zh-CN-XiaoxiaoNeural --azure-key YOUR_KEY --azure-region YOUR_REGION
+```
+## Installation
+### Prerequisites
+- Python 3.11 or higher
+- FFmpeg (for audio processing)
+- Azure Speech Service credentials
+### Install Dependencies
+```bash
+# Install Python dependencies
+pip install poetry
+poetry install
+# Install FFmpeg
+# macOS: brew install ffmpeg
+# Ubuntu/Debian: sudo apt install ffmpeg
+# Windows: Download from https://ffmpeg.org/download.html
+```
+### Azure Speech Service Setup
+1. Create an Azure account at https://azure.microsoft.com
+2. Create a Speech Service resource in Azure Portal
+3. Get your subscription key and region from the Azure dashboard
+## Quick Start
+### Environment Variables
+Set your Azure credentials as environment variables:
+```bash
+export AZURE_SPEECH_KEY="your-subscription-key"
+export AZURE_SPEECH_REGION="your-region"
+epub2speech input.epub output.m4b --voice zh-CN-XiaoxiaoNeural
+```
+### Advanced Options
+```bash
+# Limit to first 5 chapters
+epub2speech input.epub output.m4b --voice en-US-AriaNeural --max-chapters 5
+# Use custom workspace directory
+epub2speech input.epub output.m4b --voice zh-CN-YunxiNeural --workspace /tmp/my-workspace
+# Quiet mode (no progress output)
+epub2speech input.epub output.m4b --voice ja-JP-NanamiNeural --quiet
+```
+## Available Voices
+For a complete list, see [Azure Neural Voices](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices).
+## How It Works
+1. **EPUB Parsing**: Extracts text content and metadata from EPUB files
+2. **Chapter Detection**: Identifies chapters using EPUB navigation data
+3. **Text Processing**: Cleans and segments text for optimal speech synthesis
+4. **Audio Generation**: Converts text to speech using Azure TTS
+5. **M4B Creation**: Combines audio files with chapter metadata into M4B format
+## Development
+### Running Tests
+```bash
+python test.py
+```
+Run specific test modules:
+```bash
+python test.py --test test_epub_picker
+python test.py --test test_tts
+```
+## Contributing
+Contributions are welcome! Please feel free to submit issues or pull requests.
+## License
+This project is licensed under the MIT License - see the LICENSE file for details.
+## Acknowledgments
+- [Azure Cognitive Services](https://azure.microsoft.com/services/cognitive-services/) for text-to-speech technology
+- [ebooklib](https://github.com/aerkalov/ebooklib) for EPUB parsing
+- [FFmpeg](https://ffmpeg.org/) for audio processing
+- [spaCy](https://spacy.io/) for natural language processing
+## Support
+For issues and questions:
+1. Check existing GitHub issues
+2. Create a new issue with detailed information
+3. Include EPUB file samples if relevant (ensure no copyright restrictions)”，“file_path”:

epub2speech-0.0.1/README.md ADDED Viewed

@@ -0,0 +1,124 @@
+<div align=center>
+  <h1>EPUB to Speech</h1>
+  <p>English | <a href="./README_zh-CN.md">中文</a></p>
+</div>
+Convert EPUB e-books into high-quality audiobooks using Azure Text-to-Speech technology.
+## Features
+- **📚 EPUB Support**: Compatible with EPUB 2 and EPUB 3 formats
+- **🎙️ High-Quality TTS**: Uses Azure Cognitive Services Speech for natural voice synthesis
+- **🌍 Multi-Language Support**: Supports various languages and voices via Azure TTS
+- **📱 M4B Output**: Generates standard M4B audiobook format with chapter navigation
+- **🔧 CLI Interface**: Easy-to-use command-line tool with progress tracking
+## Basic Usage
+Convert an EPUB file to audiobook:
+```bash
+epub2speech input.epub output.m4b --voice zh-CN-XiaoxiaoNeural --azure-key YOUR_KEY --azure-region YOUR_REGION
+```
+## Installation
+### Prerequisites
+- Python 3.11 or higher
+- FFmpeg (for audio processing)
+- Azure Speech Service credentials
+### Install Dependencies
+```bash
+# Install Python dependencies
+pip install poetry
+poetry install
+# Install FFmpeg
+# macOS: brew install ffmpeg
+# Ubuntu/Debian: sudo apt install ffmpeg
+# Windows: Download from https://ffmpeg.org/download.html
+```
+### Azure Speech Service Setup
+1. Create an Azure account at https://azure.microsoft.com
+2. Create a Speech Service resource in Azure Portal
+3. Get your subscription key and region from the Azure dashboard
+## Quick Start
+### Environment Variables
+Set your Azure credentials as environment variables:
+```bash
+export AZURE_SPEECH_KEY="your-subscription-key"
+export AZURE_SPEECH_REGION="your-region"
+epub2speech input.epub output.m4b --voice zh-CN-XiaoxiaoNeural
+```
+### Advanced Options
+```bash
+# Limit to first 5 chapters
+epub2speech input.epub output.m4b --voice en-US-AriaNeural --max-chapters 5
+# Use custom workspace directory
+epub2speech input.epub output.m4b --voice zh-CN-YunxiNeural --workspace /tmp/my-workspace
+# Quiet mode (no progress output)
+epub2speech input.epub output.m4b --voice ja-JP-NanamiNeural --quiet
+```
+## Available Voices
+For a complete list, see [Azure Neural Voices](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices).
+## How It Works
+1. **EPUB Parsing**: Extracts text content and metadata from EPUB files
+2. **Chapter Detection**: Identifies chapters using EPUB navigation data
+3. **Text Processing**: Cleans and segments text for optimal speech synthesis
+4. **Audio Generation**: Converts text to speech using Azure TTS
+5. **M4B Creation**: Combines audio files with chapter metadata into M4B format
+## Development
+### Running Tests
+```bash
+python test.py
+```
+Run specific test modules:
+```bash
+python test.py --test test_epub_picker
+python test.py --test test_tts
+```
+## Contributing
+Contributions are welcome! Please feel free to submit issues or pull requests.
+## License
+This project is licensed under the MIT License - see the LICENSE file for details.
+## Acknowledgments
+- [Azure Cognitive Services](https://azure.microsoft.com/services/cognitive-services/) for text-to-speech technology
+- [ebooklib](https://github.com/aerkalov/ebooklib) for EPUB parsing
+- [FFmpeg](https://ffmpeg.org/) for audio processing
+- [spaCy](https://spacy.io/) for natural language processing
+## Support
+For issues and questions:
+1. Check existing GitHub issues
+2. Create a new issue with detailed information
+3. Include EPUB file samples if relevant (ensure no copyright restrictions)”，“file_path”:

epub2speech-0.0.1/epub2speech/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .convertor import convert_epub_to_m4b, ConversionProgress
+from .tts import TextToSpeechProtocol, AzureTextToSpeech
+__all__ = [
+    "convert_epub_to_m4b",
+    "ConversionProgress",
+    "TextToSpeechProtocol",
+    "AzureTextToSpeech",
+]

epub2speech-0.0.1/epub2speech/chapter_tts.py ADDED Viewed

@@ -0,0 +1,179 @@
+import uuid
+import numpy as np
+import soundfile as sf
+from pathlib import Path
+from typing import List, Optional, Callable, Generator
+from spacy.lang.xx import MultiLanguage
+from spacy.language import Language
+from spacy.tokens import Span
+from .tts import TextToSpeechProtocol
+from resource_segmentation import split, Resource, Segment
+SEGMENT_LEVEL = 1
+SENTENCE_LEVEL = 2
+class ChapterTTS:
+    def __init__(
+        self,
+        tts_protocol: TextToSpeechProtocol,
+        sample_rate: int = 24000,
+        max_segment_length: int = 500,
+        language_model: Optional[str] = None
+    ):
+        self.tts_protocol = tts_protocol
+        self.sample_rate = sample_rate
+        self.max_segment_length = max_segment_length
+        self._nlp = self._load_language_model(language_model)
+    def _load_language_model(self, language_model: Optional[str]) -> Language:
+        if language_model:
+            try:
+                import spacy
+                return spacy.load(language_model)
+            except OSError:
+                pass
+        nlp: Language = MultiLanguage()
+        nlp.add_pipe("sentencizer")
+        return nlp
+    def process_chapter(
+        self,
+        text: str,
+        output_path: Path,
+        workspace_path: Path,
+        voice: str,
+        progress_callback: Optional[Callable[[int, int], None]] = None
+    ) -> None:
+        # TODO: 重构 temp 逻辑，不需要 temp 这个概念了
+        segments = list(self.split_text_into_segments(text))
+        if not segments:
+            return
+        audio_segments = []
+        temp_files_created = []
+        try:
+            for i, segment in enumerate(segments):
+                if progress_callback:
+                    progress_callback(i + 1, len(segments))
+                session_id = str(uuid.uuid4())[:8]
+                temp_audio_path = workspace_path / f"{session_id}_segment_{i:04d}.wav"
+                temp_files_created.append(temp_audio_path)
+                self.tts_protocol.convert_text_to_audio(
+                    text=segment,
+                    output_path=temp_audio_path,
+                    voice=voice
+                )
+                if not temp_audio_path.exists():
+                    continue
+                audio_data: np.ndarray
+                sr: int
+                audio_data, sr = sf.read(temp_audio_path)
+                if sr != self.sample_rate:
+                    pass
+                audio_segments.append(audio_data)
+            final_audio = np.concatenate(audio_segments)
+            sf.write(output_path, final_audio, self.sample_rate)
+        finally:
+            for temp_file in temp_files_created:
+                if temp_file.exists():
+                    temp_file.unlink()
+    def split_text_into_segments(self, text: str) -> Generator[str, None, None]:
+        text = text.strip()
+        if not text:
+            return
+        all_resources = []
+        doc = self._nlp(text)
+        next_start_incision = 2
+        for sent in doc.sents:
+            segment_text = sent.text.strip()
+            if not segment_text:
+                continue
+            resources = list(self._build_segment_internal_structure(sent))
+            if not resources:
+                continue
+            resources[0].start_incision = next_start_incision
+            resources[-1].end_incision = 2
+            next_start_incision = 2
+            all_resources.extend(resources)
+        if not all_resources:
+            text_resource = Resource(
+                count=len(text),
+                start_incision=SENTENCE_LEVEL,
+                end_incision=SENTENCE_LEVEL,
+                payload=text
+            )
+            all_resources.append(text_resource)
+        yield from self._split_by_resource_segmentation(all_resources)
+    def _build_segment_internal_structure(self, sent: Span) -> Generator[Resource, None, None]:
+        current_fragment: list[str] = []
+        for token in sent:
+            if token.is_punct:
+                if current_fragment:
+                    fragment_text = "".join(current_fragment)
+                    fragment_resource = Resource(
+                        count=len(fragment_text),
+                        start_incision=SEGMENT_LEVEL,
+                        end_incision=SEGMENT_LEVEL,
+                        payload=fragment_text
+                    )
+                    yield fragment_resource
+                    current_fragment = []
+                punct_resource = Resource(
+                    count=len(token.text),
+                    start_incision=SEGMENT_LEVEL,
+                    end_incision=SEGMENT_LEVEL,
+                    payload=token.text
+                )
+                yield punct_resource
+            else:
+                current_fragment.append(token.text_with_ws)
+        if current_fragment:
+            fragment_text = "".join(current_fragment)
+            fragment_resource = Resource(
+                count=len(fragment_text),
+                start_incision=SEGMENT_LEVEL,
+                end_incision=SEGMENT_LEVEL,
+                payload=fragment_text
+            )
+            yield fragment_resource
+    def _split_by_resource_segmentation(self, resources: List[Resource]) -> Generator[str, None, None]:
+        max_byte_length = self.max_segment_length * 3
+        groups = list(split(
+            iter(resources),
+            max_segment_count=max_byte_length,
+            border_incision=1,
+            gap_rate=0.0,
+            tail_rate=0.0
+        ))
+        for group in groups:
+            segment_chars = []
+            for item in group.body:
+                if isinstance(item, Segment):
+                    for resource in item.resources:
+                        segment_chars.append(resource.payload)
+                elif isinstance(item, Resource):
+                    segment_chars.append(item.payload)
+            combined_text = "".join(segment_chars).strip()
+            if combined_text:
+                yield combined_text

epub2speech-0.0.1/epub2speech/cli.py ADDED Viewed

@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+import os
+import sys
+import argparse
+from pathlib import Path
+from .convertor import convert_epub_to_m4b, ConversionProgress
+def progress_callback(progress: ConversionProgress) -> None:
+    print(f"Progress: {progress.progress:.1f}% - Chapter {progress.current_chapter}/{progress.total_chapters}: {progress.chapter_title}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert EPUB files to audiobooks (M4B format)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s input.epub output.m4b --voice zh-CN-XiaoxiaoNeural
+  %(prog)s input.epub output.m4b --voice zh-CN-XiaoxiaoNeural --max-chapters 5
+  %(prog)s input.epub output.m4b --voice zh-CN-XiaoxiaoNeural --workspace /tmp/workspace
+        """
+    )
+    parser.add_argument(
+        "epub_path",
+        type=str,
+        help="Input EPUB file path"
+    )
+    parser.add_argument(
+        "output_path",
+        type=str,
+        help="Output M4B file path"
+    )
+    parser.add_argument(
+        "--voice",
+        type=str,
+        default="zh-CN-XiaoxiaoNeural",
+        help="TTS voice name (default: zh-CN-XiaoxiaoNeural)"
+    )
+    parser.add_argument(
+        "--max-chapters",
+        type=int,
+        help="Maximum number of chapters to convert (optional)"
+    )
+    parser.add_argument(
+        "--workspace",
+        type=str,
+        help="Workspace directory path (default: system temp directory)"
+    )
+    parser.add_argument(
+        "--azure-key",
+        type=str,
+        default=os.environ.get("AZURE_SPEECH_KEY"),
+        help="Azure Speech Service Key (can also be set via AZURE_SPEECH_KEY environment variable)"
+    )
+    parser.add_argument(
+        "--azure-region",
+        type=str,
+        default=os.environ.get("AZURE_SPEECH_REGION"),
+        help="Azure Speech Service region (can also be set via AZURE_SPEECH_REGION environment variable)"
+    )
+    parser.add_argument(
+        "--quiet",
+        action="store_true",
+        help="Quiet mode, do not show progress information"
+    )
+    args = parser.parse_args()
+    epub_path = Path(args.epub_path)
+    if not epub_path.exists():
+        print(f"Error: EPUB file does not exist: {epub_path}", file=sys.stderr)
+        sys.exit(1)
+    if not epub_path.suffix.lower() == '.epub':
+        print(f"Error: Input file must be in EPUB format: {epub_path}", file=sys.stderr)
+        sys.exit(1)
+    if not args.azure_key or not args.azure_region:
+        print("Error: Azure Speech Service credentials must be provided", file=sys.stderr)
+        print("Please provide via --azure-key and --azure-region parameters, or set AZURE_SPEECH_KEY and AZURE_SPEECH_REGION environment variables", file=sys.stderr)
+        sys.exit(1)
+    if args.workspace:
+        workspace = Path(args.workspace)
+        workspace.mkdir(parents=True, exist_ok=True)
+    else:
+        import tempfile
+        workspace = Path(tempfile.mkdtemp(prefix="epub2speech_"))
+    output_path = Path(args.output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        from .tts.azure_provider import AzureTextToSpeech
+        tts_provider = AzureTextToSpeech(
+            subscription_key=args.azure_key,
+            region=args.azure_region,
+            default_voice=args.voice
+        )
+        print(f"Starting conversion: {epub_path.name}")
+        print(f"Output file: {output_path}")
+        print(f"Workspace: {workspace}")
+        print(f"Using voice: {args.voice}")
+        if args.max_chapters:
+            print(f"Maximum chapters: {args.max_chapters}")
+        print()
+        result_path = convert_epub_to_m4b(
+            epub_path=epub_path,
+            workspace=workspace,
+            output_path=output_path,
+            tts_protocol=tts_provider,
+            voice=args.voice,
+            max_chapters=args.max_chapters,
+            progress_callback=None if args.quiet else progress_callback
+        )
+        if result_path:
+            print(f"\nConversion complete! Output file: {result_path}")
+            print(f"File size: {result_path.stat().st_size / (1024*1024):.1f} MB")
+        else:
+            print("\nConversion failed: no output file generated", file=sys.stderr)
+            sys.exit(1)
+    except KeyboardInterrupt:
+        print("\nConversion interrupted by user", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"\nConversion failed: {e}", file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()