PyPI - audiopod - Versions diffs - 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

audiopod 1.1.1py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

audiopod/__init__.py CHANGED Viewed

@@ -47,7 +47,7 @@ from .models import (
     TranslationResult
 )
-__version__ = "1.1.1"
+__version__ = "1.2.0"
 __author__ = "AudioPod AI"
 __email__ = "support@audiopod.ai"
 __license__ = "MIT"

audiopod/services/voice.py CHANGED Viewed

@@ -2,6 +2,7 @@
 Voice Service - Voice cloning and TTS operations
 """
+import time
 from typing import List, Optional, Dict, Any, Union
 from pathlib import Path
@@ -13,54 +14,103 @@ from ..exceptions import ValidationError
 class VoiceService(BaseService):
     """Service for voice cloning and text-to-speech operations"""
-    def clone_voice(
+    def generate_voice(
         self,
-        voice_file: str,
         text: str,
+        voice_file: Optional[str] = None,
+        voice_id: Optional[Union[int, str]] = None,
         language: Optional[str] = None,
         speed: float = 1.0,
+        audio_format: str = "mp3",
+        generation_params: Optional[Dict[str, Any]] = None,
         wait_for_completion: bool = False,
         timeout: int = 300
     ) -> Union[Job, Dict[str, Any]]:
         """
-        Clone a voice from an audio file
+        Generate speech using either a voice file (for cloning) or existing voice profile
+        This unified method handles both voice cloning and text-to-speech generation:
+        - For voice cloning: Provide voice_file parameter
+        - For TTS with existing voice: Provide voice_id parameter
         Args:
-            voice_file: Path to audio file containing voice to clone
-            text: Text to generate with the cloned voice
+            text: Text to generate speech for
+            voice_file: Path to audio file for voice cloning (mutually exclusive with voice_id)
+            voice_id: ID/UUID of existing voice profile (mutually exclusive with voice_file)
             language: Target language code (e.g., 'en', 'es')
-            speed: Speech speed (0.5 to 2.0)
+            speed: Speech speed (0.25 to 4.0, provider dependent)
+            audio_format: Output audio format ('mp3', 'wav', 'ogg')
+            generation_params: Provider-specific parameters (speed, temperature, etc.)
             wait_for_completion: Whether to wait for job completion
             timeout: Maximum time to wait if wait_for_completion=True
         Returns:
             Job object if wait_for_completion=False, otherwise job result
+        Raises:
+            ValidationError: If both or neither voice_file and voice_id are provided
         """
         # Validate inputs
+        if not voice_file and not voice_id:
+            raise ValidationError("Either voice_file (for cloning) or voice_id (for TTS) must be provided")
+        if voice_file and voice_id:
+            raise ValidationError("Provide either voice_file or voice_id, not both")
         text = self._validate_text_input(text)
         if language:
             language = self._validate_language_code(language)
-        if not 0.5 <= speed <= 2.0:
-            raise ValidationError("Speed must be between 0.5 and 2.0")
+        if not 0.25 <= speed <= 4.0:
+            raise ValidationError("Speed must be between 0.25 and 4.0")
+        if audio_format not in ["mp3", "wav", "ogg"]:
+            raise ValidationError("Audio format must be 'mp3', 'wav', or 'ogg'")
-        # Prepare file upload
+        # For voice cloning, we need to create a temporary voice first, then generate
+        if voice_file:
+            return self._generate_with_voice_file(
+                voice_file, text, language, speed, audio_format,
+                generation_params, wait_for_completion, timeout
+            )
+        else:
+            # Use existing voice profile with unified endpoint
+            return self._generate_with_voice_id(
+                voice_id, text, language, speed, audio_format,
+                generation_params, wait_for_completion, timeout
+            )
+    def _generate_with_voice_file(
+        self,
+        voice_file: str,
+        text: str,
+        language: Optional[str],
+        speed: float,
+        audio_format: str,
+        generation_params: Optional[Dict[str, Any]],
+        wait_for_completion: bool,
+        timeout: int
+    ) -> Union[Job, Dict[str, Any]]:
+        """Generate speech with voice cloning using unified endpoint"""
+        # For voice cloning, we use the "clone" identifier with the unified endpoint
         files = self._prepare_file_upload(voice_file, "file")
-        # Prepare form data
+        # Prepare form data for unified endpoint
         data = {
             "input_text": text,
-            "speed": speed
+            "speed": speed,
+            "audio_format": audio_format
         }
         if language:
-            data["target_language"] = language
+            data["language"] = language
+        if generation_params:
+            # Add generation parameters
+            data.update(generation_params)
-        # Make request
         if self.async_mode:
-            return self._async_clone_voice(files, data, wait_for_completion, timeout)
+            return self._async_generate_voice("clone", data, files, wait_for_completion, timeout)
         else:
+            # Use unified voice generation endpoint with "clone" identifier
             response = self.client.request(
                 "POST",
-                "/api/v1/voice/voice-clone",
+                "/api/v1/voice/voices/clone/generate",
                 data=data,
                 files=files
             )
@@ -73,17 +123,60 @@ class VoiceService(BaseService):
             return job
-    async def _async_clone_voice(
+    def _generate_with_voice_id(
         self,
-        files: Dict[str, Any],
+        voice_id: Union[int, str],
+        text: str,
+        language: Optional[str],
+        speed: float,
+        audio_format: str,
+        generation_params: Optional[Dict[str, Any]],
+        wait_for_completion: bool,
+        timeout: int
+    ) -> Union[Job, Dict[str, Any]]:
+        """Generate speech with existing voice profile using unified endpoint"""
+        # Prepare form data for unified endpoint
+        data = {
+            "input_text": text,
+            "speed": speed,
+            "audio_format": audio_format
+        }
+        if language:
+            data["language"] = language
+        if generation_params:
+            # Add generation parameters
+            data.update(generation_params)
+        if self.async_mode:
+            return self._async_generate_voice(voice_id, data, None, wait_for_completion, timeout)
+        else:
+            # Use unified voice generation endpoint
+            response = self.client.request(
+                "POST",
+                f"/api/v1/voice/voices/{voice_id}/generate",
+                data=data
+            )
+            job = Job.from_dict(response)
+            if wait_for_completion:
+                job = self._wait_for_completion(job.id, timeout)
+                return job.result if job.result else job
+            return job
+    async def _async_generate_voice(
+        self,
+        voice_identifier: Union[int, str],
         data: Dict[str, Any],
+        files: Optional[Dict[str, Any]],
         wait_for_completion: bool,
         timeout: int
     ) -> Union[Job, Dict[str, Any]]:
-        """Async version of clone_voice"""
+        """Async version of unified voice generation"""
         response = await self.client.request(
-            "POST",
-            "/api/v1/voice/voice-clone",
+            "POST",
+            f"/api/v1/voice/voices/{voice_identifier}/generate",
             data=data,
             files=files
         )
@@ -93,7 +186,7 @@ class VoiceService(BaseService):
         if wait_for_completion:
             job = await self._async_wait_for_completion(job.id, timeout)
             return job.result if job.result else job
         return job
     def create_voice_profile(
@@ -202,78 +295,79 @@ class VoiceService(BaseService):
         language: Optional[str] = None,
         speed: float = 1.0,
         audio_format: str = "mp3",
+        generation_params: Optional[Dict[str, Any]] = None,
         wait_for_completion: bool = False,
         timeout: int = 300
     ) -> Union[Job, Dict[str, Any]]:
         """
-        Generate speech using an existing voice profile
+        Generate speech using an existing voice profile (unified endpoint)
+        This method now uses the unified voice generation endpoint for consistency.
         Args:
             voice_id: ID or UUID of the voice profile
             text: Text to generate speech for
             language: Target language code
-            speed: Speech speed (0.5 to 2.0)
-            audio_format: Output audio format (mp3, wav)
+            speed: Speech speed (0.25 to 4.0, provider dependent)
+            audio_format: Output audio format ('mp3', 'wav', 'ogg')
+            generation_params: Additional generation parameters
             wait_for_completion: Whether to wait for completion
             timeout: Maximum time to wait
         Returns:
             Job object or generation result
         """
-        # Validate inputs
-        text = self._validate_text_input(text)
-        if language:
-            language = self._validate_language_code(language)
-        if not 0.5 <= speed <= 2.0:
-            raise ValidationError("Speed must be between 0.5 and 2.0")
-        if audio_format not in ["mp3", "wav"]:
-            raise ValidationError("Audio format must be 'mp3' or 'wav'")
-        # Prepare form data
-        data = {
-            "input_text": text,
-            "speed": speed,
-            "audio_format": audio_format
-        }
-        if language:
-            data["language"] = language
-        # Make request
-        endpoint = f"/api/v1/voice/voices/{voice_id}/generate"
-        if self.async_mode:
-            return self._async_generate_speech(endpoint, data, wait_for_completion, timeout)
-        else:
-            response = self.client.request("POST", endpoint, data=data)
-            if "job_id" in response:
-                job = Job.from_dict(response)
-                if wait_for_completion:
-                    job = self._wait_for_completion(job.id, timeout)
-                    return job.result if job.result else job
-                return job
-            else:
-                # Direct response with audio URL
-                return response
-    async def _async_generate_speech(
+        # Use unified voice generation method
+        return self.generate_voice(
+            text=text,
+            voice_id=voice_id,
+            language=language,
+            speed=speed,
+            audio_format=audio_format,
+            generation_params=generation_params,
+            wait_for_completion=wait_for_completion,
+            timeout=timeout
+        )
+    def clone_voice(
         self,
-        endpoint: str,
-        data: Dict[str, Any],
-        wait_for_completion: bool,
-        timeout: int
+        voice_file: str,
+        text: str,
+        language: Optional[str] = None,
+        speed: float = 1.0,
+        generation_params: Optional[Dict[str, Any]] = None,
+        wait_for_completion: bool = False,
+        timeout: int = 300
     ) -> Union[Job, Dict[str, Any]]:
-        """Async version of generate_speech"""
-        response = await self.client.request("POST", endpoint, data=data)
+        """
+        Clone a voice from an audio file (backward compatibility)
-        if "job_id" in response:
-            job = Job.from_dict(response)
-            if wait_for_completion:
-                job = await self._async_wait_for_completion(job.id, timeout)
-                return job.result if job.result else job
-            return job
-        else:
-            return response
+        This method is now a wrapper around the unified generate_voice method.
+        For new code, consider using generate_voice() directly with voice_file parameter.
+        Args:
+            voice_file: Path to audio file containing voice to clone
+            text: Text to generate with the cloned voice
+            language: Target language code (e.g., 'en', 'es')
+            speed: Speech speed (0.25 to 4.0, provider dependent)
+            generation_params: Additional generation parameters
+            wait_for_completion: Whether to wait for job completion
+            timeout: Maximum time to wait if wait_for_completion=True
+        Returns:
+            Job object if wait_for_completion=False, otherwise job result
+        """
+        # Use unified voice generation method
+        return self.generate_voice(
+            text=text,
+            voice_file=voice_file,
+            language=language,
+            speed=speed,
+            audio_format="mp3",  # Default format for backward compatibility
+            generation_params=generation_params,
+            wait_for_completion=wait_for_completion,
+            timeout=timeout
+        )
     def list_voice_profiles(
         self,

{audiopod-1.1.1.dist-info → audiopod-1.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: audiopod
-Version: 1.1.1
+Version: 1.2.0
 Summary: Professional Audio Processing API Client for Python
 Home-page: https://github.com/audiopod-ai/audiopod-python
 Author: AudioPod AI
@@ -95,7 +95,7 @@ client = audiopod.Client(api_key="ap_your_api_key_here")
 ### Basic Usage
-#### Voice Cloning
+#### Voice Generation (Unified TTS & Cloning)
 ```python
 import audiopod
@@ -103,15 +103,39 @@ import audiopod
 # Initialize client
 client = audiopod.Client()
-# Clone a voice and generate speech
-job = client.voice.clone_voice(
+# Generate voice using file cloning (unified approach)
+job = client.voice.generate_voice(
+    text="Hello! This is voice generation using a cloned voice.",
+    voice_file="path/to/voice_sample.wav",  # For voice cloning
+    language="en",
+    audio_format="mp3",
+    generation_params={
+        "speed": 1.0
+    },
+    wait_for_completion=True
+)
+print(f"Generated audio URL: {job.output_url}")
+# Generate speech with existing voice profile (unified approach)
+speech = client.voice.generate_voice(
+    text="Hello from my voice profile!",
+    voice_id="voice-profile-id",  # For existing voice profiles
+    language="en",
+    audio_format="mp3",
+    generation_params={
+        "speed": 1.0
+    },
+    wait_for_completion=True
+)
+# Backward compatibility methods (deprecated - use generate_voice instead)
+legacy_clone = client.voice.clone_voice(
     voice_file="path/to/voice_sample.wav",
     text="Hello! This is a cloned voice speaking.",
     language="en",
     wait_for_completion=True
 )
-print(f"Generated audio URL: {job['output_url']}")
 ```
 #### Music Generation
@@ -197,10 +221,22 @@ voice_profile = client.voice.create_voice_profile(
     wait_for_completion=True
 )
-# Use the voice profile for speech generation
-speech = client.voice.generate_speech(
+# Use the voice profile for speech generation (unified approach - recommended)
+speech = client.voice.generate_voice(
+    text="This uses my custom voice profile with the unified method!",
     voice_id=voice_profile.id,
-    text="This uses my custom voice profile!",
+    language="en",
+    audio_format="mp3",
+    generation_params={
+        "speed": 1.0
+    },
+    wait_for_completion=True
+)
+# Legacy method (still works - uses generate_voice internally)
+legacy_speech = client.voice.generate_speech(
+    voice_id=voice_profile.id,
+    text="This uses the legacy method.",
     wait_for_completion=True
 )
 ```
@@ -343,7 +379,7 @@ client = audiopod.Client(
 ### Services
-- `client.voice`: Voice cloning and TTS operations
+- `client.voice`: **Voice generation operations** (unified TTS & cloning using `generate_voice()`)
 - `client.music`: Music generation and editing
 - `client.transcription`: Speech-to-text transcription
 - `client.translation`: Audio/video translation
@@ -352,6 +388,20 @@ client = audiopod.Client(
 - `client.karaoke`: Karaoke video generation
 - `client.credits`: Credit management and usage tracking
+#### Voice Service Methods
+**Recommended (Unified Approach):**
+- `client.voice.generate_voice()` - Generate speech with voice file (cloning) or voice ID (TTS)
+**Legacy Methods (Backward Compatibility):**
+- `client.voice.clone_voice()` - Clone voice from audio file (deprecated, uses `generate_voice` internally)
+- `client.voice.generate_speech()` - Generate speech with voice profile (deprecated, uses `generate_voice` internally)
+**Voice Management:**
+- `client.voice.create_voice_profile()` - Create reusable voice profiles
+- `client.voice.list_voice_profiles()` - List available voice profiles
+- `client.voice.delete_voice_profile()` - Delete voice profiles
 ### Models
 - `Job`: Base job information and status

{audiopod-1.1.1.dist-info → audiopod-1.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-audiopod/__init__.py,sha256=UsB5ET6nUy1Upx8wCiB17mMErdF3RvEHfAr51-pPPGQ,1790
+audiopod/__init__.py,sha256=jQtsXMdmCZArJ1pOWG9huymdE_-WlyW8Rk8q0kcQn-c,1790
 audiopod/cli.py,sha256=ZYzAQ3UpoYuOEWivMwMneJUf2z8DGGYTx1Nb6yRfdVY,9339
 audiopod/client.py,sha256=67oPSInSNssJpTR00ZuYSdk9lbx5KiRnDQw8UYKNVsA,11742
 audiopod/config.py,sha256=fuGtbuES4tXdHwqQqoZa5izCH6nVfFRP06D8eK1Cg10,1683
@@ -15,10 +15,10 @@ audiopod/services/speaker.py,sha256=OPSOwArfrGXVzRgciS13n1QsCJSK1PB-Mz6VgwxuHAA,
 audiopod/services/stem_extraction.py,sha256=3ibMFKFR25xKHpVs3WGMNriZ88sB5PriFNa_s2Bvon4,6026
 audiopod/services/transcription.py,sha256=HyH6WpGWZsggYxIvt2dhB6_5UHaigk3XwXsVgarWzcE,7565
 audiopod/services/translation.py,sha256=oUU82c61CeAt13lzlWx8S-9xEgYlskwX8bLMbQw2Ni8,7396
-audiopod/services/voice.py,sha256=_IHv3zU3k184kfijxr1QRBenrIpmhhPOBS96DddZ8yw,13456
-audiopod-1.1.1.dist-info/licenses/LICENSE,sha256=hqEjnOaGNbnLSBxbtbC7WQVREU2vQI8FmwecCiZlMfA,1068
-audiopod-1.1.1.dist-info/METADATA,sha256=UG_csntfzscrZTjy-2v-lCTlS7-VSEM6WEwUw26hSYc,11217
-audiopod-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-audiopod-1.1.1.dist-info/entry_points.txt,sha256=uLcNDzXuOXnJAz9j91TDGayVjjZ7-ZiHBGDydqNUErU,47
-audiopod-1.1.1.dist-info/top_level.txt,sha256=M6yyOFFNpLdH4i1AMRqJZLRIgfpg1NvrQVmnPd8A6N8,9
-audiopod-1.1.1.dist-info/RECORD,,
+audiopod/services/voice.py,sha256=t0-4yjVrzWXJorfQCGbBSNRGE2wZfjoRQ76elJu1BvU,17748
+audiopod-1.2.0.dist-info/licenses/LICENSE,sha256=hqEjnOaGNbnLSBxbtbC7WQVREU2vQI8FmwecCiZlMfA,1068
+audiopod-1.2.0.dist-info/METADATA,sha256=mLpXkYWYxvd3k3LrDtH6Cr68C7Xk7JmpoBL4vwYPiOU,13048
+audiopod-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+audiopod-1.2.0.dist-info/entry_points.txt,sha256=uLcNDzXuOXnJAz9j91TDGayVjjZ7-ZiHBGDydqNUErU,47
+audiopod-1.2.0.dist-info/top_level.txt,sha256=M6yyOFFNpLdH4i1AMRqJZLRIgfpg1NvrQVmnPd8A6N8,9
+audiopod-1.2.0.dist-info/RECORD,,

{audiopod-1.1.1.dist-info → audiopod-1.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{audiopod-1.1.1.dist-info → audiopod-1.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{audiopod-1.1.1.dist-info → audiopod-1.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{audiopod-1.1.1.dist-info → audiopod-1.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

audiopod 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

audiopod 1.1.1py3-none-any.whl → 1.2.0py3-none-any.whl