PyPI - indoxrouter - Versions diffs - 0.1.27__tar.gz → 0.1.29__tar.gz - Mend

indoxrouter 0.1.27tar.gz → 0.1.29tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{indoxrouter-0.1.27/indoxrouter.egg-info → indoxrouter-0.1.29}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: indoxrouter
-Version: 0.1.27
+Version: 0.1.29
 Summary: A unified client for various AI providers
 Author-email: indoxRouter Team <ashkan.eskandari.dev@gmail.com>
 License: MIT

indoxrouter-0.1.29/examples/speech_to_text_example.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""
+Example script demonstrating speech-to-text functionality in IndoxRouter.
+This script shows how to use the new speech-to-text capabilities added to the IndoxRouter client.
+"""
+from indoxrouter import Client
+def main():
+    # Initialize the client with your API key
+    client = Client(api_key="your_api_key_here")
+    try:
+        print("=== IndoxRouter Speech-to-Text Examples ===\n")
+        # Example 1: Basic transcription with file path
+        print("1. Transcribing audio file:")
+        try:
+            response = client.speech_to_text(
+                "path/to/your/audio.mp3", model="openai/whisper-1"
+            )
+            if response["success"]:
+                print(f"   Transcription: {response['text']}")
+            else:
+                print(f"   Error: {response['message']}")
+        except Exception as e:
+            print(f"   Example 1 Error: {e}")
+        print()
+        # Example 2: Transcription with specific language and format
+        print("2. Transcription with language specification:")
+        try:
+            response = client.speech_to_text(
+                "path/to/your/audio.wav",
+                model="openai/whisper-1",
+                language="en",
+                response_format="verbose_json",
+                temperature=0.2,
+            )
+            if response["success"]:
+                print(f"   Transcription: {response['text']}")
+                if "language" in response:
+                    print(f"   Detected Language: {response['language']}")
+            else:
+                print(f"   Error: {response['message']}")
+        except Exception as e:
+            print(f"   Example 2 Error: {e}")
+        print()
+        # Example 3: Transcription with timestamps
+        print("3. Transcription with detailed timestamps:")
+        try:
+            response = client.speech_to_text(
+                "path/to/your/audio.mp3",
+                model="openai/whisper-1",
+                response_format="verbose_json",
+                timestamp_granularities=["word", "segment"],
+            )
+            if response["success"]:
+                print(f"   Transcription: {response['text']}")
+                if "segments" in response:
+                    print(f"   Number of segments: {len(response['segments'])}")
+            else:
+                print(f"   Error: {response['message']}")
+        except Exception as e:
+            print(f"   Example 3 Error: {e}")
+        print()
+        # Example 4: Audio translation to English
+        print("4. Translating foreign audio to English:")
+        try:
+            response = client.translate_audio(
+                "path/to/your/foreign_audio.mp3",
+                model="openai/whisper-1",
+                response_format="text",
+            )
+            if response["success"]:
+                print(f"   Translation: {response['text']}")
+            else:
+                print(f"   Error: {response['message']}")
+        except Exception as e:
+            print(f"   Example 4 Error: {e}")
+        print()
+        # Example 5: Using audio data bytes instead of file path
+        print("5. Transcription using audio bytes:")
+        try:
+            # Read audio file as bytes
+            with open("path/to/your/audio.mp3", "rb") as f:
+                audio_data = f.read()
+            response = client.speech_to_text(
+                audio_data,
+                model="openai/whisper-1",
+                filename="my_audio.mp3",  # Optional filename hint
+            )
+            if response["success"]:
+                print(f"   Transcription: {response['text']}")
+            else:
+                print(f"   Error: {response['message']}")
+        except FileNotFoundError:
+            print(
+                "   Example 5 Note: Audio file not found - this is expected for the example"
+            )
+        except Exception as e:
+            print(f"   Example 5 Error: {e}")
+        print()
+        # Example 6: Using BYOK (Bring Your Own Key)
+        print("6. Using BYOK (Bring Your Own Key):")
+        try:
+            response = client.speech_to_text(
+                "path/to/your/audio.mp3",
+                model="openai/whisper-1",
+                byok_api_key="sk-your-openai-key-here",
+            )
+            if response["success"]:
+                print(f"   Transcription: {response['text']}")
+                print(
+                    "   Note: This used your own OpenAI API key (no IndoxRouter credits used)"
+                )
+            else:
+                print(f"   Error: {response['message']}")
+        except Exception as e:
+            print(f"   Example 6 Error: {e}")
+        print("\n=== Examples completed ===")
+    finally:
+        # Clean up the client
+        client.close()
+if __name__ == "__main__":
+    main()

{indoxrouter-0.1.27 → indoxrouter-0.1.29}/indoxrouter/client.py RENAMED Viewed

@@ -38,6 +38,9 @@ Usage example:
     # Generate text-to-speech audio
     audio = client.text_to_speech("Hello, welcome to IndoxRouter!", model="openai/tts-1", voice="alloy")
+    # Transcribe audio to text using speech-to-text
+    transcription = client.speech_to_text("path/to/audio.mp3", model="openai/whisper-1")
     # Using BYOK (Bring Your Own Key)
     response = client.chat([
         {"role": "user", "content": "Hello!"}
@@ -97,11 +100,14 @@ from .constants import (
     DEFAULT_EMBEDDING_MODEL,
     DEFAULT_IMAGE_MODEL,
     DEFAULT_TTS_MODEL,
+    DEFAULT_STT_MODEL,
     CHAT_ENDPOINT,
     COMPLETION_ENDPOINT,
     EMBEDDING_ENDPOINT,
     IMAGE_ENDPOINT,
     TTS_ENDPOINT,
+    STT_ENDPOINT,
+    STT_TRANSLATION_ENDPOINT,
     MODEL_ENDPOINT,
     USAGE_ENDPOINT,
     USE_COOKIES,
@@ -257,6 +263,7 @@ class Client:
         endpoint: str,
         data: Optional[Dict[str, Any]] = None,
         stream: bool = False,
+        files: Optional[Dict[str, Any]] = None,
     ) -> Any:
         """
         Make a request to the API.
@@ -266,6 +273,7 @@ class Client:
             endpoint: API endpoint
             data: Request data
             stream: Whether to stream the response
+            files: Files to upload (for multipart/form-data requests)
         Returns:
             Response data
@@ -279,7 +287,14 @@ class Client:
             endpoint = endpoint[1:]
         url = f"{self.base_url}/{endpoint}"
-        headers = {"Content-Type": "application/json"}
+        # Set headers based on whether we're uploading files
+        if files:
+            # For multipart/form-data, don't set Content-Type header
+            # requests will set it automatically with boundary
+            headers = {}
+        else:
+            headers = {"Content-Type": "application/json"}
         # Add Authorization header if we have an access token
         if hasattr(self, "access_token") and self.access_token:
@@ -289,8 +304,8 @@ class Client:
         # if data:
         #     logger.debug(f"Request data: {json.dumps(data, indent=2)}")
-        # Diagnose potential issues with the request
-        if method == "POST" and data:
+        # Diagnose potential issues with the request (only for non-file uploads)
+        if method == "POST" and data and not files:
             diagnosis = self.diagnose_request(endpoint, data)
             if not diagnosis["is_valid"]:
                 issues_str = "\n".join([f"- {issue}" for issue in diagnosis["issues"]])
@@ -298,14 +313,25 @@ class Client:
                 # We'll still send the request, but log the issues
         try:
-            response = self.session.request(
-                method,
-                url,
-                headers=headers,
-                json=data,
-                timeout=self.timeout,
-                stream=stream,
-            )
+            # Prepare request parameters
+            request_params = {
+                "method": method,
+                "url": url,
+                "headers": headers,
+                "timeout": self.timeout,
+                "stream": stream,
+            }
+            # Add data based on request type
+            if files:
+                # For file uploads, use form data
+                request_params["data"] = data
+                request_params["files"] = files
+            else:
+                # For regular requests, use JSON
+                request_params["json"] = data
+            response = self.session.request(**request_params)
             if stream:
                 return response
@@ -318,16 +344,10 @@ class Client:
                 # Update Authorization header with new token if available
                 if hasattr(self, "access_token") and self.access_token:
                     headers["Authorization"] = f"Bearer {self.access_token}"
+                    request_params["headers"] = headers
                 # Retry the request after reauthentication
-                response = self.session.request(
-                    method,
-                    url,
-                    headers=headers,
-                    json=data,
-                    timeout=self.timeout,
-                    stream=stream,
-                )
+                response = self.session.request(**request_params)
                 if stream:
                     return response
@@ -908,6 +928,231 @@ class Client:
         return self._request("POST", TTS_ENDPOINT, data)
+    def speech_to_text(
+        self,
+        file: Union[str, bytes],
+        model: str = DEFAULT_STT_MODEL,
+        language: Optional[str] = None,
+        prompt: Optional[str] = None,
+        response_format: Optional[str] = "json",
+        temperature: Optional[float] = 0.0,
+        timestamp_granularities: Optional[List[str]] = None,
+        byok_api_key: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Transcribe audio to text using speech-to-text models.
+        Args:
+            file: Audio file path (str) or audio file data (bytes)
+            model: Model to use in the format "provider/model" (e.g., "openai/whisper-1")
+            language: Language code for the audio (e.g., "en", "es", "fr")
+            prompt: Optional text to guide the model's style
+            response_format: Format of the response ("json", "text", "srt", "verbose_json", "vtt")
+            temperature: Temperature for transcription (0.0 to 1.0)
+            timestamp_granularities: List of timestamp granularities (["word", "segment"])
+            byok_api_key: Your own API key for the provider (BYOK - Bring Your Own Key)
+            **kwargs: Additional parameters to pass to the API
+        Returns:
+            Response data with transcription text
+        Examples:
+            Basic usage with file path:
+                response = client.speech_to_text("path/to/audio.mp3")
+            Basic usage with file bytes:
+                with open("audio.mp3", "rb") as f:
+                    audio_data = f.read()
+                response = client.speech_to_text(audio_data)
+            With specific model and language:
+                response = client.speech_to_text(
+                    "path/to/audio.wav",
+                    model="openai/whisper-1",
+                    language="en",
+                    response_format="json"
+                )
+            With timestamps for detailed analysis:
+                response = client.speech_to_text(
+                    "path/to/audio.mp3",
+                    model="openai/whisper-1",
+                    response_format="verbose_json",
+                    timestamp_granularities=["word", "segment"]
+                )
+            Using BYOK (Bring Your Own Key):
+                response = client.speech_to_text(
+                    "path/to/audio.mp3",
+                    model="openai/whisper-1",
+                    byok_api_key="sk-your-openai-key-here"
+                )
+        """
+        # Format the model string
+        formatted_model = self._format_model_string(model)
+        # Handle file input - can be a file path (str) or file data (bytes)
+        if isinstance(file, str):
+            # It's a file path, read the file
+            try:
+                with open(file, "rb") as f:
+                    file_data = f.read()
+                filename = os.path.basename(file)
+            except FileNotFoundError:
+                raise InvalidParametersError(f"File not found: {file}")
+            except Exception as e:
+                raise InvalidParametersError(f"Error reading file {file}: {str(e)}")
+        elif isinstance(file, bytes):
+            # It's file data
+            file_data = file
+            filename = kwargs.get("filename", "audio_file")
+        else:
+            raise InvalidParametersError(
+                "File must be either a file path (str) or file data (bytes)"
+            )
+        # Prepare form data for multipart upload
+        files = {"file": (filename, file_data, "audio/*")}
+        # Create the form data with required parameters
+        data = {
+            "model": formatted_model,
+        }
+        # Add optional parameters only if they are provided
+        if language is not None:
+            data["language"] = language
+        if prompt is not None:
+            data["prompt"] = prompt
+        if response_format is not None:
+            data["response_format"] = response_format
+        if temperature is not None:
+            data["temperature"] = temperature
+        if timestamp_granularities is not None:
+            # Convert to JSON string as expected by the API
+            data["timestamp_granularities"] = json.dumps(timestamp_granularities)
+        # Add BYOK API key if provided
+        if byok_api_key:
+            data["byok_api_key"] = byok_api_key
+        # Filter out problematic parameters from kwargs
+        filtered_kwargs = {}
+        for key, value in kwargs.items():
+            if key not in [
+                "filename",
+                "return_generator",
+            ]:  # List of parameters to exclude
+                filtered_kwargs[key] = value
+        # Add any additional parameters from kwargs
+        if filtered_kwargs:
+            data.update(filtered_kwargs)
+        return self._request("POST", STT_ENDPOINT, data, files=files)
+    def translate_audio(
+        self,
+        file: Union[str, bytes],
+        model: str = DEFAULT_STT_MODEL,
+        prompt: Optional[str] = None,
+        response_format: Optional[str] = "json",
+        temperature: Optional[float] = 0.0,
+        byok_api_key: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """
+        Translate audio to English text using speech-to-text models.
+        Args:
+            file: Audio file path (str) or audio file data (bytes)
+            model: Model to use in the format "provider/model" (e.g., "openai/whisper-1")
+            prompt: Optional text to guide the model's style
+            response_format: Format of the response ("json", "text", "srt", "verbose_json", "vtt")
+            temperature: Temperature for translation (0.0 to 1.0)
+            byok_api_key: Your own API key for the provider (BYOK - Bring Your Own Key)
+            **kwargs: Additional parameters to pass to the API
+        Returns:
+            Response data with translated text in English
+        Examples:
+            Basic usage with file path:
+                response = client.translate_audio("path/to/spanish_audio.mp3")
+            With specific response format:
+                response = client.translate_audio(
+                    "path/to/french_audio.wav",
+                    model="openai/whisper-1",
+                    response_format="text"
+                )
+            Using BYOK (Bring Your Own Key):
+                response = client.translate_audio(
+                    "path/to/audio.mp3",
+                    model="openai/whisper-1",
+                    byok_api_key="sk-your-openai-key-here"
+                )
+        """
+        # Format the model string
+        formatted_model = self._format_model_string(model)
+        # Handle file input - can be a file path (str) or file data (bytes)
+        if isinstance(file, str):
+            # It's a file path, read the file
+            try:
+                with open(file, "rb") as f:
+                    file_data = f.read()
+                filename = os.path.basename(file)
+            except FileNotFoundError:
+                raise InvalidParametersError(f"File not found: {file}")
+            except Exception as e:
+                raise InvalidParametersError(f"Error reading file {file}: {str(e)}")
+        elif isinstance(file, bytes):
+            # It's file data
+            file_data = file
+            filename = kwargs.get("filename", "audio_file")
+        else:
+            raise InvalidParametersError(
+                "File must be either a file path (str) or file data (bytes)"
+            )
+        # Prepare form data for multipart upload
+        files = {"file": (filename, file_data, "audio/*")}
+        # Create the form data with required parameters
+        data = {
+            "model": formatted_model,
+        }
+        # Add optional parameters only if they are provided
+        if prompt is not None:
+            data["prompt"] = prompt
+        if response_format is not None:
+            data["response_format"] = response_format
+        if temperature is not None:
+            data["temperature"] = temperature
+        # Add BYOK API key if provided
+        if byok_api_key:
+            data["byok_api_key"] = byok_api_key
+        # Filter out problematic parameters from kwargs
+        filtered_kwargs = {}
+        for key, value in kwargs.items():
+            if key not in [
+                "filename",
+                "return_generator",
+            ]:  # List of parameters to exclude
+                filtered_kwargs[key] = value
+        # Add any additional parameters from kwargs
+        if filtered_kwargs:
+            data.update(filtered_kwargs)
+        return self._request("POST", STT_TRANSLATION_ENDPOINT, data, files=files)
     def _get_supported_parameters_for_model(
         self, provider: str, model_name: str
     ) -> List[str]:

{indoxrouter-0.1.27 → indoxrouter-0.1.29}/indoxrouter/constants.py RENAMED Viewed

@@ -5,7 +5,7 @@ Constants for the IndoxRouter client.
 # API settings
 DEFAULT_API_VERSION = "v1"
 DEFAULT_BASE_URL = "https://api.indoxrouter.com"  # Production server URL with HTTPS
-# DEFAULT_BASE_URL = "http://localhost:8000"  # Local development server
+# DEFAULT_BASE_URL = "http://localhost:9050"  # Local development server
 DEFAULT_TIMEOUT = 60
 USE_COOKIES = True  # Always use cookie-based authentication
@@ -14,6 +14,7 @@ DEFAULT_MODEL = "openai/gpt-4o-mini"
 DEFAULT_EMBEDDING_MODEL = "openai/text-embedding-3-small"
 DEFAULT_IMAGE_MODEL = "openai/dall-e-3"
 DEFAULT_TTS_MODEL = "openai/tts-1"
+DEFAULT_STT_MODEL = "openai/whisper-1"
 GOOGLE_IMAGE_MODEL = "google/imagen-3.0-generate-002"
 XAI_IMAGE_MODEL = "xai/grok-2-image"
 XAI_IMAGE_LATEST_MODEL = "xai/grok-2-image-latest"
@@ -24,7 +25,9 @@ CHAT_ENDPOINT = "chat/completions"
 COMPLETION_ENDPOINT = "completions"
 EMBEDDING_ENDPOINT = "embeddings"
 IMAGE_ENDPOINT = "images/generations"
-TTS_ENDPOINT = "tts/generations"
+TTS_ENDPOINT = "audio/tts/generations"
+STT_ENDPOINT = "audio/stt/transcriptions"
+STT_TRANSLATION_ENDPOINT = "audio/stt/translations"
 MODEL_ENDPOINT = "models"
 USAGE_ENDPOINT = "user/usage"

{indoxrouter-0.1.27 → indoxrouter-0.1.29/indoxrouter.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: indoxrouter
-Version: 0.1.27
+Version: 0.1.29
 Summary: A unified client for various AI providers
 Author-email: indoxRouter Team <ashkan.eskandari.dev@gmail.com>
 License: MIT

{indoxrouter-0.1.27 → indoxrouter-0.1.29}/indoxrouter.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,6 +2,7 @@ LICENSE
 MANIFEST.in
 README.md
 pyproject.toml
+examples/speech_to_text_example.py
 indoxrouter/__init__.py
 indoxrouter/client.py
 indoxrouter/constants.py

{indoxrouter-0.1.27 → indoxrouter-0.1.29}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "indoxrouter"
-version = "0.1.27"
+version = "0.1.29"
 authors = [
     {name = "indoxRouter Team", email = "ashkan.eskandari.dev@gmail.com"},
 ]