PyPI - isa-model - Versions diffs - 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl - Mend

isa-model 0.3.4py3-none-any.whl → 0.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

isa_model/__init__.py +30 -1
isa_model/client.py +770 -0
isa_model/core/config/__init__.py +16 -0
isa_model/core/config/config_manager.py +514 -0
isa_model/core/config.py +426 -0
isa_model/core/models/model_billing_tracker.py +476 -0
isa_model/core/models/model_manager.py +399 -0
isa_model/core/models/model_repo.py +343 -0
isa_model/core/pricing_manager.py +426 -0
isa_model/core/services/__init__.py +19 -0
isa_model/core/services/intelligent_model_selector.py +547 -0
isa_model/core/types.py +291 -0
isa_model/deployment/__init__.py +2 -0
isa_model/deployment/cloud/__init__.py +9 -0
isa_model/deployment/cloud/modal/__init__.py +10 -0
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
isa_model/deployment/cloud/modal/register_models.py +321 -0
isa_model/deployment/runtime/deployed_service.py +338 -0
isa_model/deployment/services/__init__.py +9 -0
isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
isa_model/deployment/services/model_service.py +332 -0
isa_model/deployment/services/service_monitor.py +356 -0
isa_model/deployment/services/service_registry.py +527 -0
isa_model/eval/__init__.py +80 -44
isa_model/eval/config/__init__.py +10 -0
isa_model/eval/config/evaluation_config.py +108 -0
isa_model/eval/evaluators/__init__.py +18 -0
isa_model/eval/evaluators/base_evaluator.py +503 -0
isa_model/eval/evaluators/llm_evaluator.py +472 -0
isa_model/eval/factory.py +417 -709
isa_model/eval/infrastructure/__init__.py +24 -0
isa_model/eval/infrastructure/experiment_tracker.py +466 -0
isa_model/eval/metrics.py +191 -21
isa_model/inference/ai_factory.py +187 -387
isa_model/inference/providers/modal_provider.py +109 -0
isa_model/inference/providers/yyds_provider.py +108 -0
isa_model/inference/services/__init__.py +2 -1
isa_model/inference/services/audio/base_stt_service.py +65 -1
isa_model/inference/services/audio/base_tts_service.py +75 -1
isa_model/inference/services/audio/openai_stt_service.py +189 -151
isa_model/inference/services/audio/openai_tts_service.py +12 -10
isa_model/inference/services/audio/replicate_tts_service.py +61 -56
isa_model/inference/services/base_service.py +55 -55
isa_model/inference/services/embedding/base_embed_service.py +65 -1
isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
isa_model/inference/services/embedding/openai_embed_service.py +8 -10
isa_model/inference/services/helpers/stacked_config.py +148 -0
isa_model/inference/services/img/__init__.py +18 -0
isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
isa_model/inference/services/img/flux_professional_service.py +603 -0
isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
isa_model/inference/services/llm/__init__.py +3 -3
isa_model/inference/services/llm/base_llm_service.py +519 -35
isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
isa_model/inference/services/llm/ollama_llm_service.py +150 -15
isa_model/inference/services/llm/openai_llm_service.py +134 -31
isa_model/inference/services/llm/yyds_llm_service.py +255 -0
isa_model/inference/services/vision/__init__.py +38 -4
isa_model/inference/services/vision/base_vision_service.py +241 -96
isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
isa_model/inference/services/vision/doc_analysis_service.py +640 -0
isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
isa_model/inference/services/vision/helpers/image_utils.py +272 -3
isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
isa_model/inference/services/vision/openai_vision_service.py +109 -170
isa_model/inference/services/vision/replicate_vision_service.py +508 -0
isa_model/inference/services/vision/ui_analysis_service.py +823 -0
isa_model/scripts/register_models.py +370 -0
isa_model/scripts/register_models_with_embeddings.py +510 -0
isa_model/serving/__init__.py +19 -0
isa_model/serving/api/__init__.py +10 -0
isa_model/serving/api/fastapi_server.py +89 -0
isa_model/serving/api/middleware/__init__.py +9 -0
isa_model/serving/api/middleware/request_logger.py +88 -0
isa_model/serving/api/routes/__init__.py +5 -0
isa_model/serving/api/routes/health.py +82 -0
isa_model/serving/api/routes/llm.py +19 -0
isa_model/serving/api/routes/ui_analysis.py +223 -0
isa_model/serving/api/routes/unified.py +202 -0
isa_model/serving/api/routes/vision.py +19 -0
isa_model/serving/api/schemas/__init__.py +17 -0
isa_model/serving/api/schemas/common.py +33 -0
isa_model/serving/api/schemas/ui_analysis.py +78 -0
{isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
isa_model-0.3.6.dist-info/RECORD +147 -0
isa_model/core/model_manager.py +0 -208
isa_model/core/model_registry.py +0 -342
isa_model/inference/billing_tracker.py +0 -406
isa_model/inference/services/llm/triton_llm_service.py +0 -481
isa_model/inference/services/vision/ollama_vision_service.py +0 -194
isa_model-0.3.4.dist-info/RECORD +0 -91
/isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
/isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
{isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
{isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0

isa_model/inference/services/audio/openai_stt_service.py CHANGED Viewed

@@ -5,8 +5,6 @@ from openai import AsyncOpenAI
 from tenacity import retry, stop_after_attempt, wait_exponential
 from isa_model.inference.services.audio.base_stt_service import BaseSTTService
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.billing_tracker import ServiceType
 logger = logging.getLogger(__name__)
@@ -14,22 +12,22 @@ class OpenAISTTService(BaseSTTService):
     """
     OpenAI Speech-to-Text service using whisper-1 model.
     Supports transcription and translation to English.
+    Uses the new unified architecture with centralized config management.
     """
-    def __init__(self, provider: 'BaseProvider', model_name: str = "whisper-1"):
-        super().__init__(provider, model_name)
+    def __init__(self, provider_name: str, model_name: str = "whisper-1", **kwargs):
+        super().__init__(provider_name, model_name, **kwargs)
-        # Get full configuration from provider (including sensitive data)
-        provider_config = provider.get_full_config()
+        # Get provider configuration from centralized config manager
+        provider_config = self.get_provider_config()
         # Initialize AsyncOpenAI client with provider configuration
         try:
-            if not provider_config.get("api_key"):
-                raise ValueError("OpenAI API key not found in provider configuration")
+            api_key = self.get_api_key()
             self.client = AsyncOpenAI(
-                api_key=provider_config["api_key"],
-                base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
+                api_key=api_key,
+                base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
                 organization=provider_config.get("organization")
             )
@@ -48,205 +46,245 @@ class OpenAISTTService(BaseSTTService):
         wait=wait_exponential(multiplier=1, min=4, max=10),
         reraise=True
     )
-    async def _download_audio(self, audio_url: str) -> bytes:
-        """Download audio from URL"""
-        async with aiohttp.ClientSession() as session:
-            async with session.get(audio_url) as response:
-                if response.status == 200:
-                    return await response.read()
-                else:
-                    raise ValueError(f"Failed to download audio from {audio_url}: {response.status}")
-    async def transcribe(
-        self,
-        audio_file: Union[str, BinaryIO],
-        language: Optional[str] = None,
-        prompt: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """Transcribe audio file to text using whisper-1"""
-        try:
-            # Prepare the audio file
-            if isinstance(audio_file, str):
-                if audio_file.startswith(('http://', 'https://')):
-                    # Download audio from URL
-                    audio_data = await self._download_audio(audio_file)
-                    filename = audio_file.split('/')[-1] or 'audio.wav'
-                else:
-                    # Local file path
-                    with open(audio_file, 'rb') as f:
-                        audio_data = f.read()
-                        filename = audio_file
-            else:
-                audio_data = audio_file.read()
-                filename = getattr(audio_file, 'name', 'audio.wav')
-            # Check file size
-            if len(audio_data) > self.max_file_size:
-                raise ValueError(f"Audio file size ({len(audio_data)} bytes) exceeds maximum ({self.max_file_size} bytes)")
+    async def transcribe(self, audio_file: Union[str, BinaryIO], language: Optional[str] = None, prompt: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Transcribe audio file to text using OpenAI's Whisper model.
+        Args:
+            audio_file: Path to audio file or file-like object
+            language: Optional language code for better accuracy
+            **kwargs: Additional parameters for the transcription API
-            # Prepare transcription parameters
-            kwargs = {
+        Returns:
+            Dict containing transcription result and metadata
+        """
+        try:
+            # Prepare request parameters
+            transcription_params = {
                 "model": self.model_name,
-                "file": (filename, audio_data),
                 "response_format": "verbose_json"
             }
             if language:
-                kwargs["language"] = language
-            if prompt:
-                kwargs["prompt"] = prompt
+                transcription_params["language"] = language
-            # Transcribe audio
-            response = await self.client.audio.transcriptions.create(**kwargs)
+            # Add optional parameters
+            if prompt:
+                transcription_params["prompt"] = prompt
-            # Track usage for billing
-            usage = getattr(response, 'usage', {})
-            input_tokens = usage.get('input_tokens', 0) if usage else 0
-            output_tokens = usage.get('output_tokens', 0) if usage else 0
+            # Handle file input
+            if isinstance(audio_file, str):
+                with open(audio_file, "rb") as f:
+                    transcription = await self.client.audio.transcriptions.create(
+                        file=f,
+                        **transcription_params
+                    )
+            else:
+                transcription = await self.client.audio.transcriptions.create(
+                    file=audio_file,
+                    **transcription_params
+                )
-            # For audio, also track duration in minutes
-            duration_minutes = getattr(response, 'duration', 0) / 60.0 if getattr(response, 'duration', 0) else 0
+            # Extract usage information for billing
+            result = {
+                "text": transcription.text,
+                "language": getattr(transcription, 'language', language),
+                "duration": getattr(transcription, 'duration', None),
+                "segments": getattr(transcription, 'segments', []),
+                "usage": {
+                    "input_units": getattr(transcription, 'duration', 1),  # Duration in seconds
+                    "output_tokens": len(transcription.text.split()) if transcription.text else 0
+                }
+            }
-            self._track_usage(
-                service_type=ServiceType.AUDIO_STT,
+            # Track usage for billing
+            await self._track_usage(
+                service_type="audio_stt",
                 operation="transcribe",
-                input_tokens=input_tokens,
-                output_tokens=output_tokens,
-                input_units=duration_minutes,  # Duration in minutes
+                input_units=result["usage"]["input_units"],
+                output_tokens=result["usage"]["output_tokens"],
                 metadata={
-                    "language": language,
-                    "model": self.model_name,
-                    "file_size": len(audio_data)
+                    "language": result.get("language"),
+                    "model_name": self.model_name,
+                    "provider": self.provider_name
                 }
             )
-            # Format response
-            result = {
-                "text": response.text,
-                "language": getattr(response, 'language', language or 'unknown'),
-                "duration": getattr(response, 'duration', None),
-                "segments": getattr(response, 'segments', []),
-                "confidence": None,  # whisper-1 doesn't provide confidence scores
-                "usage": usage  # Include usage information
-            }
             return result
         except Exception as e:
-            logger.error(f"Error transcribing audio: {e}")
+            logger.error(f"Transcription failed: {e}")
             raise
     @retry(
         stop=stop_after_attempt(3),
         wait=wait_exponential(multiplier=1, min=4, max=10),
         reraise=True
     )
-    async def translate(
-        self,
-        audio_file: Union[str, BinaryIO]
-    ) -> Dict[str, Any]:
-        """Translate audio file to English text"""
+    async def translate(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
+        """
+        Translate audio file to English text using OpenAI's Whisper model.
+        Args:
+            audio_file: Path to audio file or file-like object
+            **kwargs: Additional parameters for the translation API
+        Returns:
+            Dict containing translation result and metadata
+        """
         try:
-            # Prepare the audio file
-            if isinstance(audio_file, str):
-                with open(audio_file, 'rb') as f:
-                    audio_data = f.read()
-                    filename = audio_file
-            else:
-                audio_data = audio_file.read()
-                filename = getattr(audio_file, 'name', 'audio.wav')
+            # Prepare request parameters
+            translation_params = {
+                "model": self.model_name,
+                "response_format": "verbose_json"
+            }
-            # Check file size
-            if len(audio_data) > self.max_file_size:
-                raise ValueError(f"Audio file size ({len(audio_data)} bytes) exceeds maximum ({self.max_file_size} bytes)")
+            # No additional parameters for translation
-            # Translate audio to English
-            response = await self.client.audio.translations.create(
-                model=self.model_name,
-                file=(filename, audio_data),
-                response_format="verbose_json"
-            )
+            # Handle file input
+            if isinstance(audio_file, str):
+                with open(audio_file, "rb") as f:
+                    translation = await self.client.audio.translations.create(
+                        file=f,
+                        **translation_params
+                    )
+            else:
+                translation = await self.client.audio.translations.create(
+                    file=audio_file,
+                    **translation_params
+                )
-            # Format response
+            # Extract usage information for billing
             result = {
-                "text": response.text,
-                "detected_language": getattr(response, 'language', 'unknown'),
-                "duration": getattr(response, 'duration', None),
-                "segments": getattr(response, 'segments', []),
-                "confidence": None  # Whisper doesn't provide confidence scores
+                "text": translation.text,
+                "language": "en",  # Translation is always to English
+                "duration": getattr(translation, 'duration', None),
+                "segments": getattr(translation, 'segments', []),
+                "usage": {
+                    "input_units": getattr(translation, 'duration', 1),  # Duration in seconds
+                    "output_tokens": len(translation.text.split()) if translation.text else 0
+                }
             }
+            # Track usage for billing
+            await self._track_usage(
+                service_type="audio_stt",
+                operation="translate",
+                input_units=result["usage"]["input_units"],
+                output_tokens=result["usage"]["output_tokens"],
+                metadata={
+                    "target_language": "en",
+                    "model_name": self.model_name,
+                    "provider": self.provider_name
+                }
+            )
             return result
         except Exception as e:
-            logger.error(f"Error translating audio: {e}")
+            logger.error(f"Translation failed: {e}")
             raise
-    async def transcribe_batch(
-        self,
-        audio_files: List[Union[str, BinaryIO]],
-        language: Optional[str] = None,
-        prompt: Optional[str] = None
-    ) -> List[Dict[str, Any]]:
-        """Transcribe multiple audio files"""
-        results = []
+    async def transcribe_batch(self, audio_files: List[Union[str, BinaryIO]], language: Optional[str] = None, prompt: Optional[str] = None) -> List[Dict[str, Any]]:
+        """
+        Transcribe multiple audio files in batch.
+        Args:
+            audio_files: List of audio file paths or file-like objects
+            language: Optional language code for better accuracy
+            **kwargs: Additional parameters for the transcription API
+        Returns:
+            List of transcription results
+        """
+        results = []
         for audio_file in audio_files:
             try:
                 result = await self.transcribe(audio_file, language, prompt)
                 results.append(result)
             except Exception as e:
-                logger.error(f"Error transcribing audio file: {e}")
+                logger.error(f"Failed to transcribe {audio_file}: {e}")
                 results.append({
-                    "text": "",
-                    "language": "unknown",
-                    "duration": None,
-                    "segments": [],
-                    "confidence": None,
-                    "error": str(e)
+                    "error": str(e),
+                    "file": str(audio_file),
+                    "text": None
                 })
         return results
     async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
-        """Detect language of audio file"""
+        """
+        Detect the language of an audio file.
+        Args:
+            audio_file: Path to audio file or file-like object
+            **kwargs: Additional parameters
+        Returns:
+            Dict containing detected language and confidence
+        """
         try:
-            # Transcribe with language detection
-            result = await self.transcribe(audio_file, language=None)
+            # Use transcription with language detection - need to access client directly
+            transcription = await self.client.audio.transcriptions.create(
+                file=audio_file if not isinstance(audio_file, str) else open(audio_file, "rb"),
+                model=self.model_name,
+                response_format="verbose_json"
+            )
+            result = {
+                "text": transcription.text,
+                "language": getattr(transcription, 'language', "unknown")
+            }
             return {
-                "language": result["language"],
-                "confidence": 1.0,  # Whisper is generally confident
-                "alternatives": []  # Whisper doesn't provide alternatives
+                "language": result.get("language", "unknown"),
+                "confidence": 1.0,  # OpenAI doesn't provide confidence scores
+                "text_sample": result.get("text", "")[:100] if result.get("text") else ""
             }
         except Exception as e:
-            logger.error(f"Error detecting language: {e}")
-            raise
+            logger.error(f"Language detection failed: {e}")
+            return {
+                "language": "unknown",
+                "confidence": 0.0,
+                "error": str(e)
+            }
     def get_supported_formats(self) -> List[str]:
-        """Get list of supported audio formats"""
-        return self.supported_formats.copy()
+        """
+        Get list of supported audio formats.
+        Returns:
+            List of supported file extensions
+        """
+        return self.supported_formats
     def get_supported_languages(self) -> List[str]:
-        """Get list of supported language codes"""
-        # Whisper supports 99+ languages
+        """
+        Get list of supported language codes for OpenAI Whisper.
+        Returns:
+            List of supported language codes
+        """
         return [
-            'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca',
-            'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr',
-            'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it',
-            'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv',
-            'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no',
-            'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn',
-            'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr',
-            'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'zh'
+            'af', 'ar', 'hy', 'az', 'be', 'bs', 'bg', 'ca', 'zh', 'hr', 'cs', 'da',
+            'nl', 'en', 'et', 'fi', 'fr', 'gl', 'de', 'el', 'he', 'hi', 'hu', 'is',
+            'id', 'it', 'ja', 'kn', 'kk', 'ko', 'lv', 'lt', 'mk', 'ms', 'mr', 'mi',
+            'ne', 'no', 'fa', 'pl', 'pt', 'ro', 'ru', 'sr', 'sk', 'sl', 'es', 'sw',
+            'sv', 'tl', 'ta', 'th', 'tr', 'uk', 'ur', 'vi', 'cy'
         ]
     def get_max_file_size(self) -> int:
-        """Get maximum file size in bytes"""
+        """
+        Get maximum file size limit in bytes.
+        Returns:
+            Maximum file size in bytes
+        """
         return self.max_file_size
     async def close(self):
         """Cleanup resources"""
-        await self.client.close()
-        logger.info("OpenAISTTService client has been closed.")
+        if hasattr(self.client, 'close'):
+            await self.client.close()
+        logger.info("OpenAI STT service closed")

isa_model/inference/services/audio/openai_tts_service.py CHANGED Viewed

@@ -4,20 +4,18 @@ import os
 from openai import AsyncOpenAI
 from tenacity import retry, stop_after_attempt, wait_exponential
 from isa_model.inference.services.audio.base_tts_service import BaseTTSService
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.billing_tracker import ServiceType
 import logging
 logger = logging.getLogger(__name__)
 class OpenAITTSService(BaseTTSService):
-    """Audio model service wrapper for YYDS"""
+    """OpenAI TTS service with unified architecture"""
-    def __init__(self, provider: 'BaseProvider', model_name: str):
-        super().__init__(provider, model_name)
+    def __init__(self, provider_name: str, model_name: str = "tts-1", **kwargs):
+        super().__init__(provider_name, model_name, **kwargs)
-        # Get full configuration from provider (including sensitive data)
-        provider_config = provider.get_full_config()
+        # Get configuration from centralized config manager
+        provider_config = self.get_provider_config()
         # Initialize AsyncOpenAI client with provider configuration
         try:
@@ -113,8 +111,8 @@ class OpenAITTSService(BaseTTSService):
             estimated_duration_seconds = (words / 150.0) * 60.0 / speed
             # Track usage for billing (OpenAI TTS is token-based: $15 per 1M characters)
-            self._track_usage(
-                service_type=ServiceType.AUDIO_TTS,
+            await self._track_usage(
+                service_type="audio_tts",
                 operation="synthesize_speech",
                 input_tokens=len(text),  # Characters as input tokens
                 output_tokens=0,
@@ -130,8 +128,12 @@ class OpenAITTSService(BaseTTSService):
                 }
             )
+            # For HTTP API compatibility, encode audio data as base64
+            import base64
+            audio_base64 = base64.b64encode(audio_data).decode('utf-8')
             return {
-                "audio_data": audio_data,
+                "audio_data_base64": audio_base64,  # Base64 encoded for JSON compatibility
                 "format": format,
                 "duration": estimated_duration_seconds,
                 "sample_rate": 24000  # Default for OpenAI TTS

isa_model/inference/services/audio/replicate_tts_service.py CHANGED Viewed

@@ -4,42 +4,45 @@ import replicate
 from tenacity import retry, stop_after_attempt, wait_exponential
 from isa_model.inference.services.audio.base_tts_service import BaseTTSService
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.billing_tracker import ServiceType
 logger = logging.getLogger(__name__)
 class ReplicateTTSService(BaseTTSService):
     """
-    Replicate Text-to-Speech service using Kokoro model.
+    Replicate Text-to-Speech service using Kokoro model with unified architecture.
     High-quality voice synthesis with multiple voice options.
     """
-    def __init__(self, provider: 'BaseProvider', model_name: str = "jaaari/kokoro-82m:f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13"):
-        super().__init__(provider, model_name)
+    def __init__(self, provider_name: str, model_name: str = "jaaari/kokoro-82m:f559560eb822dc509045f3921a1921234918b91739db4bf3daab2169b71c7a13", **kwargs):
+        super().__init__(provider_name, model_name, **kwargs)
-        # Get full configuration from provider (including sensitive data)
-        provider_config = provider.get_full_config()
+        # Get configuration from centralized config manager
+        provider_config = self.get_provider_config()
         # Set up Replicate API token from provider configuration
-        self.api_token = provider_config.get('api_token') or provider_config.get('replicate_api_token')
-        if not self.api_token:
-            raise ValueError("Replicate API token not found in provider configuration")
-        # Set environment variable for replicate library
-        import os
-        os.environ['REPLICATE_API_TOKEN'] = self.api_token
-        # Available voices for Kokoro model
-        self.available_voices = [
-            "af_bella", "af_nicole", "af_sarah", "af_sky", "am_adam", "am_michael"
-        ]
-        # Default settings
-        self.default_voice = "af_nicole"
-        self.default_speed = 1.0
-        logger.info(f"Initialized ReplicateTTSService with model '{self.model_name}'")
+        try:
+            self.api_token = provider_config.get('api_key') or provider_config.get('replicate_api_token')
+            if not self.api_token:
+                raise ValueError("Replicate API token not found in provider configuration")
+            # Set environment variable for replicate library
+            import os
+            os.environ['REPLICATE_API_TOKEN'] = self.api_token
+            # Available voices for Kokoro model
+            self.available_voices = [
+                "af_bella", "af_nicole", "af_sarah", "af_sky", "am_adam", "am_michael"
+            ]
+            # Default settings
+            self.default_voice = "af_nicole"
+            self.default_speed = 1.0
+            logger.info(f"Initialized ReplicateTTSService with model '{self.model_name}'")
+        except Exception as e:
+            logger.error(f"Failed to initialize Replicate client: {e}")
+            raise ValueError(f"Failed to initialize Replicate client: {e}") from e
     @retry(
         stop=stop_after_attempt(3),
@@ -51,8 +54,8 @@ class ReplicateTTSService(BaseTTSService):
         text: str,
         voice: Optional[str] = None,
         speed: float = 1.0,
-        pitch: Optional[float] = None,
-        volume: Optional[float] = None
+        pitch: float = 1.0,
+        format: str = "wav"
     ) -> Dict[str, Any]:
         """Synthesize speech from text using Kokoro model"""
         try:
@@ -99,8 +102,8 @@ class ReplicateTTSService(BaseTTSService):
             estimated_duration_seconds = (words / 150.0) * 60.0 / speed
             # Track usage for billing
-            self._track_usage(
-                service_type=ServiceType.AUDIO_TTS,
+            await self._track_usage(
+                service_type="audio_tts",
                 operation="synthesize_speech",
                 input_tokens=0,
                 output_tokens=0,
@@ -115,15 +118,24 @@ class ReplicateTTSService(BaseTTSService):
                 }
             )
+            # Download audio data for return format consistency
+            import aiohttp
+            async with aiohttp.ClientSession() as session:
+                async with session.get(audio_url) as response:
+                    response.raise_for_status()
+                    audio_data = await response.read()
             result = {
-                "audio_url": audio_url,
-                "text": text,
-                "voice": selected_voice,
-                "speed": speed,
-                "duration_seconds": estimated_duration_seconds,
+                "audio_data": audio_data,
+                "format": "wav",  # Kokoro typically outputs WAV
+                "duration": estimated_duration_seconds,
+                "sample_rate": 22050,
+                "audio_url": audio_url,  # Keep URL for reference
                 "metadata": {
                     "model": self.model_name,
                     "provider": "replicate",
+                    "voice": selected_voice,
+                    "speed": speed,
                     "voice_options": self.available_voices
                 }
             }
@@ -137,36 +149,29 @@ class ReplicateTTSService(BaseTTSService):
     async def synthesize_speech_to_file(
         self,
-        text: str,
+        text: str,
         output_path: str,
         voice: Optional[str] = None,
         speed: float = 1.0,
-        pitch: Optional[float] = None,
-        volume: Optional[float] = None
+        pitch: float = 1.0,
+        format: str = "wav"
     ) -> Dict[str, Any]:
         """Synthesize speech and save to file"""
         try:
-            # Get audio URL
-            result = await self.synthesize_speech(text, voice, speed, pitch, volume)
-            audio_url = result["audio_url"]
+            # Get synthesis result
+            result = await self.synthesize_speech(text, voice, speed, pitch, format)
+            audio_data = result["audio_data"]
-            # Download and save audio
-            import aiohttp
-            import aiofiles
+            # Save audio data to file
+            with open(output_path, 'wb') as f:
+                f.write(audio_data)
-            async with aiohttp.ClientSession() as session:
-                async with session.get(audio_url) as response:
-                    response.raise_for_status()
-                    audio_data = await response.read()
-                    async with aiofiles.open(output_path, 'wb') as f:
-                        await f.write(audio_data)
-            result["output_path"] = output_path
-            result["file_size"] = len(audio_data)
-            logger.info(f"Audio saved to: {output_path}")
-            return result
+            return {
+                "file_path": output_path,
+                "duration": result["duration"],
+                "sample_rate": result["sample_rate"],
+                "file_size": len(audio_data)
+            }
         except Exception as e:
             logger.error(f"Error saving audio to file: {e}")

isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

isa-model 0.3.4py3-none-any.whl → 0.3.6py3-none-any.whl