PyPI - isa-model - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

isa-model 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

isa_model/__init__.py +30 -1
isa_model/client.py +937 -0
isa_model/core/config/__init__.py +16 -0
isa_model/core/config/config_manager.py +514 -0
isa_model/core/config.py +426 -0
isa_model/core/models/model_billing_tracker.py +476 -0
isa_model/core/models/model_manager.py +399 -0
isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
isa_model/core/pricing_manager.py +426 -0
isa_model/core/services/__init__.py +19 -0
isa_model/core/services/intelligent_model_selector.py +547 -0
isa_model/core/types.py +291 -0
isa_model/deployment/__init__.py +2 -0
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
isa_model/deployment/cloud/modal/register_models.py +321 -0
isa_model/deployment/runtime/deployed_service.py +338 -0
isa_model/deployment/services/__init__.py +9 -0
isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
isa_model/deployment/services/model_service.py +332 -0
isa_model/deployment/services/service_monitor.py +356 -0
isa_model/deployment/services/service_registry.py +527 -0
isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
isa_model/eval/__init__.py +80 -44
isa_model/eval/config/__init__.py +10 -0
isa_model/eval/config/evaluation_config.py +108 -0
isa_model/eval/evaluators/__init__.py +18 -0
isa_model/eval/evaluators/base_evaluator.py +503 -0
isa_model/eval/evaluators/llm_evaluator.py +472 -0
isa_model/eval/factory.py +417 -709
isa_model/eval/infrastructure/__init__.py +24 -0
isa_model/eval/infrastructure/experiment_tracker.py +466 -0
isa_model/eval/metrics.py +191 -21
isa_model/inference/ai_factory.py +257 -601
isa_model/inference/services/audio/base_stt_service.py +65 -1
isa_model/inference/services/audio/base_tts_service.py +75 -1
isa_model/inference/services/audio/openai_stt_service.py +189 -151
isa_model/inference/services/audio/openai_tts_service.py +12 -10
isa_model/inference/services/audio/replicate_tts_service.py +61 -56
isa_model/inference/services/base_service.py +55 -17
isa_model/inference/services/embedding/base_embed_service.py +65 -1
isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
isa_model/inference/services/embedding/openai_embed_service.py +8 -10
isa_model/inference/services/helpers/stacked_config.py +148 -0
isa_model/inference/services/img/__init__.py +18 -0
isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
isa_model/inference/services/llm/__init__.py +3 -3
isa_model/inference/services/llm/base_llm_service.py +492 -40
isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
isa_model/inference/services/llm/ollama_llm_service.py +51 -17
isa_model/inference/services/llm/openai_llm_service.py +70 -19
isa_model/inference/services/llm/yyds_llm_service.py +24 -23
isa_model/inference/services/vision/__init__.py +38 -4
isa_model/inference/services/vision/base_vision_service.py +218 -117
isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
isa_model/inference/services/vision/helpers/image_utils.py +272 -3
isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
isa_model/inference/services/vision/openai_vision_service.py +104 -307
isa_model/inference/services/vision/replicate_vision_service.py +140 -325
isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
isa_model/scripts/register_models.py +370 -0
isa_model/scripts/register_models_with_embeddings.py +510 -0
isa_model/serving/api/fastapi_server.py +6 -1
isa_model/serving/api/routes/unified.py +274 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
{isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
isa_model/config/__init__.py +0 -9
isa_model/config/config_manager.py +0 -213
isa_model/core/model_manager.py +0 -213
isa_model/core/model_registry.py +0 -375
isa_model/core/vision_models_init.py +0 -116
isa_model/inference/billing_tracker.py +0 -406
isa_model/inference/services/llm/triton_llm_service.py +0 -481
isa_model/inference/services/stacked/__init__.py +0 -26
isa_model/inference/services/stacked/config.py +0 -426
isa_model/inference/services/vision/ollama_vision_service.py +0 -194
/isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
/isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
/isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0

isa_model/inference/services/audio/base_stt_service.py CHANGED Viewed

@@ -3,7 +3,71 @@ from typing import Dict, Any, List, Union, Optional, BinaryIO
 from isa_model.inference.services.base_service import BaseService
 class BaseSTTService(BaseService):
-    """Base class for Speech-to-Text services"""
+    """Base class for Speech-to-Text services with unified task dispatch"""
+    async def invoke(
+        self,
+        audio_input: Union[str, BinaryIO, List[Union[str, BinaryIO]]],
+        task: Optional[str] = None,
+        **kwargs
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
+        """
+        统一的任务分发方法 - Base类提供通用实现
+        Args:
+            audio_input: 音频输入，可以是:
+                - str: 音频文件路径
+                - BinaryIO: 音频文件对象
+                - List: 多个音频文件（批量处理）
+            task: 任务类型，支持多种STT任务
+            **kwargs: 任务特定的附加参数
+        Returns:
+            Dict or List[Dict] containing task results
+        """
+        task = task or "transcribe"
+        # ==================== 语音转文本类任务 ====================
+        if task == "transcribe":
+            if isinstance(audio_input, list):
+                return await self.transcribe_batch(
+                    audio_input,
+                    kwargs.get("language"),
+                    kwargs.get("prompt")
+                )
+            else:
+                return await self.transcribe(
+                    audio_input,
+                    kwargs.get("language"),
+                    kwargs.get("prompt")
+                )
+        elif task == "translate":
+            if isinstance(audio_input, list):
+                raise ValueError("translate task requires single audio input")
+            return await self.translate(audio_input)
+        elif task == "batch_transcribe":
+            if not isinstance(audio_input, list):
+                audio_input = [audio_input]
+            return await self.transcribe_batch(
+                audio_input,
+                kwargs.get("language"),
+                kwargs.get("prompt")
+            )
+        elif task == "detect_language":
+            if isinstance(audio_input, list):
+                raise ValueError("detect_language task requires single audio input")
+            return await self.detect_language(audio_input)
+        else:
+            raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
+    def get_supported_tasks(self) -> List[str]:
+        """
+        获取支持的任务列表
+        Returns:
+            List of supported task names
+        """
+        return ["transcribe", "translate", "batch_transcribe", "detect_language"]
     @abstractmethod
     async def transcribe(

isa_model/inference/services/audio/base_tts_service.py CHANGED Viewed

@@ -3,7 +3,81 @@ from typing import Dict, Any, List, Union, Optional, BinaryIO
 from isa_model.inference.services.base_service import BaseService
 class BaseTTSService(BaseService):
-    """Base class for Text-to-Speech services"""
+    """Base class for Text-to-Speech services with unified task dispatch"""
+    async def invoke(
+        self,
+        text: Union[str, List[str]],
+        task: Optional[str] = None,
+        **kwargs
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
+        """
+        统一的任务分发方法 - Base类提供通用实现
+        Args:
+            text: 输入文本，可以是:
+                - str: 单个文本
+                - List[str]: 多个文本（批量处理）
+            task: 任务类型，支持多种TTS任务
+            **kwargs: 任务特定的附加参数
+        Returns:
+            Dict or List[Dict] containing task results
+        """
+        task = task or "synthesize"
+        # ==================== 语音合成类任务 ====================
+        if task == "synthesize":
+            if isinstance(text, list):
+                return await self.synthesize_speech_batch(
+                    text,
+                    kwargs.get("voice"),
+                    kwargs.get("speed", 1.0),
+                    kwargs.get("pitch", 1.0),
+                    kwargs.get("format", "mp3")
+                )
+            else:
+                return await self.synthesize_speech(
+                    text,
+                    kwargs.get("voice"),
+                    kwargs.get("speed", 1.0),
+                    kwargs.get("pitch", 1.0),
+                    kwargs.get("format", "mp3")
+                )
+        elif task == "synthesize_to_file":
+            if not kwargs.get("output_path"):
+                raise ValueError("output_path is required for synthesize_to_file task")
+            if isinstance(text, list):
+                raise ValueError("synthesize_to_file task requires single text input")
+            return await self.synthesize_speech_to_file(
+                text,
+                kwargs["output_path"],
+                kwargs.get("voice"),
+                kwargs.get("speed", 1.0),
+                kwargs.get("pitch", 1.0),
+                kwargs.get("format", "mp3")
+            )
+        elif task == "batch_synthesize":
+            if not isinstance(text, list):
+                text = [text]
+            return await self.synthesize_speech_batch(
+                text,
+                kwargs.get("voice"),
+                kwargs.get("speed", 1.0),
+                kwargs.get("pitch", 1.0),
+                kwargs.get("format", "mp3")
+            )
+        else:
+            raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
+    def get_supported_tasks(self) -> List[str]:
+        """
+        获取支持的任务列表
+        Returns:
+            List of supported task names
+        """
+        return ["synthesize", "synthesize_to_file", "batch_synthesize"]
     @abstractmethod
     async def synthesize_speech(

isa_model/inference/services/audio/openai_stt_service.py CHANGED Viewed

@@ -5,8 +5,6 @@ from openai import AsyncOpenAI
 from tenacity import retry, stop_after_attempt, wait_exponential
 from isa_model.inference.services.audio.base_stt_service import BaseSTTService
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.billing_tracker import ServiceType
 logger = logging.getLogger(__name__)
@@ -14,22 +12,22 @@ class OpenAISTTService(BaseSTTService):
     """
     OpenAI Speech-to-Text service using whisper-1 model.
     Supports transcription and translation to English.
+    Uses the new unified architecture with centralized config management.
     """
-    def __init__(self, provider: 'BaseProvider', model_name: str = "whisper-1"):
-        super().__init__(provider, model_name)
+    def __init__(self, provider_name: str, model_name: str = "whisper-1", **kwargs):
+        super().__init__(provider_name, model_name, **kwargs)
-        # Get full configuration from provider (including sensitive data)
-        provider_config = provider.get_full_config()
+        # Get provider configuration from centralized config manager
+        provider_config = self.get_provider_config()
         # Initialize AsyncOpenAI client with provider configuration
         try:
-            if not provider_config.get("api_key"):
-                raise ValueError("OpenAI API key not found in provider configuration")
+            api_key = self.get_api_key()
             self.client = AsyncOpenAI(
-                api_key=provider_config["api_key"],
-                base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
+                api_key=api_key,
+                base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
                 organization=provider_config.get("organization")
             )
@@ -48,205 +46,245 @@ class OpenAISTTService(BaseSTTService):
         wait=wait_exponential(multiplier=1, min=4, max=10),
         reraise=True
     )
-    async def _download_audio(self, audio_url: str) -> bytes:
-        """Download audio from URL"""
-        async with aiohttp.ClientSession() as session:
-            async with session.get(audio_url) as response:
-                if response.status == 200:
-                    return await response.read()
-                else:
-                    raise ValueError(f"Failed to download audio from {audio_url}: {response.status}")
-    async def transcribe(
-        self,
-        audio_file: Union[str, BinaryIO],
-        language: Optional[str] = None,
-        prompt: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """Transcribe audio file to text using whisper-1"""
-        try:
-            # Prepare the audio file
-            if isinstance(audio_file, str):
-                if audio_file.startswith(('http://', 'https://')):
-                    # Download audio from URL
-                    audio_data = await self._download_audio(audio_file)
-                    filename = audio_file.split('/')[-1] or 'audio.wav'
-                else:
-                    # Local file path
-                    with open(audio_file, 'rb') as f:
-                        audio_data = f.read()
-                        filename = audio_file
-            else:
-                audio_data = audio_file.read()
-                filename = getattr(audio_file, 'name', 'audio.wav')
-            # Check file size
-            if len(audio_data) > self.max_file_size:
-                raise ValueError(f"Audio file size ({len(audio_data)} bytes) exceeds maximum ({self.max_file_size} bytes)")
+    async def transcribe(self, audio_file: Union[str, BinaryIO], language: Optional[str] = None, prompt: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Transcribe audio file to text using OpenAI's Whisper model.
+        Args:
+            audio_file: Path to audio file or file-like object
+            language: Optional language code for better accuracy
+            **kwargs: Additional parameters for the transcription API
-            # Prepare transcription parameters
-            kwargs = {
+        Returns:
+            Dict containing transcription result and metadata
+        """
+        try:
+            # Prepare request parameters
+            transcription_params = {
                 "model": self.model_name,
-                "file": (filename, audio_data),
                 "response_format": "verbose_json"
             }
             if language:
-                kwargs["language"] = language
-            if prompt:
-                kwargs["prompt"] = prompt
+                transcription_params["language"] = language
-            # Transcribe audio
-            response = await self.client.audio.transcriptions.create(**kwargs)
+            # Add optional parameters
+            if prompt:
+                transcription_params["prompt"] = prompt
-            # Track usage for billing
-            usage = getattr(response, 'usage', {})
-            input_tokens = usage.get('input_tokens', 0) if usage else 0
-            output_tokens = usage.get('output_tokens', 0) if usage else 0
+            # Handle file input
+            if isinstance(audio_file, str):
+                with open(audio_file, "rb") as f:
+                    transcription = await self.client.audio.transcriptions.create(
+                        file=f,
+                        **transcription_params
+                    )
+            else:
+                transcription = await self.client.audio.transcriptions.create(
+                    file=audio_file,
+                    **transcription_params
+                )
-            # For audio, also track duration in minutes
-            duration_minutes = getattr(response, 'duration', 0) / 60.0 if getattr(response, 'duration', 0) else 0
+            # Extract usage information for billing
+            result = {
+                "text": transcription.text,
+                "language": getattr(transcription, 'language', language),
+                "duration": getattr(transcription, 'duration', None),
+                "segments": getattr(transcription, 'segments', []),
+                "usage": {
+                    "input_units": getattr(transcription, 'duration', 1),  # Duration in seconds
+                    "output_tokens": len(transcription.text.split()) if transcription.text else 0
+                }
+            }
-            self._track_usage(
-                service_type=ServiceType.AUDIO_STT,
+            # Track usage for billing
+            await self._track_usage(
+                service_type="audio_stt",
                 operation="transcribe",
-                input_tokens=input_tokens,
-                output_tokens=output_tokens,
-                input_units=duration_minutes,  # Duration in minutes
+                input_units=result["usage"]["input_units"],
+                output_tokens=result["usage"]["output_tokens"],
                 metadata={
-                    "language": language,
-                    "model": self.model_name,
-                    "file_size": len(audio_data)
+                    "language": result.get("language"),
+                    "model_name": self.model_name,
+                    "provider": self.provider_name
                 }
             )
-            # Format response
-            result = {
-                "text": response.text,
-                "language": getattr(response, 'language', language or 'unknown'),
-                "duration": getattr(response, 'duration', None),
-                "segments": getattr(response, 'segments', []),
-                "confidence": None,  # whisper-1 doesn't provide confidence scores
-                "usage": usage  # Include usage information
-            }
             return result
         except Exception as e:
-            logger.error(f"Error transcribing audio: {e}")
+            logger.error(f"Transcription failed: {e}")
             raise
     @retry(
         stop=stop_after_attempt(3),
         wait=wait_exponential(multiplier=1, min=4, max=10),
         reraise=True
     )
-    async def translate(
-        self,
-        audio_file: Union[str, BinaryIO]
-    ) -> Dict[str, Any]:
-        """Translate audio file to English text"""
+    async def translate(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
+        """
+        Translate audio file to English text using OpenAI's Whisper model.
+        Args:
+            audio_file: Path to audio file or file-like object
+            **kwargs: Additional parameters for the translation API
+        Returns:
+            Dict containing translation result and metadata
+        """
         try:
-            # Prepare the audio file
-            if isinstance(audio_file, str):
-                with open(audio_file, 'rb') as f:
-                    audio_data = f.read()
-                    filename = audio_file
-            else:
-                audio_data = audio_file.read()
-                filename = getattr(audio_file, 'name', 'audio.wav')
+            # Prepare request parameters
+            translation_params = {
+                "model": self.model_name,
+                "response_format": "verbose_json"
+            }
-            # Check file size
-            if len(audio_data) > self.max_file_size:
-                raise ValueError(f"Audio file size ({len(audio_data)} bytes) exceeds maximum ({self.max_file_size} bytes)")
+            # No additional parameters for translation
-            # Translate audio to English
-            response = await self.client.audio.translations.create(
-                model=self.model_name,
-                file=(filename, audio_data),
-                response_format="verbose_json"
-            )
+            # Handle file input
+            if isinstance(audio_file, str):
+                with open(audio_file, "rb") as f:
+                    translation = await self.client.audio.translations.create(
+                        file=f,
+                        **translation_params
+                    )
+            else:
+                translation = await self.client.audio.translations.create(
+                    file=audio_file,
+                    **translation_params
+                )
-            # Format response
+            # Extract usage information for billing
             result = {
-                "text": response.text,
-                "detected_language": getattr(response, 'language', 'unknown'),
-                "duration": getattr(response, 'duration', None),
-                "segments": getattr(response, 'segments', []),
-                "confidence": None  # Whisper doesn't provide confidence scores
+                "text": translation.text,
+                "language": "en",  # Translation is always to English
+                "duration": getattr(translation, 'duration', None),
+                "segments": getattr(translation, 'segments', []),
+                "usage": {
+                    "input_units": getattr(translation, 'duration', 1),  # Duration in seconds
+                    "output_tokens": len(translation.text.split()) if translation.text else 0
+                }
             }
+            # Track usage for billing
+            await self._track_usage(
+                service_type="audio_stt",
+                operation="translate",
+                input_units=result["usage"]["input_units"],
+                output_tokens=result["usage"]["output_tokens"],
+                metadata={
+                    "target_language": "en",
+                    "model_name": self.model_name,
+                    "provider": self.provider_name
+                }
+            )
             return result
         except Exception as e:
-            logger.error(f"Error translating audio: {e}")
+            logger.error(f"Translation failed: {e}")
             raise
-    async def transcribe_batch(
-        self,
-        audio_files: List[Union[str, BinaryIO]],
-        language: Optional[str] = None,
-        prompt: Optional[str] = None
-    ) -> List[Dict[str, Any]]:
-        """Transcribe multiple audio files"""
-        results = []
+    async def transcribe_batch(self, audio_files: List[Union[str, BinaryIO]], language: Optional[str] = None, prompt: Optional[str] = None) -> List[Dict[str, Any]]:
+        """
+        Transcribe multiple audio files in batch.
+        Args:
+            audio_files: List of audio file paths or file-like objects
+            language: Optional language code for better accuracy
+            **kwargs: Additional parameters for the transcription API
+        Returns:
+            List of transcription results
+        """
+        results = []
         for audio_file in audio_files:
             try:
                 result = await self.transcribe(audio_file, language, prompt)
                 results.append(result)
             except Exception as e:
-                logger.error(f"Error transcribing audio file: {e}")
+                logger.error(f"Failed to transcribe {audio_file}: {e}")
                 results.append({
-                    "text": "",
-                    "language": "unknown",
-                    "duration": None,
-                    "segments": [],
-                    "confidence": None,
-                    "error": str(e)
+                    "error": str(e),
+                    "file": str(audio_file),
+                    "text": None
                 })
         return results
     async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
-        """Detect language of audio file"""
+        """
+        Detect the language of an audio file.
+        Args:
+            audio_file: Path to audio file or file-like object
+            **kwargs: Additional parameters
+        Returns:
+            Dict containing detected language and confidence
+        """
         try:
-            # Transcribe with language detection
-            result = await self.transcribe(audio_file, language=None)
+            # Use transcription with language detection - need to access client directly
+            transcription = await self.client.audio.transcriptions.create(
+                file=audio_file if not isinstance(audio_file, str) else open(audio_file, "rb"),
+                model=self.model_name,
+                response_format="verbose_json"
+            )
+            result = {
+                "text": transcription.text,
+                "language": getattr(transcription, 'language', "unknown")
+            }
             return {
-                "language": result["language"],
-                "confidence": 1.0,  # Whisper is generally confident
-                "alternatives": []  # Whisper doesn't provide alternatives
+                "language": result.get("language", "unknown"),
+                "confidence": 1.0,  # OpenAI doesn't provide confidence scores
+                "text_sample": result.get("text", "")[:100] if result.get("text") else ""
             }
         except Exception as e:
-            logger.error(f"Error detecting language: {e}")
-            raise
+            logger.error(f"Language detection failed: {e}")
+            return {
+                "language": "unknown",
+                "confidence": 0.0,
+                "error": str(e)
+            }
     def get_supported_formats(self) -> List[str]:
-        """Get list of supported audio formats"""
-        return self.supported_formats.copy()
+        """
+        Get list of supported audio formats.
+        Returns:
+            List of supported file extensions
+        """
+        return self.supported_formats
     def get_supported_languages(self) -> List[str]:
-        """Get list of supported language codes"""
-        # Whisper supports 99+ languages
+        """
+        Get list of supported language codes for OpenAI Whisper.
+        Returns:
+            List of supported language codes
+        """
         return [
-            'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca',
-            'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr',
-            'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it',
-            'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv',
-            'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no',
-            'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn',
-            'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr',
-            'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'zh'
+            'af', 'ar', 'hy', 'az', 'be', 'bs', 'bg', 'ca', 'zh', 'hr', 'cs', 'da',
+            'nl', 'en', 'et', 'fi', 'fr', 'gl', 'de', 'el', 'he', 'hi', 'hu', 'is',
+            'id', 'it', 'ja', 'kn', 'kk', 'ko', 'lv', 'lt', 'mk', 'ms', 'mr', 'mi',
+            'ne', 'no', 'fa', 'pl', 'pt', 'ro', 'ru', 'sr', 'sk', 'sl', 'es', 'sw',
+            'sv', 'tl', 'ta', 'th', 'tr', 'uk', 'ur', 'vi', 'cy'
         ]
     def get_max_file_size(self) -> int:
-        """Get maximum file size in bytes"""
+        """
+        Get maximum file size limit in bytes.
+        Returns:
+            Maximum file size in bytes
+        """
         return self.max_file_size
     async def close(self):
         """Cleanup resources"""
-        await self.client.close()
-        logger.info("OpenAISTTService client has been closed.")
+        if hasattr(self.client, 'close'):
+            await self.client.close()
+        logger.info("OpenAI STT service closed")

isa_model/inference/services/audio/openai_tts_service.py CHANGED Viewed

@@ -4,20 +4,18 @@ import os
 from openai import AsyncOpenAI
 from tenacity import retry, stop_after_attempt, wait_exponential
 from isa_model.inference.services.audio.base_tts_service import BaseTTSService
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.billing_tracker import ServiceType
 import logging
 logger = logging.getLogger(__name__)
 class OpenAITTSService(BaseTTSService):
-    """Audio model service wrapper for YYDS"""
+    """OpenAI TTS service with unified architecture"""
-    def __init__(self, provider: 'BaseProvider', model_name: str):
-        super().__init__(provider, model_name)
+    def __init__(self, provider_name: str, model_name: str = "tts-1", **kwargs):
+        super().__init__(provider_name, model_name, **kwargs)
-        # Get full configuration from provider (including sensitive data)
-        provider_config = provider.get_full_config()
+        # Get configuration from centralized config manager
+        provider_config = self.get_provider_config()
         # Initialize AsyncOpenAI client with provider configuration
         try:
@@ -113,8 +111,8 @@ class OpenAITTSService(BaseTTSService):
             estimated_duration_seconds = (words / 150.0) * 60.0 / speed
             # Track usage for billing (OpenAI TTS is token-based: $15 per 1M characters)
-            self._track_usage(
-                service_type=ServiceType.AUDIO_TTS,
+            await self._track_usage(
+                service_type="audio_tts",
                 operation="synthesize_speech",
                 input_tokens=len(text),  # Characters as input tokens
                 output_tokens=0,
@@ -130,8 +128,12 @@ class OpenAITTSService(BaseTTSService):
                 }
             )
+            # For HTTP API compatibility, encode audio data as base64
+            import base64
+            audio_base64 = base64.b64encode(audio_data).decode('utf-8')
             return {
-                "audio_data": audio_data,
+                "audio_data_base64": audio_base64,  # Base64 encoded for JSON compatibility
                 "format": format,
                 "duration": estimated_duration_seconds,
                 "sample_rate": 24000  # Default for OpenAI TTS

isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

isa-model 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl