PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +35 -80
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
isa_model-0.4.4.dist-info/RECORD +180 -0
isa_model/core/security/secrets.py +0 -358
isa_model/core/storage/hf_storage.py +0 -419
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0

isa_model/inference/services/audio/base_stt_service.py CHANGED Viewed

@@ -1,13 +1,172 @@
 from abc import ABC, abstractmethod
 from typing import Dict, Any, List, Union, Optional, BinaryIO
+import aiohttp
+import asyncio
+import tempfile
+import os
+import logging
+from io import BytesIO
 from isa_model.inference.services.base_service import BaseService
+logger = logging.getLogger(__name__)
 class BaseSTTService(BaseService):
-    """Base class for Speech-to-Text services with unified task dispatch"""
+    """Base class for Speech-to-Text services with unified task dispatch and URL support"""
+    async def _prepare_audio_input(self, audio_input: Union[str, BinaryIO, bytes]) -> Union[str, BinaryIO]:
+        """
+        Prepare audio input by handling URLs, file paths, bytes data, and file objects
+        Args:
+            audio_input: Audio input (URL, file path, bytes data, or file object)
+        Returns:
+            Prepared audio input (local file path or file object)
+        """
+        if isinstance(audio_input, bytes):
+            # Handle bytes data from API uploads
+            logger.info(f"Converting bytes data to temporary file ({len(audio_input)} bytes)")
+            return await self._save_bytes_to_temp_file(audio_input)
+        elif isinstance(audio_input, str):
+            # Check if it's a URL
+            if audio_input.startswith(('http://', 'https://')):
+                logger.info(f"Downloading audio from URL: {audio_input}")
+                return await self._download_audio_url(audio_input)
+            else:
+                # Regular file path or base64 string
+                return audio_input
+        else:
+            # Already a file object
+            return audio_input
+    async def _prepare_audio_input_with_context(self, audio_input: Union[str, BinaryIO, bytes], context: Dict[str, Any]) -> Union[str, BinaryIO]:
+        """
+        Prepare audio input with additional context from kwargs
+        Args:
+            audio_input: Audio input (URL, file path, bytes data, or file object)
+            context: Additional context including filename, content_type
+        Returns:
+            Prepared audio input (local file path or file object)
+        """
+        if isinstance(audio_input, bytes):
+            # Handle bytes data from API uploads
+            filename = context.get('filename')
+            content_type = context.get('content_type')
+            logger.info(f"Converting bytes data to temporary file ({len(audio_input)} bytes), filename={filename}, content_type={content_type}")
+            return await self._save_bytes_to_temp_file(audio_input, filename, content_type)
+        else:
+            return await self._prepare_audio_input(audio_input)
+    async def _download_audio_url(self, url: str) -> str:
+        """
+        Download audio file from URL to temporary file
+        Args:
+            url: HTTP/HTTPS URL to audio file
+        Returns:
+            Path to downloaded temporary file
+        Raises:
+            Exception: If download fails
+        """
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url) as response:
+                    if response.status != 200:
+                        raise Exception(f"Failed to download audio: HTTP {response.status}")
+                    # Get content type to determine file extension
+                    content_type = response.headers.get('Content-Type', '')
+                    file_ext = self._get_file_extension_from_content_type(content_type)
+                    # Create temporary file
+                    temp_file = tempfile.NamedTemporaryFile(
+                        delete=False,
+                        suffix=file_ext,
+                        prefix='audio_download_'
+                    )
+                    # Download and save
+                    async for chunk in response.content.iter_chunked(8192):
+                        temp_file.write(chunk)
+                    temp_file.close()
+                    logger.info(f"Downloaded audio to temporary file: {temp_file.name}")
+                    return temp_file.name
+        except Exception as e:
+            logger.error(f"Failed to download audio from URL {url}: {e}")
+            raise Exception(f"Audio URL download failed: {e}") from e
+    def _get_file_extension_from_content_type(self, content_type: str) -> str:
+        """Get appropriate file extension from Content-Type header"""
+        content_type_map = {
+            'audio/mpeg': '.mp3',
+            'audio/mp3': '.mp3',
+            'audio/wav': '.wav',
+            'audio/wave': '.wav',
+            'audio/x-wav': '.wav',
+            'audio/flac': '.flac',
+            'audio/ogg': '.ogg',
+            'audio/m4a': '.m4a',
+            'audio/mp4': '.mp4',
+            'audio/webm': '.webm'
+        }
+        return content_type_map.get(content_type.lower(), '.audio')
+    async def _save_bytes_to_temp_file(self, audio_bytes: bytes, filename: Optional[str] = None, content_type: Optional[str] = None) -> str:
+        """
+        Save audio bytes data to temporary file
+        Args:
+            audio_bytes: Audio data as bytes
+            filename: Optional filename to determine extension
+            content_type: Optional content type to determine extension
+        Returns:
+            Path to temporary file containing audio data
+        """
+        try:
+            # Determine file extension from filename or content type
+            suffix = '.mp3'  # Default
+            if filename and '.' in filename:
+                suffix = '.' + filename.split('.')[-1]
+            elif content_type:
+                suffix = self._get_file_extension_from_content_type(content_type)
+            # Create temporary file with proper audio extension
+            temp_file = tempfile.NamedTemporaryFile(
+                delete=False,
+                suffix=suffix,
+                prefix='audio_bytes_'
+            )
+            # Write bytes data
+            temp_file.write(audio_bytes)
+            temp_file.close()
+            logger.info(f"Saved {len(audio_bytes)} bytes to temporary file: {temp_file.name}")
+            return temp_file.name
+        except Exception as e:
+            logger.error(f"Failed to save audio bytes to temporary file: {e}")
+            raise Exception(f"Audio bytes save failed: {e}") from e
+    def _cleanup_temp_file(self, file_path: str):
+        """Clean up temporary downloaded file"""
+        try:
+            if file_path and file_path.startswith(tempfile.gettempdir()):
+                os.unlink(file_path)
+                logger.debug(f"Cleaned up temporary file: {file_path}")
+        except Exception as e:
+            logger.warning(f"Failed to cleanup temporary file {file_path}: {e}")
     async def invoke(
         self,
-        audio_input: Union[str, BinaryIO, List[Union[str, BinaryIO]]],
+        audio_input: Union[str, BinaryIO, bytes, List[Union[str, BinaryIO, bytes]]],
         task: Optional[str] = None,
         **kwargs
     ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
@@ -30,33 +189,47 @@ class BaseSTTService(BaseService):
         # ==================== 语音转文本类任务 ====================
         if task == "transcribe":
             if isinstance(audio_input, list):
+                # Prepare all audio inputs (handle URLs)
+                prepared_inputs = []
+                for audio in audio_input:
+                    prepared_input = await self._prepare_audio_input_with_context(audio, kwargs)
+                    prepared_inputs.append(prepared_input)
                 return await self.transcribe_batch(
-                    audio_input,
+                    prepared_inputs,
                     kwargs.get("language"),
                     kwargs.get("prompt")
                 )
             else:
+                # Prepare single audio input (handle URLs)
+                prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
                 return await self.transcribe(
-                    audio_input,
+                    prepared_input,
                     kwargs.get("language"),
                     kwargs.get("prompt")
                 )
         elif task == "translate":
             if isinstance(audio_input, list):
                 raise ValueError("translate task requires single audio input")
-            return await self.translate(audio_input)
+            prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
+            return await self.translate(prepared_input)
         elif task == "batch_transcribe":
             if not isinstance(audio_input, list):
                 audio_input = [audio_input]
+            # Prepare all audio inputs (handle URLs)
+            prepared_inputs = []
+            for audio in audio_input:
+                prepared_input = await self._prepare_audio_input_with_context(audio, kwargs)
+                prepared_inputs.append(prepared_input)
             return await self.transcribe_batch(
-                audio_input,
+                prepared_inputs,
                 kwargs.get("language"),
                 kwargs.get("prompt")
             )
         elif task == "detect_language":
             if isinstance(audio_input, list):
                 raise ValueError("detect_language task requires single audio input")
-            return await self.detect_language(audio_input)
+            prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
+            return await self.detect_language(prepared_input)
         else:
             raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
@@ -72,7 +245,7 @@ class BaseSTTService(BaseService):
     @abstractmethod
     async def transcribe(
         self,
-        audio_file: Union[str, BinaryIO],
+        audio_file: Union[str, BinaryIO, bytes],
         language: Optional[str] = None,
         prompt: Optional[str] = None
     ) -> Dict[str, Any]:
@@ -96,7 +269,7 @@ class BaseSTTService(BaseService):
     @abstractmethod
     async def translate(
         self,
-        audio_file: Union[str, BinaryIO]
+        audio_file: Union[str, BinaryIO, bytes]
     ) -> Dict[str, Any]:
         """
         Translate audio file to English text
@@ -115,7 +288,7 @@ class BaseSTTService(BaseService):
     @abstractmethod
     async def transcribe_batch(
         self,
-        audio_files: List[Union[str, BinaryIO]],
+        audio_files: List[Union[str, BinaryIO, bytes]],
         language: Optional[str] = None,
         prompt: Optional[str] = None
     ) -> List[Dict[str, Any]]:
@@ -133,7 +306,7 @@ class BaseSTTService(BaseService):
         pass
     @abstractmethod
-    async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
+    async def detect_language(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
         """
         Detect language of audio file

isa_model/inference/services/audio/openai_stt_service.py CHANGED Viewed

@@ -47,7 +47,7 @@ class OpenAISTTService(BaseSTTService):
         wait=wait_exponential(multiplier=1, min=4, max=10),
         reraise=True
     )
-    async def transcribe(self, audio_file: Union[str, BinaryIO], language: Optional[str] = None, prompt: Optional[str] = None) -> Dict[str, Any]:
+    async def transcribe(self, audio_file: Union[str, BinaryIO, bytes], language: Optional[str] = None, prompt: Optional[str] = None, **kwargs) -> Dict[str, Any]:
         """
         Transcribe audio file to text using OpenAI's Whisper model.
@@ -73,8 +73,24 @@ class OpenAISTTService(BaseSTTService):
             if prompt:
                 transcription_params["prompt"] = prompt
-            # Handle file input - support base64 strings, file paths, and file objects
-            if isinstance(audio_file, str):
+            # Handle file input - support bytes, base64 strings, file paths, and file objects
+            if isinstance(audio_file, bytes):
+                # Handle bytes data directly
+                logger.info(f"Processing bytes audio data ({len(audio_file)} bytes)")
+                from io import BytesIO
+                audio_buffer = BytesIO(audio_file)
+                # Use filename from kwargs if provided, otherwise default to .mp3
+                filename = kwargs.get('filename', 'audio.mp3')
+                if filename and not filename.endswith(('.mp3', '.wav', '.m4a', '.flac', '.ogg', '.webm', '.mp4')):
+                    filename += '.mp3'  # Add extension if missing
+                audio_buffer.name = filename
+                logger.info(f"Using filename: {filename}")
+                transcription = await self.client.audio.transcriptions.create(
+                    file=audio_buffer,
+                    **transcription_params
+                )
+            elif isinstance(audio_file, str):
                 # Check if it's a base64 string or file path
                 if len(audio_file) > 100 and not os.path.exists(audio_file):
                     # Likely a base64 string
@@ -147,7 +163,7 @@ class OpenAISTTService(BaseSTTService):
         wait=wait_exponential(multiplier=1, min=4, max=10),
         reraise=True
     )
-    async def translate(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
+    async def translate(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
         """
         Translate audio file to English text using OpenAI's Whisper model.
@@ -211,7 +227,7 @@ class OpenAISTTService(BaseSTTService):
             logger.error(f"Translation failed: {e}")
             raise
-    async def transcribe_batch(self, audio_files: List[Union[str, BinaryIO]], language: Optional[str] = None, prompt: Optional[str] = None) -> List[Dict[str, Any]]:
+    async def transcribe_batch(self, audio_files: List[Union[str, BinaryIO, bytes]], language: Optional[str] = None, prompt: Optional[str] = None) -> List[Dict[str, Any]]:
         """
         Transcribe multiple audio files in batch.
@@ -238,7 +254,7 @@ class OpenAISTTService(BaseSTTService):
         return results
-    async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
+    async def detect_language(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
         """
         Detect the language of an audio file.

isa_model/inference/services/embedding/ollama_embed_service.py CHANGED Viewed

@@ -4,6 +4,7 @@ import asyncio
 from typing import List, Dict, Any, Optional
 from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
+from isa_model.core.config.config_manager import ConfigManager
 logger = logging.getLogger(__name__)
@@ -21,9 +22,20 @@ class OllamaEmbedService(BaseEmbedService):
         # Initialize HTTP client with provider configuration
         try:
-            host = provider_config.get("host", "localhost")
-            port = provider_config.get("port", 11434)
-            base_url = f"http://{host}:{port}"
+            config_manager = ConfigManager()
+            # Use Consul discovery with fallback
+            default_base_url = config_manager.get_ollama_url()
+            if "base_url" in provider_config:
+                base_url = provider_config["base_url"]
+            else:
+                host = provider_config.get("host", "localhost")
+                port = provider_config.get("port", 11434)
+                base_url = provider_config.get("base_url", f"http://{host}:{port}")
+            # Use config manager default (Consul discovery) if still not set
+            if base_url == f"http://localhost:11434":
+                base_url = default_base_url
             self.client = httpx.AsyncClient(base_url=base_url, timeout=30.0)

isa_model/inference/services/embedding/resilient_embed_service.py ADDED Viewed

@@ -0,0 +1,285 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Resilient Embedding Service - Provides fallback mechanisms for embedding operations
+Automatically handles OpenAI API failures with local embedding alternatives
+"""
+import logging
+import random
+import numpy as np
+from typing import List, Dict, Any, Optional, Union
+from openai import APIConnectionError, APITimeoutError, RateLimitError, AuthenticationError
+from isa_model.inference.services.embedding.openai_embed_service import OpenAIEmbedService
+from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
+logger = logging.getLogger(__name__)
+class ResilientEmbedService(BaseEmbedService):
+    """
+    Resilient embedding service with automatic fallback mechanisms
+    When OpenAI service fails, automatically falls back to:
+    1. Simple TF-IDF based embeddings
+    2. Random embeddings (for testing/demo purposes)
+    """
+    def __init__(self, provider_name: str = "openai", model_name: str = "text-embedding-3-small", **kwargs):
+        super().__init__(provider_name, model_name, **kwargs)
+        # Try to initialize OpenAI service
+        self.primary_service = None
+        self.fallback_mode = False
+        try:
+            self.primary_service = OpenAIEmbedService(provider_name, model_name, **kwargs)
+            logger.info("✅ Primary OpenAI embedding service initialized")
+        except Exception as e:
+            logger.warning(f"Failed to initialize OpenAI service, starting in fallback mode: {e}")
+            self.fallback_mode = True
+        # Initialize TF-IDF vectorizer for fallback
+        self._init_fallback_vectorizer()
+    def _init_fallback_vectorizer(self):
+        """Initialize TF-IDF vectorizer for fallback embeddings"""
+        try:
+            from sklearn.feature_extraction.text import TfidfVectorizer
+            # Use a simple TF-IDF vectorizer with limited features
+            self.tfidf_vectorizer = TfidfVectorizer(
+                max_features=1536,  # Match OpenAI dimensions
+                stop_words='english',
+                ngram_range=(1, 2),
+                lowercase=True,
+                strip_accents='unicode'
+            )
+            # Pre-fit with some common words to ensure consistency
+            common_words = [
+                "hello world", "machine learning", "artificial intelligence",
+                "data science", "natural language processing", "computer vision",
+                "deep learning", "neural networks", "text analysis",
+                "information retrieval", "semantic search", "embeddings"
+            ]
+            self.tfidf_vectorizer.fit(common_words)
+            self.tfidf_available = True
+            logger.info("✅ TF-IDF fallback vectorizer initialized")
+        except ImportError:
+            logger.warning("scikit-learn not available, using random embeddings as fallback")
+            self.tfidf_available = False
+    def _generate_fallback_embedding(self, text: str, dimension: int = 1536) -> List[float]:
+        """Generate fallback embedding for a single text"""
+        if self.tfidf_available and hasattr(self, 'tfidf_vectorizer'):
+            try:
+                # Use TF-IDF for more meaningful embeddings
+                tfidf_vector = self.tfidf_vectorizer.transform([text]).toarray()[0]
+                # Pad or truncate to desired dimension
+                if len(tfidf_vector) < dimension:
+                    padding = [0.0] * (dimension - len(tfidf_vector))
+                    tfidf_vector = np.concatenate([tfidf_vector, padding])
+                elif len(tfidf_vector) > dimension:
+                    tfidf_vector = tfidf_vector[:dimension]
+                # Normalize to unit vector
+                norm = np.linalg.norm(tfidf_vector)
+                if norm > 0:
+                    tfidf_vector = tfidf_vector / norm
+                return tfidf_vector.tolist()
+            except Exception as e:
+                logger.warning(f"TF-IDF fallback failed: {e}, using random embedding")
+        # Random embedding as last resort (normalized)
+        random.seed(hash(text) % (2**32))  # Deterministic based on text
+        embedding = [random.gauss(0, 1) for _ in range(dimension)]
+        # Normalize to unit vector
+        norm = np.sqrt(sum(x*x for x in embedding))
+        if norm > 0:
+            embedding = [x/norm for x in embedding]
+        return embedding
+    async def create_text_embedding(self, text: str) -> List[float]:
+        """Create embedding for single text with fallback"""
+        # Try primary service first if available
+        if not self.fallback_mode and self.primary_service:
+            try:
+                result = await self.primary_service.create_text_embedding(text)
+                logger.debug("✅ Used primary OpenAI service")
+                return result
+            except (APIConnectionError, APITimeoutError) as e:
+                logger.warning(f"OpenAI connection issue, switching to fallback: {e}")
+                self.fallback_mode = True
+            except RateLimitError as e:
+                logger.warning(f"OpenAI rate limit hit, using fallback: {e}")
+            except AuthenticationError as e:
+                logger.error(f"OpenAI authentication failed, switching to fallback: {e}")
+                self.fallback_mode = True
+            except Exception as e:
+                logger.warning(f"OpenAI service error, using fallback: {e}")
+        # Use fallback embedding
+        logger.info(f"Using fallback embedding for text: {text[:50]}...")
+        return self._generate_fallback_embedding(text)
+    async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Create embeddings for multiple texts with fallback"""
+        if not texts:
+            return []
+        # Try primary service first if available
+        if not self.fallback_mode and self.primary_service:
+            try:
+                result = await self.primary_service.create_text_embeddings(texts)
+                logger.debug(f"✅ Used primary OpenAI service for {len(texts)} texts")
+                return result
+            except (APIConnectionError, APITimeoutError) as e:
+                logger.warning(f"OpenAI connection issue, switching to fallback: {e}")
+                self.fallback_mode = True
+            except RateLimitError as e:
+                logger.warning(f"OpenAI rate limit hit, using fallback: {e}")
+            except AuthenticationError as e:
+                logger.error(f"OpenAI authentication failed, switching to fallback: {e}")
+                self.fallback_mode = True
+            except Exception as e:
+                logger.warning(f"OpenAI service error, using fallback: {e}")
+        # Use fallback embeddings
+        logger.info(f"Using fallback embeddings for {len(texts)} texts")
+        return [self._generate_fallback_embedding(text) for text in texts]
+    async def create_chunks(self, text: str, metadata: Optional[Dict] = None,
+                          chunk_size: int = 400, overlap: int = 50, **kwargs) -> List[Dict]:
+        """Create text chunks with embeddings (with fallback)"""
+        words = text.split()
+        if not words:
+            return []
+        chunks = []
+        chunk_texts = []
+        for i in range(0, len(words), chunk_size - overlap):
+            chunk_words = words[i:i + chunk_size]
+            chunk_text = " ".join(chunk_words)
+            chunk_texts.append(chunk_text)
+            chunks.append({
+                "text": chunk_text,
+                "start_index": i,
+                "end_index": min(i + chunk_size, len(words)),
+                "metadata": metadata or {}
+            })
+        # Get embeddings for all chunks
+        embeddings = await self.create_text_embeddings(chunk_texts)
+        # Add embeddings to chunks
+        for chunk, embedding in zip(chunks, embeddings):
+            chunk["embedding"] = embedding
+            chunk["fallback_used"] = self.fallback_mode
+        return chunks
+    async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
+        """Compute cosine similarity between two embeddings"""
+        import math
+        try:
+            dot_product = sum(a * b for a, b in zip(embedding1, embedding2))
+            norm1 = math.sqrt(sum(a * a for a in embedding1))
+            norm2 = math.sqrt(sum(b * b for b in embedding2))
+            if norm1 * norm2 == 0:
+                return 0.0
+            return dot_product / (norm1 * norm2)
+        except Exception as e:
+            logger.error(f"Error computing similarity: {e}")
+            return 0.0
+    async def find_similar_texts(
+        self,
+        query_embedding: List[float],
+        candidate_embeddings: List[List[float]],
+        top_k: int = 5
+    ) -> List[Dict[str, Any]]:
+        """Find most similar texts based on embeddings"""
+        try:
+            similarities = []
+            for i, candidate in enumerate(candidate_embeddings):
+                similarity = await self.compute_similarity(query_embedding, candidate)
+                similarities.append({
+                    "index": i,
+                    "similarity": similarity
+                })
+            # Sort by similarity in descending order and return top_k
+            similarities.sort(key=lambda x: x["similarity"], reverse=True)
+            return similarities[:top_k]
+        except Exception as e:
+            logger.error(f"Error finding similar texts: {e}")
+            return []
+    def get_embedding_dimension(self) -> int:
+        """Get the dimension of embeddings produced by this service"""
+        return 1536  # Standard dimension for consistency
+    def get_max_input_length(self) -> int:
+        """Get maximum input text length supported"""
+        return 8192
+    def is_fallback_mode(self) -> bool:
+        """Check if service is running in fallback mode"""
+        return self.fallback_mode
+    def get_service_status(self) -> Dict[str, Any]:
+        """Get current service status and capabilities"""
+        return {
+            "primary_service_available": not self.fallback_mode and self.primary_service is not None,
+            "fallback_mode": self.fallback_mode,
+            "tfidf_available": self.tfidf_available,
+            "provider": self.provider_name,
+            "model": self.model_name,
+            "embedding_dimension": self.get_embedding_dimension(),
+            "max_input_length": self.get_max_input_length()
+        }
+    async def health_check(self) -> Dict[str, Any]:
+        """Health check with detailed status"""
+        status = self.get_service_status()
+        # Test embedding generation
+        try:
+            test_embedding = await self.create_text_embedding("test")
+            status["embedding_test"] = {
+                "success": True,
+                "dimension": len(test_embedding),
+                "fallback_used": self.fallback_mode
+            }
+        except Exception as e:
+            status["embedding_test"] = {
+                "success": False,
+                "error": str(e)
+            }
+        return status
+    async def close(self):
+        """Cleanup resources"""
+        if self.primary_service:
+            await self.primary_service.close()
+        logger.info("ResilientEmbedService has been closed.")

isa_model/inference/services/llm/__init__.py CHANGED Viewed

@@ -6,9 +6,17 @@ LLM Services - Business logic services for Language Models
 from .ollama_llm_service import OllamaLLMService
 from .openai_llm_service import OpenAILLMService
 from .yyds_llm_service import YydsLLMService
+from .huggingface_llm_service import ISALLMService, HuggingFaceLLMService, HuggingFaceInferenceService
+# LocalLLMService requires torch (local mode only) - import explicitly when needed
+# from .local_llm_service import LocalLLMService, create_local_llm_service
 __all__ = [
     "OllamaLLMService",
-    "OpenAILLMService",
-    "YydsLLMService"
+    "OpenAILLMService",
+    "YydsLLMService",
+    "ISALLMService",
+    "HuggingFaceLLMService",
+    "HuggingFaceInferenceService",
+    # "LocalLLMService",  # Requires isa_model[local]
+    # "create_local_llm_service"
 ]

isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl