isa-model 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. isa_model/__init__.py +5 -0
  2. isa_model/core/model_manager.py +143 -0
  3. isa_model/core/model_registry.py +115 -0
  4. isa_model/core/model_router.py +226 -0
  5. isa_model/core/model_storage.py +133 -0
  6. isa_model/core/model_version.py +0 -0
  7. isa_model/core/resource_manager.py +202 -0
  8. isa_model/core/storage/hf_storage.py +0 -0
  9. isa_model/core/storage/local_storage.py +0 -0
  10. isa_model/core/storage/minio_storage.py +0 -0
  11. isa_model/deployment/mlflow_gateway/__init__.py +8 -0
  12. isa_model/deployment/mlflow_gateway/start_gateway.py +65 -0
  13. isa_model/deployment/unified_multimodal_client.py +341 -0
  14. isa_model/inference/__init__.py +11 -0
  15. isa_model/inference/adapter/triton_adapter.py +453 -0
  16. isa_model/inference/adapter/unified_api.py +248 -0
  17. isa_model/inference/ai_factory.py +354 -0
  18. isa_model/inference/backends/Pytorch/bge_embed_backend.py +188 -0
  19. isa_model/inference/backends/Pytorch/gemma_backend.py +167 -0
  20. isa_model/inference/backends/Pytorch/llama_backend.py +166 -0
  21. isa_model/inference/backends/Pytorch/whisper_backend.py +194 -0
  22. isa_model/inference/backends/__init__.py +53 -0
  23. isa_model/inference/backends/base_backend_client.py +26 -0
  24. isa_model/inference/backends/container_services.py +104 -0
  25. isa_model/inference/backends/local_services.py +72 -0
  26. isa_model/inference/backends/openai_client.py +130 -0
  27. isa_model/inference/backends/replicate_client.py +197 -0
  28. isa_model/inference/backends/third_party_services.py +239 -0
  29. isa_model/inference/backends/triton_client.py +97 -0
  30. isa_model/inference/base.py +46 -0
  31. isa_model/inference/client_sdk/__init__.py +0 -0
  32. isa_model/inference/client_sdk/client.py +134 -0
  33. isa_model/inference/client_sdk/client_data_std.py +34 -0
  34. isa_model/inference/client_sdk/client_sdk_schema.py +16 -0
  35. isa_model/inference/client_sdk/exceptions.py +0 -0
  36. isa_model/inference/engine/triton/model_repository/bge/1/model.py +174 -0
  37. isa_model/inference/engine/triton/model_repository/gemma/1/model.py +250 -0
  38. isa_model/inference/engine/triton/model_repository/llama/1/model.py +76 -0
  39. isa_model/inference/engine/triton/model_repository/whisper/1/model.py +195 -0
  40. isa_model/inference/providers/__init__.py +19 -0
  41. isa_model/inference/providers/base_provider.py +30 -0
  42. isa_model/inference/providers/model_cache_manager.py +341 -0
  43. isa_model/inference/providers/ollama_provider.py +73 -0
  44. isa_model/inference/providers/openai_provider.py +87 -0
  45. isa_model/inference/providers/replicate_provider.py +94 -0
  46. isa_model/inference/providers/triton_provider.py +439 -0
  47. isa_model/inference/providers/vllm_provider.py +0 -0
  48. isa_model/inference/providers/yyds_provider.py +83 -0
  49. isa_model/inference/services/__init__.py +14 -0
  50. isa_model/inference/services/audio/fish_speech/handler.py +215 -0
  51. isa_model/inference/services/audio/runpod_tts_fish_service.py +212 -0
  52. isa_model/inference/services/audio/triton_speech_service.py +138 -0
  53. isa_model/inference/services/audio/whisper_service.py +186 -0
  54. isa_model/inference/services/audio/yyds_audio_service.py +71 -0
  55. isa_model/inference/services/base_service.py +106 -0
  56. isa_model/inference/services/base_tts_service.py +66 -0
  57. isa_model/inference/services/embedding/bge_service.py +183 -0
  58. isa_model/inference/services/embedding/ollama_embed_service.py +85 -0
  59. isa_model/inference/services/embedding/ollama_rerank_service.py +118 -0
  60. isa_model/inference/services/embedding/onnx_rerank_service.py +73 -0
  61. isa_model/inference/services/llm/__init__.py +16 -0
  62. isa_model/inference/services/llm/gemma_service.py +143 -0
  63. isa_model/inference/services/llm/llama_service.py +143 -0
  64. isa_model/inference/services/llm/ollama_llm_service.py +108 -0
  65. isa_model/inference/services/llm/openai_llm_service.py +129 -0
  66. isa_model/inference/services/llm/replicate_llm_service.py +179 -0
  67. isa_model/inference/services/llm/triton_llm_service.py +230 -0
  68. isa_model/inference/services/others/table_transformer_service.py +61 -0
  69. isa_model/inference/services/vision/__init__.py +12 -0
  70. isa_model/inference/services/vision/helpers/image_utils.py +58 -0
  71. isa_model/inference/services/vision/helpers/text_splitter.py +46 -0
  72. isa_model/inference/services/vision/ollama_vision_service.py +60 -0
  73. isa_model/inference/services/vision/replicate_vision_service.py +241 -0
  74. isa_model/inference/services/vision/triton_vision_service.py +199 -0
  75. isa_model/inference/services/vision/yyds_vision_service.py +80 -0
  76. isa_model/inference/utils/conversion/bge_rerank_convert.py +73 -0
  77. isa_model/inference/utils/conversion/onnx_converter.py +0 -0
  78. isa_model/inference/utils/conversion/torch_converter.py +0 -0
  79. isa_model/scripts/inference_tracker.py +283 -0
  80. isa_model/scripts/mlflow_manager.py +379 -0
  81. isa_model/scripts/model_registry.py +465 -0
  82. isa_model/scripts/start_mlflow.py +95 -0
  83. isa_model/scripts/training_tracker.py +257 -0
  84. isa_model/training/engine/llama_factory/__init__.py +39 -0
  85. isa_model/training/engine/llama_factory/config.py +115 -0
  86. isa_model/training/engine/llama_factory/data_adapter.py +284 -0
  87. isa_model/training/engine/llama_factory/examples/__init__.py +6 -0
  88. isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +185 -0
  89. isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +163 -0
  90. isa_model/training/engine/llama_factory/factory.py +331 -0
  91. isa_model/training/engine/llama_factory/rl.py +254 -0
  92. isa_model/training/engine/llama_factory/trainer.py +171 -0
  93. isa_model/training/image_model/configs/create_config.py +37 -0
  94. isa_model/training/image_model/configs/create_flux_config.py +26 -0
  95. isa_model/training/image_model/configs/create_lora_config.py +21 -0
  96. isa_model/training/image_model/prepare_massed_compute.py +97 -0
  97. isa_model/training/image_model/prepare_upload.py +17 -0
  98. isa_model/training/image_model/raw_data/create_captions.py +16 -0
  99. isa_model/training/image_model/raw_data/create_lora_captions.py +20 -0
  100. isa_model/training/image_model/raw_data/pre_processing.py +200 -0
  101. isa_model/training/image_model/train/train.py +42 -0
  102. isa_model/training/image_model/train/train_flux.py +41 -0
  103. isa_model/training/image_model/train/train_lora.py +57 -0
  104. isa_model/training/image_model/train_main.py +25 -0
  105. isa_model/training/llm_model/annotation/annotation_schema.py +47 -0
  106. isa_model/training/llm_model/annotation/processors/annotation_processor.py +126 -0
  107. isa_model/training/llm_model/annotation/storage/dataset_manager.py +131 -0
  108. isa_model/training/llm_model/annotation/storage/dataset_schema.py +44 -0
  109. isa_model/training/llm_model/annotation/tests/test_annotation_flow.py +109 -0
  110. isa_model/training/llm_model/annotation/tests/test_minio copy.py +113 -0
  111. isa_model/training/llm_model/annotation/tests/test_minio_upload.py +43 -0
  112. isa_model/training/llm_model/annotation/views/annotation_controller.py +158 -0
  113. isa_model-0.1.0.dist-info/METADATA +116 -0
  114. isa_model-0.1.0.dist-info/RECORD +117 -0
  115. isa_model-0.1.0.dist-info/WHEEL +5 -0
  116. isa_model-0.1.0.dist-info/licenses/LICENSE +21 -0
  117. isa_model-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,167 @@
1
+ import os
2
+ import logging
3
+ import torch
4
+ from typing import Dict, List, Any, Optional, Union
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class GemmaBackend:
10
+ """
11
+ PyTorch backend for the Gemma LLM model.
12
+ """
13
+
14
+ def __init__(self, model_path: Optional[str] = None, device: str = "auto"):
15
+ """
16
+ Initialize the Gemma backend.
17
+
18
+ Args:
19
+ model_path: Path to the model
20
+ device: Device to run the model on ("cpu", "cuda", or "auto")
21
+ """
22
+ self.model_path = model_path or os.environ.get("GEMMA_MODEL_PATH", "/models/Gemma3-4B")
23
+ self.device = device if device != "auto" else ("cuda" if torch.cuda.is_available() else "cpu")
24
+ self.model = None
25
+ self.tokenizer = None
26
+ self._loaded = False
27
+
28
+ # Default generation config
29
+ self.default_config = {
30
+ "max_new_tokens": 512,
31
+ "temperature": 0.7,
32
+ "top_p": 0.9,
33
+ "top_k": 50,
34
+ "repetition_penalty": 1.1,
35
+ "do_sample": True
36
+ }
37
+
38
+ self.logger = logger
39
+
40
+ def load(self) -> None:
41
+ """
42
+ Load the model and tokenizer.
43
+ """
44
+ if self._loaded:
45
+ return
46
+
47
+ try:
48
+ from transformers import AutoModelForCausalLM, AutoTokenizer
49
+
50
+ # Load tokenizer
51
+ self.logger.info(f"Loading Gemma tokenizer from {self.model_path}")
52
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
53
+
54
+ # Load model
55
+ self.logger.info(f"Loading Gemma model on {self.device}")
56
+ if self.device == "cpu":
57
+ self.model = AutoModelForCausalLM.from_pretrained(
58
+ self.model_path,
59
+ torch_dtype=torch.float32,
60
+ low_cpu_mem_usage=True,
61
+ device_map="auto"
62
+ )
63
+ else: # cuda
64
+ self.model = AutoModelForCausalLM.from_pretrained(
65
+ self.model_path,
66
+ torch_dtype=torch.float16, # Use half precision on GPU
67
+ device_map="auto"
68
+ )
69
+
70
+ self.model.eval()
71
+ self._loaded = True
72
+ self.logger.info("Gemma model loaded successfully")
73
+
74
+ except Exception as e:
75
+ self.logger.error(f"Failed to load Gemma model: {str(e)}")
76
+ raise
77
+
78
+ def unload(self) -> None:
79
+ """
80
+ Unload the model and tokenizer.
81
+ """
82
+ if not self._loaded:
83
+ return
84
+
85
+ self.model = None
86
+ self.tokenizer = None
87
+ self._loaded = False
88
+
89
+ # Force garbage collection
90
+ import gc
91
+ gc.collect()
92
+
93
+ if self.device == "cuda":
94
+ torch.cuda.empty_cache()
95
+
96
+ self.logger.info("Gemma model unloaded")
97
+
98
+ def generate(self,
99
+ prompt: str,
100
+ system_prompt: Optional[str] = None,
101
+ generation_config: Optional[Dict[str, Any]] = None) -> str:
102
+ """
103
+ Generate text from a prompt.
104
+
105
+ Args:
106
+ prompt: User prompt
107
+ system_prompt: System prompt to control model behavior
108
+ generation_config: Configuration for text generation
109
+
110
+ Returns:
111
+ Generated text
112
+ """
113
+ if not self._loaded:
114
+ self.load()
115
+
116
+ # Get generation config
117
+ config = self.default_config.copy()
118
+ if generation_config:
119
+ config.update(generation_config)
120
+
121
+ try:
122
+ # Format the prompt with system prompt if provided
123
+ if system_prompt:
124
+ # Gemma uses a specific format for system prompts
125
+ formatted_prompt = f"<bos><start_of_turn>system\n{system_prompt}<end_of_turn>\n<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model"
126
+ else:
127
+ formatted_prompt = f"<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model"
128
+
129
+ # Tokenize the prompt
130
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
131
+
132
+ # Generate text
133
+ with torch.no_grad():
134
+ outputs = self.model.generate(
135
+ inputs.input_ids,
136
+ attention_mask=inputs.attention_mask,
137
+ pad_token_id=self.tokenizer.eos_token_id,
138
+ **config
139
+ )
140
+
141
+ # Decode the generated text
142
+ generated_text = self.tokenizer.decode(
143
+ outputs[0][inputs.input_ids.shape[1]:],
144
+ skip_special_tokens=True
145
+ )
146
+
147
+ return generated_text.strip()
148
+
149
+ except Exception as e:
150
+ self.logger.error(f"Error during Gemma text generation: {str(e)}")
151
+ raise
152
+
153
+ def get_model_info(self) -> Dict[str, Any]:
154
+ """
155
+ Get information about the model.
156
+
157
+ Returns:
158
+ Dictionary containing model information
159
+ """
160
+ return {
161
+ "name": "gemma3-4b",
162
+ "type": "llm",
163
+ "device": self.device,
164
+ "path": self.model_path,
165
+ "loaded": self._loaded,
166
+ "default_config": self.default_config
167
+ }
@@ -0,0 +1,166 @@
1
+ import os
2
+ import logging
3
+ import torch
4
+ from typing import Dict, List, Any, Optional, Union
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class LlamaBackend:
10
+ """
11
+ PyTorch backend for the Llama LLM model.
12
+ """
13
+
14
+ def __init__(self, model_path: Optional[str] = None, device: str = "auto"):
15
+ """
16
+ Initialize the Llama backend.
17
+
18
+ Args:
19
+ model_path: Path to the model
20
+ device: Device to run the model on ("cpu", "cuda", or "auto")
21
+ """
22
+ self.model_path = model_path or os.environ.get("LLAMA_MODEL_PATH", "/models/Llama3-8B")
23
+ self.device = device if device != "auto" else ("cuda" if torch.cuda.is_available() else "cpu")
24
+ self.model = None
25
+ self.tokenizer = None
26
+ self._loaded = False
27
+
28
+ # Default generation config
29
+ self.default_config = {
30
+ "max_new_tokens": 512,
31
+ "temperature": 0.7,
32
+ "top_p": 0.9,
33
+ "top_k": 50,
34
+ "repetition_penalty": 1.1,
35
+ "do_sample": True
36
+ }
37
+
38
+ self.logger = logger
39
+
40
+ def load(self) -> None:
41
+ """
42
+ Load the model and tokenizer.
43
+ """
44
+ if self._loaded:
45
+ return
46
+
47
+ try:
48
+ from transformers import AutoModelForCausalLM, AutoTokenizer
49
+
50
+ # Load tokenizer
51
+ self.logger.info(f"Loading Llama tokenizer from {self.model_path}")
52
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
53
+
54
+ # Load model
55
+ self.logger.info(f"Loading Llama model on {self.device}")
56
+ if self.device == "cpu":
57
+ self.model = AutoModelForCausalLM.from_pretrained(
58
+ self.model_path,
59
+ torch_dtype=torch.float32,
60
+ low_cpu_mem_usage=True,
61
+ device_map="auto"
62
+ )
63
+ else: # cuda
64
+ self.model = AutoModelForCausalLM.from_pretrained(
65
+ self.model_path,
66
+ torch_dtype=torch.float16, # Use half precision on GPU
67
+ device_map="auto"
68
+ )
69
+
70
+ self.model.eval()
71
+ self._loaded = True
72
+ self.logger.info("Llama model loaded successfully")
73
+
74
+ except Exception as e:
75
+ self.logger.error(f"Failed to load Llama model: {str(e)}")
76
+ raise
77
+
78
+ def unload(self) -> None:
79
+ """
80
+ Unload the model and tokenizer.
81
+ """
82
+ if not self._loaded:
83
+ return
84
+
85
+ self.model = None
86
+ self.tokenizer = None
87
+ self._loaded = False
88
+
89
+ # Force garbage collection
90
+ import gc
91
+ gc.collect()
92
+
93
+ if self.device == "cuda":
94
+ torch.cuda.empty_cache()
95
+
96
+ self.logger.info("Llama model unloaded")
97
+
98
+ def generate(self,
99
+ prompt: str,
100
+ system_prompt: Optional[str] = None,
101
+ generation_config: Optional[Dict[str, Any]] = None) -> str:
102
+ """
103
+ Generate text from a prompt.
104
+
105
+ Args:
106
+ prompt: User prompt
107
+ system_prompt: System prompt to control model behavior
108
+ generation_config: Configuration for text generation
109
+
110
+ Returns:
111
+ Generated text
112
+ """
113
+ if not self._loaded:
114
+ self.load()
115
+
116
+ # Get generation config
117
+ config = self.default_config.copy()
118
+ if generation_config:
119
+ config.update(generation_config)
120
+
121
+ try:
122
+ # Format the prompt with system prompt if provided
123
+ if system_prompt:
124
+ formatted_prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{prompt}\n<|assistant|>"
125
+ else:
126
+ formatted_prompt = f"<|user|>\n{prompt}\n<|assistant|>"
127
+
128
+ # Tokenize the prompt
129
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
130
+
131
+ # Generate text
132
+ with torch.no_grad():
133
+ outputs = self.model.generate(
134
+ inputs.input_ids,
135
+ attention_mask=inputs.attention_mask,
136
+ pad_token_id=self.tokenizer.eos_token_id,
137
+ **config
138
+ )
139
+
140
+ # Decode the generated text
141
+ generated_text = self.tokenizer.decode(
142
+ outputs[0][inputs.input_ids.shape[1]:],
143
+ skip_special_tokens=True
144
+ )
145
+
146
+ return generated_text.strip()
147
+
148
+ except Exception as e:
149
+ self.logger.error(f"Error during Llama text generation: {str(e)}")
150
+ raise
151
+
152
+ def get_model_info(self) -> Dict[str, Any]:
153
+ """
154
+ Get information about the model.
155
+
156
+ Returns:
157
+ Dictionary containing model information
158
+ """
159
+ return {
160
+ "name": "llama3-8b",
161
+ "type": "llm",
162
+ "device": self.device,
163
+ "path": self.model_path,
164
+ "loaded": self._loaded,
165
+ "default_config": self.default_config
166
+ }
@@ -0,0 +1,194 @@
1
+ import os
2
+ import io
3
+ import torch
4
+ import logging
5
+ import numpy as np
6
+ from typing import Dict, Any, Optional, Union, BinaryIO
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class WhisperBackend:
12
+ """
13
+ PyTorch backend for the Whisper speech-to-text model.
14
+ """
15
+
16
+ def __init__(self, model_path: Optional[str] = None, device: str = "auto"):
17
+ """
18
+ Initialize the Whisper backend.
19
+
20
+ Args:
21
+ model_path: Path to the model
22
+ device: Device to run the model on ("cpu", "cuda", or "auto")
23
+ """
24
+ self.model_path = model_path or os.environ.get("WHISPER_MODEL_PATH", "/models/Whisper-tiny")
25
+ self.device = device if device != "auto" else ("cuda" if torch.cuda.is_available() else "cpu")
26
+ self.model = None
27
+ self.processor = None
28
+ self._loaded = False
29
+
30
+ # Default configuration
31
+ self.config = {
32
+ "language": "en",
33
+ "task": "transcribe",
34
+ "sampling_rate": 16000,
35
+ "chunk_length_s": 30,
36
+ "batch_size": 16
37
+ }
38
+
39
+ self.logger = logger
40
+
41
+ def load(self) -> None:
42
+ """
43
+ Load the model and processor.
44
+ """
45
+ if self._loaded:
46
+ return
47
+
48
+ try:
49
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
50
+
51
+ # Load processor
52
+ self.logger.info(f"Loading Whisper processor from {self.model_path}")
53
+ self.processor = AutoProcessor.from_pretrained(self.model_path)
54
+
55
+ # Load model
56
+ self.logger.info(f"Loading Whisper model on {self.device}")
57
+ if self.device == "cpu":
58
+ self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
59
+ self.model_path,
60
+ torch_dtype=torch.float32,
61
+ low_cpu_mem_usage=True,
62
+ device_map="auto"
63
+ )
64
+ else: # cuda
65
+ self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
66
+ self.model_path,
67
+ torch_dtype=torch.float16, # Use half precision on GPU
68
+ device_map="auto"
69
+ )
70
+
71
+ self.model.eval()
72
+ self._loaded = True
73
+ self.logger.info("Whisper model loaded successfully")
74
+
75
+ except Exception as e:
76
+ self.logger.error(f"Failed to load Whisper model: {str(e)}")
77
+ raise
78
+
79
+ def unload(self) -> None:
80
+ """
81
+ Unload the model and processor.
82
+ """
83
+ if not self._loaded:
84
+ return
85
+
86
+ self.model = None
87
+ self.processor = None
88
+ self._loaded = False
89
+
90
+ # Force garbage collection
91
+ import gc
92
+ gc.collect()
93
+
94
+ if self.device == "cuda":
95
+ torch.cuda.empty_cache()
96
+
97
+ self.logger.info("Whisper model unloaded")
98
+
99
+ def transcribe(self,
100
+ audio: Union[np.ndarray, str, BinaryIO, bytes],
101
+ language: str = "en",
102
+ **kwargs) -> str:
103
+ """
104
+ Transcribe audio to text.
105
+
106
+ Args:
107
+ audio: Audio input (numpy array, file path, file-like object, or bytes)
108
+ language: Language code (e.g., "en", "fr")
109
+ kwargs: Additional keyword arguments to override config
110
+
111
+ Returns:
112
+ Transcribed text
113
+ """
114
+ if not self._loaded:
115
+ self.load()
116
+
117
+ # Process audio to get numpy array
118
+ audio_array = self._process_audio_input(audio)
119
+
120
+ # Update config with kwargs
121
+ config = self.config.copy()
122
+ config.update(kwargs)
123
+ config["language"] = language
124
+
125
+ try:
126
+ # Process audio with processor
127
+ inputs = self.processor(
128
+ audio_array,
129
+ sampling_rate=config["sampling_rate"],
130
+ return_tensors="pt"
131
+ ).to(self.device)
132
+
133
+ # Generate transcription
134
+ with torch.no_grad():
135
+ output = self.model.generate(
136
+ **inputs,
137
+ language=config["language"],
138
+ task=config["task"]
139
+ )
140
+
141
+ # Decode the output
142
+ transcription = self.processor.batch_decode(
143
+ output,
144
+ skip_special_tokens=True
145
+ )[0]
146
+
147
+ return transcription
148
+
149
+ except Exception as e:
150
+ self.logger.error(f"Error during Whisper transcription: {str(e)}")
151
+ raise
152
+
153
+ def _process_audio_input(self, audio: Union[np.ndarray, str, BinaryIO, bytes]) -> np.ndarray:
154
+ """
155
+ Process different types of audio inputs into a numpy array.
156
+
157
+ Args:
158
+ audio: Audio input (numpy array, file path, file-like object, or bytes)
159
+
160
+ Returns:
161
+ Numpy array of the audio
162
+ """
163
+ if isinstance(audio, np.ndarray):
164
+ return audio
165
+
166
+ try:
167
+ import librosa
168
+
169
+ if isinstance(audio, str):
170
+ # File path
171
+ y, sr = librosa.load(audio, sr=self.config["sampling_rate"])
172
+ return y
173
+
174
+ elif isinstance(audio, (io.IOBase, BinaryIO)):
175
+ # File-like object
176
+ audio.seek(0)
177
+ y, sr = librosa.load(audio, sr=self.config["sampling_rate"])
178
+ return y
179
+
180
+ elif isinstance(audio, bytes):
181
+ # Bytes
182
+ with io.BytesIO(audio) as audio_bytes:
183
+ y, sr = librosa.load(audio_bytes, sr=self.config["sampling_rate"])
184
+ return y
185
+
186
+ else:
187
+ raise ValueError(f"Unsupported audio type: {type(audio)}")
188
+
189
+ except ImportError:
190
+ self.logger.error("librosa not installed. Please install with: pip install librosa")
191
+ raise
192
+ except Exception as e:
193
+ self.logger.error(f"Error processing audio: {str(e)}")
194
+ raise
@@ -0,0 +1,53 @@
1
+ """
2
+ Backend services for isa_model inference.
3
+
4
+ Three types of backend services:
5
+ 1. Local Services: Services running locally (e.g., Ollama)
6
+ 2. Container Services: Docker/K8s deployed services (e.g., Triton, vLLM)
7
+ 3. Third-party Services: External API services with wrappers
8
+ """
9
+
10
+ from .base_backend_client import BaseBackendClient
11
+ from .triton_client import TritonBackendClient, TritonClient
12
+
13
+ # Local Services
14
+ from .local_services import OllamaBackendClient, LocalModelServerClient
15
+
16
+ # Container Services
17
+ from .container_services import (
18
+ VLLMBackendClient,
19
+ TensorFlowServingClient,
20
+ KubernetesServiceClient
21
+ )
22
+
23
+ # Third-party Services
24
+ from .third_party_services import (
25
+ OpenAIClient,
26
+ AnthropicClient,
27
+ CohereClient,
28
+ AzureOpenAIClient,
29
+ GoogleAIClient
30
+ )
31
+
32
+ __all__ = [
33
+ # Base
34
+ "BaseBackendClient",
35
+ "TritonBackendClient",
36
+ "TritonClient", # Backward compatibility
37
+
38
+ # Local Services
39
+ "OllamaBackendClient",
40
+ "LocalModelServerClient",
41
+
42
+ # Container Services
43
+ "VLLMBackendClient",
44
+ "TensorFlowServingClient",
45
+ "KubernetesServiceClient",
46
+
47
+ # Third-party Services
48
+ "OpenAIClient",
49
+ "AnthropicClient",
50
+ "CohereClient",
51
+ "AzureOpenAIClient",
52
+ "GoogleAIClient",
53
+ ]
@@ -0,0 +1,26 @@
1
+ """
2
+ Base backend client interface for all AI service backends.
3
+ Defines the common interface that all backend clients must implement.
4
+ """
5
+
6
+ from abc import ABC, abstractmethod
7
+ from typing import Dict, Any, AsyncGenerator, Optional
8
+
9
+
10
+ class BaseBackendClient(ABC):
11
+ """Abstract base class for all backend clients"""
12
+
13
+ def __init__(self, *args, **kwargs):
14
+ """Initialize backend client"""
15
+ pass
16
+
17
+ @abstractmethod
18
+ async def health_check(self) -> bool:
19
+ """Check if the backend service is healthy"""
20
+ pass
21
+
22
+ async def close(self):
23
+ """Close any open connections"""
24
+ pass
25
+
26
+