isa-model 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/core/model_registry.py +273 -46
- isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +120 -0
- isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +18 -0
- isa_model/deployment/gpu_int8_ds8/app/server.py +66 -0
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +43 -0
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +35 -0
- isa_model/eval/__init__.py +56 -0
- isa_model/eval/benchmarks.py +469 -0
- isa_model/eval/factory.py +582 -0
- isa_model/eval/metrics.py +628 -0
- isa_model/inference/ai_factory.py +98 -93
- isa_model/inference/providers/openai_provider.py +21 -7
- isa_model/inference/providers/replicate_provider.py +18 -5
- isa_model/inference/providers/triton_provider.py +1 -1
- isa_model/inference/services/audio/base_stt_service.py +91 -0
- isa_model/inference/services/audio/base_tts_service.py +136 -0
- isa_model/inference/services/audio/{yyds_audio_service.py → openai_tts_service.py} +4 -4
- isa_model/inference/services/embedding/ollama_embed_service.py +48 -36
- isa_model/inference/services/llm/__init__.py +0 -4
- isa_model/inference/services/llm/base_llm_service.py +134 -0
- isa_model/inference/services/llm/ollama_llm_service.py +1 -10
- isa_model/inference/services/llm/openai_llm_service.py +70 -61
- isa_model/inference/services/vision/__init__.py +1 -1
- isa_model/inference/services/vision/ollama_vision_service.py +4 -4
- isa_model/inference/services/vision/{yyds_vision_service.py → openai_vision_service.py} +5 -5
- isa_model/inference/services/vision/replicate_image_gen_service.py +185 -0
- isa_model/training/__init__.py +44 -0
- isa_model/training/factory.py +393 -0
- isa_model-0.1.1.dist-info/METADATA +327 -0
- {isa_model-0.1.0.dist-info → isa_model-0.1.1.dist-info}/RECORD +35 -60
- isa_model/deployment/mlflow_gateway/__init__.py +0 -8
- isa_model/deployment/mlflow_gateway/start_gateway.py +0 -65
- isa_model/deployment/unified_multimodal_client.py +0 -341
- isa_model/inference/adapter/triton_adapter.py +0 -453
- isa_model/inference/backends/Pytorch/bge_embed_backend.py +0 -188
- isa_model/inference/backends/Pytorch/gemma_backend.py +0 -167
- isa_model/inference/backends/Pytorch/llama_backend.py +0 -166
- isa_model/inference/backends/Pytorch/whisper_backend.py +0 -194
- isa_model/inference/backends/__init__.py +0 -53
- isa_model/inference/backends/base_backend_client.py +0 -26
- isa_model/inference/backends/container_services.py +0 -104
- isa_model/inference/backends/local_services.py +0 -72
- isa_model/inference/backends/openai_client.py +0 -130
- isa_model/inference/backends/replicate_client.py +0 -197
- isa_model/inference/backends/third_party_services.py +0 -239
- isa_model/inference/backends/triton_client.py +0 -97
- isa_model/inference/client_sdk/client.py +0 -134
- isa_model/inference/client_sdk/client_data_std.py +0 -34
- isa_model/inference/client_sdk/client_sdk_schema.py +0 -16
- isa_model/inference/client_sdk/exceptions.py +0 -0
- isa_model/inference/engine/triton/model_repository/bge/1/model.py +0 -174
- isa_model/inference/engine/triton/model_repository/gemma/1/model.py +0 -250
- isa_model/inference/engine/triton/model_repository/llama/1/model.py +0 -76
- isa_model/inference/engine/triton/model_repository/whisper/1/model.py +0 -195
- isa_model/inference/providers/vllm_provider.py +0 -0
- isa_model/inference/providers/yyds_provider.py +0 -83
- isa_model/inference/services/audio/fish_speech/handler.py +0 -215
- isa_model/inference/services/audio/runpod_tts_fish_service.py +0 -212
- isa_model/inference/services/audio/triton_speech_service.py +0 -138
- isa_model/inference/services/audio/whisper_service.py +0 -186
- isa_model/inference/services/base_tts_service.py +0 -66
- isa_model/inference/services/embedding/bge_service.py +0 -183
- isa_model/inference/services/embedding/ollama_rerank_service.py +0 -118
- isa_model/inference/services/embedding/onnx_rerank_service.py +0 -73
- isa_model/inference/services/llm/gemma_service.py +0 -143
- isa_model/inference/services/llm/llama_service.py +0 -143
- isa_model/inference/services/llm/replicate_llm_service.py +0 -179
- isa_model/inference/services/llm/triton_llm_service.py +0 -230
- isa_model/inference/services/vision/replicate_vision_service.py +0 -241
- isa_model/inference/services/vision/triton_vision_service.py +0 -199
- isa_model-0.1.0.dist-info/METADATA +0 -116
- /isa_model/inference/{client_sdk/__init__.py → services/embedding/openai_embed_service.py} +0 -0
- {isa_model-0.1.0.dist-info → isa_model-0.1.1.dist-info}/WHEEL +0 -0
- {isa_model-0.1.0.dist-info → isa_model-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {isa_model-0.1.0.dist-info → isa_model-0.1.1.dist-info}/top_level.txt +0 -0
@@ -1,341 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
Unified Multimodal Client
|
4
|
-
|
5
|
-
This client provides a unified interface to different model types and modalities,
|
6
|
-
abstracting away the complexity of different backends and deployment strategies.
|
7
|
-
|
8
|
-
Features:
|
9
|
-
- Text generation (chat completion)
|
10
|
-
- Image generation
|
11
|
-
- Audio transcription
|
12
|
-
- Embeddings
|
13
|
-
|
14
|
-
Usage:
|
15
|
-
from isa_model.deployment.unified_multimodal_client import UnifiedClient
|
16
|
-
|
17
|
-
client = UnifiedClient()
|
18
|
-
|
19
|
-
# Text generation
|
20
|
-
response = client.chat_completion("What is MLflow?")
|
21
|
-
|
22
|
-
# Image generation
|
23
|
-
image_data = client.generate_image("A beautiful mountain landscape")
|
24
|
-
|
25
|
-
# Audio transcription
|
26
|
-
transcription = client.transcribe_audio(audio_base64)
|
27
|
-
|
28
|
-
# Embeddings
|
29
|
-
embeddings = client.get_embeddings("This is a test sentence.")
|
30
|
-
"""
|
31
|
-
|
32
|
-
import os
|
33
|
-
import json
|
34
|
-
import base64
|
35
|
-
import requests
|
36
|
-
import tempfile
|
37
|
-
from typing import List, Dict, Any, Optional, Union
|
38
|
-
from dataclasses import dataclass
|
39
|
-
from PIL import Image
|
40
|
-
import io
|
41
|
-
|
42
|
-
@dataclass
|
43
|
-
class DeploymentConfig:
|
44
|
-
"""Deployment configuration for a model type"""
|
45
|
-
name: str
|
46
|
-
endpoint: str
|
47
|
-
api_key: Optional[str] = None
|
48
|
-
|
49
|
-
class UnifiedClient:
|
50
|
-
"""Unified client for multimodal AI models"""
|
51
|
-
|
52
|
-
def __init__(self, adapter_url: str = "http://localhost:8300"):
|
53
|
-
"""Initialize the client with the adapter URL"""
|
54
|
-
self.adapter_url = adapter_url
|
55
|
-
|
56
|
-
# Configure deployment endpoints - directly to multimodal adapter
|
57
|
-
self.deployments = {
|
58
|
-
"text": DeploymentConfig(
|
59
|
-
name="default",
|
60
|
-
endpoint=f"{adapter_url}/v1/chat/completions"
|
61
|
-
),
|
62
|
-
"image": DeploymentConfig(
|
63
|
-
name="default",
|
64
|
-
endpoint=f"{adapter_url}/v1/images/generations"
|
65
|
-
),
|
66
|
-
"audio": DeploymentConfig(
|
67
|
-
name="default",
|
68
|
-
endpoint=f"{adapter_url}/v1/audio/transcriptions"
|
69
|
-
),
|
70
|
-
"embeddings": DeploymentConfig(
|
71
|
-
name="default",
|
72
|
-
endpoint=f"{adapter_url}/v1/embeddings"
|
73
|
-
)
|
74
|
-
}
|
75
|
-
|
76
|
-
def _make_request(self,
|
77
|
-
deployment_type: str,
|
78
|
-
payload: Dict[str, Any],
|
79
|
-
files: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
80
|
-
"""Make a request to the specified deployment type"""
|
81
|
-
if deployment_type not in self.deployments:
|
82
|
-
raise ValueError(f"Unsupported deployment type: {deployment_type}")
|
83
|
-
|
84
|
-
deployment = self.deployments[deployment_type]
|
85
|
-
|
86
|
-
headers = {
|
87
|
-
"Content-Type": "application/json"
|
88
|
-
}
|
89
|
-
|
90
|
-
if deployment.api_key:
|
91
|
-
headers["Authorization"] = f"Bearer {deployment.api_key}"
|
92
|
-
|
93
|
-
try:
|
94
|
-
if files:
|
95
|
-
# For multipart/form-data requests
|
96
|
-
response = requests.post(
|
97
|
-
deployment.endpoint,
|
98
|
-
data=payload,
|
99
|
-
files=files
|
100
|
-
)
|
101
|
-
else:
|
102
|
-
# Ensure model is included in the payload
|
103
|
-
if "model" not in payload:
|
104
|
-
payload["model"] = deployment.name
|
105
|
-
|
106
|
-
# For JSON requests
|
107
|
-
response = requests.post(
|
108
|
-
deployment.endpoint,
|
109
|
-
json=payload,
|
110
|
-
headers=headers
|
111
|
-
)
|
112
|
-
|
113
|
-
response.raise_for_status()
|
114
|
-
return response.json()
|
115
|
-
|
116
|
-
except Exception as e:
|
117
|
-
print(f"Error calling {deployment_type} endpoint: {str(e)}")
|
118
|
-
print(f"Response: {response.text if 'response' in locals() else 'No response'}")
|
119
|
-
raise
|
120
|
-
|
121
|
-
def chat_completion(self,
|
122
|
-
prompt: str,
|
123
|
-
system_prompt: Optional[str] = None,
|
124
|
-
max_tokens: int = 100,
|
125
|
-
temperature: float = 0.7) -> str:
|
126
|
-
"""Generate a chat completion response"""
|
127
|
-
messages = []
|
128
|
-
|
129
|
-
if system_prompt:
|
130
|
-
messages.append({
|
131
|
-
"role": "system",
|
132
|
-
"content": system_prompt
|
133
|
-
})
|
134
|
-
|
135
|
-
messages.append({
|
136
|
-
"role": "user",
|
137
|
-
"content": prompt
|
138
|
-
})
|
139
|
-
|
140
|
-
payload = {
|
141
|
-
"messages": messages,
|
142
|
-
"max_tokens": max_tokens,
|
143
|
-
"temperature": temperature
|
144
|
-
}
|
145
|
-
|
146
|
-
response = self._make_request("text", payload)
|
147
|
-
|
148
|
-
if "choices" in response and len(response["choices"]) > 0:
|
149
|
-
return response["choices"][0]["message"]["content"]
|
150
|
-
else:
|
151
|
-
return "Error: No response generated"
|
152
|
-
|
153
|
-
def generate_image(self,
|
154
|
-
prompt: str,
|
155
|
-
n: int = 1,
|
156
|
-
size: str = "1024x1024") -> str:
|
157
|
-
"""Generate an image from a text prompt"""
|
158
|
-
payload = {
|
159
|
-
"prompt": prompt,
|
160
|
-
"n": n,
|
161
|
-
"size": size
|
162
|
-
}
|
163
|
-
|
164
|
-
response = self._make_request("image", payload)
|
165
|
-
|
166
|
-
if "data" in response and len(response["data"]) > 0:
|
167
|
-
# Return the base64 data URL
|
168
|
-
return response["data"][0]["url"]
|
169
|
-
else:
|
170
|
-
return "Error: No image generated"
|
171
|
-
|
172
|
-
def save_image(self, image_data_url: str, output_path: str) -> None:
|
173
|
-
"""Save a base64 image data URL to a file"""
|
174
|
-
if image_data_url.startswith("data:image"):
|
175
|
-
# Extract the base64 part from the data URL
|
176
|
-
base64_data = image_data_url.split(",")[1]
|
177
|
-
|
178
|
-
# Decode the base64 data
|
179
|
-
image_data = base64.b64decode(base64_data)
|
180
|
-
|
181
|
-
# Save the image
|
182
|
-
with open(output_path, "wb") as f:
|
183
|
-
f.write(image_data)
|
184
|
-
|
185
|
-
print(f"Image saved to {output_path}")
|
186
|
-
else:
|
187
|
-
raise ValueError("Invalid image data URL format")
|
188
|
-
|
189
|
-
def transcribe_audio(self,
|
190
|
-
audio_data: Union[str, bytes],
|
191
|
-
language: str = "en") -> str:
|
192
|
-
"""
|
193
|
-
Transcribe audio to text
|
194
|
-
|
195
|
-
Parameters:
|
196
|
-
- audio_data: Either a base64 encoded string or raw bytes
|
197
|
-
- language: Language code
|
198
|
-
|
199
|
-
Returns:
|
200
|
-
- Transcribed text
|
201
|
-
"""
|
202
|
-
# Convert bytes to base64 if needed
|
203
|
-
if isinstance(audio_data, bytes):
|
204
|
-
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
|
205
|
-
else:
|
206
|
-
# Assume it's already base64 encoded
|
207
|
-
audio_base64 = audio_data
|
208
|
-
|
209
|
-
payload = {
|
210
|
-
"file": audio_base64,
|
211
|
-
"language": language
|
212
|
-
}
|
213
|
-
|
214
|
-
response = self._make_request("audio", payload)
|
215
|
-
|
216
|
-
if "text" in response:
|
217
|
-
return response["text"]
|
218
|
-
else:
|
219
|
-
return "Error: No transcription generated"
|
220
|
-
|
221
|
-
def transcribe_audio_file(self,
|
222
|
-
file_path: str,
|
223
|
-
language: str = "en") -> str:
|
224
|
-
"""
|
225
|
-
Transcribe an audio file to text
|
226
|
-
|
227
|
-
Parameters:
|
228
|
-
- file_path: Path to the audio file
|
229
|
-
- language: Language code
|
230
|
-
|
231
|
-
Returns:
|
232
|
-
- Transcribed text
|
233
|
-
"""
|
234
|
-
with open(file_path, "rb") as f:
|
235
|
-
audio_data = f.read()
|
236
|
-
|
237
|
-
return self.transcribe_audio(audio_data, language)
|
238
|
-
|
239
|
-
def get_embeddings(self,
|
240
|
-
text: Union[str, List[str]]) -> List[List[float]]:
|
241
|
-
"""
|
242
|
-
Get embeddings for text or a list of texts
|
243
|
-
|
244
|
-
Parameters:
|
245
|
-
- text: Either a single string or a list of strings
|
246
|
-
|
247
|
-
Returns:
|
248
|
-
- List of embedding vectors
|
249
|
-
"""
|
250
|
-
payload = {
|
251
|
-
"input": text
|
252
|
-
}
|
253
|
-
|
254
|
-
response = self._make_request("embeddings", payload)
|
255
|
-
|
256
|
-
if "data" in response:
|
257
|
-
return [item["embedding"] for item in response["data"]]
|
258
|
-
else:
|
259
|
-
return []
|
260
|
-
|
261
|
-
def similarity(self, text1: str, text2: str) -> float:
|
262
|
-
"""
|
263
|
-
Calculate the cosine similarity between two texts
|
264
|
-
|
265
|
-
Parameters:
|
266
|
-
- text1: First text
|
267
|
-
- text2: Second text
|
268
|
-
|
269
|
-
Returns:
|
270
|
-
- Cosine similarity (0-1)
|
271
|
-
"""
|
272
|
-
import numpy as np
|
273
|
-
|
274
|
-
# Get embeddings for both texts
|
275
|
-
embeddings = self.get_embeddings([text1, text2])
|
276
|
-
|
277
|
-
if len(embeddings) != 2:
|
278
|
-
raise ValueError("Failed to get embeddings for both texts")
|
279
|
-
|
280
|
-
# Calculate cosine similarity
|
281
|
-
embedding1 = np.array(embeddings[0])
|
282
|
-
embedding2 = np.array(embeddings[1])
|
283
|
-
|
284
|
-
cos_sim = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
285
|
-
return float(cos_sim)
|
286
|
-
|
287
|
-
def health_check(self) -> bool:
|
288
|
-
"""Check if the adapter is healthy"""
|
289
|
-
try:
|
290
|
-
response = requests.get(f"{self.adapter_url}/health")
|
291
|
-
return response.status_code == 200
|
292
|
-
except Exception as e:
|
293
|
-
print(f"Health check failed: {str(e)}")
|
294
|
-
return False
|
295
|
-
|
296
|
-
if __name__ == "__main__":
|
297
|
-
# Test the client
|
298
|
-
client = UnifiedClient()
|
299
|
-
|
300
|
-
print("\n===== Unified Multimodal Client Demo =====")
|
301
|
-
|
302
|
-
# Check health
|
303
|
-
if not client.health_check():
|
304
|
-
print("Adapter is not healthy. Make sure it's running.")
|
305
|
-
exit(1)
|
306
|
-
|
307
|
-
# Test chat completion
|
308
|
-
print("\nTesting chat completion...")
|
309
|
-
response = client.chat_completion(
|
310
|
-
"What are the key benefits of MLflow?",
|
311
|
-
system_prompt="You are a helpful AI assistant specializing in machine learning.",
|
312
|
-
max_tokens=150
|
313
|
-
)
|
314
|
-
print(f"\nResponse: {response}")
|
315
|
-
|
316
|
-
# Test embeddings
|
317
|
-
print("\nTesting embeddings...")
|
318
|
-
embeddings = client.get_embeddings("What is MLflow?")
|
319
|
-
print(f"Embedding dimensionality: {len(embeddings[0])}")
|
320
|
-
print(f"First 5 values: {embeddings[0][:5]}")
|
321
|
-
|
322
|
-
# Test similarity
|
323
|
-
print("\nTesting similarity...")
|
324
|
-
similarity = client.similarity(
|
325
|
-
"MLflow is a platform for managing machine learning workflows.",
|
326
|
-
"MLflow helps data scientists track experiments and deploy models."
|
327
|
-
)
|
328
|
-
print(f"Similarity: {similarity:.4f}")
|
329
|
-
|
330
|
-
# Test image generation
|
331
|
-
print("\nTesting image generation...")
|
332
|
-
image_url = client.generate_image("A beautiful mountain landscape")
|
333
|
-
print(f"Image URL: {image_url[:30]}...")
|
334
|
-
|
335
|
-
# Test audio transcription
|
336
|
-
print("\nTesting audio transcription...")
|
337
|
-
dummy_audio = base64.b64encode(b"dummy audio data").decode("utf-8")
|
338
|
-
transcription = client.transcribe_audio(dummy_audio)
|
339
|
-
print(f"Transcription: {transcription}")
|
340
|
-
|
341
|
-
print("\n===== Demo Complete =====")
|