isa-model 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/core/model_registry.py +273 -46
  3. isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +120 -0
  4. isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +18 -0
  5. isa_model/deployment/gpu_int8_ds8/app/server.py +66 -0
  6. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +43 -0
  7. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +35 -0
  8. isa_model/eval/__init__.py +56 -0
  9. isa_model/eval/benchmarks.py +469 -0
  10. isa_model/eval/factory.py +582 -0
  11. isa_model/eval/metrics.py +628 -0
  12. isa_model/inference/ai_factory.py +98 -93
  13. isa_model/inference/providers/openai_provider.py +21 -7
  14. isa_model/inference/providers/replicate_provider.py +18 -5
  15. isa_model/inference/providers/triton_provider.py +1 -1
  16. isa_model/inference/services/audio/base_stt_service.py +91 -0
  17. isa_model/inference/services/audio/base_tts_service.py +136 -0
  18. isa_model/inference/services/audio/{yyds_audio_service.py → openai_tts_service.py} +4 -4
  19. isa_model/inference/services/embedding/ollama_embed_service.py +48 -36
  20. isa_model/inference/services/llm/__init__.py +0 -4
  21. isa_model/inference/services/llm/base_llm_service.py +134 -0
  22. isa_model/inference/services/llm/ollama_llm_service.py +1 -10
  23. isa_model/inference/services/llm/openai_llm_service.py +70 -61
  24. isa_model/inference/services/vision/__init__.py +1 -1
  25. isa_model/inference/services/vision/ollama_vision_service.py +4 -4
  26. isa_model/inference/services/vision/{yyds_vision_service.py → openai_vision_service.py} +5 -5
  27. isa_model/inference/services/vision/replicate_image_gen_service.py +185 -0
  28. isa_model/training/__init__.py +44 -0
  29. isa_model/training/factory.py +393 -0
  30. isa_model-0.2.0.dist-info/METADATA +327 -0
  31. {isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/RECORD +35 -60
  32. isa_model/deployment/mlflow_gateway/__init__.py +0 -8
  33. isa_model/deployment/mlflow_gateway/start_gateway.py +0 -65
  34. isa_model/deployment/unified_multimodal_client.py +0 -341
  35. isa_model/inference/adapter/triton_adapter.py +0 -453
  36. isa_model/inference/backends/Pytorch/bge_embed_backend.py +0 -188
  37. isa_model/inference/backends/Pytorch/gemma_backend.py +0 -167
  38. isa_model/inference/backends/Pytorch/llama_backend.py +0 -166
  39. isa_model/inference/backends/Pytorch/whisper_backend.py +0 -194
  40. isa_model/inference/backends/__init__.py +0 -53
  41. isa_model/inference/backends/base_backend_client.py +0 -26
  42. isa_model/inference/backends/container_services.py +0 -104
  43. isa_model/inference/backends/local_services.py +0 -72
  44. isa_model/inference/backends/openai_client.py +0 -130
  45. isa_model/inference/backends/replicate_client.py +0 -197
  46. isa_model/inference/backends/third_party_services.py +0 -239
  47. isa_model/inference/backends/triton_client.py +0 -97
  48. isa_model/inference/client_sdk/client.py +0 -134
  49. isa_model/inference/client_sdk/client_data_std.py +0 -34
  50. isa_model/inference/client_sdk/client_sdk_schema.py +0 -16
  51. isa_model/inference/client_sdk/exceptions.py +0 -0
  52. isa_model/inference/engine/triton/model_repository/bge/1/model.py +0 -174
  53. isa_model/inference/engine/triton/model_repository/gemma/1/model.py +0 -250
  54. isa_model/inference/engine/triton/model_repository/llama/1/model.py +0 -76
  55. isa_model/inference/engine/triton/model_repository/whisper/1/model.py +0 -195
  56. isa_model/inference/providers/vllm_provider.py +0 -0
  57. isa_model/inference/providers/yyds_provider.py +0 -83
  58. isa_model/inference/services/audio/fish_speech/handler.py +0 -215
  59. isa_model/inference/services/audio/runpod_tts_fish_service.py +0 -212
  60. isa_model/inference/services/audio/triton_speech_service.py +0 -138
  61. isa_model/inference/services/audio/whisper_service.py +0 -186
  62. isa_model/inference/services/base_tts_service.py +0 -66
  63. isa_model/inference/services/embedding/bge_service.py +0 -183
  64. isa_model/inference/services/embedding/ollama_rerank_service.py +0 -118
  65. isa_model/inference/services/embedding/onnx_rerank_service.py +0 -73
  66. isa_model/inference/services/llm/gemma_service.py +0 -143
  67. isa_model/inference/services/llm/llama_service.py +0 -143
  68. isa_model/inference/services/llm/replicate_llm_service.py +0 -179
  69. isa_model/inference/services/llm/triton_llm_service.py +0 -230
  70. isa_model/inference/services/vision/replicate_vision_service.py +0 -241
  71. isa_model/inference/services/vision/triton_vision_service.py +0 -199
  72. isa_model-0.1.0.dist-info/METADATA +0 -116
  73. /isa_model/inference/{client_sdk/__init__.py → services/embedding/openai_embed_service.py} +0 -0
  74. {isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/WHEEL +0 -0
  75. {isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/licenses/LICENSE +0 -0
  76. {isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/top_level.txt +0 -0
@@ -1,341 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Unified Multimodal Client
4
-
5
- This client provides a unified interface to different model types and modalities,
6
- abstracting away the complexity of different backends and deployment strategies.
7
-
8
- Features:
9
- - Text generation (chat completion)
10
- - Image generation
11
- - Audio transcription
12
- - Embeddings
13
-
14
- Usage:
15
- from isa_model.deployment.unified_multimodal_client import UnifiedClient
16
-
17
- client = UnifiedClient()
18
-
19
- # Text generation
20
- response = client.chat_completion("What is MLflow?")
21
-
22
- # Image generation
23
- image_data = client.generate_image("A beautiful mountain landscape")
24
-
25
- # Audio transcription
26
- transcription = client.transcribe_audio(audio_base64)
27
-
28
- # Embeddings
29
- embeddings = client.get_embeddings("This is a test sentence.")
30
- """
31
-
32
- import os
33
- import json
34
- import base64
35
- import requests
36
- import tempfile
37
- from typing import List, Dict, Any, Optional, Union
38
- from dataclasses import dataclass
39
- from PIL import Image
40
- import io
41
-
42
- @dataclass
43
- class DeploymentConfig:
44
- """Deployment configuration for a model type"""
45
- name: str
46
- endpoint: str
47
- api_key: Optional[str] = None
48
-
49
- class UnifiedClient:
50
- """Unified client for multimodal AI models"""
51
-
52
- def __init__(self, adapter_url: str = "http://localhost:8300"):
53
- """Initialize the client with the adapter URL"""
54
- self.adapter_url = adapter_url
55
-
56
- # Configure deployment endpoints - directly to multimodal adapter
57
- self.deployments = {
58
- "text": DeploymentConfig(
59
- name="default",
60
- endpoint=f"{adapter_url}/v1/chat/completions"
61
- ),
62
- "image": DeploymentConfig(
63
- name="default",
64
- endpoint=f"{adapter_url}/v1/images/generations"
65
- ),
66
- "audio": DeploymentConfig(
67
- name="default",
68
- endpoint=f"{adapter_url}/v1/audio/transcriptions"
69
- ),
70
- "embeddings": DeploymentConfig(
71
- name="default",
72
- endpoint=f"{adapter_url}/v1/embeddings"
73
- )
74
- }
75
-
76
- def _make_request(self,
77
- deployment_type: str,
78
- payload: Dict[str, Any],
79
- files: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
80
- """Make a request to the specified deployment type"""
81
- if deployment_type not in self.deployments:
82
- raise ValueError(f"Unsupported deployment type: {deployment_type}")
83
-
84
- deployment = self.deployments[deployment_type]
85
-
86
- headers = {
87
- "Content-Type": "application/json"
88
- }
89
-
90
- if deployment.api_key:
91
- headers["Authorization"] = f"Bearer {deployment.api_key}"
92
-
93
- try:
94
- if files:
95
- # For multipart/form-data requests
96
- response = requests.post(
97
- deployment.endpoint,
98
- data=payload,
99
- files=files
100
- )
101
- else:
102
- # Ensure model is included in the payload
103
- if "model" not in payload:
104
- payload["model"] = deployment.name
105
-
106
- # For JSON requests
107
- response = requests.post(
108
- deployment.endpoint,
109
- json=payload,
110
- headers=headers
111
- )
112
-
113
- response.raise_for_status()
114
- return response.json()
115
-
116
- except Exception as e:
117
- print(f"Error calling {deployment_type} endpoint: {str(e)}")
118
- print(f"Response: {response.text if 'response' in locals() else 'No response'}")
119
- raise
120
-
121
- def chat_completion(self,
122
- prompt: str,
123
- system_prompt: Optional[str] = None,
124
- max_tokens: int = 100,
125
- temperature: float = 0.7) -> str:
126
- """Generate a chat completion response"""
127
- messages = []
128
-
129
- if system_prompt:
130
- messages.append({
131
- "role": "system",
132
- "content": system_prompt
133
- })
134
-
135
- messages.append({
136
- "role": "user",
137
- "content": prompt
138
- })
139
-
140
- payload = {
141
- "messages": messages,
142
- "max_tokens": max_tokens,
143
- "temperature": temperature
144
- }
145
-
146
- response = self._make_request("text", payload)
147
-
148
- if "choices" in response and len(response["choices"]) > 0:
149
- return response["choices"][0]["message"]["content"]
150
- else:
151
- return "Error: No response generated"
152
-
153
- def generate_image(self,
154
- prompt: str,
155
- n: int = 1,
156
- size: str = "1024x1024") -> str:
157
- """Generate an image from a text prompt"""
158
- payload = {
159
- "prompt": prompt,
160
- "n": n,
161
- "size": size
162
- }
163
-
164
- response = self._make_request("image", payload)
165
-
166
- if "data" in response and len(response["data"]) > 0:
167
- # Return the base64 data URL
168
- return response["data"][0]["url"]
169
- else:
170
- return "Error: No image generated"
171
-
172
- def save_image(self, image_data_url: str, output_path: str) -> None:
173
- """Save a base64 image data URL to a file"""
174
- if image_data_url.startswith("data:image"):
175
- # Extract the base64 part from the data URL
176
- base64_data = image_data_url.split(",")[1]
177
-
178
- # Decode the base64 data
179
- image_data = base64.b64decode(base64_data)
180
-
181
- # Save the image
182
- with open(output_path, "wb") as f:
183
- f.write(image_data)
184
-
185
- print(f"Image saved to {output_path}")
186
- else:
187
- raise ValueError("Invalid image data URL format")
188
-
189
- def transcribe_audio(self,
190
- audio_data: Union[str, bytes],
191
- language: str = "en") -> str:
192
- """
193
- Transcribe audio to text
194
-
195
- Parameters:
196
- - audio_data: Either a base64 encoded string or raw bytes
197
- - language: Language code
198
-
199
- Returns:
200
- - Transcribed text
201
- """
202
- # Convert bytes to base64 if needed
203
- if isinstance(audio_data, bytes):
204
- audio_base64 = base64.b64encode(audio_data).decode("utf-8")
205
- else:
206
- # Assume it's already base64 encoded
207
- audio_base64 = audio_data
208
-
209
- payload = {
210
- "file": audio_base64,
211
- "language": language
212
- }
213
-
214
- response = self._make_request("audio", payload)
215
-
216
- if "text" in response:
217
- return response["text"]
218
- else:
219
- return "Error: No transcription generated"
220
-
221
- def transcribe_audio_file(self,
222
- file_path: str,
223
- language: str = "en") -> str:
224
- """
225
- Transcribe an audio file to text
226
-
227
- Parameters:
228
- - file_path: Path to the audio file
229
- - language: Language code
230
-
231
- Returns:
232
- - Transcribed text
233
- """
234
- with open(file_path, "rb") as f:
235
- audio_data = f.read()
236
-
237
- return self.transcribe_audio(audio_data, language)
238
-
239
- def get_embeddings(self,
240
- text: Union[str, List[str]]) -> List[List[float]]:
241
- """
242
- Get embeddings for text or a list of texts
243
-
244
- Parameters:
245
- - text: Either a single string or a list of strings
246
-
247
- Returns:
248
- - List of embedding vectors
249
- """
250
- payload = {
251
- "input": text
252
- }
253
-
254
- response = self._make_request("embeddings", payload)
255
-
256
- if "data" in response:
257
- return [item["embedding"] for item in response["data"]]
258
- else:
259
- return []
260
-
261
- def similarity(self, text1: str, text2: str) -> float:
262
- """
263
- Calculate the cosine similarity between two texts
264
-
265
- Parameters:
266
- - text1: First text
267
- - text2: Second text
268
-
269
- Returns:
270
- - Cosine similarity (0-1)
271
- """
272
- import numpy as np
273
-
274
- # Get embeddings for both texts
275
- embeddings = self.get_embeddings([text1, text2])
276
-
277
- if len(embeddings) != 2:
278
- raise ValueError("Failed to get embeddings for both texts")
279
-
280
- # Calculate cosine similarity
281
- embedding1 = np.array(embeddings[0])
282
- embedding2 = np.array(embeddings[1])
283
-
284
- cos_sim = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
285
- return float(cos_sim)
286
-
287
- def health_check(self) -> bool:
288
- """Check if the adapter is healthy"""
289
- try:
290
- response = requests.get(f"{self.adapter_url}/health")
291
- return response.status_code == 200
292
- except Exception as e:
293
- print(f"Health check failed: {str(e)}")
294
- return False
295
-
296
- if __name__ == "__main__":
297
- # Test the client
298
- client = UnifiedClient()
299
-
300
- print("\n===== Unified Multimodal Client Demo =====")
301
-
302
- # Check health
303
- if not client.health_check():
304
- print("Adapter is not healthy. Make sure it's running.")
305
- exit(1)
306
-
307
- # Test chat completion
308
- print("\nTesting chat completion...")
309
- response = client.chat_completion(
310
- "What are the key benefits of MLflow?",
311
- system_prompt="You are a helpful AI assistant specializing in machine learning.",
312
- max_tokens=150
313
- )
314
- print(f"\nResponse: {response}")
315
-
316
- # Test embeddings
317
- print("\nTesting embeddings...")
318
- embeddings = client.get_embeddings("What is MLflow?")
319
- print(f"Embedding dimensionality: {len(embeddings[0])}")
320
- print(f"First 5 values: {embeddings[0][:5]}")
321
-
322
- # Test similarity
323
- print("\nTesting similarity...")
324
- similarity = client.similarity(
325
- "MLflow is a platform for managing machine learning workflows.",
326
- "MLflow helps data scientists track experiments and deploy models."
327
- )
328
- print(f"Similarity: {similarity:.4f}")
329
-
330
- # Test image generation
331
- print("\nTesting image generation...")
332
- image_url = client.generate_image("A beautiful mountain landscape")
333
- print(f"Image URL: {image_url[:30]}...")
334
-
335
- # Test audio transcription
336
- print("\nTesting audio transcription...")
337
- dummy_audio = base64.b64encode(b"dummy audio data").decode("utf-8")
338
- transcription = client.transcribe_audio(dummy_audio)
339
- print(f"Transcription: {transcription}")
340
-
341
- print("\n===== Demo Complete =====")