isa-model 0.0.2__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/core/model_manager.py +69 -4
  3. isa_model/core/model_registry.py +273 -46
  4. isa_model/core/storage/hf_storage.py +419 -0
  5. isa_model/deployment/__init__.py +52 -0
  6. isa_model/deployment/core/__init__.py +34 -0
  7. isa_model/deployment/core/deployment_config.py +356 -0
  8. isa_model/deployment/core/deployment_manager.py +549 -0
  9. isa_model/deployment/core/isa_deployment_service.py +401 -0
  10. isa_model/eval/factory.py +381 -140
  11. isa_model/inference/ai_factory.py +427 -236
  12. isa_model/inference/billing_tracker.py +406 -0
  13. isa_model/inference/providers/base_provider.py +51 -4
  14. isa_model/inference/providers/ml_provider.py +50 -0
  15. isa_model/inference/providers/ollama_provider.py +37 -18
  16. isa_model/inference/providers/openai_provider.py +65 -36
  17. isa_model/inference/providers/replicate_provider.py +42 -30
  18. isa_model/inference/services/audio/base_stt_service.py +21 -2
  19. isa_model/inference/services/audio/openai_realtime_service.py +353 -0
  20. isa_model/inference/services/audio/openai_stt_service.py +252 -0
  21. isa_model/inference/services/audio/openai_tts_service.py +149 -9
  22. isa_model/inference/services/audio/replicate_tts_service.py +239 -0
  23. isa_model/inference/services/base_service.py +36 -1
  24. isa_model/inference/services/embedding/base_embed_service.py +112 -0
  25. isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
  26. isa_model/inference/services/embedding/openai_embed_service.py +223 -0
  27. isa_model/inference/services/llm/__init__.py +2 -0
  28. isa_model/inference/services/llm/base_llm_service.py +158 -86
  29. isa_model/inference/services/llm/llm_adapter.py +414 -0
  30. isa_model/inference/services/llm/ollama_llm_service.py +252 -63
  31. isa_model/inference/services/llm/openai_llm_service.py +231 -93
  32. isa_model/inference/services/llm/triton_llm_service.py +481 -0
  33. isa_model/inference/services/ml/base_ml_service.py +78 -0
  34. isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
  35. isa_model/inference/services/vision/__init__.py +3 -3
  36. isa_model/inference/services/vision/base_image_gen_service.py +161 -0
  37. isa_model/inference/services/vision/base_vision_service.py +177 -0
  38. isa_model/inference/services/vision/helpers/image_utils.py +4 -3
  39. isa_model/inference/services/vision/ollama_vision_service.py +151 -17
  40. isa_model/inference/services/vision/openai_vision_service.py +275 -41
  41. isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
  42. isa_model/training/__init__.py +62 -32
  43. isa_model/training/cloud/__init__.py +22 -0
  44. isa_model/training/cloud/job_orchestrator.py +402 -0
  45. isa_model/training/cloud/runpod_trainer.py +454 -0
  46. isa_model/training/cloud/storage_manager.py +482 -0
  47. isa_model/training/core/__init__.py +23 -0
  48. isa_model/training/core/config.py +181 -0
  49. isa_model/training/core/dataset.py +222 -0
  50. isa_model/training/core/trainer.py +720 -0
  51. isa_model/training/core/utils.py +213 -0
  52. isa_model/training/factory.py +229 -198
  53. isa_model-0.3.1.dist-info/METADATA +465 -0
  54. isa_model-0.3.1.dist-info/RECORD +91 -0
  55. isa_model/core/model_router.py +0 -226
  56. isa_model/core/model_version.py +0 -0
  57. isa_model/core/resource_manager.py +0 -202
  58. isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
  59. isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
  60. isa_model/training/engine/llama_factory/__init__.py +0 -39
  61. isa_model/training/engine/llama_factory/config.py +0 -115
  62. isa_model/training/engine/llama_factory/data_adapter.py +0 -284
  63. isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
  64. isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
  65. isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
  66. isa_model/training/engine/llama_factory/factory.py +0 -331
  67. isa_model/training/engine/llama_factory/rl.py +0 -254
  68. isa_model/training/engine/llama_factory/trainer.py +0 -171
  69. isa_model/training/image_model/configs/create_config.py +0 -37
  70. isa_model/training/image_model/configs/create_flux_config.py +0 -26
  71. isa_model/training/image_model/configs/create_lora_config.py +0 -21
  72. isa_model/training/image_model/prepare_massed_compute.py +0 -97
  73. isa_model/training/image_model/prepare_upload.py +0 -17
  74. isa_model/training/image_model/raw_data/create_captions.py +0 -16
  75. isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
  76. isa_model/training/image_model/raw_data/pre_processing.py +0 -200
  77. isa_model/training/image_model/train/train.py +0 -42
  78. isa_model/training/image_model/train/train_flux.py +0 -41
  79. isa_model/training/image_model/train/train_lora.py +0 -57
  80. isa_model/training/image_model/train_main.py +0 -25
  81. isa_model-0.0.2.dist-info/METADATA +0 -327
  82. isa_model-0.0.2.dist-info/RECORD +0 -92
  83. isa_model-0.0.2.dist-info/licenses/LICENSE +0 -21
  84. /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
  85. /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
  86. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
  87. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
  88. /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
  89. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
  90. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
  91. /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
  92. {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
  93. {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0
@@ -1,80 +1,314 @@
1
- from typing import Dict, Any, Union
1
+ from typing import Dict, Any, Union, List, Optional, BinaryIO
2
+ import base64
3
+ import aiohttp
2
4
  from openai import AsyncOpenAI
3
5
  from tenacity import retry, stop_after_attempt, wait_exponential
4
- from isa_model.inference.services.base_service import BaseService
6
+ from isa_model.inference.services.vision.base_vision_service import BaseVisionService
5
7
  from isa_model.inference.providers.base_provider import BaseProvider
6
- from .helpers.image_utils import compress_image, encode_image_to_base64
8
+ from isa_model.inference.billing_tracker import ServiceType
7
9
  import logging
8
10
 
9
11
  logger = logging.getLogger(__name__)
10
12
 
11
- class OpenAIVisionService(BaseService):
12
- """Vision model service wrapper for YYDS"""
13
+ class OpenAIVisionService(BaseVisionService):
14
+ """OpenAI Vision service using gpt-4.1-nano with vision capabilities"""
13
15
 
14
- def __init__(self, provider: 'BaseProvider', model_name: str):
16
+ def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-4.1-nano"):
15
17
  super().__init__(provider, model_name)
16
- # 初始化 AsyncOpenAI 客户端
17
- self._client = AsyncOpenAI(
18
- api_key=self.config.get('api_key'),
19
- base_url=self.config.get('base_url')
20
- )
21
- self.max_tokens = self.config.get('max_tokens', 1000)
22
- self.temperature = self.config.get('temperature', 0.7)
18
+
19
+ # Get full configuration from provider (including sensitive data)
20
+ provider_config = provider.get_full_config()
21
+
22
+ # Initialize AsyncOpenAI client with provider configuration
23
+ try:
24
+ if not provider_config.get("api_key"):
25
+ raise ValueError("OpenAI API key not found in provider configuration")
26
+
27
+ self._client = AsyncOpenAI(
28
+ api_key=provider_config["api_key"],
29
+ base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
30
+ organization=provider_config.get("organization")
31
+ )
32
+
33
+ logger.info(f"Initialized OpenAIVisionService with model {self.model_name}")
34
+
35
+ except Exception as e:
36
+ logger.error(f"Failed to initialize OpenAI client: {e}")
37
+ raise ValueError(f"Failed to initialize OpenAI client. Check your API key configuration: {e}") from e
38
+
39
+ self.max_tokens = provider_config.get('max_tokens', 1000)
40
+ self.temperature = provider_config.get('temperature', 0.7)
23
41
 
24
42
  @property
25
43
  def client(self) -> AsyncOpenAI:
26
- """获取底层的 OpenAI 客户端"""
44
+ """Get the underlying OpenAI client"""
27
45
  return self._client
28
46
 
47
+ async def _download_image(self, image_url: str) -> bytes:
48
+ """Download image from URL"""
49
+ async with aiohttp.ClientSession() as session:
50
+ async with session.get(image_url) as response:
51
+ if response.status == 200:
52
+ return await response.read()
53
+ else:
54
+ raise ValueError(f"Failed to download image from {image_url}: {response.status}")
55
+
56
+ def _encode_image(self, image_path_or_data: Union[str, bytes, BinaryIO]) -> str:
57
+ """Encode image to base64"""
58
+ if isinstance(image_path_or_data, str):
59
+ # If it's a file path
60
+ with open(image_path_or_data, "rb") as image_file:
61
+ return base64.b64encode(image_file.read()).decode("utf-8")
62
+ elif hasattr(image_path_or_data, 'read'):
63
+ # If it's a file-like object (BinaryIO)
64
+ data = image_path_or_data.read() # type: ignore
65
+ if isinstance(data, bytes):
66
+ return base64.b64encode(data).decode("utf-8")
67
+ else:
68
+ raise ValueError("File-like object did not return bytes")
69
+ else:
70
+ # If it's bytes data
71
+ return base64.b64encode(image_path_or_data).decode("utf-8") # type: ignore
72
+
29
73
  @retry(
30
74
  stop=stop_after_attempt(3),
31
75
  wait=wait_exponential(multiplier=1, min=4, max=10),
32
76
  reraise=True
33
77
  )
34
- async def analyze_image(self, image_data: Union[bytes, str], query: str) -> str:
35
- """分析图片并返回结果
78
+ async def analyze_image(
79
+ self,
80
+ image: Union[str, BinaryIO],
81
+ prompt: Optional[str] = None,
82
+ max_tokens: int = 1000
83
+ ) -> Dict[str, Any]:
84
+ """
85
+ Analyze image and provide description or answer questions
36
86
 
37
87
  Args:
38
- image_data: 图片数据,可以是 bytes 或已编码的 base64 字符串
39
- query: 查询文本
88
+ image: Path to image file, URL, or image data
89
+ prompt: Optional text prompt/question about the image
90
+ max_tokens: Maximum tokens in response
40
91
 
41
92
  Returns:
42
- str: 分析结果
93
+ Dict containing analysis results
43
94
  """
44
95
  try:
45
- # 处理图片数据
46
- if isinstance(image_data, bytes):
47
- # 压缩并编码图片
48
- compressed_image = compress_image(image_data)
49
- image_b64 = encode_image_to_base64(compressed_image)
96
+ # Handle different input types
97
+ if isinstance(image, str):
98
+ if image.startswith(('http://', 'https://')):
99
+ # Download image from URL
100
+ image_bytes = await self._download_image(image)
101
+ base64_image = self._encode_image(image_bytes)
102
+ else:
103
+ # File path
104
+ base64_image = self._encode_image(image)
50
105
  else:
51
- image_b64 = image_data
52
-
53
- # 移除可能存在的 base64 前缀
54
- if 'base64,' in image_b64:
55
- image_b64 = image_b64.split('base64,')[1]
56
-
57
- # 使用 AsyncOpenAI 客户端创建请求
58
- response = await self._client.chat.completions.create(
59
- model=self.model_name,
60
- messages=[{
106
+ # BinaryIO or bytes data
107
+ if hasattr(image, 'read'):
108
+ image_data = image.read()
109
+ else:
110
+ image_data = image
111
+ base64_image = self._encode_image(image_data)
112
+
113
+ # Use default prompt if none provided
114
+ if prompt is None:
115
+ prompt = "Please describe what you see in this image in detail."
116
+
117
+ # Use the standard chat completions API with vision
118
+ messages = [
119
+ {
61
120
  "role": "user",
62
121
  "content": [
63
- {"type": "text", "text": query},
122
+ {"type": "text", "text": prompt},
64
123
  {
65
124
  "type": "image_url",
66
125
  "image_url": {
67
- "url": f"data:image/jpeg;base64,{image_b64}"
126
+ "url": f"data:image/jpeg;base64,{base64_image}",
127
+ "detail": "auto"
68
128
  }
69
- }
70
- ]
71
- }],
72
- max_tokens=self.max_tokens,
129
+ },
130
+ ],
131
+ }
132
+ ]
133
+
134
+ response = await self._client.chat.completions.create( # type: ignore
135
+ model=self.model_name,
136
+ messages=messages, # type: ignore
137
+ max_tokens=max_tokens,
73
138
  temperature=self.temperature
74
139
  )
75
140
 
76
- return response.choices[0].message.content
141
+ # Track usage for billing
142
+ if response.usage:
143
+ self._track_usage(
144
+ service_type=ServiceType.VISION,
145
+ operation="image_analysis",
146
+ input_tokens=response.usage.prompt_tokens,
147
+ output_tokens=response.usage.completion_tokens,
148
+ metadata={"prompt": prompt[:100], "model": self.model_name}
149
+ )
150
+
151
+ content = response.choices[0].message.content or ""
152
+
153
+ return {
154
+ "text": content,
155
+ "confidence": 1.0, # OpenAI doesn't provide confidence scores
156
+ "detected_objects": [], # Would need separate object detection
157
+ "metadata": {
158
+ "model": self.model_name,
159
+ "prompt": prompt,
160
+ "tokens_used": response.usage.total_tokens if response.usage else 0
161
+ }
162
+ }
77
163
 
78
164
  except Exception as e:
79
165
  logger.error(f"Error in image analysis: {e}")
80
166
  raise
167
+
168
+ async def analyze_images(
169
+ self,
170
+ images: List[Union[str, BinaryIO]],
171
+ prompt: Optional[str] = None,
172
+ max_tokens: int = 1000
173
+ ) -> List[Dict[str, Any]]:
174
+ """Analyze multiple images"""
175
+ results = []
176
+ for image in images:
177
+ result = await self.analyze_image(image, prompt, max_tokens)
178
+ results.append(result)
179
+ return results
180
+
181
+ async def describe_image(
182
+ self,
183
+ image: Union[str, BinaryIO],
184
+ detail_level: str = "medium"
185
+ ) -> Dict[str, Any]:
186
+ """Generate detailed description of image"""
187
+ detail_prompts = {
188
+ "low": "Briefly describe what you see in this image.",
189
+ "medium": "Describe what you see in this image in detail, including objects, colors, and scene.",
190
+ "high": "Provide a comprehensive and detailed description of this image, including all visible objects, their positions, colors, textures, lighting, composition, and any text or symbols present."
191
+ }
192
+
193
+ prompt = detail_prompts.get(detail_level, detail_prompts["medium"])
194
+ result = await self.analyze_image(image, prompt, 1500)
195
+
196
+ return {
197
+ "description": result["text"],
198
+ "objects": [], # Would need object detection API
199
+ "scene": result["text"], # Use same description
200
+ "colors": [], # Would need color analysis
201
+ "detail_level": detail_level,
202
+ "metadata": result["metadata"]
203
+ }
204
+
205
+ async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
206
+ """Extract text from image (OCR)"""
207
+ prompt = "Extract all text visible in this image. Provide only the text content, maintaining the original structure and formatting as much as possible."
208
+ result = await self.analyze_image(image, prompt, 1000)
209
+
210
+ return {
211
+ "text": result["text"],
212
+ "confidence": 1.0,
213
+ "bounding_boxes": [], # OpenAI vision doesn't provide bounding boxes
214
+ "language": "unknown", # Would need language detection
215
+ "metadata": result["metadata"]
216
+ }
217
+
218
+ async def detect_objects(
219
+ self,
220
+ image: Union[str, BinaryIO],
221
+ confidence_threshold: float = 0.5
222
+ ) -> Dict[str, Any]:
223
+ """Detect objects in image"""
224
+ prompt = "List all objects visible in this image. For each object, provide the object name and a brief description of its location in the image."
225
+ result = await self.analyze_image(image, prompt, 1000)
226
+
227
+ # Parse the response to extract object information
228
+ objects = []
229
+ lines = result["text"].split('\n')
230
+ for line in lines:
231
+ line = line.strip()
232
+ if line and not line.startswith(('In this image', 'The image shows', 'I can see')):
233
+ objects.append({
234
+ "label": line,
235
+ "confidence": 1.0 # OpenAI doesn't provide confidence scores
236
+ })
237
+
238
+ return {
239
+ "objects": objects,
240
+ "count": len(objects),
241
+ "bounding_boxes": [], # Not available with current API
242
+ "metadata": result["metadata"]
243
+ }
244
+
245
+ async def classify_image(
246
+ self,
247
+ image: Union[str, BinaryIO],
248
+ categories: Optional[List[str]] = None
249
+ ) -> Dict[str, Any]:
250
+ """Classify image into categories"""
251
+ if categories:
252
+ category_list = ", ".join(categories)
253
+ prompt = f"Classify this image into one of these categories: {category_list}. Respond with only the most appropriate category name."
254
+ else:
255
+ prompt = "What category best describes this image? Provide a single category name."
256
+
257
+ result = await self.analyze_image(image, prompt, 100)
258
+ category = result["text"].strip()
259
+
260
+ return {
261
+ "category": category,
262
+ "confidence": 1.0,
263
+ "all_predictions": [{"category": category, "confidence": 1.0}],
264
+ "metadata": result["metadata"]
265
+ }
266
+
267
+ async def compare_images(
268
+ self,
269
+ image1: Union[str, BinaryIO],
270
+ image2: Union[str, BinaryIO]
271
+ ) -> Dict[str, Any]:
272
+ """Compare two images for similarity"""
273
+ # For now, analyze both images separately and compare descriptions
274
+ result1 = await self.analyze_image(image1, "Describe this image in detail.")
275
+ result2 = await self.analyze_image(image2, "Describe this image in detail.")
276
+
277
+ # Use LLM to compare the descriptions
278
+ comparison_prompt = f"Compare these two image descriptions and provide a similarity analysis:\n\nImage 1: {result1['text']}\n\nImage 2: {result2['text']}\n\nProvide: 1) A similarity score from 0.0 to 1.0, 2) Key differences, 3) Common elements."
279
+
280
+ comparison_result = await self._client.chat.completions.create(
281
+ model=self.model_name,
282
+ messages=[{"role": "user", "content": comparison_prompt}],
283
+ max_tokens=500,
284
+ temperature=0.3
285
+ )
286
+
287
+ comparison_text = comparison_result.choices[0].message.content or ""
288
+
289
+ return {
290
+ "similarity_score": 0.5, # Would need better parsing to extract actual score
291
+ "differences": comparison_text,
292
+ "common_elements": comparison_text,
293
+ "metadata": {
294
+ "model": self.model_name,
295
+ "comparison_method": "description_based"
296
+ }
297
+ }
298
+
299
+ def get_supported_formats(self) -> List[str]:
300
+ """Get list of supported image formats"""
301
+ return ['jpg', 'jpeg', 'png', 'gif', 'webp']
302
+
303
+ def get_max_image_size(self) -> Dict[str, int]:
304
+ """Get maximum supported image dimensions"""
305
+ return {
306
+ "width": 2048,
307
+ "height": 2048,
308
+ "file_size_mb": 20
309
+ }
310
+
311
+ async def close(self):
312
+ """Clean up resources"""
313
+ if hasattr(self._client, 'close'):
314
+ await self._client.close()