isa-model 0.2.0__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/core/storage/hf_storage.py +419 -0
  3. isa_model/deployment/__init__.py +52 -0
  4. isa_model/deployment/core/__init__.py +34 -0
  5. isa_model/deployment/core/deployment_config.py +356 -0
  6. isa_model/deployment/core/deployment_manager.py +549 -0
  7. isa_model/deployment/core/isa_deployment_service.py +401 -0
  8. isa_model/eval/factory.py +381 -140
  9. isa_model/inference/ai_factory.py +142 -240
  10. isa_model/inference/providers/ml_provider.py +50 -0
  11. isa_model/inference/services/audio/openai_tts_service.py +104 -3
  12. isa_model/inference/services/embedding/base_embed_service.py +112 -0
  13. isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
  14. isa_model/inference/services/llm/__init__.py +2 -0
  15. isa_model/inference/services/llm/base_llm_service.py +111 -1
  16. isa_model/inference/services/llm/ollama_llm_service.py +234 -26
  17. isa_model/inference/services/llm/openai_llm_service.py +243 -28
  18. isa_model/inference/services/llm/triton_llm_service.py +481 -0
  19. isa_model/inference/services/ml/base_ml_service.py +78 -0
  20. isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
  21. isa_model/inference/services/vision/__init__.py +3 -3
  22. isa_model/inference/services/vision/base_image_gen_service.py +161 -0
  23. isa_model/inference/services/vision/base_vision_service.py +177 -0
  24. isa_model/inference/services/vision/ollama_vision_service.py +143 -17
  25. isa_model/inference/services/vision/replicate_image_gen_service.py +139 -7
  26. isa_model/training/__init__.py +62 -32
  27. isa_model/training/cloud/__init__.py +22 -0
  28. isa_model/training/cloud/job_orchestrator.py +402 -0
  29. isa_model/training/cloud/runpod_trainer.py +454 -0
  30. isa_model/training/cloud/storage_manager.py +482 -0
  31. isa_model/training/core/__init__.py +23 -0
  32. isa_model/training/core/config.py +181 -0
  33. isa_model/training/core/dataset.py +222 -0
  34. isa_model/training/core/trainer.py +720 -0
  35. isa_model/training/core/utils.py +213 -0
  36. isa_model/training/factory.py +229 -198
  37. isa_model-0.2.9.dist-info/METADATA +465 -0
  38. isa_model-0.2.9.dist-info/RECORD +86 -0
  39. isa_model/core/model_router.py +0 -226
  40. isa_model/core/model_version.py +0 -0
  41. isa_model/core/resource_manager.py +0 -202
  42. isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
  43. isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
  44. isa_model/training/engine/llama_factory/__init__.py +0 -39
  45. isa_model/training/engine/llama_factory/config.py +0 -115
  46. isa_model/training/engine/llama_factory/data_adapter.py +0 -284
  47. isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
  48. isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
  49. isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
  50. isa_model/training/engine/llama_factory/factory.py +0 -331
  51. isa_model/training/engine/llama_factory/rl.py +0 -254
  52. isa_model/training/engine/llama_factory/trainer.py +0 -171
  53. isa_model/training/image_model/configs/create_config.py +0 -37
  54. isa_model/training/image_model/configs/create_flux_config.py +0 -26
  55. isa_model/training/image_model/configs/create_lora_config.py +0 -21
  56. isa_model/training/image_model/prepare_massed_compute.py +0 -97
  57. isa_model/training/image_model/prepare_upload.py +0 -17
  58. isa_model/training/image_model/raw_data/create_captions.py +0 -16
  59. isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
  60. isa_model/training/image_model/raw_data/pre_processing.py +0 -200
  61. isa_model/training/image_model/train/train.py +0 -42
  62. isa_model/training/image_model/train/train_flux.py +0 -41
  63. isa_model/training/image_model/train/train_lora.py +0 -57
  64. isa_model/training/image_model/train_main.py +0 -25
  65. isa_model-0.2.0.dist-info/METADATA +0 -327
  66. isa_model-0.2.0.dist-info/RECORD +0 -92
  67. isa_model-0.2.0.dist-info/licenses/LICENSE +0 -21
  68. /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
  69. /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
  70. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
  71. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
  72. /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
  73. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
  74. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
  75. /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
  76. {isa_model-0.2.0.dist-info → isa_model-0.2.9.dist-info}/WHEEL +0 -0
  77. {isa_model-0.2.0.dist-info → isa_model-0.2.9.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ Vision服务包
6
6
  包含所有视觉相关服务模块
7
7
  """
8
8
 
9
- # 导出ReplicateVisionService
10
- from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateVisionService
9
+ # 导出ReplicateImageGenService
10
+ from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateImageGenService
11
11
 
12
- __all__ = ["ReplicateVisionService"]
12
+ __all__ = ["ReplicateImageGenService"]
@@ -0,0 +1,161 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Any, List, Union, Optional, BinaryIO
3
+ from isa_model.inference.services.base_service import BaseService
4
+
5
+ class BaseImageGenService(BaseService):
6
+ """Base class for image generation services"""
7
+
8
+ @abstractmethod
9
+ async def generate_image(
10
+ self,
11
+ prompt: str,
12
+ negative_prompt: Optional[str] = None,
13
+ width: int = 512,
14
+ height: int = 512,
15
+ num_inference_steps: int = 20,
16
+ guidance_scale: float = 7.5,
17
+ seed: Optional[int] = None
18
+ ) -> Dict[str, Any]:
19
+ """
20
+ Generate a single image from text prompt
21
+
22
+ Args:
23
+ prompt: Text description of the desired image
24
+ negative_prompt: Text describing what to avoid in the image
25
+ width: Image width in pixels
26
+ height: Image height in pixels
27
+ num_inference_steps: Number of denoising steps
28
+ guidance_scale: How closely to follow the prompt
29
+ seed: Random seed for reproducible results
30
+
31
+ Returns:
32
+ Dict containing generation results with keys:
33
+ - image_data: Binary image data or PIL Image
34
+ - format: Image format (e.g., 'png', 'jpg')
35
+ - width: Actual image width
36
+ - height: Actual image height
37
+ - seed: Seed used for generation
38
+ """
39
+ pass
40
+
41
+ @abstractmethod
42
+ async def generate_images(
43
+ self,
44
+ prompt: str,
45
+ num_images: int = 1,
46
+ negative_prompt: Optional[str] = None,
47
+ width: int = 512,
48
+ height: int = 512,
49
+ num_inference_steps: int = 20,
50
+ guidance_scale: float = 7.5,
51
+ seed: Optional[int] = None
52
+ ) -> List[Dict[str, Any]]:
53
+ """
54
+ Generate multiple images from text prompt
55
+
56
+ Args:
57
+ prompt: Text description of the desired image
58
+ num_images: Number of images to generate
59
+ negative_prompt: Text describing what to avoid in the image
60
+ width: Image width in pixels
61
+ height: Image height in pixels
62
+ num_inference_steps: Number of denoising steps
63
+ guidance_scale: How closely to follow the prompt
64
+ seed: Random seed for reproducible results
65
+
66
+ Returns:
67
+ List of generation result dictionaries
68
+ """
69
+ pass
70
+
71
+ @abstractmethod
72
+ async def generate_image_to_file(
73
+ self,
74
+ prompt: str,
75
+ output_path: str,
76
+ negative_prompt: Optional[str] = None,
77
+ width: int = 512,
78
+ height: int = 512,
79
+ num_inference_steps: int = 20,
80
+ guidance_scale: float = 7.5,
81
+ seed: Optional[int] = None
82
+ ) -> Dict[str, Any]:
83
+ """
84
+ Generate image and save directly to file
85
+
86
+ Args:
87
+ prompt: Text description of the desired image
88
+ output_path: Path to save the generated image
89
+ negative_prompt: Text describing what to avoid in the image
90
+ width: Image width in pixels
91
+ height: Image height in pixels
92
+ num_inference_steps: Number of denoising steps
93
+ guidance_scale: How closely to follow the prompt
94
+ seed: Random seed for reproducible results
95
+
96
+ Returns:
97
+ Dict containing generation results with keys:
98
+ - file_path: Path to saved image
99
+ - width: Image width
100
+ - height: Image height
101
+ - seed: Seed used for generation
102
+ """
103
+ pass
104
+
105
+ @abstractmethod
106
+ async def image_to_image(
107
+ self,
108
+ prompt: str,
109
+ init_image: Union[str, BinaryIO],
110
+ strength: float = 0.8,
111
+ negative_prompt: Optional[str] = None,
112
+ num_inference_steps: int = 20,
113
+ guidance_scale: float = 7.5,
114
+ seed: Optional[int] = None
115
+ ) -> Dict[str, Any]:
116
+ """
117
+ Generate image based on existing image and prompt
118
+
119
+ Args:
120
+ prompt: Text description of desired modifications
121
+ init_image: Path to initial image or image data
122
+ strength: How much to transform the initial image (0.0-1.0)
123
+ negative_prompt: Text describing what to avoid
124
+ num_inference_steps: Number of denoising steps
125
+ guidance_scale: How closely to follow the prompt
126
+ seed: Random seed for reproducible results
127
+
128
+ Returns:
129
+ Dict containing generation results
130
+ """
131
+ pass
132
+
133
+ @abstractmethod
134
+ def get_supported_sizes(self) -> List[Dict[str, int]]:
135
+ """
136
+ Get list of supported image dimensions
137
+
138
+ Returns:
139
+ List of dictionaries with 'width' and 'height' keys
140
+ """
141
+ pass
142
+
143
+ @abstractmethod
144
+ def get_model_info(self) -> Dict[str, Any]:
145
+ """
146
+ Get information about the image generation model
147
+
148
+ Returns:
149
+ Dict containing model information:
150
+ - name: Model name
151
+ - max_width: Maximum supported width
152
+ - max_height: Maximum supported height
153
+ - supports_negative_prompt: Whether negative prompts are supported
154
+ - supports_img2img: Whether image-to-image is supported
155
+ """
156
+ pass
157
+
158
+ @abstractmethod
159
+ async def close(self):
160
+ """Cleanup resources"""
161
+ pass
@@ -0,0 +1,177 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Any, List, Union, Optional, BinaryIO
3
+ from isa_model.inference.services.base_service import BaseService
4
+
5
+ class BaseVisionService(BaseService):
6
+ """Base class for vision understanding services"""
7
+
8
+ @abstractmethod
9
+ async def analyze_image(
10
+ self,
11
+ image: Union[str, BinaryIO],
12
+ prompt: Optional[str] = None,
13
+ max_tokens: int = 1000
14
+ ) -> Dict[str, Any]:
15
+ """
16
+ Analyze image and provide description or answer questions
17
+
18
+ Args:
19
+ image: Path to image file or image data
20
+ prompt: Optional text prompt/question about the image
21
+ max_tokens: Maximum tokens in response
22
+
23
+ Returns:
24
+ Dict containing analysis results with keys:
25
+ - text: Description or answer about the image
26
+ - confidence: Confidence score (if available)
27
+ - detected_objects: List of detected objects (if available)
28
+ - metadata: Additional metadata about the analysis
29
+ """
30
+ pass
31
+
32
+ @abstractmethod
33
+ async def analyze_images(
34
+ self,
35
+ images: List[Union[str, BinaryIO]],
36
+ prompt: Optional[str] = None,
37
+ max_tokens: int = 1000
38
+ ) -> List[Dict[str, Any]]:
39
+ """
40
+ Analyze multiple images
41
+
42
+ Args:
43
+ images: List of image paths or image data
44
+ prompt: Optional text prompt/question about the images
45
+ max_tokens: Maximum tokens in response
46
+
47
+ Returns:
48
+ List of analysis result dictionaries
49
+ """
50
+ pass
51
+
52
+ @abstractmethod
53
+ async def describe_image(
54
+ self,
55
+ image: Union[str, BinaryIO],
56
+ detail_level: str = "medium"
57
+ ) -> Dict[str, Any]:
58
+ """
59
+ Generate detailed description of image
60
+
61
+ Args:
62
+ image: Path to image file or image data
63
+ detail_level: Level of detail ("low", "medium", "high")
64
+
65
+ Returns:
66
+ Dict containing description results with keys:
67
+ - description: Detailed text description
68
+ - objects: List of detected objects
69
+ - scene: Scene description
70
+ - colors: Dominant colors
71
+ """
72
+ pass
73
+
74
+ @abstractmethod
75
+ async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
76
+ """
77
+ Extract text from image (OCR)
78
+
79
+ Args:
80
+ image: Path to image file or image data
81
+
82
+ Returns:
83
+ Dict containing OCR results with keys:
84
+ - text: Extracted text
85
+ - confidence: Overall confidence score
86
+ - bounding_boxes: Text regions with coordinates (if available)
87
+ - language: Detected language (if available)
88
+ """
89
+ pass
90
+
91
+ @abstractmethod
92
+ async def detect_objects(
93
+ self,
94
+ image: Union[str, BinaryIO],
95
+ confidence_threshold: float = 0.5
96
+ ) -> Dict[str, Any]:
97
+ """
98
+ Detect objects in image
99
+
100
+ Args:
101
+ image: Path to image file or image data
102
+ confidence_threshold: Minimum confidence for detections
103
+
104
+ Returns:
105
+ Dict containing detection results with keys:
106
+ - objects: List of detected objects with labels and confidence
107
+ - count: Number of objects detected
108
+ - bounding_boxes: Object locations (if available)
109
+ """
110
+ pass
111
+
112
+ @abstractmethod
113
+ async def classify_image(
114
+ self,
115
+ image: Union[str, BinaryIO],
116
+ categories: Optional[List[str]] = None
117
+ ) -> Dict[str, Any]:
118
+ """
119
+ Classify image into categories
120
+
121
+ Args:
122
+ image: Path to image file or image data
123
+ categories: Optional list of specific categories to consider
124
+
125
+ Returns:
126
+ Dict containing classification results with keys:
127
+ - category: Top predicted category
128
+ - confidence: Confidence score
129
+ - all_predictions: List of all predictions with scores
130
+ """
131
+ pass
132
+
133
+ @abstractmethod
134
+ async def compare_images(
135
+ self,
136
+ image1: Union[str, BinaryIO],
137
+ image2: Union[str, BinaryIO]
138
+ ) -> Dict[str, Any]:
139
+ """
140
+ Compare two images for similarity
141
+
142
+ Args:
143
+ image1: First image path or data
144
+ image2: Second image path or data
145
+
146
+ Returns:
147
+ Dict containing comparison results with keys:
148
+ - similarity_score: Numerical similarity score
149
+ - differences: Description of key differences
150
+ - common_elements: Description of common elements
151
+ """
152
+ pass
153
+
154
+ @abstractmethod
155
+ def get_supported_formats(self) -> List[str]:
156
+ """
157
+ Get list of supported image formats
158
+
159
+ Returns:
160
+ List of supported file extensions (e.g., ['jpg', 'png', 'gif'])
161
+ """
162
+ pass
163
+
164
+ @abstractmethod
165
+ def get_max_image_size(self) -> Dict[str, int]:
166
+ """
167
+ Get maximum supported image dimensions
168
+
169
+ Returns:
170
+ Dict with 'width' and 'height' keys for maximum dimensions
171
+ """
172
+ pass
173
+
174
+ @abstractmethod
175
+ async def close(self):
176
+ """Cleanup resources"""
177
+ pass
@@ -2,15 +2,15 @@ import os
2
2
  import json
3
3
  import base64
4
4
  import ollama
5
- from typing import Dict, Any, Union
5
+ from typing import Dict, Any, Union, List, Optional, BinaryIO
6
6
  from tenacity import retry, stop_after_attempt, wait_exponential
7
- from isa_model.inference.services.base_service import BaseService
7
+ from isa_model.inference.services.vision.base_vision_service import BaseVisionService
8
8
  from isa_model.inference.providers.base_provider import BaseProvider
9
9
  import logging
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
- class OllamaVisionService(BaseService):
13
+ class OllamaVisionService(BaseVisionService):
14
14
  """Vision model service wrapper for Ollama using base64 encoded images"""
15
15
 
16
16
  def __init__(self, provider: 'BaseProvider', model_name: str = 'gemma3:4b'):
@@ -18,30 +18,38 @@ class OllamaVisionService(BaseService):
18
18
  self.max_tokens = self.config.get('max_tokens', 1000)
19
19
  self.temperature = self.config.get('temperature', 0.7)
20
20
 
21
+ def _get_image_data(self, image: Union[str, BinaryIO]) -> bytes:
22
+ """获取图像数据"""
23
+ if isinstance(image, str):
24
+ with open(image, 'rb') as f:
25
+ return f.read()
26
+ else:
27
+ return image.read()
28
+
21
29
  @retry(
22
30
  stop=stop_after_attempt(3),
23
31
  wait=wait_exponential(multiplier=1, min=4, max=10),
24
32
  reraise=True
25
33
  )
26
- async def analyze_image(self, image_data: Union[bytes, str], query: str) -> str:
27
- """分析图片并返回结果
28
-
29
- Args:
30
- image_data: 图片数据,可以是 bytes 或图片路径字符串
31
- query: 查询文本
32
-
33
- Returns:
34
- str: 分析结果
34
+ async def analyze_image(
35
+ self,
36
+ image: Union[str, BinaryIO],
37
+ prompt: Optional[str] = None,
38
+ max_tokens: int = 1000
39
+ ) -> Dict[str, Any]:
40
+ """
41
+ Analyze image and provide description or answer questions
35
42
  """
36
43
  try:
37
- # 如果是文件路径,读取文件内容
38
- if isinstance(image_data, str):
39
- with open(image_data, 'rb') as f:
40
- image_data = f.read()
44
+ # 获取图像数据
45
+ image_data = self._get_image_data(image)
41
46
 
42
47
  # 转换为base64
43
48
  image_base64 = base64.b64encode(image_data).decode('utf-8')
44
49
 
50
+ # 使用默认提示词如果没有提供
51
+ query = prompt or "请描述这张图片的内容。"
52
+
45
53
  # 使用 ollama 库直接调用
46
54
  response = ollama.chat(
47
55
  model=self.model_name,
@@ -52,9 +60,127 @@ class OllamaVisionService(BaseService):
52
60
  }]
53
61
  )
54
62
 
55
- return response['message']['content']
63
+ content = response['message']['content']
64
+
65
+ return {
66
+ "text": content,
67
+ "confidence": 1.0, # Ollama doesn't provide confidence scores
68
+ "detected_objects": [], # Basic implementation
69
+ "metadata": {
70
+ "model": self.model_name,
71
+ "prompt": query
72
+ }
73
+ }
56
74
 
57
75
  except Exception as e:
58
76
  logger.error(f"Error in image analysis: {e}")
59
77
  raise
60
78
 
79
+ async def analyze_images(
80
+ self,
81
+ images: List[Union[str, BinaryIO]],
82
+ prompt: Optional[str] = None,
83
+ max_tokens: int = 1000
84
+ ) -> List[Dict[str, Any]]:
85
+ """Analyze multiple images"""
86
+ results = []
87
+ for image in images:
88
+ result = await self.analyze_image(image, prompt, max_tokens)
89
+ results.append(result)
90
+ return results
91
+
92
+ async def describe_image(
93
+ self,
94
+ image: Union[str, BinaryIO],
95
+ detail_level: str = "medium"
96
+ ) -> Dict[str, Any]:
97
+ """Generate detailed description of image"""
98
+ prompts = {
99
+ "low": "简单描述这张图片。",
100
+ "medium": "详细描述这张图片的内容、颜色、物体和场景。",
101
+ "high": "非常详细地描述这张图片,包括所有可见的物体、颜色、纹理、场景、情感和任何其他细节。"
102
+ }
103
+
104
+ prompt = prompts.get(detail_level, prompts["medium"])
105
+ result = await self.analyze_image(image, prompt)
106
+
107
+ return {
108
+ "description": result["text"],
109
+ "objects": [], # Basic implementation
110
+ "scene": "未知", # Basic implementation
111
+ "colors": [] # Basic implementation
112
+ }
113
+
114
+ async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
115
+ """Extract text from image (OCR)"""
116
+ result = await self.analyze_image(image, "提取图片中的所有文字内容。")
117
+
118
+ return {
119
+ "text": result["text"],
120
+ "confidence": 1.0,
121
+ "bounding_boxes": [], # Basic implementation
122
+ "language": "未知" # Basic implementation
123
+ }
124
+
125
+ async def detect_objects(
126
+ self,
127
+ image: Union[str, BinaryIO],
128
+ confidence_threshold: float = 0.5
129
+ ) -> Dict[str, Any]:
130
+ """Detect objects in image"""
131
+ result = await self.analyze_image(image, "识别并列出图片中的所有物体。")
132
+
133
+ return {
134
+ "objects": [], # Basic implementation - would need parsing
135
+ "count": 0,
136
+ "bounding_boxes": []
137
+ }
138
+
139
+ async def classify_image(
140
+ self,
141
+ image: Union[str, BinaryIO],
142
+ categories: Optional[List[str]] = None
143
+ ) -> Dict[str, Any]:
144
+ """Classify image into categories"""
145
+ if categories:
146
+ category_str = "、".join(categories)
147
+ prompt = f"将这张图片分类到以下类别之一:{category_str}"
148
+ else:
149
+ prompt = "这张图片属于什么类别?"
150
+
151
+ result = await self.analyze_image(image, prompt)
152
+
153
+ return {
154
+ "category": result["text"],
155
+ "confidence": 1.0,
156
+ "all_predictions": [{"category": result["text"], "confidence": 1.0}]
157
+ }
158
+
159
+ async def compare_images(
160
+ self,
161
+ image1: Union[str, BinaryIO],
162
+ image2: Union[str, BinaryIO]
163
+ ) -> Dict[str, Any]:
164
+ """Compare two images for similarity"""
165
+ # For now, analyze each image separately and compare descriptions
166
+ result1 = await self.analyze_image(image1, "描述这张图片。")
167
+ result2 = await self.analyze_image(image2, "描述这张图片。")
168
+
169
+ return {
170
+ "similarity_score": 0.5, # Basic implementation
171
+ "differences": "需要进一步分析",
172
+ "common_elements": "需要进一步分析"
173
+ }
174
+
175
+ def get_supported_formats(self) -> List[str]:
176
+ """Get list of supported image formats"""
177
+ return ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']
178
+
179
+ def get_max_image_size(self) -> Dict[str, int]:
180
+ """Get maximum supported image dimensions"""
181
+ return {"width": 4096, "height": 4096}
182
+
183
+ async def close(self):
184
+ """Cleanup resources"""
185
+ pass
186
+