isa-model 0.0.2__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/core/model_manager.py +69 -4
- isa_model/core/model_registry.py +273 -46
- isa_model/core/storage/hf_storage.py +419 -0
- isa_model/deployment/__init__.py +52 -0
- isa_model/deployment/core/__init__.py +34 -0
- isa_model/deployment/core/deployment_config.py +356 -0
- isa_model/deployment/core/deployment_manager.py +549 -0
- isa_model/deployment/core/isa_deployment_service.py +401 -0
- isa_model/eval/factory.py +381 -140
- isa_model/inference/ai_factory.py +427 -236
- isa_model/inference/billing_tracker.py +406 -0
- isa_model/inference/providers/base_provider.py +51 -4
- isa_model/inference/providers/ml_provider.py +50 -0
- isa_model/inference/providers/ollama_provider.py +37 -18
- isa_model/inference/providers/openai_provider.py +65 -36
- isa_model/inference/providers/replicate_provider.py +42 -30
- isa_model/inference/services/audio/base_stt_service.py +21 -2
- isa_model/inference/services/audio/openai_realtime_service.py +353 -0
- isa_model/inference/services/audio/openai_stt_service.py +252 -0
- isa_model/inference/services/audio/openai_tts_service.py +149 -9
- isa_model/inference/services/audio/replicate_tts_service.py +239 -0
- isa_model/inference/services/base_service.py +36 -1
- isa_model/inference/services/embedding/base_embed_service.py +112 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
- isa_model/inference/services/embedding/openai_embed_service.py +223 -0
- isa_model/inference/services/llm/__init__.py +2 -0
- isa_model/inference/services/llm/base_llm_service.py +158 -86
- isa_model/inference/services/llm/llm_adapter.py +414 -0
- isa_model/inference/services/llm/ollama_llm_service.py +252 -63
- isa_model/inference/services/llm/openai_llm_service.py +231 -93
- isa_model/inference/services/llm/triton_llm_service.py +481 -0
- isa_model/inference/services/ml/base_ml_service.py +78 -0
- isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
- isa_model/inference/services/vision/__init__.py +3 -3
- isa_model/inference/services/vision/base_image_gen_service.py +161 -0
- isa_model/inference/services/vision/base_vision_service.py +177 -0
- isa_model/inference/services/vision/helpers/image_utils.py +4 -3
- isa_model/inference/services/vision/ollama_vision_service.py +151 -17
- isa_model/inference/services/vision/openai_vision_service.py +275 -41
- isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
- isa_model/training/__init__.py +62 -32
- isa_model/training/cloud/__init__.py +22 -0
- isa_model/training/cloud/job_orchestrator.py +402 -0
- isa_model/training/cloud/runpod_trainer.py +454 -0
- isa_model/training/cloud/storage_manager.py +482 -0
- isa_model/training/core/__init__.py +23 -0
- isa_model/training/core/config.py +181 -0
- isa_model/training/core/dataset.py +222 -0
- isa_model/training/core/trainer.py +720 -0
- isa_model/training/core/utils.py +213 -0
- isa_model/training/factory.py +229 -198
- isa_model-0.3.1.dist-info/METADATA +465 -0
- isa_model-0.3.1.dist-info/RECORD +91 -0
- isa_model/core/model_router.py +0 -226
- isa_model/core/model_version.py +0 -0
- isa_model/core/resource_manager.py +0 -202
- isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
- isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
- isa_model/training/engine/llama_factory/__init__.py +0 -39
- isa_model/training/engine/llama_factory/config.py +0 -115
- isa_model/training/engine/llama_factory/data_adapter.py +0 -284
- isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
- isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
- isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
- isa_model/training/engine/llama_factory/factory.py +0 -331
- isa_model/training/engine/llama_factory/rl.py +0 -254
- isa_model/training/engine/llama_factory/trainer.py +0 -171
- isa_model/training/image_model/configs/create_config.py +0 -37
- isa_model/training/image_model/configs/create_flux_config.py +0 -26
- isa_model/training/image_model/configs/create_lora_config.py +0 -21
- isa_model/training/image_model/prepare_massed_compute.py +0 -97
- isa_model/training/image_model/prepare_upload.py +0 -17
- isa_model/training/image_model/raw_data/create_captions.py +0 -16
- isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
- isa_model/training/image_model/raw_data/pre_processing.py +0 -200
- isa_model/training/image_model/train/train.py +0 -42
- isa_model/training/image_model/train/train_flux.py +0 -41
- isa_model/training/image_model/train/train_lora.py +0 -57
- isa_model/training/image_model/train_main.py +0 -25
- isa_model-0.0.2.dist-info/METADATA +0 -327
- isa_model-0.0.2.dist-info/RECORD +0 -92
- isa_model-0.0.2.dist-info/licenses/LICENSE +0 -21
- /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
- {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
- {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ Vision服务包
|
|
6
6
|
包含所有视觉相关服务模块
|
7
7
|
"""
|
8
8
|
|
9
|
-
# 导出
|
10
|
-
from isa_model.inference.services.vision.replicate_image_gen_service import
|
9
|
+
# 导出ReplicateImageGenService
|
10
|
+
from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateImageGenService
|
11
11
|
|
12
|
-
__all__ = ["
|
12
|
+
__all__ = ["ReplicateImageGenService"]
|
@@ -0,0 +1,161 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
3
|
+
from isa_model.inference.services.base_service import BaseService
|
4
|
+
|
5
|
+
class BaseImageGenService(BaseService):
|
6
|
+
"""Base class for image generation services"""
|
7
|
+
|
8
|
+
@abstractmethod
|
9
|
+
async def generate_image(
|
10
|
+
self,
|
11
|
+
prompt: str,
|
12
|
+
negative_prompt: Optional[str] = None,
|
13
|
+
width: int = 512,
|
14
|
+
height: int = 512,
|
15
|
+
num_inference_steps: int = 20,
|
16
|
+
guidance_scale: float = 7.5,
|
17
|
+
seed: Optional[int] = None
|
18
|
+
) -> Dict[str, Any]:
|
19
|
+
"""
|
20
|
+
Generate a single image from text prompt
|
21
|
+
|
22
|
+
Args:
|
23
|
+
prompt: Text description of the desired image
|
24
|
+
negative_prompt: Text describing what to avoid in the image
|
25
|
+
width: Image width in pixels
|
26
|
+
height: Image height in pixels
|
27
|
+
num_inference_steps: Number of denoising steps
|
28
|
+
guidance_scale: How closely to follow the prompt
|
29
|
+
seed: Random seed for reproducible results
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
Dict containing generation results with keys:
|
33
|
+
- image_data: Binary image data or PIL Image
|
34
|
+
- format: Image format (e.g., 'png', 'jpg')
|
35
|
+
- width: Actual image width
|
36
|
+
- height: Actual image height
|
37
|
+
- seed: Seed used for generation
|
38
|
+
"""
|
39
|
+
pass
|
40
|
+
|
41
|
+
@abstractmethod
|
42
|
+
async def generate_images(
|
43
|
+
self,
|
44
|
+
prompt: str,
|
45
|
+
num_images: int = 1,
|
46
|
+
negative_prompt: Optional[str] = None,
|
47
|
+
width: int = 512,
|
48
|
+
height: int = 512,
|
49
|
+
num_inference_steps: int = 20,
|
50
|
+
guidance_scale: float = 7.5,
|
51
|
+
seed: Optional[int] = None
|
52
|
+
) -> List[Dict[str, Any]]:
|
53
|
+
"""
|
54
|
+
Generate multiple images from text prompt
|
55
|
+
|
56
|
+
Args:
|
57
|
+
prompt: Text description of the desired image
|
58
|
+
num_images: Number of images to generate
|
59
|
+
negative_prompt: Text describing what to avoid in the image
|
60
|
+
width: Image width in pixels
|
61
|
+
height: Image height in pixels
|
62
|
+
num_inference_steps: Number of denoising steps
|
63
|
+
guidance_scale: How closely to follow the prompt
|
64
|
+
seed: Random seed for reproducible results
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
List of generation result dictionaries
|
68
|
+
"""
|
69
|
+
pass
|
70
|
+
|
71
|
+
@abstractmethod
|
72
|
+
async def generate_image_to_file(
|
73
|
+
self,
|
74
|
+
prompt: str,
|
75
|
+
output_path: str,
|
76
|
+
negative_prompt: Optional[str] = None,
|
77
|
+
width: int = 512,
|
78
|
+
height: int = 512,
|
79
|
+
num_inference_steps: int = 20,
|
80
|
+
guidance_scale: float = 7.5,
|
81
|
+
seed: Optional[int] = None
|
82
|
+
) -> Dict[str, Any]:
|
83
|
+
"""
|
84
|
+
Generate image and save directly to file
|
85
|
+
|
86
|
+
Args:
|
87
|
+
prompt: Text description of the desired image
|
88
|
+
output_path: Path to save the generated image
|
89
|
+
negative_prompt: Text describing what to avoid in the image
|
90
|
+
width: Image width in pixels
|
91
|
+
height: Image height in pixels
|
92
|
+
num_inference_steps: Number of denoising steps
|
93
|
+
guidance_scale: How closely to follow the prompt
|
94
|
+
seed: Random seed for reproducible results
|
95
|
+
|
96
|
+
Returns:
|
97
|
+
Dict containing generation results with keys:
|
98
|
+
- file_path: Path to saved image
|
99
|
+
- width: Image width
|
100
|
+
- height: Image height
|
101
|
+
- seed: Seed used for generation
|
102
|
+
"""
|
103
|
+
pass
|
104
|
+
|
105
|
+
@abstractmethod
|
106
|
+
async def image_to_image(
|
107
|
+
self,
|
108
|
+
prompt: str,
|
109
|
+
init_image: Union[str, BinaryIO],
|
110
|
+
strength: float = 0.8,
|
111
|
+
negative_prompt: Optional[str] = None,
|
112
|
+
num_inference_steps: int = 20,
|
113
|
+
guidance_scale: float = 7.5,
|
114
|
+
seed: Optional[int] = None
|
115
|
+
) -> Dict[str, Any]:
|
116
|
+
"""
|
117
|
+
Generate image based on existing image and prompt
|
118
|
+
|
119
|
+
Args:
|
120
|
+
prompt: Text description of desired modifications
|
121
|
+
init_image: Path to initial image or image data
|
122
|
+
strength: How much to transform the initial image (0.0-1.0)
|
123
|
+
negative_prompt: Text describing what to avoid
|
124
|
+
num_inference_steps: Number of denoising steps
|
125
|
+
guidance_scale: How closely to follow the prompt
|
126
|
+
seed: Random seed for reproducible results
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
Dict containing generation results
|
130
|
+
"""
|
131
|
+
pass
|
132
|
+
|
133
|
+
@abstractmethod
|
134
|
+
def get_supported_sizes(self) -> List[Dict[str, int]]:
|
135
|
+
"""
|
136
|
+
Get list of supported image dimensions
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
List of dictionaries with 'width' and 'height' keys
|
140
|
+
"""
|
141
|
+
pass
|
142
|
+
|
143
|
+
@abstractmethod
|
144
|
+
def get_model_info(self) -> Dict[str, Any]:
|
145
|
+
"""
|
146
|
+
Get information about the image generation model
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
Dict containing model information:
|
150
|
+
- name: Model name
|
151
|
+
- max_width: Maximum supported width
|
152
|
+
- max_height: Maximum supported height
|
153
|
+
- supports_negative_prompt: Whether negative prompts are supported
|
154
|
+
- supports_img2img: Whether image-to-image is supported
|
155
|
+
"""
|
156
|
+
pass
|
157
|
+
|
158
|
+
@abstractmethod
|
159
|
+
async def close(self):
|
160
|
+
"""Cleanup resources"""
|
161
|
+
pass
|
@@ -0,0 +1,177 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
3
|
+
from isa_model.inference.services.base_service import BaseService
|
4
|
+
|
5
|
+
class BaseVisionService(BaseService):
|
6
|
+
"""Base class for vision understanding services"""
|
7
|
+
|
8
|
+
@abstractmethod
|
9
|
+
async def analyze_image(
|
10
|
+
self,
|
11
|
+
image: Union[str, BinaryIO],
|
12
|
+
prompt: Optional[str] = None,
|
13
|
+
max_tokens: int = 1000
|
14
|
+
) -> Dict[str, Any]:
|
15
|
+
"""
|
16
|
+
Analyze image and provide description or answer questions
|
17
|
+
|
18
|
+
Args:
|
19
|
+
image: Path to image file or image data
|
20
|
+
prompt: Optional text prompt/question about the image
|
21
|
+
max_tokens: Maximum tokens in response
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
Dict containing analysis results with keys:
|
25
|
+
- text: Description or answer about the image
|
26
|
+
- confidence: Confidence score (if available)
|
27
|
+
- detected_objects: List of detected objects (if available)
|
28
|
+
- metadata: Additional metadata about the analysis
|
29
|
+
"""
|
30
|
+
pass
|
31
|
+
|
32
|
+
@abstractmethod
|
33
|
+
async def analyze_images(
|
34
|
+
self,
|
35
|
+
images: List[Union[str, BinaryIO]],
|
36
|
+
prompt: Optional[str] = None,
|
37
|
+
max_tokens: int = 1000
|
38
|
+
) -> List[Dict[str, Any]]:
|
39
|
+
"""
|
40
|
+
Analyze multiple images
|
41
|
+
|
42
|
+
Args:
|
43
|
+
images: List of image paths or image data
|
44
|
+
prompt: Optional text prompt/question about the images
|
45
|
+
max_tokens: Maximum tokens in response
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
List of analysis result dictionaries
|
49
|
+
"""
|
50
|
+
pass
|
51
|
+
|
52
|
+
@abstractmethod
|
53
|
+
async def describe_image(
|
54
|
+
self,
|
55
|
+
image: Union[str, BinaryIO],
|
56
|
+
detail_level: str = "medium"
|
57
|
+
) -> Dict[str, Any]:
|
58
|
+
"""
|
59
|
+
Generate detailed description of image
|
60
|
+
|
61
|
+
Args:
|
62
|
+
image: Path to image file or image data
|
63
|
+
detail_level: Level of detail ("low", "medium", "high")
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
Dict containing description results with keys:
|
67
|
+
- description: Detailed text description
|
68
|
+
- objects: List of detected objects
|
69
|
+
- scene: Scene description
|
70
|
+
- colors: Dominant colors
|
71
|
+
"""
|
72
|
+
pass
|
73
|
+
|
74
|
+
@abstractmethod
|
75
|
+
async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
76
|
+
"""
|
77
|
+
Extract text from image (OCR)
|
78
|
+
|
79
|
+
Args:
|
80
|
+
image: Path to image file or image data
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
Dict containing OCR results with keys:
|
84
|
+
- text: Extracted text
|
85
|
+
- confidence: Overall confidence score
|
86
|
+
- bounding_boxes: Text regions with coordinates (if available)
|
87
|
+
- language: Detected language (if available)
|
88
|
+
"""
|
89
|
+
pass
|
90
|
+
|
91
|
+
@abstractmethod
|
92
|
+
async def detect_objects(
|
93
|
+
self,
|
94
|
+
image: Union[str, BinaryIO],
|
95
|
+
confidence_threshold: float = 0.5
|
96
|
+
) -> Dict[str, Any]:
|
97
|
+
"""
|
98
|
+
Detect objects in image
|
99
|
+
|
100
|
+
Args:
|
101
|
+
image: Path to image file or image data
|
102
|
+
confidence_threshold: Minimum confidence for detections
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
Dict containing detection results with keys:
|
106
|
+
- objects: List of detected objects with labels and confidence
|
107
|
+
- count: Number of objects detected
|
108
|
+
- bounding_boxes: Object locations (if available)
|
109
|
+
"""
|
110
|
+
pass
|
111
|
+
|
112
|
+
@abstractmethod
|
113
|
+
async def classify_image(
|
114
|
+
self,
|
115
|
+
image: Union[str, BinaryIO],
|
116
|
+
categories: Optional[List[str]] = None
|
117
|
+
) -> Dict[str, Any]:
|
118
|
+
"""
|
119
|
+
Classify image into categories
|
120
|
+
|
121
|
+
Args:
|
122
|
+
image: Path to image file or image data
|
123
|
+
categories: Optional list of specific categories to consider
|
124
|
+
|
125
|
+
Returns:
|
126
|
+
Dict containing classification results with keys:
|
127
|
+
- category: Top predicted category
|
128
|
+
- confidence: Confidence score
|
129
|
+
- all_predictions: List of all predictions with scores
|
130
|
+
"""
|
131
|
+
pass
|
132
|
+
|
133
|
+
@abstractmethod
|
134
|
+
async def compare_images(
|
135
|
+
self,
|
136
|
+
image1: Union[str, BinaryIO],
|
137
|
+
image2: Union[str, BinaryIO]
|
138
|
+
) -> Dict[str, Any]:
|
139
|
+
"""
|
140
|
+
Compare two images for similarity
|
141
|
+
|
142
|
+
Args:
|
143
|
+
image1: First image path or data
|
144
|
+
image2: Second image path or data
|
145
|
+
|
146
|
+
Returns:
|
147
|
+
Dict containing comparison results with keys:
|
148
|
+
- similarity_score: Numerical similarity score
|
149
|
+
- differences: Description of key differences
|
150
|
+
- common_elements: Description of common elements
|
151
|
+
"""
|
152
|
+
pass
|
153
|
+
|
154
|
+
@abstractmethod
|
155
|
+
def get_supported_formats(self) -> List[str]:
|
156
|
+
"""
|
157
|
+
Get list of supported image formats
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
List of supported file extensions (e.g., ['jpg', 'png', 'gif'])
|
161
|
+
"""
|
162
|
+
pass
|
163
|
+
|
164
|
+
@abstractmethod
|
165
|
+
def get_max_image_size(self) -> Dict[str, int]:
|
166
|
+
"""
|
167
|
+
Get maximum supported image dimensions
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
Dict with 'width' and 'height' keys for maximum dimensions
|
171
|
+
"""
|
172
|
+
pass
|
173
|
+
|
174
|
+
@abstractmethod
|
175
|
+
async def close(self):
|
176
|
+
"""Cleanup resources"""
|
177
|
+
pass
|
@@ -2,9 +2,10 @@ from io import BytesIO
|
|
2
2
|
from PIL import Image
|
3
3
|
from typing import Union
|
4
4
|
import base64
|
5
|
-
from app.config.config_manager import config_manager
|
5
|
+
# from app.config.config_manager import config_manager # Commented out to fix import
|
6
|
+
import logging
|
6
7
|
|
7
|
-
logger =
|
8
|
+
logger = logging.getLogger(__name__)
|
8
9
|
|
9
10
|
def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> bytes:
|
10
11
|
"""压缩图片以减小大小
|
@@ -30,7 +31,7 @@ def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> b
|
|
30
31
|
# 计算新尺寸,保持宽高比
|
31
32
|
ratio = max_size / max(img.size)
|
32
33
|
if ratio < 1:
|
33
|
-
new_size =
|
34
|
+
new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
|
34
35
|
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
35
36
|
|
36
37
|
# 保存压缩后的图片
|
@@ -2,15 +2,16 @@ import os
|
|
2
2
|
import json
|
3
3
|
import base64
|
4
4
|
import ollama
|
5
|
-
from typing import Dict, Any, Union
|
5
|
+
from typing import Dict, Any, Union, List, Optional, BinaryIO
|
6
6
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
7
|
-
from isa_model.inference.services.
|
7
|
+
from isa_model.inference.services.vision.base_vision_service import BaseVisionService
|
8
8
|
from isa_model.inference.providers.base_provider import BaseProvider
|
9
9
|
import logging
|
10
|
+
import requests
|
10
11
|
|
11
12
|
logger = logging.getLogger(__name__)
|
12
13
|
|
13
|
-
class OllamaVisionService(
|
14
|
+
class OllamaVisionService(BaseVisionService):
|
14
15
|
"""Vision model service wrapper for Ollama using base64 encoded images"""
|
15
16
|
|
16
17
|
def __init__(self, provider: 'BaseProvider', model_name: str = 'gemma3:4b'):
|
@@ -18,30 +19,45 @@ class OllamaVisionService(BaseService):
|
|
18
19
|
self.max_tokens = self.config.get('max_tokens', 1000)
|
19
20
|
self.temperature = self.config.get('temperature', 0.7)
|
20
21
|
|
22
|
+
def _get_image_data(self, image: Union[str, BinaryIO]) -> bytes:
|
23
|
+
"""获取图像数据,支持本地文件和URL"""
|
24
|
+
if isinstance(image, str):
|
25
|
+
# Check if it's a URL
|
26
|
+
if image.startswith(('http://', 'https://')):
|
27
|
+
response = requests.get(image)
|
28
|
+
response.raise_for_status()
|
29
|
+
return response.content
|
30
|
+
else:
|
31
|
+
# Local file path
|
32
|
+
with open(image, 'rb') as f:
|
33
|
+
return f.read()
|
34
|
+
else:
|
35
|
+
return image.read()
|
36
|
+
|
21
37
|
@retry(
|
22
38
|
stop=stop_after_attempt(3),
|
23
39
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
24
40
|
reraise=True
|
25
41
|
)
|
26
|
-
async def analyze_image(
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
str: 分析结果
|
42
|
+
async def analyze_image(
|
43
|
+
self,
|
44
|
+
image: Union[str, BinaryIO],
|
45
|
+
prompt: Optional[str] = None,
|
46
|
+
max_tokens: int = 1000
|
47
|
+
) -> Dict[str, Any]:
|
48
|
+
"""
|
49
|
+
Analyze image and provide description or answer questions
|
35
50
|
"""
|
36
51
|
try:
|
37
|
-
#
|
38
|
-
|
39
|
-
with open(image_data, 'rb') as f:
|
40
|
-
image_data = f.read()
|
52
|
+
# 获取图像数据
|
53
|
+
image_data = self._get_image_data(image)
|
41
54
|
|
42
55
|
# 转换为base64
|
43
56
|
image_base64 = base64.b64encode(image_data).decode('utf-8')
|
44
57
|
|
58
|
+
# 使用默认提示词如果没有提供
|
59
|
+
query = prompt or "请描述这张图片的内容。"
|
60
|
+
|
45
61
|
# 使用 ollama 库直接调用
|
46
62
|
response = ollama.chat(
|
47
63
|
model=self.model_name,
|
@@ -52,9 +68,127 @@ class OllamaVisionService(BaseService):
|
|
52
68
|
}]
|
53
69
|
)
|
54
70
|
|
55
|
-
|
71
|
+
content = response['message']['content']
|
72
|
+
|
73
|
+
return {
|
74
|
+
"text": content,
|
75
|
+
"confidence": 1.0, # Ollama doesn't provide confidence scores
|
76
|
+
"detected_objects": [], # Basic implementation
|
77
|
+
"metadata": {
|
78
|
+
"model": self.model_name,
|
79
|
+
"prompt": query
|
80
|
+
}
|
81
|
+
}
|
56
82
|
|
57
83
|
except Exception as e:
|
58
84
|
logger.error(f"Error in image analysis: {e}")
|
59
85
|
raise
|
60
86
|
|
87
|
+
async def analyze_images(
|
88
|
+
self,
|
89
|
+
images: List[Union[str, BinaryIO]],
|
90
|
+
prompt: Optional[str] = None,
|
91
|
+
max_tokens: int = 1000
|
92
|
+
) -> List[Dict[str, Any]]:
|
93
|
+
"""Analyze multiple images"""
|
94
|
+
results = []
|
95
|
+
for image in images:
|
96
|
+
result = await self.analyze_image(image, prompt, max_tokens)
|
97
|
+
results.append(result)
|
98
|
+
return results
|
99
|
+
|
100
|
+
async def describe_image(
|
101
|
+
self,
|
102
|
+
image: Union[str, BinaryIO],
|
103
|
+
detail_level: str = "medium"
|
104
|
+
) -> Dict[str, Any]:
|
105
|
+
"""Generate detailed description of image"""
|
106
|
+
prompts = {
|
107
|
+
"low": "简单描述这张图片。",
|
108
|
+
"medium": "详细描述这张图片的内容、颜色、物体和场景。",
|
109
|
+
"high": "非常详细地描述这张图片,包括所有可见的物体、颜色、纹理、场景、情感和任何其他细节。"
|
110
|
+
}
|
111
|
+
|
112
|
+
prompt = prompts.get(detail_level, prompts["medium"])
|
113
|
+
result = await self.analyze_image(image, prompt)
|
114
|
+
|
115
|
+
return {
|
116
|
+
"description": result["text"],
|
117
|
+
"objects": [], # Basic implementation
|
118
|
+
"scene": "未知", # Basic implementation
|
119
|
+
"colors": [] # Basic implementation
|
120
|
+
}
|
121
|
+
|
122
|
+
async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
123
|
+
"""Extract text from image (OCR)"""
|
124
|
+
result = await self.analyze_image(image, "提取图片中的所有文字内容。")
|
125
|
+
|
126
|
+
return {
|
127
|
+
"text": result["text"],
|
128
|
+
"confidence": 1.0,
|
129
|
+
"bounding_boxes": [], # Basic implementation
|
130
|
+
"language": "未知" # Basic implementation
|
131
|
+
}
|
132
|
+
|
133
|
+
async def detect_objects(
|
134
|
+
self,
|
135
|
+
image: Union[str, BinaryIO],
|
136
|
+
confidence_threshold: float = 0.5
|
137
|
+
) -> Dict[str, Any]:
|
138
|
+
"""Detect objects in image"""
|
139
|
+
result = await self.analyze_image(image, "识别并列出图片中的所有物体。")
|
140
|
+
|
141
|
+
return {
|
142
|
+
"objects": [], # Basic implementation - would need parsing
|
143
|
+
"count": 0,
|
144
|
+
"bounding_boxes": []
|
145
|
+
}
|
146
|
+
|
147
|
+
async def classify_image(
|
148
|
+
self,
|
149
|
+
image: Union[str, BinaryIO],
|
150
|
+
categories: Optional[List[str]] = None
|
151
|
+
) -> Dict[str, Any]:
|
152
|
+
"""Classify image into categories"""
|
153
|
+
if categories:
|
154
|
+
category_str = "、".join(categories)
|
155
|
+
prompt = f"将这张图片分类到以下类别之一:{category_str}"
|
156
|
+
else:
|
157
|
+
prompt = "这张图片属于什么类别?"
|
158
|
+
|
159
|
+
result = await self.analyze_image(image, prompt)
|
160
|
+
|
161
|
+
return {
|
162
|
+
"category": result["text"],
|
163
|
+
"confidence": 1.0,
|
164
|
+
"all_predictions": [{"category": result["text"], "confidence": 1.0}]
|
165
|
+
}
|
166
|
+
|
167
|
+
async def compare_images(
|
168
|
+
self,
|
169
|
+
image1: Union[str, BinaryIO],
|
170
|
+
image2: Union[str, BinaryIO]
|
171
|
+
) -> Dict[str, Any]:
|
172
|
+
"""Compare two images for similarity"""
|
173
|
+
# For now, analyze each image separately and compare descriptions
|
174
|
+
result1 = await self.analyze_image(image1, "描述这张图片。")
|
175
|
+
result2 = await self.analyze_image(image2, "描述这张图片。")
|
176
|
+
|
177
|
+
return {
|
178
|
+
"similarity_score": 0.5, # Basic implementation
|
179
|
+
"differences": "需要进一步分析",
|
180
|
+
"common_elements": "需要进一步分析"
|
181
|
+
}
|
182
|
+
|
183
|
+
def get_supported_formats(self) -> List[str]:
|
184
|
+
"""Get list of supported image formats"""
|
185
|
+
return ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']
|
186
|
+
|
187
|
+
def get_max_image_size(self) -> Dict[str, int]:
|
188
|
+
"""Get maximum supported image dimensions"""
|
189
|
+
return {"width": 4096, "height": 4096}
|
190
|
+
|
191
|
+
async def close(self):
|
192
|
+
"""Cleanup resources"""
|
193
|
+
pass
|
194
|
+
|