isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +770 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/models/model_repo.py +343 -0
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/__init__.py +9 -0
- isa_model/deployment/cloud/modal/__init__.py +10 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +187 -387
- isa_model/inference/providers/modal_provider.py +109 -0
- isa_model/inference/providers/yyds_provider.py +108 -0
- isa_model/inference/services/__init__.py +2 -1
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -55
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
- isa_model/inference/services/img/flux_professional_service.py +603 -0
- isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +519 -35
- isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +150 -15
- isa_model/inference/services/llm/openai_llm_service.py +134 -31
- isa_model/inference/services/llm/yyds_llm_service.py +255 -0
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +241 -96
- isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
- isa_model/inference/services/vision/doc_analysis_service.py +640 -0
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +109 -170
- isa_model/inference/services/vision/replicate_vision_service.py +508 -0
- isa_model/inference/services/vision/ui_analysis_service.py +823 -0
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/__init__.py +19 -0
- isa_model/serving/api/__init__.py +10 -0
- isa_model/serving/api/fastapi_server.py +89 -0
- isa_model/serving/api/middleware/__init__.py +9 -0
- isa_model/serving/api/middleware/request_logger.py +88 -0
- isa_model/serving/api/routes/__init__.py +5 -0
- isa_model/serving/api/routes/health.py +82 -0
- isa_model/serving/api/routes/llm.py +19 -0
- isa_model/serving/api/routes/ui_analysis.py +223 -0
- isa_model/serving/api/routes/unified.py +202 -0
- isa_model/serving/api/routes/vision.py +19 -0
- isa_model/serving/api/schemas/__init__.py +17 -0
- isa_model/serving/api/schemas/common.py +33 -0
- isa_model/serving/api/schemas/ui_analysis.py +78 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
- isa_model-0.3.6.dist-info/RECORD +147 -0
- isa_model/core/model_manager.py +0 -208
- isa_model/core/model_registry.py +0 -342
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- isa_model-0.3.4.dist-info/RECORD +0 -91
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -1,481 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Triton LLM Service
|
3
|
-
|
4
|
-
Provides LLM-specific functionality using Triton Inference Server as the backend.
|
5
|
-
Integrates with the existing TritonProvider for low-level operations.
|
6
|
-
"""
|
7
|
-
|
8
|
-
import logging
|
9
|
-
from typing import Dict, Any, List, Optional, Union, AsyncGenerator
|
10
|
-
import json
|
11
|
-
import asyncio
|
12
|
-
|
13
|
-
from ..base_service import BaseService
|
14
|
-
from ...providers.triton_provider import TritonProvider
|
15
|
-
from ...base import ModelType, Capability
|
16
|
-
|
17
|
-
logger = logging.getLogger(__name__)
|
18
|
-
|
19
|
-
|
20
|
-
class TritonLLMService(BaseService):
|
21
|
-
"""
|
22
|
-
LLM service using Triton Inference Server.
|
23
|
-
|
24
|
-
This service provides high-level LLM operations like text generation,
|
25
|
-
chat completion, and streaming responses using Triton as the backend.
|
26
|
-
|
27
|
-
Features:
|
28
|
-
- Text generation with customizable parameters
|
29
|
-
- Chat completion with conversation context
|
30
|
-
- Streaming responses for real-time interaction
|
31
|
-
- Multiple model support
|
32
|
-
- Automatic model loading and management
|
33
|
-
- Integration with model registry
|
34
|
-
|
35
|
-
Example:
|
36
|
-
```python
|
37
|
-
from isa_model.inference.services.llm import TritonLLMService
|
38
|
-
|
39
|
-
# Initialize service
|
40
|
-
service = TritonLLMService({
|
41
|
-
"triton_url": "localhost:8001",
|
42
|
-
"default_model": "gemma-4b-alpaca"
|
43
|
-
})
|
44
|
-
|
45
|
-
# Generate text
|
46
|
-
response = await service.generate_text(
|
47
|
-
prompt="What is artificial intelligence?",
|
48
|
-
model_name="gemma-4b-alpaca",
|
49
|
-
max_tokens=100
|
50
|
-
)
|
51
|
-
|
52
|
-
# Chat completion
|
53
|
-
messages = [
|
54
|
-
{"role": "user", "content": "Hello, how are you?"}
|
55
|
-
]
|
56
|
-
response = await service.chat_completion(
|
57
|
-
messages=messages,
|
58
|
-
model_name="gemma-4b-alpaca"
|
59
|
-
)
|
60
|
-
|
61
|
-
# Streaming generation
|
62
|
-
async for chunk in service.generate_text_stream(
|
63
|
-
prompt="Tell me a story",
|
64
|
-
model_name="gemma-4b-alpaca"
|
65
|
-
):
|
66
|
-
print(chunk["text"], end="")
|
67
|
-
```
|
68
|
-
"""
|
69
|
-
|
70
|
-
def __init__(self, config: Dict[str, Any]):
|
71
|
-
"""
|
72
|
-
Initialize Triton LLM service.
|
73
|
-
|
74
|
-
Args:
|
75
|
-
config: Service configuration including Triton connection details
|
76
|
-
"""
|
77
|
-
super().__init__(config)
|
78
|
-
|
79
|
-
# Initialize Triton provider
|
80
|
-
self.triton_provider = TritonProvider(config)
|
81
|
-
|
82
|
-
# Service configuration
|
83
|
-
self.default_model = config.get("default_model", "model")
|
84
|
-
self.max_tokens_limit = config.get("max_tokens_limit", 2048)
|
85
|
-
self.temperature_default = config.get("temperature_default", 0.7)
|
86
|
-
self.top_p_default = config.get("top_p_default", 0.9)
|
87
|
-
self.top_k_default = config.get("top_k_default", 50)
|
88
|
-
|
89
|
-
# Chat templates
|
90
|
-
self.chat_templates = {
|
91
|
-
"gemma": self._format_gemma_chat,
|
92
|
-
"llama": self._format_llama_chat,
|
93
|
-
"default": self._format_default_chat
|
94
|
-
}
|
95
|
-
|
96
|
-
logger.info(f"TritonLLMService initialized with default model: {self.default_model}")
|
97
|
-
|
98
|
-
async def initialize(self) -> bool:
|
99
|
-
"""Initialize the service and check Triton connectivity"""
|
100
|
-
try:
|
101
|
-
# Check if Triton server is live
|
102
|
-
if not self.triton_provider.is_server_live():
|
103
|
-
logger.error("Triton server is not live")
|
104
|
-
return False
|
105
|
-
|
106
|
-
# Check if default model is ready
|
107
|
-
if not self.triton_provider.is_model_ready(self.default_model):
|
108
|
-
logger.warning(f"Default model {self.default_model} is not ready")
|
109
|
-
|
110
|
-
logger.info("TritonLLMService initialized successfully")
|
111
|
-
return True
|
112
|
-
|
113
|
-
except Exception as e:
|
114
|
-
logger.error(f"Failed to initialize TritonLLMService: {e}")
|
115
|
-
return False
|
116
|
-
|
117
|
-
async def generate_text(self,
|
118
|
-
prompt: str,
|
119
|
-
model_name: Optional[str] = None,
|
120
|
-
max_tokens: int = 100,
|
121
|
-
temperature: float = None,
|
122
|
-
top_p: float = None,
|
123
|
-
top_k: int = None,
|
124
|
-
stop_sequences: Optional[List[str]] = None,
|
125
|
-
system_prompt: Optional[str] = None,
|
126
|
-
**kwargs) -> Dict[str, Any]:
|
127
|
-
"""
|
128
|
-
Generate text using the specified model.
|
129
|
-
|
130
|
-
Args:
|
131
|
-
prompt: Input text prompt
|
132
|
-
model_name: Name of the model to use (uses default if not specified)
|
133
|
-
max_tokens: Maximum number of tokens to generate
|
134
|
-
temperature: Sampling temperature (0.0 to 1.0)
|
135
|
-
top_p: Top-p sampling parameter
|
136
|
-
top_k: Top-k sampling parameter
|
137
|
-
stop_sequences: List of sequences to stop generation
|
138
|
-
system_prompt: System prompt for instruction-following models
|
139
|
-
**kwargs: Additional generation parameters
|
140
|
-
|
141
|
-
Returns:
|
142
|
-
Dictionary containing generated text and metadata
|
143
|
-
"""
|
144
|
-
try:
|
145
|
-
# Use default model if not specified
|
146
|
-
model_name = model_name or self.default_model
|
147
|
-
|
148
|
-
# Validate parameters
|
149
|
-
max_tokens = min(max_tokens, self.max_tokens_limit)
|
150
|
-
temperature = temperature if temperature is not None else self.temperature_default
|
151
|
-
top_p = top_p if top_p is not None else self.top_p_default
|
152
|
-
top_k = top_k if top_k is not None else self.top_k_default
|
153
|
-
|
154
|
-
# Prepare generation parameters
|
155
|
-
params = {
|
156
|
-
"temperature": temperature,
|
157
|
-
"max_tokens": max_tokens,
|
158
|
-
"top_p": top_p,
|
159
|
-
"top_k": top_k,
|
160
|
-
**kwargs
|
161
|
-
}
|
162
|
-
|
163
|
-
if system_prompt:
|
164
|
-
params["system_prompt"] = system_prompt
|
165
|
-
|
166
|
-
if stop_sequences:
|
167
|
-
params["stop_sequences"] = stop_sequences
|
168
|
-
|
169
|
-
logger.debug(f"Generating text with model {model_name}, prompt length: {len(prompt)}")
|
170
|
-
|
171
|
-
# Call Triton provider
|
172
|
-
result = await self.triton_provider.completions(
|
173
|
-
prompt=prompt,
|
174
|
-
model_name=model_name,
|
175
|
-
params=params
|
176
|
-
)
|
177
|
-
|
178
|
-
if "error" in result:
|
179
|
-
logger.error(f"Text generation failed: {result['error']}")
|
180
|
-
return {
|
181
|
-
"success": False,
|
182
|
-
"error": result["error"],
|
183
|
-
"model_name": model_name
|
184
|
-
}
|
185
|
-
|
186
|
-
# Format response
|
187
|
-
response = {
|
188
|
-
"success": True,
|
189
|
-
"text": result["completion"],
|
190
|
-
"model_name": model_name,
|
191
|
-
"usage": result.get("metadata", {}).get("token_usage", {}),
|
192
|
-
"parameters": {
|
193
|
-
"temperature": temperature,
|
194
|
-
"max_tokens": max_tokens,
|
195
|
-
"top_p": top_p,
|
196
|
-
"top_k": top_k
|
197
|
-
}
|
198
|
-
}
|
199
|
-
|
200
|
-
logger.debug(f"Text generation completed, output length: {len(response['text'])}")
|
201
|
-
return response
|
202
|
-
|
203
|
-
except Exception as e:
|
204
|
-
logger.error(f"Error in generate_text: {e}")
|
205
|
-
return {
|
206
|
-
"success": False,
|
207
|
-
"error": str(e),
|
208
|
-
"model_name": model_name or self.default_model
|
209
|
-
}
|
210
|
-
|
211
|
-
async def chat_completion(self,
|
212
|
-
messages: List[Dict[str, str]],
|
213
|
-
model_name: Optional[str] = None,
|
214
|
-
max_tokens: int = 100,
|
215
|
-
temperature: float = None,
|
216
|
-
top_p: float = None,
|
217
|
-
top_k: int = None,
|
218
|
-
stop_sequences: Optional[List[str]] = None,
|
219
|
-
**kwargs) -> Dict[str, Any]:
|
220
|
-
"""
|
221
|
-
Generate chat completion using conversation messages.
|
222
|
-
|
223
|
-
Args:
|
224
|
-
messages: List of message dictionaries with 'role' and 'content'
|
225
|
-
model_name: Name of the model to use
|
226
|
-
max_tokens: Maximum number of tokens to generate
|
227
|
-
temperature: Sampling temperature
|
228
|
-
top_p: Top-p sampling parameter
|
229
|
-
top_k: Top-k sampling parameter
|
230
|
-
stop_sequences: List of sequences to stop generation
|
231
|
-
**kwargs: Additional parameters
|
232
|
-
|
233
|
-
Returns:
|
234
|
-
Dictionary containing the assistant's response and metadata
|
235
|
-
"""
|
236
|
-
try:
|
237
|
-
# Use default model if not specified
|
238
|
-
model_name = model_name or self.default_model
|
239
|
-
|
240
|
-
# Format messages into a prompt
|
241
|
-
prompt = self._format_chat_messages(messages, model_name)
|
242
|
-
|
243
|
-
logger.debug(f"Chat completion with {len(messages)} messages, model: {model_name}")
|
244
|
-
|
245
|
-
# Generate response
|
246
|
-
result = await self.generate_text(
|
247
|
-
prompt=prompt,
|
248
|
-
model_name=model_name,
|
249
|
-
max_tokens=max_tokens,
|
250
|
-
temperature=temperature,
|
251
|
-
top_p=top_p,
|
252
|
-
top_k=top_k,
|
253
|
-
stop_sequences=stop_sequences,
|
254
|
-
**kwargs
|
255
|
-
)
|
256
|
-
|
257
|
-
if not result["success"]:
|
258
|
-
return result
|
259
|
-
|
260
|
-
# Format as chat completion response
|
261
|
-
response = {
|
262
|
-
"success": True,
|
263
|
-
"message": {
|
264
|
-
"role": "assistant",
|
265
|
-
"content": result["text"]
|
266
|
-
},
|
267
|
-
"model_name": model_name,
|
268
|
-
"usage": result.get("usage", {}),
|
269
|
-
"parameters": result.get("parameters", {})
|
270
|
-
}
|
271
|
-
|
272
|
-
logger.debug("Chat completion completed successfully")
|
273
|
-
return response
|
274
|
-
|
275
|
-
except Exception as e:
|
276
|
-
logger.error(f"Error in chat_completion: {e}")
|
277
|
-
return {
|
278
|
-
"success": False,
|
279
|
-
"error": str(e),
|
280
|
-
"model_name": model_name or self.default_model
|
281
|
-
}
|
282
|
-
|
283
|
-
async def generate_text_stream(self,
|
284
|
-
prompt: str,
|
285
|
-
model_name: Optional[str] = None,
|
286
|
-
max_tokens: int = 100,
|
287
|
-
temperature: float = None,
|
288
|
-
top_p: float = None,
|
289
|
-
top_k: int = None,
|
290
|
-
stop_sequences: Optional[List[str]] = None,
|
291
|
-
**kwargs) -> AsyncGenerator[Dict[str, Any], None]:
|
292
|
-
"""
|
293
|
-
Generate text with streaming response.
|
294
|
-
|
295
|
-
Args:
|
296
|
-
prompt: Input text prompt
|
297
|
-
model_name: Name of the model to use
|
298
|
-
max_tokens: Maximum number of tokens to generate
|
299
|
-
temperature: Sampling temperature
|
300
|
-
top_p: Top-p sampling parameter
|
301
|
-
top_k: Top-k sampling parameter
|
302
|
-
stop_sequences: List of sequences to stop generation
|
303
|
-
**kwargs: Additional parameters
|
304
|
-
|
305
|
-
Yields:
|
306
|
-
Dictionary chunks containing partial text and metadata
|
307
|
-
"""
|
308
|
-
try:
|
309
|
-
# For now, simulate streaming by chunking the complete response
|
310
|
-
# TODO: Implement true streaming when Triton supports it
|
311
|
-
|
312
|
-
result = await self.generate_text(
|
313
|
-
prompt=prompt,
|
314
|
-
model_name=model_name,
|
315
|
-
max_tokens=max_tokens,
|
316
|
-
temperature=temperature,
|
317
|
-
top_p=top_p,
|
318
|
-
top_k=top_k,
|
319
|
-
stop_sequences=stop_sequences,
|
320
|
-
**kwargs
|
321
|
-
)
|
322
|
-
|
323
|
-
if not result["success"]:
|
324
|
-
yield {
|
325
|
-
"success": False,
|
326
|
-
"error": result["error"],
|
327
|
-
"model_name": model_name or self.default_model
|
328
|
-
}
|
329
|
-
return
|
330
|
-
|
331
|
-
# Simulate streaming by yielding chunks
|
332
|
-
text = result["text"]
|
333
|
-
chunk_size = 10 # Characters per chunk
|
334
|
-
|
335
|
-
for i in range(0, len(text), chunk_size):
|
336
|
-
chunk = text[i:i + chunk_size]
|
337
|
-
|
338
|
-
yield {
|
339
|
-
"success": True,
|
340
|
-
"text": chunk,
|
341
|
-
"is_complete": i + chunk_size >= len(text),
|
342
|
-
"model_name": model_name or self.default_model
|
343
|
-
}
|
344
|
-
|
345
|
-
# Small delay to simulate streaming
|
346
|
-
await asyncio.sleep(0.05)
|
347
|
-
|
348
|
-
except Exception as e:
|
349
|
-
logger.error(f"Error in generate_text_stream: {e}")
|
350
|
-
yield {
|
351
|
-
"success": False,
|
352
|
-
"error": str(e),
|
353
|
-
"model_name": model_name or self.default_model
|
354
|
-
}
|
355
|
-
|
356
|
-
async def get_model_info(self, model_name: str) -> Dict[str, Any]:
|
357
|
-
"""Get information about a specific model"""
|
358
|
-
try:
|
359
|
-
if not self.triton_provider.is_model_ready(model_name):
|
360
|
-
return {
|
361
|
-
"success": False,
|
362
|
-
"error": f"Model {model_name} is not ready"
|
363
|
-
}
|
364
|
-
|
365
|
-
metadata = self.triton_provider.get_model_metadata(model_name)
|
366
|
-
config = self.triton_provider.get_model_config(model_name)
|
367
|
-
|
368
|
-
return {
|
369
|
-
"success": True,
|
370
|
-
"model_name": model_name,
|
371
|
-
"metadata": metadata,
|
372
|
-
"config": config,
|
373
|
-
"is_ready": True
|
374
|
-
}
|
375
|
-
|
376
|
-
except Exception as e:
|
377
|
-
logger.error(f"Error getting model info for {model_name}: {e}")
|
378
|
-
return {
|
379
|
-
"success": False,
|
380
|
-
"error": str(e),
|
381
|
-
"model_name": model_name
|
382
|
-
}
|
383
|
-
|
384
|
-
async def list_available_models(self) -> List[str]:
|
385
|
-
"""List all available models"""
|
386
|
-
try:
|
387
|
-
return self.triton_provider.get_models(ModelType.LLM)
|
388
|
-
except Exception as e:
|
389
|
-
logger.error(f"Error listing models: {e}")
|
390
|
-
return []
|
391
|
-
|
392
|
-
def _format_chat_messages(self, messages: List[Dict[str, str]], model_name: str) -> str:
|
393
|
-
"""Format chat messages into a prompt based on model type"""
|
394
|
-
# Determine chat template based on model name
|
395
|
-
template_key = "default"
|
396
|
-
if "gemma" in model_name.lower():
|
397
|
-
template_key = "gemma"
|
398
|
-
elif "llama" in model_name.lower():
|
399
|
-
template_key = "llama"
|
400
|
-
|
401
|
-
formatter = self.chat_templates.get(template_key, self.chat_templates["default"])
|
402
|
-
return formatter(messages)
|
403
|
-
|
404
|
-
def _format_gemma_chat(self, messages: List[Dict[str, str]]) -> str:
|
405
|
-
"""Format messages for Gemma models"""
|
406
|
-
formatted = ""
|
407
|
-
|
408
|
-
for message in messages:
|
409
|
-
role = message["role"]
|
410
|
-
content = message["content"]
|
411
|
-
|
412
|
-
if role == "system":
|
413
|
-
formatted += f"<start_of_turn>system\n{content}<end_of_turn>\n"
|
414
|
-
elif role == "user":
|
415
|
-
formatted += f"<start_of_turn>user\n{content}<end_of_turn>\n"
|
416
|
-
elif role == "assistant":
|
417
|
-
formatted += f"<start_of_turn>model\n{content}<end_of_turn>\n"
|
418
|
-
|
419
|
-
# Add the start token for the assistant response
|
420
|
-
formatted += "<start_of_turn>model\n"
|
421
|
-
|
422
|
-
return formatted
|
423
|
-
|
424
|
-
def _format_llama_chat(self, messages: List[Dict[str, str]]) -> str:
|
425
|
-
"""Format messages for Llama models"""
|
426
|
-
formatted = "<s>"
|
427
|
-
|
428
|
-
for message in messages:
|
429
|
-
role = message["role"]
|
430
|
-
content = message["content"]
|
431
|
-
|
432
|
-
if role == "system":
|
433
|
-
formatted += f"[INST] <<SYS>>\n{content}\n<</SYS>>\n\n"
|
434
|
-
elif role == "user":
|
435
|
-
if formatted.endswith("<s>"):
|
436
|
-
formatted += f"[INST] {content} [/INST]"
|
437
|
-
else:
|
438
|
-
formatted += f"<s>[INST] {content} [/INST]"
|
439
|
-
elif role == "assistant":
|
440
|
-
formatted += f" {content} </s>"
|
441
|
-
|
442
|
-
return formatted
|
443
|
-
|
444
|
-
def _format_default_chat(self, messages: List[Dict[str, str]]) -> str:
|
445
|
-
"""Default chat formatting"""
|
446
|
-
formatted = ""
|
447
|
-
|
448
|
-
for message in messages:
|
449
|
-
role = message["role"]
|
450
|
-
content = message["content"]
|
451
|
-
|
452
|
-
if role == "system":
|
453
|
-
formatted += f"System: {content}\n\n"
|
454
|
-
elif role == "user":
|
455
|
-
formatted += f"User: {content}\n\n"
|
456
|
-
elif role == "assistant":
|
457
|
-
formatted += f"Assistant: {content}\n\n"
|
458
|
-
|
459
|
-
# Add prompt for assistant response
|
460
|
-
formatted += "Assistant:"
|
461
|
-
|
462
|
-
return formatted
|
463
|
-
|
464
|
-
def get_capabilities(self) -> List[Capability]:
|
465
|
-
"""Get service capabilities"""
|
466
|
-
return [
|
467
|
-
Capability.CHAT,
|
468
|
-
Capability.COMPLETION
|
469
|
-
]
|
470
|
-
|
471
|
-
def get_supported_models(self) -> List[str]:
|
472
|
-
"""Get list of supported model types"""
|
473
|
-
return [
|
474
|
-
"gemma-2-2b-it",
|
475
|
-
"gemma-2-4b-it",
|
476
|
-
"gemma-2-7b-it",
|
477
|
-
"llama-2-7b-chat",
|
478
|
-
"llama-2-13b-chat",
|
479
|
-
"mistral-7b-instruct",
|
480
|
-
"custom-models" # Support for custom deployed models
|
481
|
-
]
|
@@ -1,194 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import json
|
3
|
-
import base64
|
4
|
-
import ollama
|
5
|
-
from typing import Dict, Any, Union, List, Optional, BinaryIO
|
6
|
-
from tenacity import retry, stop_after_attempt, wait_exponential
|
7
|
-
from isa_model.inference.services.vision.base_vision_service import BaseVisionService
|
8
|
-
from isa_model.inference.providers.base_provider import BaseProvider
|
9
|
-
import logging
|
10
|
-
import requests
|
11
|
-
|
12
|
-
logger = logging.getLogger(__name__)
|
13
|
-
|
14
|
-
class OllamaVisionService(BaseVisionService):
|
15
|
-
"""Vision model service wrapper for Ollama using base64 encoded images"""
|
16
|
-
|
17
|
-
def __init__(self, provider: 'BaseProvider', model_name: str = 'gemma3:4b'):
|
18
|
-
super().__init__(provider, model_name)
|
19
|
-
self.max_tokens = self.config.get('max_tokens', 1000)
|
20
|
-
self.temperature = self.config.get('temperature', 0.7)
|
21
|
-
|
22
|
-
def _get_image_data(self, image: Union[str, BinaryIO]) -> bytes:
|
23
|
-
"""获取图像数据,支持本地文件和URL"""
|
24
|
-
if isinstance(image, str):
|
25
|
-
# Check if it's a URL
|
26
|
-
if image.startswith(('http://', 'https://')):
|
27
|
-
response = requests.get(image)
|
28
|
-
response.raise_for_status()
|
29
|
-
return response.content
|
30
|
-
else:
|
31
|
-
# Local file path
|
32
|
-
with open(image, 'rb') as f:
|
33
|
-
return f.read()
|
34
|
-
else:
|
35
|
-
return image.read()
|
36
|
-
|
37
|
-
@retry(
|
38
|
-
stop=stop_after_attempt(3),
|
39
|
-
wait=wait_exponential(multiplier=1, min=4, max=10),
|
40
|
-
reraise=True
|
41
|
-
)
|
42
|
-
async def analyze_image(
|
43
|
-
self,
|
44
|
-
image: Union[str, BinaryIO],
|
45
|
-
prompt: Optional[str] = None,
|
46
|
-
max_tokens: int = 1000
|
47
|
-
) -> Dict[str, Any]:
|
48
|
-
"""
|
49
|
-
Analyze image and provide description or answer questions
|
50
|
-
"""
|
51
|
-
try:
|
52
|
-
# 获取图像数据
|
53
|
-
image_data = self._get_image_data(image)
|
54
|
-
|
55
|
-
# 转换为base64
|
56
|
-
image_base64 = base64.b64encode(image_data).decode('utf-8')
|
57
|
-
|
58
|
-
# 使用默认提示词如果没有提供
|
59
|
-
query = prompt or "请描述这张图片的内容。"
|
60
|
-
|
61
|
-
# 使用 ollama 库直接调用
|
62
|
-
response = ollama.chat(
|
63
|
-
model=self.model_name,
|
64
|
-
messages=[{
|
65
|
-
'role': 'user',
|
66
|
-
'content': query,
|
67
|
-
'images': [image_base64]
|
68
|
-
}]
|
69
|
-
)
|
70
|
-
|
71
|
-
content = response['message']['content']
|
72
|
-
|
73
|
-
return {
|
74
|
-
"text": content,
|
75
|
-
"confidence": 1.0, # Ollama doesn't provide confidence scores
|
76
|
-
"detected_objects": [], # Basic implementation
|
77
|
-
"metadata": {
|
78
|
-
"model": self.model_name,
|
79
|
-
"prompt": query
|
80
|
-
}
|
81
|
-
}
|
82
|
-
|
83
|
-
except Exception as e:
|
84
|
-
logger.error(f"Error in image analysis: {e}")
|
85
|
-
raise
|
86
|
-
|
87
|
-
async def analyze_images(
|
88
|
-
self,
|
89
|
-
images: List[Union[str, BinaryIO]],
|
90
|
-
prompt: Optional[str] = None,
|
91
|
-
max_tokens: int = 1000
|
92
|
-
) -> List[Dict[str, Any]]:
|
93
|
-
"""Analyze multiple images"""
|
94
|
-
results = []
|
95
|
-
for image in images:
|
96
|
-
result = await self.analyze_image(image, prompt, max_tokens)
|
97
|
-
results.append(result)
|
98
|
-
return results
|
99
|
-
|
100
|
-
async def describe_image(
|
101
|
-
self,
|
102
|
-
image: Union[str, BinaryIO],
|
103
|
-
detail_level: str = "medium"
|
104
|
-
) -> Dict[str, Any]:
|
105
|
-
"""Generate detailed description of image"""
|
106
|
-
prompts = {
|
107
|
-
"low": "简单描述这张图片。",
|
108
|
-
"medium": "详细描述这张图片的内容、颜色、物体和场景。",
|
109
|
-
"high": "非常详细地描述这张图片,包括所有可见的物体、颜色、纹理、场景、情感和任何其他细节。"
|
110
|
-
}
|
111
|
-
|
112
|
-
prompt = prompts.get(detail_level, prompts["medium"])
|
113
|
-
result = await self.analyze_image(image, prompt)
|
114
|
-
|
115
|
-
return {
|
116
|
-
"description": result["text"],
|
117
|
-
"objects": [], # Basic implementation
|
118
|
-
"scene": "未知", # Basic implementation
|
119
|
-
"colors": [] # Basic implementation
|
120
|
-
}
|
121
|
-
|
122
|
-
async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
123
|
-
"""Extract text from image (OCR)"""
|
124
|
-
result = await self.analyze_image(image, "提取图片中的所有文字内容。")
|
125
|
-
|
126
|
-
return {
|
127
|
-
"text": result["text"],
|
128
|
-
"confidence": 1.0,
|
129
|
-
"bounding_boxes": [], # Basic implementation
|
130
|
-
"language": "未知" # Basic implementation
|
131
|
-
}
|
132
|
-
|
133
|
-
async def detect_objects(
|
134
|
-
self,
|
135
|
-
image: Union[str, BinaryIO],
|
136
|
-
confidence_threshold: float = 0.5
|
137
|
-
) -> Dict[str, Any]:
|
138
|
-
"""Detect objects in image"""
|
139
|
-
result = await self.analyze_image(image, "识别并列出图片中的所有物体。")
|
140
|
-
|
141
|
-
return {
|
142
|
-
"objects": [], # Basic implementation - would need parsing
|
143
|
-
"count": 0,
|
144
|
-
"bounding_boxes": []
|
145
|
-
}
|
146
|
-
|
147
|
-
async def classify_image(
|
148
|
-
self,
|
149
|
-
image: Union[str, BinaryIO],
|
150
|
-
categories: Optional[List[str]] = None
|
151
|
-
) -> Dict[str, Any]:
|
152
|
-
"""Classify image into categories"""
|
153
|
-
if categories:
|
154
|
-
category_str = "、".join(categories)
|
155
|
-
prompt = f"将这张图片分类到以下类别之一:{category_str}"
|
156
|
-
else:
|
157
|
-
prompt = "这张图片属于什么类别?"
|
158
|
-
|
159
|
-
result = await self.analyze_image(image, prompt)
|
160
|
-
|
161
|
-
return {
|
162
|
-
"category": result["text"],
|
163
|
-
"confidence": 1.0,
|
164
|
-
"all_predictions": [{"category": result["text"], "confidence": 1.0}]
|
165
|
-
}
|
166
|
-
|
167
|
-
async def compare_images(
|
168
|
-
self,
|
169
|
-
image1: Union[str, BinaryIO],
|
170
|
-
image2: Union[str, BinaryIO]
|
171
|
-
) -> Dict[str, Any]:
|
172
|
-
"""Compare two images for similarity"""
|
173
|
-
# For now, analyze each image separately and compare descriptions
|
174
|
-
result1 = await self.analyze_image(image1, "描述这张图片。")
|
175
|
-
result2 = await self.analyze_image(image2, "描述这张图片。")
|
176
|
-
|
177
|
-
return {
|
178
|
-
"similarity_score": 0.5, # Basic implementation
|
179
|
-
"differences": "需要进一步分析",
|
180
|
-
"common_elements": "需要进一步分析"
|
181
|
-
}
|
182
|
-
|
183
|
-
def get_supported_formats(self) -> List[str]:
|
184
|
-
"""Get list of supported image formats"""
|
185
|
-
return ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']
|
186
|
-
|
187
|
-
def get_max_image_size(self) -> Dict[str, int]:
|
188
|
-
"""Get maximum supported image dimensions"""
|
189
|
-
return {"width": 4096, "height": 4096}
|
190
|
-
|
191
|
-
async def close(self):
|
192
|
-
"""Cleanup resources"""
|
193
|
-
pass
|
194
|
-
|