isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +770 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/models/model_repo.py +343 -0
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/__init__.py +9 -0
  15. isa_model/deployment/cloud/modal/__init__.py +10 -0
  16. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
  17. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  18. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
  19. isa_model/deployment/cloud/modal/register_models.py +321 -0
  20. isa_model/deployment/runtime/deployed_service.py +338 -0
  21. isa_model/deployment/services/__init__.py +9 -0
  22. isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
  23. isa_model/deployment/services/model_service.py +332 -0
  24. isa_model/deployment/services/service_monitor.py +356 -0
  25. isa_model/deployment/services/service_registry.py +527 -0
  26. isa_model/eval/__init__.py +80 -44
  27. isa_model/eval/config/__init__.py +10 -0
  28. isa_model/eval/config/evaluation_config.py +108 -0
  29. isa_model/eval/evaluators/__init__.py +18 -0
  30. isa_model/eval/evaluators/base_evaluator.py +503 -0
  31. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  32. isa_model/eval/factory.py +417 -709
  33. isa_model/eval/infrastructure/__init__.py +24 -0
  34. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  35. isa_model/eval/metrics.py +191 -21
  36. isa_model/inference/ai_factory.py +187 -387
  37. isa_model/inference/providers/modal_provider.py +109 -0
  38. isa_model/inference/providers/yyds_provider.py +108 -0
  39. isa_model/inference/services/__init__.py +2 -1
  40. isa_model/inference/services/audio/base_stt_service.py +65 -1
  41. isa_model/inference/services/audio/base_tts_service.py +75 -1
  42. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  43. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  44. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  45. isa_model/inference/services/base_service.py +55 -55
  46. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  47. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  48. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  49. isa_model/inference/services/helpers/stacked_config.py +148 -0
  50. isa_model/inference/services/img/__init__.py +18 -0
  51. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
  52. isa_model/inference/services/img/flux_professional_service.py +603 -0
  53. isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
  54. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
  55. isa_model/inference/services/llm/__init__.py +3 -3
  56. isa_model/inference/services/llm/base_llm_service.py +519 -35
  57. isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
  58. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  59. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  60. isa_model/inference/services/llm/ollama_llm_service.py +150 -15
  61. isa_model/inference/services/llm/openai_llm_service.py +134 -31
  62. isa_model/inference/services/llm/yyds_llm_service.py +255 -0
  63. isa_model/inference/services/vision/__init__.py +38 -4
  64. isa_model/inference/services/vision/base_vision_service.py +241 -96
  65. isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
  66. isa_model/inference/services/vision/doc_analysis_service.py +640 -0
  67. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  68. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  69. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  70. isa_model/inference/services/vision/openai_vision_service.py +109 -170
  71. isa_model/inference/services/vision/replicate_vision_service.py +508 -0
  72. isa_model/inference/services/vision/ui_analysis_service.py +823 -0
  73. isa_model/scripts/register_models.py +370 -0
  74. isa_model/scripts/register_models_with_embeddings.py +510 -0
  75. isa_model/serving/__init__.py +19 -0
  76. isa_model/serving/api/__init__.py +10 -0
  77. isa_model/serving/api/fastapi_server.py +89 -0
  78. isa_model/serving/api/middleware/__init__.py +9 -0
  79. isa_model/serving/api/middleware/request_logger.py +88 -0
  80. isa_model/serving/api/routes/__init__.py +5 -0
  81. isa_model/serving/api/routes/health.py +82 -0
  82. isa_model/serving/api/routes/llm.py +19 -0
  83. isa_model/serving/api/routes/ui_analysis.py +223 -0
  84. isa_model/serving/api/routes/unified.py +202 -0
  85. isa_model/serving/api/routes/vision.py +19 -0
  86. isa_model/serving/api/schemas/__init__.py +17 -0
  87. isa_model/serving/api/schemas/common.py +33 -0
  88. isa_model/serving/api/schemas/ui_analysis.py +78 -0
  89. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
  90. isa_model-0.3.6.dist-info/RECORD +147 -0
  91. isa_model/core/model_manager.py +0 -208
  92. isa_model/core/model_registry.py +0 -342
  93. isa_model/inference/billing_tracker.py +0 -406
  94. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  95. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  96. isa_model-0.3.4.dist-info/RECORD +0 -91
  97. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  98. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  99. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
  100. {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -1,481 +0,0 @@
1
- """
2
- Triton LLM Service
3
-
4
- Provides LLM-specific functionality using Triton Inference Server as the backend.
5
- Integrates with the existing TritonProvider for low-level operations.
6
- """
7
-
8
- import logging
9
- from typing import Dict, Any, List, Optional, Union, AsyncGenerator
10
- import json
11
- import asyncio
12
-
13
- from ..base_service import BaseService
14
- from ...providers.triton_provider import TritonProvider
15
- from ...base import ModelType, Capability
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- class TritonLLMService(BaseService):
21
- """
22
- LLM service using Triton Inference Server.
23
-
24
- This service provides high-level LLM operations like text generation,
25
- chat completion, and streaming responses using Triton as the backend.
26
-
27
- Features:
28
- - Text generation with customizable parameters
29
- - Chat completion with conversation context
30
- - Streaming responses for real-time interaction
31
- - Multiple model support
32
- - Automatic model loading and management
33
- - Integration with model registry
34
-
35
- Example:
36
- ```python
37
- from isa_model.inference.services.llm import TritonLLMService
38
-
39
- # Initialize service
40
- service = TritonLLMService({
41
- "triton_url": "localhost:8001",
42
- "default_model": "gemma-4b-alpaca"
43
- })
44
-
45
- # Generate text
46
- response = await service.generate_text(
47
- prompt="What is artificial intelligence?",
48
- model_name="gemma-4b-alpaca",
49
- max_tokens=100
50
- )
51
-
52
- # Chat completion
53
- messages = [
54
- {"role": "user", "content": "Hello, how are you?"}
55
- ]
56
- response = await service.chat_completion(
57
- messages=messages,
58
- model_name="gemma-4b-alpaca"
59
- )
60
-
61
- # Streaming generation
62
- async for chunk in service.generate_text_stream(
63
- prompt="Tell me a story",
64
- model_name="gemma-4b-alpaca"
65
- ):
66
- print(chunk["text"], end="")
67
- ```
68
- """
69
-
70
- def __init__(self, config: Dict[str, Any]):
71
- """
72
- Initialize Triton LLM service.
73
-
74
- Args:
75
- config: Service configuration including Triton connection details
76
- """
77
- super().__init__(config)
78
-
79
- # Initialize Triton provider
80
- self.triton_provider = TritonProvider(config)
81
-
82
- # Service configuration
83
- self.default_model = config.get("default_model", "model")
84
- self.max_tokens_limit = config.get("max_tokens_limit", 2048)
85
- self.temperature_default = config.get("temperature_default", 0.7)
86
- self.top_p_default = config.get("top_p_default", 0.9)
87
- self.top_k_default = config.get("top_k_default", 50)
88
-
89
- # Chat templates
90
- self.chat_templates = {
91
- "gemma": self._format_gemma_chat,
92
- "llama": self._format_llama_chat,
93
- "default": self._format_default_chat
94
- }
95
-
96
- logger.info(f"TritonLLMService initialized with default model: {self.default_model}")
97
-
98
- async def initialize(self) -> bool:
99
- """Initialize the service and check Triton connectivity"""
100
- try:
101
- # Check if Triton server is live
102
- if not self.triton_provider.is_server_live():
103
- logger.error("Triton server is not live")
104
- return False
105
-
106
- # Check if default model is ready
107
- if not self.triton_provider.is_model_ready(self.default_model):
108
- logger.warning(f"Default model {self.default_model} is not ready")
109
-
110
- logger.info("TritonLLMService initialized successfully")
111
- return True
112
-
113
- except Exception as e:
114
- logger.error(f"Failed to initialize TritonLLMService: {e}")
115
- return False
116
-
117
- async def generate_text(self,
118
- prompt: str,
119
- model_name: Optional[str] = None,
120
- max_tokens: int = 100,
121
- temperature: float = None,
122
- top_p: float = None,
123
- top_k: int = None,
124
- stop_sequences: Optional[List[str]] = None,
125
- system_prompt: Optional[str] = None,
126
- **kwargs) -> Dict[str, Any]:
127
- """
128
- Generate text using the specified model.
129
-
130
- Args:
131
- prompt: Input text prompt
132
- model_name: Name of the model to use (uses default if not specified)
133
- max_tokens: Maximum number of tokens to generate
134
- temperature: Sampling temperature (0.0 to 1.0)
135
- top_p: Top-p sampling parameter
136
- top_k: Top-k sampling parameter
137
- stop_sequences: List of sequences to stop generation
138
- system_prompt: System prompt for instruction-following models
139
- **kwargs: Additional generation parameters
140
-
141
- Returns:
142
- Dictionary containing generated text and metadata
143
- """
144
- try:
145
- # Use default model if not specified
146
- model_name = model_name or self.default_model
147
-
148
- # Validate parameters
149
- max_tokens = min(max_tokens, self.max_tokens_limit)
150
- temperature = temperature if temperature is not None else self.temperature_default
151
- top_p = top_p if top_p is not None else self.top_p_default
152
- top_k = top_k if top_k is not None else self.top_k_default
153
-
154
- # Prepare generation parameters
155
- params = {
156
- "temperature": temperature,
157
- "max_tokens": max_tokens,
158
- "top_p": top_p,
159
- "top_k": top_k,
160
- **kwargs
161
- }
162
-
163
- if system_prompt:
164
- params["system_prompt"] = system_prompt
165
-
166
- if stop_sequences:
167
- params["stop_sequences"] = stop_sequences
168
-
169
- logger.debug(f"Generating text with model {model_name}, prompt length: {len(prompt)}")
170
-
171
- # Call Triton provider
172
- result = await self.triton_provider.completions(
173
- prompt=prompt,
174
- model_name=model_name,
175
- params=params
176
- )
177
-
178
- if "error" in result:
179
- logger.error(f"Text generation failed: {result['error']}")
180
- return {
181
- "success": False,
182
- "error": result["error"],
183
- "model_name": model_name
184
- }
185
-
186
- # Format response
187
- response = {
188
- "success": True,
189
- "text": result["completion"],
190
- "model_name": model_name,
191
- "usage": result.get("metadata", {}).get("token_usage", {}),
192
- "parameters": {
193
- "temperature": temperature,
194
- "max_tokens": max_tokens,
195
- "top_p": top_p,
196
- "top_k": top_k
197
- }
198
- }
199
-
200
- logger.debug(f"Text generation completed, output length: {len(response['text'])}")
201
- return response
202
-
203
- except Exception as e:
204
- logger.error(f"Error in generate_text: {e}")
205
- return {
206
- "success": False,
207
- "error": str(e),
208
- "model_name": model_name or self.default_model
209
- }
210
-
211
- async def chat_completion(self,
212
- messages: List[Dict[str, str]],
213
- model_name: Optional[str] = None,
214
- max_tokens: int = 100,
215
- temperature: float = None,
216
- top_p: float = None,
217
- top_k: int = None,
218
- stop_sequences: Optional[List[str]] = None,
219
- **kwargs) -> Dict[str, Any]:
220
- """
221
- Generate chat completion using conversation messages.
222
-
223
- Args:
224
- messages: List of message dictionaries with 'role' and 'content'
225
- model_name: Name of the model to use
226
- max_tokens: Maximum number of tokens to generate
227
- temperature: Sampling temperature
228
- top_p: Top-p sampling parameter
229
- top_k: Top-k sampling parameter
230
- stop_sequences: List of sequences to stop generation
231
- **kwargs: Additional parameters
232
-
233
- Returns:
234
- Dictionary containing the assistant's response and metadata
235
- """
236
- try:
237
- # Use default model if not specified
238
- model_name = model_name or self.default_model
239
-
240
- # Format messages into a prompt
241
- prompt = self._format_chat_messages(messages, model_name)
242
-
243
- logger.debug(f"Chat completion with {len(messages)} messages, model: {model_name}")
244
-
245
- # Generate response
246
- result = await self.generate_text(
247
- prompt=prompt,
248
- model_name=model_name,
249
- max_tokens=max_tokens,
250
- temperature=temperature,
251
- top_p=top_p,
252
- top_k=top_k,
253
- stop_sequences=stop_sequences,
254
- **kwargs
255
- )
256
-
257
- if not result["success"]:
258
- return result
259
-
260
- # Format as chat completion response
261
- response = {
262
- "success": True,
263
- "message": {
264
- "role": "assistant",
265
- "content": result["text"]
266
- },
267
- "model_name": model_name,
268
- "usage": result.get("usage", {}),
269
- "parameters": result.get("parameters", {})
270
- }
271
-
272
- logger.debug("Chat completion completed successfully")
273
- return response
274
-
275
- except Exception as e:
276
- logger.error(f"Error in chat_completion: {e}")
277
- return {
278
- "success": False,
279
- "error": str(e),
280
- "model_name": model_name or self.default_model
281
- }
282
-
283
- async def generate_text_stream(self,
284
- prompt: str,
285
- model_name: Optional[str] = None,
286
- max_tokens: int = 100,
287
- temperature: float = None,
288
- top_p: float = None,
289
- top_k: int = None,
290
- stop_sequences: Optional[List[str]] = None,
291
- **kwargs) -> AsyncGenerator[Dict[str, Any], None]:
292
- """
293
- Generate text with streaming response.
294
-
295
- Args:
296
- prompt: Input text prompt
297
- model_name: Name of the model to use
298
- max_tokens: Maximum number of tokens to generate
299
- temperature: Sampling temperature
300
- top_p: Top-p sampling parameter
301
- top_k: Top-k sampling parameter
302
- stop_sequences: List of sequences to stop generation
303
- **kwargs: Additional parameters
304
-
305
- Yields:
306
- Dictionary chunks containing partial text and metadata
307
- """
308
- try:
309
- # For now, simulate streaming by chunking the complete response
310
- # TODO: Implement true streaming when Triton supports it
311
-
312
- result = await self.generate_text(
313
- prompt=prompt,
314
- model_name=model_name,
315
- max_tokens=max_tokens,
316
- temperature=temperature,
317
- top_p=top_p,
318
- top_k=top_k,
319
- stop_sequences=stop_sequences,
320
- **kwargs
321
- )
322
-
323
- if not result["success"]:
324
- yield {
325
- "success": False,
326
- "error": result["error"],
327
- "model_name": model_name or self.default_model
328
- }
329
- return
330
-
331
- # Simulate streaming by yielding chunks
332
- text = result["text"]
333
- chunk_size = 10 # Characters per chunk
334
-
335
- for i in range(0, len(text), chunk_size):
336
- chunk = text[i:i + chunk_size]
337
-
338
- yield {
339
- "success": True,
340
- "text": chunk,
341
- "is_complete": i + chunk_size >= len(text),
342
- "model_name": model_name or self.default_model
343
- }
344
-
345
- # Small delay to simulate streaming
346
- await asyncio.sleep(0.05)
347
-
348
- except Exception as e:
349
- logger.error(f"Error in generate_text_stream: {e}")
350
- yield {
351
- "success": False,
352
- "error": str(e),
353
- "model_name": model_name or self.default_model
354
- }
355
-
356
- async def get_model_info(self, model_name: str) -> Dict[str, Any]:
357
- """Get information about a specific model"""
358
- try:
359
- if not self.triton_provider.is_model_ready(model_name):
360
- return {
361
- "success": False,
362
- "error": f"Model {model_name} is not ready"
363
- }
364
-
365
- metadata = self.triton_provider.get_model_metadata(model_name)
366
- config = self.triton_provider.get_model_config(model_name)
367
-
368
- return {
369
- "success": True,
370
- "model_name": model_name,
371
- "metadata": metadata,
372
- "config": config,
373
- "is_ready": True
374
- }
375
-
376
- except Exception as e:
377
- logger.error(f"Error getting model info for {model_name}: {e}")
378
- return {
379
- "success": False,
380
- "error": str(e),
381
- "model_name": model_name
382
- }
383
-
384
- async def list_available_models(self) -> List[str]:
385
- """List all available models"""
386
- try:
387
- return self.triton_provider.get_models(ModelType.LLM)
388
- except Exception as e:
389
- logger.error(f"Error listing models: {e}")
390
- return []
391
-
392
- def _format_chat_messages(self, messages: List[Dict[str, str]], model_name: str) -> str:
393
- """Format chat messages into a prompt based on model type"""
394
- # Determine chat template based on model name
395
- template_key = "default"
396
- if "gemma" in model_name.lower():
397
- template_key = "gemma"
398
- elif "llama" in model_name.lower():
399
- template_key = "llama"
400
-
401
- formatter = self.chat_templates.get(template_key, self.chat_templates["default"])
402
- return formatter(messages)
403
-
404
- def _format_gemma_chat(self, messages: List[Dict[str, str]]) -> str:
405
- """Format messages for Gemma models"""
406
- formatted = ""
407
-
408
- for message in messages:
409
- role = message["role"]
410
- content = message["content"]
411
-
412
- if role == "system":
413
- formatted += f"<start_of_turn>system\n{content}<end_of_turn>\n"
414
- elif role == "user":
415
- formatted += f"<start_of_turn>user\n{content}<end_of_turn>\n"
416
- elif role == "assistant":
417
- formatted += f"<start_of_turn>model\n{content}<end_of_turn>\n"
418
-
419
- # Add the start token for the assistant response
420
- formatted += "<start_of_turn>model\n"
421
-
422
- return formatted
423
-
424
- def _format_llama_chat(self, messages: List[Dict[str, str]]) -> str:
425
- """Format messages for Llama models"""
426
- formatted = "<s>"
427
-
428
- for message in messages:
429
- role = message["role"]
430
- content = message["content"]
431
-
432
- if role == "system":
433
- formatted += f"[INST] <<SYS>>\n{content}\n<</SYS>>\n\n"
434
- elif role == "user":
435
- if formatted.endswith("<s>"):
436
- formatted += f"[INST] {content} [/INST]"
437
- else:
438
- formatted += f"<s>[INST] {content} [/INST]"
439
- elif role == "assistant":
440
- formatted += f" {content} </s>"
441
-
442
- return formatted
443
-
444
- def _format_default_chat(self, messages: List[Dict[str, str]]) -> str:
445
- """Default chat formatting"""
446
- formatted = ""
447
-
448
- for message in messages:
449
- role = message["role"]
450
- content = message["content"]
451
-
452
- if role == "system":
453
- formatted += f"System: {content}\n\n"
454
- elif role == "user":
455
- formatted += f"User: {content}\n\n"
456
- elif role == "assistant":
457
- formatted += f"Assistant: {content}\n\n"
458
-
459
- # Add prompt for assistant response
460
- formatted += "Assistant:"
461
-
462
- return formatted
463
-
464
- def get_capabilities(self) -> List[Capability]:
465
- """Get service capabilities"""
466
- return [
467
- Capability.CHAT,
468
- Capability.COMPLETION
469
- ]
470
-
471
- def get_supported_models(self) -> List[str]:
472
- """Get list of supported model types"""
473
- return [
474
- "gemma-2-2b-it",
475
- "gemma-2-4b-it",
476
- "gemma-2-7b-it",
477
- "llama-2-7b-chat",
478
- "llama-2-13b-chat",
479
- "mistral-7b-instruct",
480
- "custom-models" # Support for custom deployed models
481
- ]
@@ -1,194 +0,0 @@
1
- import os
2
- import json
3
- import base64
4
- import ollama
5
- from typing import Dict, Any, Union, List, Optional, BinaryIO
6
- from tenacity import retry, stop_after_attempt, wait_exponential
7
- from isa_model.inference.services.vision.base_vision_service import BaseVisionService
8
- from isa_model.inference.providers.base_provider import BaseProvider
9
- import logging
10
- import requests
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- class OllamaVisionService(BaseVisionService):
15
- """Vision model service wrapper for Ollama using base64 encoded images"""
16
-
17
- def __init__(self, provider: 'BaseProvider', model_name: str = 'gemma3:4b'):
18
- super().__init__(provider, model_name)
19
- self.max_tokens = self.config.get('max_tokens', 1000)
20
- self.temperature = self.config.get('temperature', 0.7)
21
-
22
- def _get_image_data(self, image: Union[str, BinaryIO]) -> bytes:
23
- """获取图像数据,支持本地文件和URL"""
24
- if isinstance(image, str):
25
- # Check if it's a URL
26
- if image.startswith(('http://', 'https://')):
27
- response = requests.get(image)
28
- response.raise_for_status()
29
- return response.content
30
- else:
31
- # Local file path
32
- with open(image, 'rb') as f:
33
- return f.read()
34
- else:
35
- return image.read()
36
-
37
- @retry(
38
- stop=stop_after_attempt(3),
39
- wait=wait_exponential(multiplier=1, min=4, max=10),
40
- reraise=True
41
- )
42
- async def analyze_image(
43
- self,
44
- image: Union[str, BinaryIO],
45
- prompt: Optional[str] = None,
46
- max_tokens: int = 1000
47
- ) -> Dict[str, Any]:
48
- """
49
- Analyze image and provide description or answer questions
50
- """
51
- try:
52
- # 获取图像数据
53
- image_data = self._get_image_data(image)
54
-
55
- # 转换为base64
56
- image_base64 = base64.b64encode(image_data).decode('utf-8')
57
-
58
- # 使用默认提示词如果没有提供
59
- query = prompt or "请描述这张图片的内容。"
60
-
61
- # 使用 ollama 库直接调用
62
- response = ollama.chat(
63
- model=self.model_name,
64
- messages=[{
65
- 'role': 'user',
66
- 'content': query,
67
- 'images': [image_base64]
68
- }]
69
- )
70
-
71
- content = response['message']['content']
72
-
73
- return {
74
- "text": content,
75
- "confidence": 1.0, # Ollama doesn't provide confidence scores
76
- "detected_objects": [], # Basic implementation
77
- "metadata": {
78
- "model": self.model_name,
79
- "prompt": query
80
- }
81
- }
82
-
83
- except Exception as e:
84
- logger.error(f"Error in image analysis: {e}")
85
- raise
86
-
87
- async def analyze_images(
88
- self,
89
- images: List[Union[str, BinaryIO]],
90
- prompt: Optional[str] = None,
91
- max_tokens: int = 1000
92
- ) -> List[Dict[str, Any]]:
93
- """Analyze multiple images"""
94
- results = []
95
- for image in images:
96
- result = await self.analyze_image(image, prompt, max_tokens)
97
- results.append(result)
98
- return results
99
-
100
- async def describe_image(
101
- self,
102
- image: Union[str, BinaryIO],
103
- detail_level: str = "medium"
104
- ) -> Dict[str, Any]:
105
- """Generate detailed description of image"""
106
- prompts = {
107
- "low": "简单描述这张图片。",
108
- "medium": "详细描述这张图片的内容、颜色、物体和场景。",
109
- "high": "非常详细地描述这张图片,包括所有可见的物体、颜色、纹理、场景、情感和任何其他细节。"
110
- }
111
-
112
- prompt = prompts.get(detail_level, prompts["medium"])
113
- result = await self.analyze_image(image, prompt)
114
-
115
- return {
116
- "description": result["text"],
117
- "objects": [], # Basic implementation
118
- "scene": "未知", # Basic implementation
119
- "colors": [] # Basic implementation
120
- }
121
-
122
- async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
123
- """Extract text from image (OCR)"""
124
- result = await self.analyze_image(image, "提取图片中的所有文字内容。")
125
-
126
- return {
127
- "text": result["text"],
128
- "confidence": 1.0,
129
- "bounding_boxes": [], # Basic implementation
130
- "language": "未知" # Basic implementation
131
- }
132
-
133
- async def detect_objects(
134
- self,
135
- image: Union[str, BinaryIO],
136
- confidence_threshold: float = 0.5
137
- ) -> Dict[str, Any]:
138
- """Detect objects in image"""
139
- result = await self.analyze_image(image, "识别并列出图片中的所有物体。")
140
-
141
- return {
142
- "objects": [], # Basic implementation - would need parsing
143
- "count": 0,
144
- "bounding_boxes": []
145
- }
146
-
147
- async def classify_image(
148
- self,
149
- image: Union[str, BinaryIO],
150
- categories: Optional[List[str]] = None
151
- ) -> Dict[str, Any]:
152
- """Classify image into categories"""
153
- if categories:
154
- category_str = "、".join(categories)
155
- prompt = f"将这张图片分类到以下类别之一:{category_str}"
156
- else:
157
- prompt = "这张图片属于什么类别?"
158
-
159
- result = await self.analyze_image(image, prompt)
160
-
161
- return {
162
- "category": result["text"],
163
- "confidence": 1.0,
164
- "all_predictions": [{"category": result["text"], "confidence": 1.0}]
165
- }
166
-
167
- async def compare_images(
168
- self,
169
- image1: Union[str, BinaryIO],
170
- image2: Union[str, BinaryIO]
171
- ) -> Dict[str, Any]:
172
- """Compare two images for similarity"""
173
- # For now, analyze each image separately and compare descriptions
174
- result1 = await self.analyze_image(image1, "描述这张图片。")
175
- result2 = await self.analyze_image(image2, "描述这张图片。")
176
-
177
- return {
178
- "similarity_score": 0.5, # Basic implementation
179
- "differences": "需要进一步分析",
180
- "common_elements": "需要进一步分析"
181
- }
182
-
183
- def get_supported_formats(self) -> List[str]:
184
- """Get list of supported image formats"""
185
- return ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']
186
-
187
- def get_max_image_size(self) -> Dict[str, int]:
188
- """Get maximum supported image dimensions"""
189
- return {"width": 4096, "height": 4096}
190
-
191
- async def close(self):
192
- """Cleanup resources"""
193
- pass
194
-