isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +40 -17
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import httpx
|
|
3
3
|
import json
|
4
4
|
from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
|
5
5
|
from isa_model.inference.services.llm.base_llm_service import BaseLLMService
|
6
|
+
from isa_model.core.config.config_manager import ConfigManager
|
6
7
|
|
7
8
|
logger = logging.getLogger(__name__)
|
8
9
|
|
@@ -16,7 +17,10 @@ class OllamaLLMService(BaseLLMService):
|
|
16
17
|
provider_config = self.get_provider_config()
|
17
18
|
|
18
19
|
# Create HTTP client for Ollama API
|
19
|
-
|
20
|
+
config_manager = ConfigManager()
|
21
|
+
# Use Consul discovery with fallback
|
22
|
+
default_base_url = config_manager.get_ollama_url()
|
23
|
+
base_url = provider_config.get("base_url", default_base_url)
|
20
24
|
timeout = provider_config.get("timeout", 60)
|
21
25
|
|
22
26
|
self.client = httpx.AsyncClient(
|
@@ -34,7 +38,10 @@ class OllamaLLMService(BaseLLMService):
|
|
34
38
|
"""Ensure the HTTP client is available and not closed"""
|
35
39
|
if not hasattr(self, 'client') or not self.client or self.client.is_closed:
|
36
40
|
provider_config = self.get_provider_config()
|
37
|
-
|
41
|
+
config_manager = ConfigManager()
|
42
|
+
# Use Consul discovery with fallback
|
43
|
+
default_base_url = config_manager.get_ollama_url()
|
44
|
+
base_url = provider_config.get("base_url", default_base_url)
|
38
45
|
timeout = provider_config.get("timeout", 60)
|
39
46
|
self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
|
40
47
|
|
@@ -20,6 +20,8 @@ class OpenAILLMService(BaseLLMService):
|
|
20
20
|
|
21
21
|
# Check if this is an O-series reasoning model
|
22
22
|
self.is_reasoning_model = model_name.startswith("o4-") or model_name.startswith("o3-")
|
23
|
+
self.uses_completion_tokens = self.is_reasoning_model or model_name.startswith("gpt-5")
|
24
|
+
self.requires_default_temperature = self.is_reasoning_model or model_name.startswith("gpt-5")
|
23
25
|
self.supports_deep_research = "deep-search" in model_name or "deep-research" in model_name
|
24
26
|
|
25
27
|
# Get configuration from centralized config manager
|
@@ -36,7 +38,9 @@ class OpenAILLMService(BaseLLMService):
|
|
36
38
|
self.client = AsyncOpenAI(
|
37
39
|
api_key=provider_config["api_key"],
|
38
40
|
base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
|
39
|
-
organization=provider_config.get("organization")
|
41
|
+
organization=provider_config.get("organization"),
|
42
|
+
timeout=10.0, # 10 second timeout for first token (much faster than 600s default)
|
43
|
+
max_retries=2 # Retry on timeout
|
40
44
|
)
|
41
45
|
|
42
46
|
logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
|
@@ -70,6 +74,8 @@ class OpenAILLMService(BaseLLMService):
|
|
70
74
|
|
71
75
|
# Copy OpenAI-specific attributes
|
72
76
|
bound_service.is_reasoning_model = self.is_reasoning_model
|
77
|
+
bound_service.uses_completion_tokens = self.uses_completion_tokens
|
78
|
+
bound_service.requires_default_temperature = self.requires_default_temperature
|
73
79
|
bound_service.supports_deep_research = self.supports_deep_research
|
74
80
|
|
75
81
|
# Copy base class attributes
|
@@ -103,7 +109,7 @@ class OpenAILLMService(BaseLLMService):
|
|
103
109
|
|
104
110
|
return bound_service
|
105
111
|
|
106
|
-
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
112
|
+
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
107
113
|
"""
|
108
114
|
True streaming method - yields tokens one by one as they arrive
|
109
115
|
|
@@ -121,19 +127,19 @@ class OpenAILLMService(BaseLLMService):
|
|
121
127
|
if use_responses_api:
|
122
128
|
logger.info(f"Using Responses API streaming for {self.model_name}")
|
123
129
|
# Use Responses API streaming
|
124
|
-
async for chunk in self._astream_responses_api(input_data, show_reasoning):
|
130
|
+
async for chunk in self._astream_responses_api(input_data, show_reasoning, **extra_kwargs):
|
125
131
|
yield chunk
|
126
132
|
else:
|
127
133
|
logger.debug(f"Using Chat Completions API streaming for {self.model_name}")
|
128
134
|
# Use Chat Completions API streaming
|
129
|
-
async for chunk in self._astream_chat_completions_api(input_data):
|
135
|
+
async for chunk in self._astream_chat_completions_api(input_data, **extra_kwargs):
|
130
136
|
yield chunk
|
131
137
|
|
132
138
|
except Exception as e:
|
133
139
|
logger.error(f"Error in astream: {e}")
|
134
140
|
raise
|
135
141
|
|
136
|
-
async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
142
|
+
async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
137
143
|
"""Stream using Responses API for reasoning models and deep research models"""
|
138
144
|
try:
|
139
145
|
# Use adapter manager to prepare messages
|
@@ -228,7 +234,7 @@ class OpenAILLMService(BaseLLMService):
|
|
228
234
|
logger.error(f"Error in _astream_responses_api: {e}")
|
229
235
|
raise
|
230
236
|
|
231
|
-
async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
237
|
+
async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any], **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
232
238
|
"""Stream using Chat Completions API for standard models"""
|
233
239
|
try:
|
234
240
|
# Use adapter manager to prepare messages
|
@@ -242,13 +248,13 @@ class OpenAILLMService(BaseLLMService):
|
|
242
248
|
"stream": True
|
243
249
|
}
|
244
250
|
|
245
|
-
# O4 models only support temperature=1 (default)
|
246
|
-
if not self.
|
251
|
+
# O4 and GPT-5 models only support temperature=1 (default)
|
252
|
+
if not self.requires_default_temperature:
|
247
253
|
kwargs["temperature"] = provider_config.get("temperature", 0.7)
|
248
254
|
|
249
|
-
# O4 models use max_completion_tokens instead of max_tokens
|
255
|
+
# O4 and GPT-5 models use max_completion_tokens instead of max_tokens
|
250
256
|
max_tokens_value = provider_config.get("max_tokens", 1024)
|
251
|
-
if self.
|
257
|
+
if self.uses_completion_tokens:
|
252
258
|
kwargs["max_completion_tokens"] = max_tokens_value
|
253
259
|
else:
|
254
260
|
kwargs["max_tokens"] = max_tokens_value
|
@@ -259,6 +265,11 @@ class OpenAILLMService(BaseLLMService):
|
|
259
265
|
kwargs["tools"] = tool_schemas
|
260
266
|
kwargs["tool_choice"] = "auto"
|
261
267
|
|
268
|
+
# Add response_format if specified (for JSON mode)
|
269
|
+
if 'response_format' in extra_kwargs:
|
270
|
+
kwargs['response_format'] = extra_kwargs['response_format']
|
271
|
+
logger.debug(f"Using response_format in streaming: {extra_kwargs['response_format']}")
|
272
|
+
|
262
273
|
# Stream tokens and detect tool calls
|
263
274
|
content_chunks = []
|
264
275
|
tool_calls_accumulator = {} # Track complete tool calls by ID
|
@@ -360,13 +371,14 @@ class OpenAILLMService(BaseLLMService):
|
|
360
371
|
logger.error(f"Error in _astream_chat_completions_api: {e}")
|
361
372
|
raise
|
362
373
|
|
363
|
-
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> Union[str, Any]:
|
374
|
+
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> Union[str, Any]:
|
364
375
|
"""
|
365
376
|
Unified invoke method for all input types
|
366
377
|
|
367
378
|
Args:
|
368
379
|
input_data: Input messages or text
|
369
380
|
show_reasoning: If True and model supports it, show reasoning process using Responses API
|
381
|
+
**extra_kwargs: Additional parameters to pass to the API (e.g., response_format)
|
370
382
|
"""
|
371
383
|
try:
|
372
384
|
# Use adapter manager to prepare messages
|
@@ -385,13 +397,13 @@ class OpenAILLMService(BaseLLMService):
|
|
385
397
|
"messages": messages
|
386
398
|
}
|
387
399
|
|
388
|
-
# O4 models only support temperature=1 (default)
|
389
|
-
if not self.
|
400
|
+
# O4 and GPT-5 models only support temperature=1 (default)
|
401
|
+
if not self.requires_default_temperature:
|
390
402
|
kwargs["temperature"] = provider_config.get("temperature", 0.7)
|
391
403
|
|
392
|
-
# O4 models use max_completion_tokens instead of max_tokens
|
404
|
+
# O4 and GPT-5 models use max_completion_tokens instead of max_tokens
|
393
405
|
max_tokens_value = provider_config.get("max_tokens", 1024)
|
394
|
-
if self.
|
406
|
+
if self.uses_completion_tokens:
|
395
407
|
kwargs["max_completion_tokens"] = max_tokens_value
|
396
408
|
else:
|
397
409
|
kwargs["max_tokens"] = max_tokens_value
|
@@ -403,11 +415,16 @@ class OpenAILLMService(BaseLLMService):
|
|
403
415
|
if not use_responses_api: # Responses API handles tool choice differently
|
404
416
|
kwargs["tool_choice"] = "auto"
|
405
417
|
|
418
|
+
# Add response_format if specified (for JSON mode)
|
419
|
+
if 'response_format' in extra_kwargs:
|
420
|
+
kwargs['response_format'] = extra_kwargs['response_format']
|
421
|
+
logger.debug(f"Using response_format: {extra_kwargs['response_format']}")
|
422
|
+
|
406
423
|
# Handle streaming vs non-streaming
|
407
424
|
if self.streaming:
|
408
425
|
# TRUE STREAMING MODE - collect all chunks from the stream
|
409
426
|
content_chunks = []
|
410
|
-
async for token in self.astream(input_data, show_reasoning=show_reasoning):
|
427
|
+
async for token in self.astream(input_data, show_reasoning=show_reasoning, **extra_kwargs):
|
411
428
|
if isinstance(token, str):
|
412
429
|
content_chunks.append(token)
|
413
430
|
elif isinstance(token, dict) and "result" in token:
|
@@ -63,16 +63,19 @@ class YydsLLMService(BaseLLMService):
|
|
63
63
|
|
64
64
|
return bound_service
|
65
65
|
|
66
|
-
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
|
66
|
+
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> AsyncGenerator[str, None]:
|
67
67
|
"""
|
68
68
|
True streaming method - yields tokens one by one as they arrive
|
69
69
|
|
70
70
|
Args:
|
71
71
|
input_data: Same as ainvoke
|
72
|
+
**kwargs: Additional parameters (will filter out unsupported ones)
|
72
73
|
|
73
74
|
Yields:
|
74
75
|
Individual tokens as they arrive from the API
|
75
76
|
"""
|
77
|
+
# Remove parameters that yyds doesn't support
|
78
|
+
kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
|
76
79
|
try:
|
77
80
|
# Use adapter manager to prepare messages
|
78
81
|
messages = self._prepare_messages(input_data)
|
@@ -115,8 +118,11 @@ class YydsLLMService(BaseLLMService):
|
|
115
118
|
logger.error(f"Error in astream: {e}")
|
116
119
|
raise
|
117
120
|
|
118
|
-
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
|
121
|
+
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> Union[str, Any]:
|
119
122
|
"""Unified invoke method for all input types"""
|
123
|
+
# Remove parameters that yyds doesn't support
|
124
|
+
kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
|
125
|
+
kwargs.pop('task', None) # Handled internally
|
120
126
|
try:
|
121
127
|
# Use adapter manager to prepare messages
|
122
128
|
messages = self._prepare_messages(input_data)
|
@@ -31,6 +31,21 @@ except ImportError:
|
|
31
31
|
OllamaVisionService = None
|
32
32
|
OLLAMA_VISION_AVAILABLE = False
|
33
33
|
|
34
|
+
# Computer Vision specialized services
|
35
|
+
try:
|
36
|
+
from .vgg16_vision_service import VGG16VisionService
|
37
|
+
VGG16_VISION_AVAILABLE = True
|
38
|
+
except ImportError:
|
39
|
+
VGG16VisionService = None
|
40
|
+
VGG16_VISION_AVAILABLE = False
|
41
|
+
|
42
|
+
try:
|
43
|
+
from .blip_vision_service import BLIPVisionService
|
44
|
+
BLIP_VISION_AVAILABLE = True
|
45
|
+
except ImportError:
|
46
|
+
BLIPVisionService = None
|
47
|
+
BLIP_VISION_AVAILABLE = False
|
48
|
+
|
34
49
|
__all__ = [
|
35
50
|
"BaseVisionService",
|
36
51
|
"OpenAIVisionService",
|
@@ -43,4 +58,10 @@ if ISA_VISION_AVAILABLE:
|
|
43
58
|
__all__.append("ISAVisionService")
|
44
59
|
|
45
60
|
if OLLAMA_VISION_AVAILABLE:
|
46
|
-
__all__.append("OllamaVisionService")
|
61
|
+
__all__.append("OllamaVisionService")
|
62
|
+
|
63
|
+
if VGG16_VISION_AVAILABLE:
|
64
|
+
__all__.append("VGG16VisionService")
|
65
|
+
|
66
|
+
if BLIP_VISION_AVAILABLE:
|
67
|
+
__all__.append("BLIPVisionService")
|
@@ -0,0 +1,359 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
BLIP Vision Service
|
4
|
+
Computer vision service using BLIP for image captioning and description
|
5
|
+
Based on the notebook implementation
|
6
|
+
"""
|
7
|
+
|
8
|
+
import os
|
9
|
+
import logging
|
10
|
+
from typing import Dict, List, Any, Optional, Union, BinaryIO
|
11
|
+
from PIL import Image
|
12
|
+
import io
|
13
|
+
|
14
|
+
from .base_vision_service import BaseVisionService
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
def _lazy_import_blip_deps():
|
19
|
+
"""Lazy import BLIP dependencies"""
|
20
|
+
try:
|
21
|
+
import torch
|
22
|
+
import tensorflow as tf
|
23
|
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
24
|
+
|
25
|
+
return {
|
26
|
+
'torch': torch,
|
27
|
+
'tf': tf,
|
28
|
+
'BlipProcessor': BlipProcessor,
|
29
|
+
'BlipForConditionalGeneration': BlipForConditionalGeneration,
|
30
|
+
'available': True
|
31
|
+
}
|
32
|
+
except ImportError as e:
|
33
|
+
logger.warning(f"BLIP dependencies not available: {e}")
|
34
|
+
return {'available': False}
|
35
|
+
|
36
|
+
class BLIPVisionService(BaseVisionService):
|
37
|
+
"""
|
38
|
+
BLIP-based vision service for image captioning and description
|
39
|
+
Provides an alternative implementation to VLM-based captioning
|
40
|
+
"""
|
41
|
+
|
42
|
+
def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"):
|
43
|
+
"""
|
44
|
+
Initialize BLIP vision service
|
45
|
+
|
46
|
+
Args:
|
47
|
+
model_name: Hugging Face model name for BLIP
|
48
|
+
"""
|
49
|
+
super().__init__()
|
50
|
+
|
51
|
+
self.model_name = model_name
|
52
|
+
self.processor = None
|
53
|
+
self.model = None
|
54
|
+
|
55
|
+
# Lazy load dependencies
|
56
|
+
self.blip_components = _lazy_import_blip_deps()
|
57
|
+
|
58
|
+
if not self.blip_components['available']:
|
59
|
+
raise ImportError("BLIP dependencies (transformers, torch) are required")
|
60
|
+
|
61
|
+
# Load BLIP model
|
62
|
+
self._load_blip_model()
|
63
|
+
|
64
|
+
def _load_blip_model(self):
|
65
|
+
"""Load BLIP model and processor"""
|
66
|
+
try:
|
67
|
+
# Load the pretrained BLIP processor and model
|
68
|
+
self.processor = self.blip_components['BlipProcessor'].from_pretrained(self.model_name)
|
69
|
+
self.model = self.blip_components['BlipForConditionalGeneration'].from_pretrained(self.model_name)
|
70
|
+
|
71
|
+
logger.info(f"BLIP model loaded: {self.model_name}")
|
72
|
+
|
73
|
+
except Exception as e:
|
74
|
+
logger.error(f"Error loading BLIP model: {e}")
|
75
|
+
raise
|
76
|
+
|
77
|
+
def _preprocess_image(self, image: Union[str, BinaryIO]) -> Image.Image:
|
78
|
+
"""
|
79
|
+
Preprocess image for BLIP input
|
80
|
+
|
81
|
+
Args:
|
82
|
+
image: Image path or binary data
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
PIL Image in RGB format
|
86
|
+
"""
|
87
|
+
try:
|
88
|
+
# Handle different image input types
|
89
|
+
if isinstance(image, str):
|
90
|
+
# File path
|
91
|
+
pil_image = Image.open(image).convert('RGB')
|
92
|
+
elif hasattr(image, 'read'):
|
93
|
+
# Binary IO
|
94
|
+
image_data = image.read()
|
95
|
+
pil_image = Image.open(io.BytesIO(image_data)).convert('RGB')
|
96
|
+
else:
|
97
|
+
raise ValueError("Unsupported image format")
|
98
|
+
|
99
|
+
return pil_image
|
100
|
+
|
101
|
+
except Exception as e:
|
102
|
+
logger.error(f"Error preprocessing image: {e}")
|
103
|
+
raise
|
104
|
+
|
105
|
+
def _generate_text(self, image: Image.Image, prompt: str) -> str:
|
106
|
+
"""
|
107
|
+
Generate text for image using BLIP
|
108
|
+
|
109
|
+
Args:
|
110
|
+
image: PIL Image
|
111
|
+
prompt: Text prompt for generation
|
112
|
+
|
113
|
+
Returns:
|
114
|
+
Generated text
|
115
|
+
"""
|
116
|
+
try:
|
117
|
+
# Prepare inputs for BLIP model
|
118
|
+
inputs = self.processor(images=image, text=prompt, return_tensors="pt")
|
119
|
+
|
120
|
+
# Generate text output
|
121
|
+
output = self.model.generate(**inputs)
|
122
|
+
|
123
|
+
# Decode output
|
124
|
+
result = self.processor.decode(output[0], skip_special_tokens=True)
|
125
|
+
|
126
|
+
return result
|
127
|
+
|
128
|
+
except Exception as e:
|
129
|
+
logger.error(f"Error generating text: {e}")
|
130
|
+
raise
|
131
|
+
|
132
|
+
async def describe_image(self,
|
133
|
+
image: Union[str, BinaryIO],
|
134
|
+
detail_level: str = "medium") -> Dict[str, Any]:
|
135
|
+
"""
|
136
|
+
Generate description for image using BLIP
|
137
|
+
|
138
|
+
Args:
|
139
|
+
image: Image path or binary data
|
140
|
+
detail_level: Level of detail (not used in BLIP, maintained for compatibility)
|
141
|
+
|
142
|
+
Returns:
|
143
|
+
Description results
|
144
|
+
"""
|
145
|
+
try:
|
146
|
+
# Preprocess image
|
147
|
+
pil_image = self._preprocess_image(image)
|
148
|
+
|
149
|
+
# Generate caption using BLIP
|
150
|
+
prompt = "This is a picture of" # Following notebook implementation
|
151
|
+
caption = self._generate_text(pil_image, prompt)
|
152
|
+
|
153
|
+
return {
|
154
|
+
"task": "describe",
|
155
|
+
"service": "BLIPVisionService",
|
156
|
+
"description": caption,
|
157
|
+
"detail_level": detail_level,
|
158
|
+
"model_type": "BLIP",
|
159
|
+
"prompt_used": prompt,
|
160
|
+
"success": True
|
161
|
+
}
|
162
|
+
|
163
|
+
except Exception as e:
|
164
|
+
logger.error(f"Error describing image: {e}")
|
165
|
+
return {
|
166
|
+
"error": str(e),
|
167
|
+
"service": "BLIPVisionService",
|
168
|
+
"success": False
|
169
|
+
}
|
170
|
+
|
171
|
+
async def analyze_image(self,
|
172
|
+
image: Union[str, BinaryIO],
|
173
|
+
prompt: Optional[str] = None,
|
174
|
+
max_tokens: int = 1000) -> Dict[str, Any]:
|
175
|
+
"""
|
176
|
+
Analyze image using BLIP
|
177
|
+
|
178
|
+
Args:
|
179
|
+
image: Image path or binary data
|
180
|
+
prompt: Optional custom prompt
|
181
|
+
max_tokens: Not used for BLIP
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
Analysis results
|
185
|
+
"""
|
186
|
+
try:
|
187
|
+
# Preprocess image
|
188
|
+
pil_image = self._preprocess_image(image)
|
189
|
+
|
190
|
+
# Use custom prompt or default
|
191
|
+
if prompt:
|
192
|
+
analysis_prompt = prompt
|
193
|
+
else:
|
194
|
+
analysis_prompt = "This is a detailed photo showing" # For summary-like analysis
|
195
|
+
|
196
|
+
# Generate analysis using BLIP
|
197
|
+
analysis_text = self._generate_text(pil_image, analysis_prompt)
|
198
|
+
|
199
|
+
return {
|
200
|
+
"task": "analyze",
|
201
|
+
"service": "BLIPVisionService",
|
202
|
+
"text": analysis_text,
|
203
|
+
"model_type": "BLIP",
|
204
|
+
"prompt_used": analysis_prompt,
|
205
|
+
"success": True
|
206
|
+
}
|
207
|
+
|
208
|
+
except Exception as e:
|
209
|
+
logger.error(f"Error analyzing image: {e}")
|
210
|
+
return {
|
211
|
+
"error": str(e),
|
212
|
+
"service": "BLIPVisionService",
|
213
|
+
"success": False
|
214
|
+
}
|
215
|
+
|
216
|
+
async def generate_caption(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
217
|
+
"""
|
218
|
+
Generate caption for image (Task 9 from notebook)
|
219
|
+
|
220
|
+
Args:
|
221
|
+
image: Image path or binary data
|
222
|
+
|
223
|
+
Returns:
|
224
|
+
Caption results
|
225
|
+
"""
|
226
|
+
try:
|
227
|
+
# Preprocess image
|
228
|
+
pil_image = self._preprocess_image(image)
|
229
|
+
|
230
|
+
# Generate caption
|
231
|
+
prompt = "This is a picture of" # Following notebook
|
232
|
+
caption = self._generate_text(pil_image, prompt)
|
233
|
+
|
234
|
+
return {
|
235
|
+
"task": "caption",
|
236
|
+
"service": "BLIPVisionService",
|
237
|
+
"caption": caption,
|
238
|
+
"model_type": "BLIP",
|
239
|
+
"success": True
|
240
|
+
}
|
241
|
+
|
242
|
+
except Exception as e:
|
243
|
+
logger.error(f"Error generating caption: {e}")
|
244
|
+
return {
|
245
|
+
"error": str(e),
|
246
|
+
"service": "BLIPVisionService",
|
247
|
+
"success": False
|
248
|
+
}
|
249
|
+
|
250
|
+
async def generate_summary(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
251
|
+
"""
|
252
|
+
Generate summary for image (Task 10 from notebook)
|
253
|
+
|
254
|
+
Args:
|
255
|
+
image: Image path or binary data
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
Summary results
|
259
|
+
"""
|
260
|
+
try:
|
261
|
+
# Preprocess image
|
262
|
+
pil_image = self._preprocess_image(image)
|
263
|
+
|
264
|
+
# Generate summary
|
265
|
+
prompt = "This is a detailed photo showing" # Following notebook
|
266
|
+
summary = self._generate_text(pil_image, prompt)
|
267
|
+
|
268
|
+
return {
|
269
|
+
"task": "summary",
|
270
|
+
"service": "BLIPVisionService",
|
271
|
+
"summary": summary,
|
272
|
+
"model_type": "BLIP",
|
273
|
+
"success": True
|
274
|
+
}
|
275
|
+
|
276
|
+
except Exception as e:
|
277
|
+
logger.error(f"Error generating summary: {e}")
|
278
|
+
return {
|
279
|
+
"error": str(e),
|
280
|
+
"service": "BLIPVisionService",
|
281
|
+
"success": False
|
282
|
+
}
|
283
|
+
|
284
|
+
async def batch_generate(self,
|
285
|
+
images: List[Union[str, BinaryIO]],
|
286
|
+
task: str = "caption") -> Dict[str, Any]:
|
287
|
+
"""
|
288
|
+
Generate captions or summaries for multiple images
|
289
|
+
|
290
|
+
Args:
|
291
|
+
images: List of image paths or binary data
|
292
|
+
task: Task type ("caption" or "summary")
|
293
|
+
|
294
|
+
Returns:
|
295
|
+
Batch generation results
|
296
|
+
"""
|
297
|
+
try:
|
298
|
+
results = []
|
299
|
+
errors = []
|
300
|
+
|
301
|
+
for i, image in enumerate(images):
|
302
|
+
try:
|
303
|
+
if task == "caption":
|
304
|
+
result = await self.generate_caption(image)
|
305
|
+
elif task == "summary":
|
306
|
+
result = await self.generate_summary(image)
|
307
|
+
else:
|
308
|
+
raise ValueError(f"Unsupported task: {task}")
|
309
|
+
|
310
|
+
if result.get("success"):
|
311
|
+
results.append({
|
312
|
+
"index": i,
|
313
|
+
"image": str(image) if isinstance(image, str) else f"binary_image_{i}",
|
314
|
+
**result
|
315
|
+
})
|
316
|
+
else:
|
317
|
+
errors.append({
|
318
|
+
"index": i,
|
319
|
+
"image": str(image) if isinstance(image, str) else f"binary_image_{i}",
|
320
|
+
"error": result.get("error", "Unknown error")
|
321
|
+
})
|
322
|
+
|
323
|
+
except Exception as e:
|
324
|
+
errors.append({
|
325
|
+
"index": i,
|
326
|
+
"image": str(image) if isinstance(image, str) else f"binary_image_{i}",
|
327
|
+
"error": str(e)
|
328
|
+
})
|
329
|
+
|
330
|
+
return {
|
331
|
+
"task": f"batch_{task}",
|
332
|
+
"service": "BLIPVisionService",
|
333
|
+
"total_images": len(images),
|
334
|
+
"successful": len(results),
|
335
|
+
"failed": len(errors),
|
336
|
+
"results": results,
|
337
|
+
"errors": errors,
|
338
|
+
"success": True
|
339
|
+
}
|
340
|
+
|
341
|
+
except Exception as e:
|
342
|
+
logger.error(f"Error in batch generation: {e}")
|
343
|
+
return {
|
344
|
+
"error": str(e),
|
345
|
+
"service": "BLIPVisionService",
|
346
|
+
"success": False
|
347
|
+
}
|
348
|
+
|
349
|
+
def get_service_info(self) -> Dict[str, Any]:
|
350
|
+
"""Get service information"""
|
351
|
+
return {
|
352
|
+
"service_name": "BLIPVisionService",
|
353
|
+
"model_name": self.model_name,
|
354
|
+
"model_type": "BLIP",
|
355
|
+
"capabilities": ["describe", "analyze", "caption", "summary", "batch_generate"],
|
356
|
+
"model_loaded": self.model is not None,
|
357
|
+
"processor_loaded": self.processor is not None,
|
358
|
+
"dependencies_available": self.blip_components['available']
|
359
|
+
}
|
@@ -59,25 +59,28 @@ def get_image_data(image: Union[str, BinaryIO]) -> bytes:
|
|
59
59
|
|
60
60
|
def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> bytes:
|
61
61
|
"""压缩图片以减小大小
|
62
|
-
|
62
|
+
|
63
63
|
Args:
|
64
64
|
image_data: 图片数据,可以是 bytes 或 BytesIO
|
65
65
|
max_size: 最大尺寸(像素)
|
66
|
-
|
66
|
+
|
67
67
|
Returns:
|
68
68
|
bytes: 压缩后的图片数据
|
69
69
|
"""
|
70
70
|
try:
|
71
|
+
# Ensure max_size is int (type safety)
|
72
|
+
max_size = int(max_size)
|
73
|
+
|
71
74
|
# 如果输入是 bytes,转换为 BytesIO
|
72
75
|
if isinstance(image_data, bytes):
|
73
76
|
image_data = BytesIO(image_data)
|
74
|
-
|
77
|
+
|
75
78
|
img = Image.open(image_data)
|
76
|
-
|
79
|
+
|
77
80
|
# 转换为 RGB 模式(如果需要)
|
78
81
|
if img.mode in ('RGBA', 'P'):
|
79
82
|
img = img.convert('RGB')
|
80
|
-
|
83
|
+
|
81
84
|
# 计算新尺寸,保持宽高比
|
82
85
|
ratio = max_size / max(img.size)
|
83
86
|
if ratio < 1:
|