isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import httpx
3
3
  import json
4
4
  from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
5
5
  from isa_model.inference.services.llm.base_llm_service import BaseLLMService
6
+ from isa_model.core.config.config_manager import ConfigManager
6
7
 
7
8
  logger = logging.getLogger(__name__)
8
9
 
@@ -16,7 +17,10 @@ class OllamaLLMService(BaseLLMService):
16
17
  provider_config = self.get_provider_config()
17
18
 
18
19
  # Create HTTP client for Ollama API
19
- base_url = provider_config.get("base_url", "http://localhost:11434")
20
+ config_manager = ConfigManager()
21
+ # Use Consul discovery with fallback
22
+ default_base_url = config_manager.get_ollama_url()
23
+ base_url = provider_config.get("base_url", default_base_url)
20
24
  timeout = provider_config.get("timeout", 60)
21
25
 
22
26
  self.client = httpx.AsyncClient(
@@ -34,7 +38,10 @@ class OllamaLLMService(BaseLLMService):
34
38
  """Ensure the HTTP client is available and not closed"""
35
39
  if not hasattr(self, 'client') or not self.client or self.client.is_closed:
36
40
  provider_config = self.get_provider_config()
37
- base_url = provider_config.get("base_url", "http://localhost:11434")
41
+ config_manager = ConfigManager()
42
+ # Use Consul discovery with fallback
43
+ default_base_url = config_manager.get_ollama_url()
44
+ base_url = provider_config.get("base_url", default_base_url)
38
45
  timeout = provider_config.get("timeout", 60)
39
46
  self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
40
47
 
@@ -20,6 +20,8 @@ class OpenAILLMService(BaseLLMService):
20
20
 
21
21
  # Check if this is an O-series reasoning model
22
22
  self.is_reasoning_model = model_name.startswith("o4-") or model_name.startswith("o3-")
23
+ self.uses_completion_tokens = self.is_reasoning_model or model_name.startswith("gpt-5")
24
+ self.requires_default_temperature = self.is_reasoning_model or model_name.startswith("gpt-5")
23
25
  self.supports_deep_research = "deep-search" in model_name or "deep-research" in model_name
24
26
 
25
27
  # Get configuration from centralized config manager
@@ -36,7 +38,9 @@ class OpenAILLMService(BaseLLMService):
36
38
  self.client = AsyncOpenAI(
37
39
  api_key=provider_config["api_key"],
38
40
  base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
39
- organization=provider_config.get("organization")
41
+ organization=provider_config.get("organization"),
42
+ timeout=10.0, # 10 second timeout for first token (much faster than 600s default)
43
+ max_retries=2 # Retry on timeout
40
44
  )
41
45
 
42
46
  logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
@@ -70,6 +74,8 @@ class OpenAILLMService(BaseLLMService):
70
74
 
71
75
  # Copy OpenAI-specific attributes
72
76
  bound_service.is_reasoning_model = self.is_reasoning_model
77
+ bound_service.uses_completion_tokens = self.uses_completion_tokens
78
+ bound_service.requires_default_temperature = self.requires_default_temperature
73
79
  bound_service.supports_deep_research = self.supports_deep_research
74
80
 
75
81
  # Copy base class attributes
@@ -103,7 +109,7 @@ class OpenAILLMService(BaseLLMService):
103
109
 
104
110
  return bound_service
105
111
 
106
- async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
112
+ async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
107
113
  """
108
114
  True streaming method - yields tokens one by one as they arrive
109
115
 
@@ -121,19 +127,19 @@ class OpenAILLMService(BaseLLMService):
121
127
  if use_responses_api:
122
128
  logger.info(f"Using Responses API streaming for {self.model_name}")
123
129
  # Use Responses API streaming
124
- async for chunk in self._astream_responses_api(input_data, show_reasoning):
130
+ async for chunk in self._astream_responses_api(input_data, show_reasoning, **extra_kwargs):
125
131
  yield chunk
126
132
  else:
127
133
  logger.debug(f"Using Chat Completions API streaming for {self.model_name}")
128
134
  # Use Chat Completions API streaming
129
- async for chunk in self._astream_chat_completions_api(input_data):
135
+ async for chunk in self._astream_chat_completions_api(input_data, **extra_kwargs):
130
136
  yield chunk
131
137
 
132
138
  except Exception as e:
133
139
  logger.error(f"Error in astream: {e}")
134
140
  raise
135
141
 
136
- async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
142
+ async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
137
143
  """Stream using Responses API for reasoning models and deep research models"""
138
144
  try:
139
145
  # Use adapter manager to prepare messages
@@ -228,7 +234,7 @@ class OpenAILLMService(BaseLLMService):
228
234
  logger.error(f"Error in _astream_responses_api: {e}")
229
235
  raise
230
236
 
231
- async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
237
+ async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any], **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
232
238
  """Stream using Chat Completions API for standard models"""
233
239
  try:
234
240
  # Use adapter manager to prepare messages
@@ -242,13 +248,13 @@ class OpenAILLMService(BaseLLMService):
242
248
  "stream": True
243
249
  }
244
250
 
245
- # O4 models only support temperature=1 (default)
246
- if not self.is_reasoning_model:
251
+ # O4 and GPT-5 models only support temperature=1 (default)
252
+ if not self.requires_default_temperature:
247
253
  kwargs["temperature"] = provider_config.get("temperature", 0.7)
248
254
 
249
- # O4 models use max_completion_tokens instead of max_tokens
255
+ # O4 and GPT-5 models use max_completion_tokens instead of max_tokens
250
256
  max_tokens_value = provider_config.get("max_tokens", 1024)
251
- if self.is_reasoning_model:
257
+ if self.uses_completion_tokens:
252
258
  kwargs["max_completion_tokens"] = max_tokens_value
253
259
  else:
254
260
  kwargs["max_tokens"] = max_tokens_value
@@ -259,6 +265,11 @@ class OpenAILLMService(BaseLLMService):
259
265
  kwargs["tools"] = tool_schemas
260
266
  kwargs["tool_choice"] = "auto"
261
267
 
268
+ # Add response_format if specified (for JSON mode)
269
+ if 'response_format' in extra_kwargs:
270
+ kwargs['response_format'] = extra_kwargs['response_format']
271
+ logger.debug(f"Using response_format in streaming: {extra_kwargs['response_format']}")
272
+
262
273
  # Stream tokens and detect tool calls
263
274
  content_chunks = []
264
275
  tool_calls_accumulator = {} # Track complete tool calls by ID
@@ -360,13 +371,14 @@ class OpenAILLMService(BaseLLMService):
360
371
  logger.error(f"Error in _astream_chat_completions_api: {e}")
361
372
  raise
362
373
 
363
- async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> Union[str, Any]:
374
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> Union[str, Any]:
364
375
  """
365
376
  Unified invoke method for all input types
366
377
 
367
378
  Args:
368
379
  input_data: Input messages or text
369
380
  show_reasoning: If True and model supports it, show reasoning process using Responses API
381
+ **extra_kwargs: Additional parameters to pass to the API (e.g., response_format)
370
382
  """
371
383
  try:
372
384
  # Use adapter manager to prepare messages
@@ -385,13 +397,13 @@ class OpenAILLMService(BaseLLMService):
385
397
  "messages": messages
386
398
  }
387
399
 
388
- # O4 models only support temperature=1 (default)
389
- if not self.is_reasoning_model:
400
+ # O4 and GPT-5 models only support temperature=1 (default)
401
+ if not self.requires_default_temperature:
390
402
  kwargs["temperature"] = provider_config.get("temperature", 0.7)
391
403
 
392
- # O4 models use max_completion_tokens instead of max_tokens
404
+ # O4 and GPT-5 models use max_completion_tokens instead of max_tokens
393
405
  max_tokens_value = provider_config.get("max_tokens", 1024)
394
- if self.is_reasoning_model:
406
+ if self.uses_completion_tokens:
395
407
  kwargs["max_completion_tokens"] = max_tokens_value
396
408
  else:
397
409
  kwargs["max_tokens"] = max_tokens_value
@@ -403,11 +415,16 @@ class OpenAILLMService(BaseLLMService):
403
415
  if not use_responses_api: # Responses API handles tool choice differently
404
416
  kwargs["tool_choice"] = "auto"
405
417
 
418
+ # Add response_format if specified (for JSON mode)
419
+ if 'response_format' in extra_kwargs:
420
+ kwargs['response_format'] = extra_kwargs['response_format']
421
+ logger.debug(f"Using response_format: {extra_kwargs['response_format']}")
422
+
406
423
  # Handle streaming vs non-streaming
407
424
  if self.streaming:
408
425
  # TRUE STREAMING MODE - collect all chunks from the stream
409
426
  content_chunks = []
410
- async for token in self.astream(input_data, show_reasoning=show_reasoning):
427
+ async for token in self.astream(input_data, show_reasoning=show_reasoning, **extra_kwargs):
411
428
  if isinstance(token, str):
412
429
  content_chunks.append(token)
413
430
  elif isinstance(token, dict) and "result" in token:
@@ -63,16 +63,19 @@ class YydsLLMService(BaseLLMService):
63
63
 
64
64
  return bound_service
65
65
 
66
- async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
66
+ async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> AsyncGenerator[str, None]:
67
67
  """
68
68
  True streaming method - yields tokens one by one as they arrive
69
69
 
70
70
  Args:
71
71
  input_data: Same as ainvoke
72
+ **kwargs: Additional parameters (will filter out unsupported ones)
72
73
 
73
74
  Yields:
74
75
  Individual tokens as they arrive from the API
75
76
  """
77
+ # Remove parameters that yyds doesn't support
78
+ kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
76
79
  try:
77
80
  # Use adapter manager to prepare messages
78
81
  messages = self._prepare_messages(input_data)
@@ -115,8 +118,11 @@ class YydsLLMService(BaseLLMService):
115
118
  logger.error(f"Error in astream: {e}")
116
119
  raise
117
120
 
118
- async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
121
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> Union[str, Any]:
119
122
  """Unified invoke method for all input types"""
123
+ # Remove parameters that yyds doesn't support
124
+ kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
125
+ kwargs.pop('task', None) # Handled internally
120
126
  try:
121
127
  # Use adapter manager to prepare messages
122
128
  messages = self._prepare_messages(input_data)
@@ -31,6 +31,21 @@ except ImportError:
31
31
  OllamaVisionService = None
32
32
  OLLAMA_VISION_AVAILABLE = False
33
33
 
34
+ # Computer Vision specialized services
35
+ try:
36
+ from .vgg16_vision_service import VGG16VisionService
37
+ VGG16_VISION_AVAILABLE = True
38
+ except ImportError:
39
+ VGG16VisionService = None
40
+ VGG16_VISION_AVAILABLE = False
41
+
42
+ try:
43
+ from .blip_vision_service import BLIPVisionService
44
+ BLIP_VISION_AVAILABLE = True
45
+ except ImportError:
46
+ BLIPVisionService = None
47
+ BLIP_VISION_AVAILABLE = False
48
+
34
49
  __all__ = [
35
50
  "BaseVisionService",
36
51
  "OpenAIVisionService",
@@ -43,4 +58,10 @@ if ISA_VISION_AVAILABLE:
43
58
  __all__.append("ISAVisionService")
44
59
 
45
60
  if OLLAMA_VISION_AVAILABLE:
46
- __all__.append("OllamaVisionService")
61
+ __all__.append("OllamaVisionService")
62
+
63
+ if VGG16_VISION_AVAILABLE:
64
+ __all__.append("VGG16VisionService")
65
+
66
+ if BLIP_VISION_AVAILABLE:
67
+ __all__.append("BLIPVisionService")
@@ -0,0 +1,359 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ BLIP Vision Service
4
+ Computer vision service using BLIP for image captioning and description
5
+ Based on the notebook implementation
6
+ """
7
+
8
+ import os
9
+ import logging
10
+ from typing import Dict, List, Any, Optional, Union, BinaryIO
11
+ from PIL import Image
12
+ import io
13
+
14
+ from .base_vision_service import BaseVisionService
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ def _lazy_import_blip_deps():
19
+ """Lazy import BLIP dependencies"""
20
+ try:
21
+ import torch
22
+ import tensorflow as tf
23
+ from transformers import BlipProcessor, BlipForConditionalGeneration
24
+
25
+ return {
26
+ 'torch': torch,
27
+ 'tf': tf,
28
+ 'BlipProcessor': BlipProcessor,
29
+ 'BlipForConditionalGeneration': BlipForConditionalGeneration,
30
+ 'available': True
31
+ }
32
+ except ImportError as e:
33
+ logger.warning(f"BLIP dependencies not available: {e}")
34
+ return {'available': False}
35
+
36
+ class BLIPVisionService(BaseVisionService):
37
+ """
38
+ BLIP-based vision service for image captioning and description
39
+ Provides an alternative implementation to VLM-based captioning
40
+ """
41
+
42
+ def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"):
43
+ """
44
+ Initialize BLIP vision service
45
+
46
+ Args:
47
+ model_name: Hugging Face model name for BLIP
48
+ """
49
+ super().__init__()
50
+
51
+ self.model_name = model_name
52
+ self.processor = None
53
+ self.model = None
54
+
55
+ # Lazy load dependencies
56
+ self.blip_components = _lazy_import_blip_deps()
57
+
58
+ if not self.blip_components['available']:
59
+ raise ImportError("BLIP dependencies (transformers, torch) are required")
60
+
61
+ # Load BLIP model
62
+ self._load_blip_model()
63
+
64
+ def _load_blip_model(self):
65
+ """Load BLIP model and processor"""
66
+ try:
67
+ # Load the pretrained BLIP processor and model
68
+ self.processor = self.blip_components['BlipProcessor'].from_pretrained(self.model_name)
69
+ self.model = self.blip_components['BlipForConditionalGeneration'].from_pretrained(self.model_name)
70
+
71
+ logger.info(f"BLIP model loaded: {self.model_name}")
72
+
73
+ except Exception as e:
74
+ logger.error(f"Error loading BLIP model: {e}")
75
+ raise
76
+
77
+ def _preprocess_image(self, image: Union[str, BinaryIO]) -> Image.Image:
78
+ """
79
+ Preprocess image for BLIP input
80
+
81
+ Args:
82
+ image: Image path or binary data
83
+
84
+ Returns:
85
+ PIL Image in RGB format
86
+ """
87
+ try:
88
+ # Handle different image input types
89
+ if isinstance(image, str):
90
+ # File path
91
+ pil_image = Image.open(image).convert('RGB')
92
+ elif hasattr(image, 'read'):
93
+ # Binary IO
94
+ image_data = image.read()
95
+ pil_image = Image.open(io.BytesIO(image_data)).convert('RGB')
96
+ else:
97
+ raise ValueError("Unsupported image format")
98
+
99
+ return pil_image
100
+
101
+ except Exception as e:
102
+ logger.error(f"Error preprocessing image: {e}")
103
+ raise
104
+
105
+ def _generate_text(self, image: Image.Image, prompt: str) -> str:
106
+ """
107
+ Generate text for image using BLIP
108
+
109
+ Args:
110
+ image: PIL Image
111
+ prompt: Text prompt for generation
112
+
113
+ Returns:
114
+ Generated text
115
+ """
116
+ try:
117
+ # Prepare inputs for BLIP model
118
+ inputs = self.processor(images=image, text=prompt, return_tensors="pt")
119
+
120
+ # Generate text output
121
+ output = self.model.generate(**inputs)
122
+
123
+ # Decode output
124
+ result = self.processor.decode(output[0], skip_special_tokens=True)
125
+
126
+ return result
127
+
128
+ except Exception as e:
129
+ logger.error(f"Error generating text: {e}")
130
+ raise
131
+
132
+ async def describe_image(self,
133
+ image: Union[str, BinaryIO],
134
+ detail_level: str = "medium") -> Dict[str, Any]:
135
+ """
136
+ Generate description for image using BLIP
137
+
138
+ Args:
139
+ image: Image path or binary data
140
+ detail_level: Level of detail (not used in BLIP, maintained for compatibility)
141
+
142
+ Returns:
143
+ Description results
144
+ """
145
+ try:
146
+ # Preprocess image
147
+ pil_image = self._preprocess_image(image)
148
+
149
+ # Generate caption using BLIP
150
+ prompt = "This is a picture of" # Following notebook implementation
151
+ caption = self._generate_text(pil_image, prompt)
152
+
153
+ return {
154
+ "task": "describe",
155
+ "service": "BLIPVisionService",
156
+ "description": caption,
157
+ "detail_level": detail_level,
158
+ "model_type": "BLIP",
159
+ "prompt_used": prompt,
160
+ "success": True
161
+ }
162
+
163
+ except Exception as e:
164
+ logger.error(f"Error describing image: {e}")
165
+ return {
166
+ "error": str(e),
167
+ "service": "BLIPVisionService",
168
+ "success": False
169
+ }
170
+
171
+ async def analyze_image(self,
172
+ image: Union[str, BinaryIO],
173
+ prompt: Optional[str] = None,
174
+ max_tokens: int = 1000) -> Dict[str, Any]:
175
+ """
176
+ Analyze image using BLIP
177
+
178
+ Args:
179
+ image: Image path or binary data
180
+ prompt: Optional custom prompt
181
+ max_tokens: Not used for BLIP
182
+
183
+ Returns:
184
+ Analysis results
185
+ """
186
+ try:
187
+ # Preprocess image
188
+ pil_image = self._preprocess_image(image)
189
+
190
+ # Use custom prompt or default
191
+ if prompt:
192
+ analysis_prompt = prompt
193
+ else:
194
+ analysis_prompt = "This is a detailed photo showing" # For summary-like analysis
195
+
196
+ # Generate analysis using BLIP
197
+ analysis_text = self._generate_text(pil_image, analysis_prompt)
198
+
199
+ return {
200
+ "task": "analyze",
201
+ "service": "BLIPVisionService",
202
+ "text": analysis_text,
203
+ "model_type": "BLIP",
204
+ "prompt_used": analysis_prompt,
205
+ "success": True
206
+ }
207
+
208
+ except Exception as e:
209
+ logger.error(f"Error analyzing image: {e}")
210
+ return {
211
+ "error": str(e),
212
+ "service": "BLIPVisionService",
213
+ "success": False
214
+ }
215
+
216
+ async def generate_caption(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
217
+ """
218
+ Generate caption for image (Task 9 from notebook)
219
+
220
+ Args:
221
+ image: Image path or binary data
222
+
223
+ Returns:
224
+ Caption results
225
+ """
226
+ try:
227
+ # Preprocess image
228
+ pil_image = self._preprocess_image(image)
229
+
230
+ # Generate caption
231
+ prompt = "This is a picture of" # Following notebook
232
+ caption = self._generate_text(pil_image, prompt)
233
+
234
+ return {
235
+ "task": "caption",
236
+ "service": "BLIPVisionService",
237
+ "caption": caption,
238
+ "model_type": "BLIP",
239
+ "success": True
240
+ }
241
+
242
+ except Exception as e:
243
+ logger.error(f"Error generating caption: {e}")
244
+ return {
245
+ "error": str(e),
246
+ "service": "BLIPVisionService",
247
+ "success": False
248
+ }
249
+
250
+ async def generate_summary(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
251
+ """
252
+ Generate summary for image (Task 10 from notebook)
253
+
254
+ Args:
255
+ image: Image path or binary data
256
+
257
+ Returns:
258
+ Summary results
259
+ """
260
+ try:
261
+ # Preprocess image
262
+ pil_image = self._preprocess_image(image)
263
+
264
+ # Generate summary
265
+ prompt = "This is a detailed photo showing" # Following notebook
266
+ summary = self._generate_text(pil_image, prompt)
267
+
268
+ return {
269
+ "task": "summary",
270
+ "service": "BLIPVisionService",
271
+ "summary": summary,
272
+ "model_type": "BLIP",
273
+ "success": True
274
+ }
275
+
276
+ except Exception as e:
277
+ logger.error(f"Error generating summary: {e}")
278
+ return {
279
+ "error": str(e),
280
+ "service": "BLIPVisionService",
281
+ "success": False
282
+ }
283
+
284
+ async def batch_generate(self,
285
+ images: List[Union[str, BinaryIO]],
286
+ task: str = "caption") -> Dict[str, Any]:
287
+ """
288
+ Generate captions or summaries for multiple images
289
+
290
+ Args:
291
+ images: List of image paths or binary data
292
+ task: Task type ("caption" or "summary")
293
+
294
+ Returns:
295
+ Batch generation results
296
+ """
297
+ try:
298
+ results = []
299
+ errors = []
300
+
301
+ for i, image in enumerate(images):
302
+ try:
303
+ if task == "caption":
304
+ result = await self.generate_caption(image)
305
+ elif task == "summary":
306
+ result = await self.generate_summary(image)
307
+ else:
308
+ raise ValueError(f"Unsupported task: {task}")
309
+
310
+ if result.get("success"):
311
+ results.append({
312
+ "index": i,
313
+ "image": str(image) if isinstance(image, str) else f"binary_image_{i}",
314
+ **result
315
+ })
316
+ else:
317
+ errors.append({
318
+ "index": i,
319
+ "image": str(image) if isinstance(image, str) else f"binary_image_{i}",
320
+ "error": result.get("error", "Unknown error")
321
+ })
322
+
323
+ except Exception as e:
324
+ errors.append({
325
+ "index": i,
326
+ "image": str(image) if isinstance(image, str) else f"binary_image_{i}",
327
+ "error": str(e)
328
+ })
329
+
330
+ return {
331
+ "task": f"batch_{task}",
332
+ "service": "BLIPVisionService",
333
+ "total_images": len(images),
334
+ "successful": len(results),
335
+ "failed": len(errors),
336
+ "results": results,
337
+ "errors": errors,
338
+ "success": True
339
+ }
340
+
341
+ except Exception as e:
342
+ logger.error(f"Error in batch generation: {e}")
343
+ return {
344
+ "error": str(e),
345
+ "service": "BLIPVisionService",
346
+ "success": False
347
+ }
348
+
349
+ def get_service_info(self) -> Dict[str, Any]:
350
+ """Get service information"""
351
+ return {
352
+ "service_name": "BLIPVisionService",
353
+ "model_name": self.model_name,
354
+ "model_type": "BLIP",
355
+ "capabilities": ["describe", "analyze", "caption", "summary", "batch_generate"],
356
+ "model_loaded": self.model is not None,
357
+ "processor_loaded": self.processor is not None,
358
+ "dependencies_available": self.blip_components['available']
359
+ }
@@ -59,25 +59,28 @@ def get_image_data(image: Union[str, BinaryIO]) -> bytes:
59
59
 
60
60
  def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> bytes:
61
61
  """压缩图片以减小大小
62
-
62
+
63
63
  Args:
64
64
  image_data: 图片数据,可以是 bytes 或 BytesIO
65
65
  max_size: 最大尺寸(像素)
66
-
66
+
67
67
  Returns:
68
68
  bytes: 压缩后的图片数据
69
69
  """
70
70
  try:
71
+ # Ensure max_size is int (type safety)
72
+ max_size = int(max_size)
73
+
71
74
  # 如果输入是 bytes,转换为 BytesIO
72
75
  if isinstance(image_data, bytes):
73
76
  image_data = BytesIO(image_data)
74
-
77
+
75
78
  img = Image.open(image_data)
76
-
79
+
77
80
  # 转换为 RGB 模式(如果需要)
78
81
  if img.mode in ('RGBA', 'P'):
79
82
  img = img.convert('RGB')
80
-
83
+
81
84
  # 计算新尺寸,保持宽高比
82
85
  ratio = max_size / max(img.size)
83
86
  if ratio < 1: