isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  import aiohttp
3
4
  from typing import Dict, Any, List, Union, Optional, BinaryIO
4
5
  from openai import AsyncOpenAI
@@ -46,7 +47,7 @@ class OpenAISTTService(BaseSTTService):
46
47
  wait=wait_exponential(multiplier=1, min=4, max=10),
47
48
  reraise=True
48
49
  )
49
- async def transcribe(self, audio_file: Union[str, BinaryIO], language: Optional[str] = None, prompt: Optional[str] = None) -> Dict[str, Any]:
50
+ async def transcribe(self, audio_file: Union[str, BinaryIO, bytes], language: Optional[str] = None, prompt: Optional[str] = None, **kwargs) -> Dict[str, Any]:
50
51
  """
51
52
  Transcribe audio file to text using OpenAI's Whisper model.
52
53
 
@@ -72,13 +73,54 @@ class OpenAISTTService(BaseSTTService):
72
73
  if prompt:
73
74
  transcription_params["prompt"] = prompt
74
75
 
75
- # Handle file input
76
- if isinstance(audio_file, str):
77
- with open(audio_file, "rb") as f:
78
- transcription = await self.client.audio.transcriptions.create(
79
- file=f,
80
- **transcription_params
81
- )
76
+ # Handle file input - support bytes, base64 strings, file paths, and file objects
77
+ if isinstance(audio_file, bytes):
78
+ # Handle bytes data directly
79
+ logger.info(f"Processing bytes audio data ({len(audio_file)} bytes)")
80
+ from io import BytesIO
81
+ audio_buffer = BytesIO(audio_file)
82
+
83
+ # Use filename from kwargs if provided, otherwise default to .mp3
84
+ filename = kwargs.get('filename', 'audio.mp3')
85
+ if filename and not filename.endswith(('.mp3', '.wav', '.m4a', '.flac', '.ogg', '.webm', '.mp4')):
86
+ filename += '.mp3' # Add extension if missing
87
+ audio_buffer.name = filename
88
+ logger.info(f"Using filename: {filename}")
89
+ transcription = await self.client.audio.transcriptions.create(
90
+ file=audio_buffer,
91
+ **transcription_params
92
+ )
93
+ elif isinstance(audio_file, str):
94
+ # Check if it's a base64 string or file path
95
+ if len(audio_file) > 100 and not os.path.exists(audio_file):
96
+ # Likely a base64 string
97
+ try:
98
+ import base64
99
+ from io import BytesIO
100
+ logger.info(f"Attempting to decode base64 audio data (length: {len(audio_file)})")
101
+ audio_data = base64.b64decode(audio_file)
102
+ audio_buffer = BytesIO(audio_data)
103
+ audio_buffer.name = "audio.wav" # OpenAI needs a filename hint
104
+ logger.info(f"Successfully decoded base64 to {len(audio_data)} bytes")
105
+ transcription = await self.client.audio.transcriptions.create(
106
+ file=audio_buffer,
107
+ **transcription_params
108
+ )
109
+ except Exception as e:
110
+ # If base64 decoding fails, treat as file path
111
+ logger.error(f"Base64 decoding failed: {e}, treating as file path")
112
+ with open(audio_file, "rb") as f:
113
+ transcription = await self.client.audio.transcriptions.create(
114
+ file=f,
115
+ **transcription_params
116
+ )
117
+ else:
118
+ # Regular file path
119
+ with open(audio_file, "rb") as f:
120
+ transcription = await self.client.audio.transcriptions.create(
121
+ file=f,
122
+ **transcription_params
123
+ )
82
124
  else:
83
125
  transcription = await self.client.audio.transcriptions.create(
84
126
  file=audio_file,
@@ -121,7 +163,7 @@ class OpenAISTTService(BaseSTTService):
121
163
  wait=wait_exponential(multiplier=1, min=4, max=10),
122
164
  reraise=True
123
165
  )
124
- async def translate(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
166
+ async def translate(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
125
167
  """
126
168
  Translate audio file to English text using OpenAI's Whisper model.
127
169
 
@@ -185,7 +227,7 @@ class OpenAISTTService(BaseSTTService):
185
227
  logger.error(f"Translation failed: {e}")
186
228
  raise
187
229
 
188
- async def transcribe_batch(self, audio_files: List[Union[str, BinaryIO]], language: Optional[str] = None, prompt: Optional[str] = None) -> List[Dict[str, Any]]:
230
+ async def transcribe_batch(self, audio_files: List[Union[str, BinaryIO, bytes]], language: Optional[str] = None, prompt: Optional[str] = None) -> List[Dict[str, Any]]:
189
231
  """
190
232
  Transcribe multiple audio files in batch.
191
233
 
@@ -212,7 +254,7 @@ class OpenAISTTService(BaseSTTService):
212
254
 
213
255
  return results
214
256
 
215
- async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
257
+ async def detect_language(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
216
258
  """
217
259
  Detect the language of an audio file.
218
260
 
@@ -66,7 +66,8 @@ class BaseService(ABC):
66
66
  output_tokens=output_tokens
67
67
  )
68
68
 
69
- # Track usage through model manager
69
+ # Track usage through both systems (legacy and new)
70
+ # Legacy detailed tracking (will be phased out)
70
71
  self.model_manager.billing_tracker.track_model_usage(
71
72
  model_id=self.model_name,
72
73
  operation_type="inference",
@@ -80,6 +81,21 @@ class BaseService(ABC):
80
81
  cost_usd=cost_usd,
81
82
  metadata=metadata
82
83
  )
84
+
85
+ # New aggregated statistics tracking
86
+ self.model_manager.statistics_tracker.track_usage(
87
+ model_id=self.model_name,
88
+ provider=self.provider_name,
89
+ service_type=service_type if isinstance(service_type, str) else service_type.value,
90
+ operation_type="inference",
91
+ operation=operation,
92
+ input_tokens=input_tokens,
93
+ output_tokens=output_tokens,
94
+ input_units=input_units,
95
+ output_units=output_units,
96
+ cost_usd=cost_usd or 0.0,
97
+ metadata=metadata
98
+ )
83
99
  except Exception as e:
84
100
  # Don't let billing tracking break the service
85
101
  import logging
@@ -0,0 +1,277 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Custom Model Manager - Handles registration and management of custom trained models
6
+ Provides integration for models trained through ISA Model training pipeline
7
+ """
8
+
9
+ import logging
10
+ import json
11
+ import os
12
+ from typing import Dict, List, Any, Optional
13
+ from datetime import datetime
14
+ from dataclasses import dataclass, asdict
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ @dataclass
19
+ class CustomModelInfo:
20
+ """Information about a custom model"""
21
+ model_id: str
22
+ model_name: str
23
+ model_type: str # 'text', 'vision', 'audio', etc.
24
+ provider: str
25
+ base_model: str # The base model this was fine-tuned from
26
+ training_date: str
27
+ model_path: str # Local path or HuggingFace repo
28
+ metadata: Dict[str, Any]
29
+ capabilities: List[str]
30
+ performance_metrics: Optional[Dict[str, float]] = None
31
+ deployment_config: Optional[Dict[str, Any]] = None
32
+
33
+ def to_dict(self) -> Dict[str, Any]:
34
+ return asdict(self)
35
+
36
+ class CustomModelManager:
37
+ """
38
+ Manages custom trained models in the ISA Model ecosystem
39
+ Handles registration, discovery, and integration of custom models
40
+ """
41
+
42
+ def __init__(self, models_registry_path: str = None):
43
+ self.models_registry_path = models_registry_path or os.path.join(
44
+ os.path.expanduser("~"), ".isa_model", "custom_models.json"
45
+ )
46
+ self._models: Dict[str, CustomModelInfo] = {}
47
+ self._load_models_registry()
48
+
49
+ def _load_models_registry(self):
50
+ """Load custom models registry from file"""
51
+ if os.path.exists(self.models_registry_path):
52
+ try:
53
+ with open(self.models_registry_path, 'r', encoding='utf-8') as f:
54
+ models_data = json.load(f)
55
+
56
+ for model_data in models_data.get('models', []):
57
+ model_info = CustomModelInfo(**model_data)
58
+ self._models[model_info.model_id] = model_info
59
+
60
+ logger.info(f"Loaded {len(self._models)} custom models from registry")
61
+ except Exception as e:
62
+ logger.warning(f"Failed to load models registry: {e}")
63
+ self._models = {}
64
+ else:
65
+ # Create default registry with some ISA models
66
+ self._create_default_registry()
67
+
68
+ def _create_default_registry(self):
69
+ """Create default registry with ISA models"""
70
+ default_models = [
71
+ CustomModelInfo(
72
+ model_id="isa-llm-service",
73
+ model_name="ISA LLM Service",
74
+ model_type="text",
75
+ provider="isa",
76
+ base_model="DialoGPT-small",
77
+ training_date="2024-12-19",
78
+ model_path="modal://isa-llm-inference",
79
+ metadata={
80
+ "description": "ISA custom LLM service with fallback support",
81
+ "parameters": "124M",
82
+ "context_length": 1024,
83
+ "languages": ["en", "zh"]
84
+ },
85
+ capabilities=["chat", "text_generation", "conversation"],
86
+ performance_metrics={
87
+ "perplexity": 3.2,
88
+ "bleu_score": 0.75,
89
+ "response_time_ms": 850
90
+ },
91
+ deployment_config={
92
+ "platform": "modal",
93
+ "gpu_type": "A10G",
94
+ "memory_gb": 16,
95
+ "concurrent_requests": 5
96
+ }
97
+ ),
98
+ CustomModelInfo(
99
+ model_id="xenodennis/dialoGPT-small-20241219-v1",
100
+ model_name="ISA Fine-tuned DialoGPT",
101
+ model_type="text",
102
+ provider="huggingface",
103
+ base_model="microsoft/DialoGPT-small",
104
+ training_date="2024-12-19",
105
+ model_path="xenodennis/dialoGPT-small-20241219-v1",
106
+ metadata={
107
+ "description": "DialoGPT model fine-tuned with ISA training pipeline",
108
+ "parameters": "124M",
109
+ "trainable_parameters": "294K (LoRA)",
110
+ "training_steps": 1000,
111
+ "languages": ["en", "zh"]
112
+ },
113
+ capabilities=["chat", "text_generation", "dialogue"],
114
+ performance_metrics={
115
+ "final_loss": 2.1234,
116
+ "eval_loss": 2.3456,
117
+ "training_time_minutes": 15
118
+ }
119
+ ),
120
+ CustomModelInfo(
121
+ model_id="isa-custom-embeddings",
122
+ model_name="ISA Custom Embeddings",
123
+ model_type="embedding",
124
+ provider="isa",
125
+ base_model="sentence-transformers/all-MiniLM-L6-v2",
126
+ training_date="2024-12-19",
127
+ model_path="local://models/isa-embeddings",
128
+ metadata={
129
+ "description": "Custom embeddings trained on ISA domain data",
130
+ "dimensions": 384,
131
+ "max_sequence_length": 512
132
+ },
133
+ capabilities=["embed", "similarity", "clustering"]
134
+ )
135
+ ]
136
+
137
+ for model in default_models:
138
+ self._models[model.model_id] = model
139
+
140
+ self._save_models_registry()
141
+ logger.info(f"Created default registry with {len(default_models)} models")
142
+
143
+ def _save_models_registry(self):
144
+ """Save models registry to file"""
145
+ try:
146
+ os.makedirs(os.path.dirname(self.models_registry_path), exist_ok=True)
147
+
148
+ registry_data = {
149
+ "version": "1.0",
150
+ "last_updated": datetime.now().isoformat(),
151
+ "models": [model.to_dict() for model in self._models.values()]
152
+ }
153
+
154
+ with open(self.models_registry_path, 'w', encoding='utf-8') as f:
155
+ json.dump(registry_data, f, indent=2, ensure_ascii=False)
156
+
157
+ logger.debug(f"Saved models registry to {self.models_registry_path}")
158
+ except Exception as e:
159
+ logger.error(f"Failed to save models registry: {e}")
160
+
161
+ def register_model(self, model_info: CustomModelInfo) -> bool:
162
+ """Register a new custom model"""
163
+ try:
164
+ self._models[model_info.model_id] = model_info
165
+ self._save_models_registry()
166
+ logger.info(f"Registered custom model: {model_info.model_id}")
167
+ return True
168
+ except Exception as e:
169
+ logger.error(f"Failed to register model {model_info.model_id}: {e}")
170
+ return False
171
+
172
+ def unregister_model(self, model_id: str) -> bool:
173
+ """Unregister a custom model"""
174
+ if model_id in self._models:
175
+ del self._models[model_id]
176
+ self._save_models_registry()
177
+ logger.info(f"Unregistered custom model: {model_id}")
178
+ return True
179
+ return False
180
+
181
+ def get_model(self, model_id: str) -> Optional[CustomModelInfo]:
182
+ """Get custom model information"""
183
+ return self._models.get(model_id)
184
+
185
+ def list_models(self, model_type: str = None, provider: str = None) -> List[CustomModelInfo]:
186
+ """List custom models with optional filtering"""
187
+ models = list(self._models.values())
188
+
189
+ if model_type:
190
+ models = [m for m in models if m.model_type == model_type]
191
+
192
+ if provider:
193
+ models = [m for m in models if m.provider == provider]
194
+
195
+ return models
196
+
197
+ def get_models_for_api(self) -> List[Dict[str, Any]]:
198
+ """Get models in API format for model listing"""
199
+ api_models = []
200
+
201
+ for model in self._models.values():
202
+ api_model = {
203
+ "model_id": model.model_id,
204
+ "service_type": model.model_type,
205
+ "provider": model.provider,
206
+ "description": model.metadata.get("description", ""),
207
+ "capabilities": model.capabilities,
208
+ "custom": True,
209
+ "base_model": model.base_model,
210
+ "training_date": model.training_date
211
+ }
212
+
213
+ # Add performance metrics if available
214
+ if model.performance_metrics:
215
+ api_model["performance"] = model.performance_metrics
216
+
217
+ api_models.append(api_model)
218
+
219
+ return api_models
220
+
221
+ def search_models(self, query: str) -> List[CustomModelInfo]:
222
+ """Search custom models by query"""
223
+ query_lower = query.lower()
224
+ matching_models = []
225
+
226
+ for model in self._models.values():
227
+ # Search in model_id, name, description, and capabilities
228
+ searchable_text = f"{model.model_id} {model.model_name} {model.metadata.get('description', '')} {' '.join(model.capabilities)}".lower()
229
+
230
+ if query_lower in searchable_text:
231
+ matching_models.append(model)
232
+
233
+ return matching_models
234
+
235
+ def get_deployment_config(self, model_id: str) -> Optional[Dict[str, Any]]:
236
+ """Get deployment configuration for a model"""
237
+ model = self.get_model(model_id)
238
+ return model.deployment_config if model else None
239
+
240
+ def update_performance_metrics(self, model_id: str, metrics: Dict[str, float]) -> bool:
241
+ """Update performance metrics for a model"""
242
+ model = self.get_model(model_id)
243
+ if model:
244
+ model.performance_metrics = metrics
245
+ self._save_models_registry()
246
+ return True
247
+ return False
248
+
249
+ def get_provider_models(self, provider: str) -> List[CustomModelInfo]:
250
+ """Get all models for a specific provider"""
251
+ return [model for model in self._models.values() if model.provider == provider]
252
+
253
+ def get_stats(self) -> Dict[str, Any]:
254
+ """Get statistics about custom models"""
255
+ models_by_type = {}
256
+ models_by_provider = {}
257
+
258
+ for model in self._models.values():
259
+ models_by_type[model.model_type] = models_by_type.get(model.model_type, 0) + 1
260
+ models_by_provider[model.provider] = models_by_provider.get(model.provider, 0) + 1
261
+
262
+ return {
263
+ "total_models": len(self._models),
264
+ "models_by_type": models_by_type,
265
+ "models_by_provider": models_by_provider,
266
+ "registry_path": self.models_registry_path
267
+ }
268
+
269
+ # Global instance
270
+ _custom_model_manager = None
271
+
272
+ def get_custom_model_manager() -> CustomModelManager:
273
+ """Get the global custom model manager instance"""
274
+ global _custom_model_manager
275
+ if _custom_model_manager is None:
276
+ _custom_model_manager = CustomModelManager()
277
+ return _custom_model_manager
@@ -0,0 +1,13 @@
1
+ """
2
+ Embedding Services - Text and Document Embedding Services
3
+ """
4
+
5
+ from .base_embed_service import BaseEmbedService
6
+ from .openai_embed_service import OpenAIEmbedService
7
+ from .ollama_embed_service import OllamaEmbedService
8
+
9
+ __all__ = [
10
+ 'BaseEmbedService',
11
+ 'OpenAIEmbedService',
12
+ 'OllamaEmbedService'
13
+ ]
@@ -36,17 +36,29 @@ class BaseEmbedService(BaseService):
36
36
  if not isinstance(input_data, list):
37
37
  input_data = [input_data]
38
38
  return await self.create_text_embeddings(input_data)
39
- elif task == "chunk_and_embed":
39
+ elif task in ["chunk", "chunk_and_embed"]:
40
40
  if isinstance(input_data, list):
41
- raise ValueError("chunk_and_embed task requires single text input")
42
- return await self.create_chunks(input_data, kwargs.get("metadata"))
41
+ raise ValueError("chunk task requires single text input")
42
+ return await self.create_chunks(input_data, **kwargs)
43
43
  elif task == "similarity":
44
+ # Support both text-based and embedding-based similarity
45
+ candidates = kwargs.get("candidates")
44
46
  embedding1 = kwargs.get("embedding1")
45
47
  embedding2 = kwargs.get("embedding2")
46
- if not embedding1 or not embedding2:
47
- raise ValueError("similarity task requires embedding1 and embedding2 parameters")
48
- similarity = await self.compute_similarity(embedding1, embedding2)
49
- return {"similarity": similarity}
48
+
49
+ if candidates:
50
+ # Text-based similarity - compute embeddings first
51
+ if isinstance(input_data, list):
52
+ raise ValueError("similarity task with candidates requires single query text")
53
+ # Remove candidates from kwargs to avoid duplicate parameter
54
+ similarity_kwargs = {k: v for k, v in kwargs.items() if k != 'candidates'}
55
+ return await self._text_similarity_search(input_data, candidates, **similarity_kwargs)
56
+ elif embedding1 and embedding2:
57
+ # Direct embedding similarity
58
+ similarity = await self.compute_similarity(embedding1, embedding2)
59
+ return {"similarity": similarity}
60
+ else:
61
+ raise ValueError("similarity task requires either 'candidates' parameter or both 'embedding1' and 'embedding2' parameters")
50
62
  elif task == "find_similar":
51
63
  query_embedding = kwargs.get("query_embedding")
52
64
  candidate_embeddings = kwargs.get("candidate_embeddings")
@@ -57,6 +69,21 @@ class BaseEmbedService(BaseService):
57
69
  candidate_embeddings,
58
70
  kwargs.get("top_k", 5)
59
71
  )
72
+
73
+ # ==================== 重排序类任务 ====================
74
+ elif task in ["rerank", "rerank_documents", "document_ranking"]:
75
+ query = kwargs.get("query") or input_data
76
+ documents = kwargs.get("documents")
77
+ if not documents:
78
+ raise ValueError("rerank task requires documents parameter")
79
+ if isinstance(query, list):
80
+ raise ValueError("rerank task requires single query string")
81
+ return await self.rerank_documents(
82
+ query=query,
83
+ documents=documents,
84
+ top_k=kwargs.get("top_k"),
85
+ return_documents=kwargs.get("return_documents", True)
86
+ )
60
87
  else:
61
88
  raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
62
89
 
@@ -67,7 +94,51 @@ class BaseEmbedService(BaseService):
67
94
  Returns:
68
95
  List of supported task names
69
96
  """
70
- return ["embed", "embed_batch", "chunk_and_embed", "similarity", "find_similar"]
97
+ return ["embed", "embed_batch", "chunk", "chunk_and_embed", "similarity", "find_similar", "rerank", "rerank_documents", "document_ranking"]
98
+
99
+ async def _text_similarity_search(self, query_text: str, candidates: List[str], **kwargs) -> Dict[str, Any]:
100
+ """
101
+ Helper method for text-based similarity search
102
+
103
+ Args:
104
+ query_text: Query text
105
+ candidates: List of candidate texts
106
+ **kwargs: Additional parameters (top_k, threshold, etc.)
107
+
108
+ Returns:
109
+ Dictionary containing similar documents with scores
110
+ """
111
+ # Get embeddings for query and candidates
112
+ query_embedding = await self.create_text_embedding(query_text)
113
+ candidate_embeddings = await self.create_text_embeddings(candidates)
114
+
115
+ # Find similar texts
116
+ similar_results = await self.find_similar_texts(
117
+ query_embedding,
118
+ candidate_embeddings,
119
+ kwargs.get("top_k", len(candidates))
120
+ )
121
+
122
+ # Apply threshold if specified
123
+ threshold = kwargs.get("threshold")
124
+ if threshold is not None:
125
+ similar_results = [r for r in similar_results if r["similarity"] >= threshold]
126
+
127
+ # Convert to expected format with text content
128
+ similar_documents = []
129
+ for result in similar_results:
130
+ similar_documents.append({
131
+ "text": candidates[result["index"]],
132
+ "similarity": result["similarity"],
133
+ "index": result["index"]
134
+ })
135
+
136
+ return {
137
+ "similar_documents": similar_documents,
138
+ "query": query_text,
139
+ "total_candidates": len(candidates),
140
+ "returned_count": len(similar_documents)
141
+ }
71
142
 
72
143
  @abstractmethod
73
144
  async def create_text_embedding(self, text: str) -> List[float]:
@@ -170,6 +241,38 @@ class BaseEmbedService(BaseService):
170
241
  """
171
242
  pass
172
243
 
244
+ async def rerank_documents(
245
+ self,
246
+ query: str,
247
+ documents: List[str],
248
+ top_k: Optional[int] = None,
249
+ return_documents: bool = True
250
+ ) -> Dict[str, Any]:
251
+ """
252
+ Rerank documents based on relevance to query
253
+
254
+ Default implementation returns NotImplementedError.
255
+ Override in subclasses that support reranking.
256
+
257
+ Args:
258
+ query: Search query string
259
+ documents: List of documents to rerank
260
+ top_k: Number of top results to return (None = all)
261
+ return_documents: Whether to include document text in results
262
+
263
+ Returns:
264
+ Dictionary containing:
265
+ - success: Boolean success status
266
+ - results: List of ranked documents with scores
267
+ - metadata: Additional information (model, timing, etc.)
268
+ """
269
+ return {
270
+ 'success': False,
271
+ 'error': f'Reranking not supported by {self.__class__.__name__}',
272
+ 'provider': getattr(self, 'provider_name', 'unknown'),
273
+ 'service': getattr(self, 'model_name', 'unknown')
274
+ }
275
+
173
276
  @abstractmethod
174
277
  async def close(self):
175
278
  """Cleanup resources"""