isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,12 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Dict, Any, List, Union, Optional, AsyncGenerator, Callable
3
3
  import logging
4
+ import json
4
5
 
5
6
  from isa_model.inference.services.base_service import BaseService
6
7
  from isa_model.inference.services.llm.helpers.llm_adapter import AdapterManager
8
+ from isa_model.inference.services.llm.helpers.llm_utils import TokenCounter, TextProcessor, ResponseParser, LLMMetrics
9
+ from isa_model.inference.services.llm.helpers.llm_prompts import LLMPrompts, LLMPromptTemplates
7
10
 
8
11
  logger = logging.getLogger(__name__)
9
12
 
@@ -18,6 +21,12 @@ class BaseLLMService(BaseService):
18
21
  # 初始化适配器管理器
19
22
  self.adapter_manager = AdapterManager()
20
23
 
24
+ # Initialize helper utilities (optional, can be overridden by specific services)
25
+ self.token_counter = TokenCounter(model_name)
26
+ self.text_processor = TextProcessor()
27
+ self.response_parser = ResponseParser()
28
+ self.llm_prompts = LLMPrompts()
29
+
21
30
  # Get config from provider
22
31
  provider_config = self.get_provider_config()
23
32
  self.streaming = provider_config.get("streaming", False)
@@ -28,6 +37,9 @@ class BaseLLMService(BaseService):
28
37
  self,
29
38
  input_data: Union[str, List[Dict[str, str]], Any],
30
39
  task: Optional[str] = None,
40
+ show_reasoning: bool = False,
41
+ output_format: Optional[str] = None,
42
+ json_schema: Optional[Dict] = None,
31
43
  **kwargs
32
44
  ) -> Dict[str, Any]:
33
45
  """
@@ -39,86 +51,219 @@ class BaseLLMService(BaseService):
39
51
  - list: 消息历史 [{"role": "user", "content": "hello"}]
40
52
  - Any: LangChain 消息对象或其他格式
41
53
  task: 任务类型,支持多种LLM任务
54
+ output_format: Output format ("json", "markdown", "code", etc.)
55
+ json_schema: JSON schema for structured output validation
42
56
  **kwargs: 任务特定的附加参数
43
57
 
44
58
  Returns:
45
- Dict containing task results
59
+ Dict containing task results (optionally formatted as JSON)
46
60
  """
47
61
  task = task or "chat"
48
62
 
63
+ # Store formatting options for use by specific task methods
64
+ format_options = {
65
+ "output_format": output_format,
66
+ "json_schema": json_schema,
67
+ "repair_attempts": kwargs.get("repair_attempts", 3)
68
+ }
69
+
70
+ # Execute task and apply formatting
71
+ result = None
72
+
49
73
  # ==================== 对话类任务 ====================
50
74
  if task == "chat":
51
- return await self.chat(input_data, kwargs.get("max_tokens", self.max_tokens))
75
+ # Pass all kwargs to ainvoke for better parameter support (like response_format)
76
+ result_raw = await self.ainvoke(input_data, show_reasoning=show_reasoning, **kwargs)
77
+ # Wrap in chat response format, preserving AIMessage objects with tool_calls
78
+ if hasattr(result_raw, 'tool_calls'):
79
+ # This is an AIMessage with tool_calls - preserve the entire object
80
+ result = {"message": result_raw}
81
+ elif hasattr(result_raw, 'content'):
82
+ # Regular AIMessage without tool_calls - extract content
83
+ content = result_raw.content
84
+ result = {"message": content}
85
+ else:
86
+ # Plain string response
87
+ content = str(result_raw)
88
+ result = {"message": content}
52
89
  elif task == "complete":
53
- return await self.complete_text(input_data, kwargs.get("max_tokens", self.max_tokens))
90
+ result = await self.complete_text(input_data, kwargs.get("max_tokens", self.max_tokens))
54
91
  elif task == "instruct":
55
- return await self.instruct(input_data, kwargs.get("instruction"), kwargs.get("max_tokens", self.max_tokens))
92
+ result = await self.instruct(input_data, kwargs.get("instruction"), kwargs.get("max_tokens", self.max_tokens))
56
93
 
57
94
  # ==================== 文本生成类任务 ====================
58
95
  elif task == "generate":
59
- return await self.generate_text(input_data, kwargs.get("max_tokens", self.max_tokens))
96
+ result = await self.generate_text(input_data, kwargs.get("max_tokens", self.max_tokens))
60
97
  elif task == "rewrite":
61
- return await self.rewrite_text(input_data, kwargs.get("style"), kwargs.get("tone"))
98
+ result = await self.rewrite_text(input_data, kwargs.get("style"), kwargs.get("tone"))
62
99
  elif task == "summarize":
63
- return await self.summarize_text(input_data, kwargs.get("max_length"), kwargs.get("style"))
100
+ result = await self.summarize_text(input_data, kwargs.get("max_length"), kwargs.get("style"))
64
101
  elif task == "translate":
65
- return await self.translate_text(input_data, kwargs.get("target_language"), kwargs.get("source_language"))
102
+ target_language = kwargs.get("target_language")
103
+ if not target_language:
104
+ raise ValueError("target_language is required for translate task")
105
+ result = await self.translate_text(input_data, target_language, kwargs.get("source_language"))
66
106
 
67
107
  # ==================== 分析类任务 ====================
68
108
  elif task == "analyze":
69
- return await self.analyze_text(input_data, kwargs.get("analysis_type"))
109
+ result = await self.analyze_text(input_data, kwargs.get("analysis_type"))
70
110
  elif task == "classify":
71
- return await self.classify_text(input_data, kwargs.get("categories"))
111
+ result = await self.classify_text(input_data, kwargs.get("categories"))
72
112
  elif task == "extract":
73
- return await self.extract_information(input_data, kwargs.get("extract_type"))
113
+ result = await self.extract_information(input_data, kwargs.get("extract_type"))
74
114
  elif task == "sentiment":
75
- return await self.analyze_sentiment(input_data)
115
+ # Always use chat with appropriate prompt for sentiment analysis
116
+ if output_format == "json":
117
+ # Create JSON-formatted prompt
118
+ json_prompt = self.create_json_prompt(
119
+ f"Please analyze the sentiment of the following text: {input_data}",
120
+ json_schema or {
121
+ "type": "object",
122
+ "properties": {
123
+ "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
124
+ "confidence": {"type": "number", "minimum": 0, "maximum": 1},
125
+ "explanation": {"type": "string"}
126
+ },
127
+ "required": ["sentiment"]
128
+ }
129
+ )
130
+ result = await self.chat(json_prompt, show_reasoning=show_reasoning)
131
+ else:
132
+ # Use simple chat prompt for sentiment analysis
133
+ sentiment_prompt = f"Please analyze the sentiment of the following text and classify it as positive, negative, or neutral:\n\n{input_data}\n\nSentiment:"
134
+ result = await self.chat(sentiment_prompt, show_reasoning=show_reasoning)
76
135
 
77
136
  # ==================== 编程类任务 ====================
78
137
  elif task == "code":
79
- return await self.generate_code(input_data, kwargs.get("language"), kwargs.get("style"))
138
+ # Always use chat with appropriate prompt for code generation
139
+ language = kwargs.get("language", "")
140
+ style = kwargs.get("style", "")
141
+
142
+ code_prompt = f"Please write code"
143
+ if language:
144
+ code_prompt += f" in {language}"
145
+ code_prompt += f" for the following requirement:\n\n{input_data}\n\n"
146
+
147
+ if style:
148
+ code_prompt += f"Style requirements: {style}\n\n"
149
+
150
+ code_prompt += "Please provide clean, working code with comments."
151
+
152
+ result = await self.chat(code_prompt, show_reasoning=show_reasoning)
80
153
  elif task == "explain_code":
81
- return await self.explain_code(input_data, kwargs.get("language"))
154
+ result = await self.explain_code(input_data, kwargs.get("language"))
82
155
  elif task == "debug_code":
83
- return await self.debug_code(input_data, kwargs.get("language"))
156
+ result = await self.debug_code(input_data, kwargs.get("language"))
84
157
  elif task == "refactor_code":
85
- return await self.refactor_code(input_data, kwargs.get("language"), kwargs.get("improvements"))
158
+ result = await self.refactor_code(input_data, kwargs.get("language"), kwargs.get("improvements"))
86
159
 
87
160
  # ==================== 推理类任务 ====================
88
161
  elif task == "reason":
89
- return await self.reason_about(input_data, kwargs.get("reasoning_type"))
162
+ # Always use chat with appropriate prompt for reasoning
163
+ reasoning_type = kwargs.get("reasoning_type", "")
164
+
165
+ reason_prompt = f"Please analyze and explain the reasoning behind the following question or topic"
166
+ if reasoning_type:
167
+ reason_prompt += f" using {reasoning_type} reasoning"
168
+ reason_prompt += f":\n\n{input_data}\n\n"
169
+ reason_prompt += "Provide a clear, step-by-step explanation of your reasoning process."
170
+
171
+ result = await self.chat(reason_prompt, show_reasoning=show_reasoning)
90
172
  elif task == "solve":
91
- return await self.solve_problem(input_data, kwargs.get("problem_type"))
173
+ # Always use chat with appropriate prompt for problem solving
174
+ problem_type = kwargs.get("problem_type", "")
175
+
176
+ solve_prompt = f"Please solve the following problem"
177
+ if problem_type:
178
+ solve_prompt += f" (type: {problem_type})"
179
+ solve_prompt += f":\n\n{input_data}\n\n"
180
+ solve_prompt += "Provide a clear solution with step-by-step explanation."
181
+
182
+ result = await self.chat(solve_prompt, show_reasoning=show_reasoning)
92
183
  elif task == "plan":
93
- return await self.create_plan(input_data, kwargs.get("plan_type"))
184
+ result = await self.create_plan(input_data, kwargs.get("plan_type"))
185
+ elif task == "deep_research":
186
+ result = await self.deep_research(input_data, kwargs.get("research_type"), kwargs.get("search_enabled", True))
94
187
 
95
188
  # ==================== 工具调用类任务 ====================
96
189
  elif task == "tool_call":
97
- return await self.call_tools(input_data, kwargs.get("available_tools"))
190
+ result = await self.call_tools(input_data, kwargs.get("available_tools"))
98
191
  elif task == "function_call":
99
- return await self.call_function(input_data, kwargs.get("function_name"), kwargs.get("parameters"))
192
+ function_name = kwargs.get("function_name")
193
+ if not function_name:
194
+ raise ValueError("function_name is required for function_call task")
195
+ result = await self.call_function(input_data, function_name, kwargs.get("parameters"))
100
196
 
101
197
  else:
102
198
  raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
199
+
200
+ # Apply output formatting if requested
201
+ if result is not None and output_format:
202
+ # Extract the raw response for formatting
203
+ # If result is a dict with 'message' key, use the message for formatting
204
+ format_input = result
205
+ if isinstance(result, dict) and 'message' in result:
206
+ format_input = result['message']
207
+
208
+ formatted_result = self.format_structured_output(
209
+ response=format_input,
210
+ output_format=output_format,
211
+ schema=json_schema,
212
+ repair_attempts=format_options.get("repair_attempts", 3)
213
+ )
214
+
215
+ # If formatting succeeded, return formatted result
216
+ if formatted_result.get("success", False):
217
+ return {
218
+ "result": formatted_result["data"],
219
+ "formatted": True,
220
+ "format": output_format,
221
+ "original": result
222
+ }
223
+ else:
224
+ # If formatting failed, return original with error info
225
+ return {
226
+ "result": result,
227
+ "formatted": False,
228
+ "format_errors": formatted_result.get("errors", []),
229
+ "original": result
230
+ }
231
+
232
+ # Return unformatted result
233
+ return result if result is not None else {"message": "Task completed but returned no result"}
103
234
 
104
235
  # ==================== 对话类方法 ====================
105
236
 
106
237
  async def chat(
107
238
  self,
108
239
  input_data: Union[str, List[Dict[str, str]], Any],
109
- max_tokens: Optional[int] = None
240
+ max_tokens: Optional[int] = None,
241
+ show_reasoning: bool = False
110
242
  ) -> Dict[str, Any]:
111
243
  """
112
- 对话聊天 - Provider必须实现
244
+ 对话聊天 - 委托给 ainvoke 方法
113
245
 
114
246
  Args:
115
247
  input_data: 输入消息
116
248
  max_tokens: 最大生成token数
249
+ show_reasoning: 是否显示推理过程
117
250
 
118
251
  Returns:
119
252
  Dict containing chat response
120
253
  """
121
- raise NotImplementedError(f"{self.__class__.__name__} does not support chat task")
254
+ result = await self.ainvoke(input_data, show_reasoning=show_reasoning)
255
+ # Ensure we return a proper response structure
256
+ if result is None:
257
+ logger.warning("ainvoke returned None - this may indicate an implementation issue")
258
+ return {"message": ""}
259
+
260
+ # Extract content if it's an AIMessage object
261
+ if hasattr(result, 'content'):
262
+ content = result.content
263
+ else:
264
+ content = str(result)
265
+
266
+ return {"message": content}
122
267
 
123
268
  # ==================== 文本生成类方法 ====================
124
269
 
@@ -303,6 +448,17 @@ class BaseLLMService(BaseService):
303
448
  """
304
449
  raise NotImplementedError(f"{self.__class__.__name__} does not support create_plan task")
305
450
 
451
+ async def deep_research(
452
+ self,
453
+ input_data: Union[str, Any],
454
+ research_type: Optional[str] = None,
455
+ search_enabled: bool = True
456
+ ) -> Dict[str, Any]:
457
+ """
458
+ 深度研究 - O-series模型专用任务,支持网络搜索和深入分析
459
+ """
460
+ raise NotImplementedError(f"{self.__class__.__name__} does not support deep_research task")
461
+
306
462
  # ==================== 工具调用类方法 ====================
307
463
 
308
464
  async def call_tools(
@@ -354,7 +510,7 @@ class BaseLLMService(BaseService):
354
510
  """使用适配器管理器转换消息格式"""
355
511
  return self.adapter_manager.convert_messages(input_data)
356
512
 
357
- def _format_response(self, response: str, original_input: Any) -> Union[str, Any]:
513
+ def _format_response(self, response: Union[str, Any], original_input: Any) -> Union[str, Any]:
358
514
  """使用适配器管理器格式化响应"""
359
515
  return self.adapter_manager.format_response(response, original_input)
360
516
 
@@ -379,7 +535,7 @@ class BaseLLMService(BaseService):
379
535
  pass
380
536
 
381
537
  @abstractmethod
382
- async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
538
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> Union[str, Any]:
383
539
  """
384
540
  Universal async invocation method that handles different input types
385
541
 
@@ -388,6 +544,7 @@ class BaseLLMService(BaseService):
388
544
  - str: Simple text prompt
389
545
  - list: Message history like [{"role": "user", "content": "hello"}]
390
546
  - Any: LangChain message objects or other formats
547
+ show_reasoning: If True and model supports it, show reasoning process
391
548
 
392
549
  Returns:
393
550
  Model response (string for simple cases, object for complex cases)
@@ -491,6 +648,183 @@ class BaseLLMService(BaseService):
491
648
  )
492
649
  return 0.0
493
650
 
651
+ # ==================== JSON OUTPUT AND FORMATTING METHODS ====================
652
+
653
+ def format_structured_output(
654
+ self,
655
+ response: Union[str, Any],
656
+ output_format: str = "json",
657
+ schema: Optional[Dict] = None,
658
+ repair_attempts: int = 3
659
+ ) -> Dict[str, Any]:
660
+ """
661
+ Format response as structured output (JSON, etc.)
662
+
663
+ Args:
664
+ response: Raw response from model
665
+ output_format: Desired output format ("json", "code", "structured")
666
+ schema: Optional JSON schema for validation
667
+ repair_attempts: Number of JSON repair attempts
668
+
669
+ Returns:
670
+ Dict with formatted output and metadata
671
+ """
672
+ if output_format == "json":
673
+ if isinstance(response, str):
674
+ return self.text_processor.extract_json_from_text(response, schema, repair_attempts)
675
+ else:
676
+ # Handle response objects with content attribute
677
+ content = getattr(response, 'content', str(response))
678
+ return self.text_processor.extract_json_from_text(content, schema, repair_attempts)
679
+
680
+ elif output_format == "code":
681
+ content = response if isinstance(response, str) else getattr(response, 'content', str(response))
682
+ code_blocks = self.text_processor.extract_code_blocks(content)
683
+ return {
684
+ "success": True,
685
+ "data": code_blocks,
686
+ "method": "code_block_extraction",
687
+ "errors": []
688
+ }
689
+
690
+ elif output_format == "structured":
691
+ # Use ResponseParser for general structured parsing
692
+ content = response if isinstance(response, str) else getattr(response, 'content', str(response))
693
+ parsed = self.response_parser.parse_structured_response(content, "json")
694
+ if parsed:
695
+ return {
696
+ "success": True,
697
+ "data": parsed,
698
+ "method": "structured_parsing",
699
+ "errors": []
700
+ }
701
+ else:
702
+ return {
703
+ "success": False,
704
+ "data": content,
705
+ "method": "raw_fallback",
706
+ "errors": ["Failed to parse as structured output"]
707
+ }
708
+
709
+ # Fallback: return raw response
710
+ return {
711
+ "success": True,
712
+ "data": response,
713
+ "method": "raw_output",
714
+ "errors": []
715
+ }
716
+
717
+ def create_json_prompt(
718
+ self,
719
+ base_prompt: str,
720
+ json_schema: Optional[Dict] = None,
721
+ output_instructions: Optional[str] = None
722
+ ) -> str:
723
+ """
724
+ Create a prompt that requests JSON output
725
+
726
+ Args:
727
+ base_prompt: The base prompt content
728
+ json_schema: Optional JSON schema to include in prompt
729
+ output_instructions: Custom output format instructions
730
+
731
+ Returns:
732
+ Enhanced prompt requesting JSON output
733
+ """
734
+ if output_instructions:
735
+ json_instruction = output_instructions
736
+ else:
737
+ json_instruction = LLMPromptTemplates.OUTPUT_FORMATS["json"]
738
+
739
+ if json_schema:
740
+ schema_text = f"\n\nPlease format your response according to this JSON schema:\n```json\n{json.dumps(json_schema, indent=2)}\n```"
741
+ return f"{base_prompt}{schema_text}\n\n{json_instruction}"
742
+ else:
743
+ return f"{base_prompt}\n\n{json_instruction}"
744
+
745
+ def create_structured_prompt(
746
+ self,
747
+ task_type: str,
748
+ content: str,
749
+ output_format: str = "json",
750
+ **kwargs
751
+ ) -> str:
752
+ """
753
+ Create a structured prompt using LLMPrompts templates
754
+
755
+ Args:
756
+ task_type: Type of task (from LLMPrompts methods)
757
+ content: Main content/input
758
+ output_format: Desired output format
759
+ **kwargs: Additional arguments for the prompt template
760
+
761
+ Returns:
762
+ Formatted prompt string
763
+ """
764
+ try:
765
+ # Get the appropriate prompt template
766
+ if hasattr(self.llm_prompts, f"{task_type}_prompt"):
767
+ method = getattr(self.llm_prompts, f"{task_type}_prompt")
768
+ base_prompt = method(content, **kwargs)
769
+ else:
770
+ # Fallback to generic prompt
771
+ base_prompt = f"Please {task_type} the following:\n\n{content}"
772
+
773
+ # Add output format instructions
774
+ if output_format in LLMPromptTemplates.OUTPUT_FORMATS:
775
+ format_instruction = LLMPromptTemplates.OUTPUT_FORMATS[output_format]
776
+ return f"{base_prompt}\n\n{format_instruction}"
777
+
778
+ return base_prompt
779
+
780
+ except Exception as e:
781
+ logger.warning(f"Failed to create structured prompt: {e}")
782
+ return f"Please {task_type} the following:\n\n{content}"
783
+
784
+ def count_tokens(self, text: Union[str, List[Dict[str, str]]]) -> int:
785
+ """
786
+ Count tokens in text or message list
787
+
788
+ Args:
789
+ text: String or message list to count tokens for
790
+
791
+ Returns:
792
+ Number of tokens
793
+ """
794
+ if isinstance(text, str):
795
+ return self.token_counter.count_tokens(text)
796
+ elif isinstance(text, list):
797
+ return self.token_counter.count_messages_tokens(text)
798
+ else:
799
+ return self.token_counter.count_tokens(str(text))
800
+
801
+ def truncate_to_token_limit(self, text: str, max_tokens: int) -> str:
802
+ """
803
+ Truncate text to fit within token limit
804
+
805
+ Args:
806
+ text: Text to truncate
807
+ max_tokens: Maximum number of tokens
808
+
809
+ Returns:
810
+ Truncated text
811
+ """
812
+ return self.token_counter.truncate_text(text, max_tokens)
813
+
814
+ def split_text_by_tokens(self, text: str, chunk_size: int, overlap: int = 0) -> List[str]:
815
+ """
816
+ Split text into chunks by token count
817
+
818
+ Args:
819
+ text: Text to split
820
+ chunk_size: Size of each chunk in tokens
821
+ overlap: Number of overlapping tokens between chunks
822
+
823
+ Returns:
824
+ List of text chunks
825
+ """
826
+ return self.token_counter.split_text_by_tokens(text, chunk_size, overlap)
827
+
494
828
  # ==================== METADATA AND UTILITY METHODS ====================
495
829
 
496
830
  def get_supported_tasks(self) -> List[str]:
@@ -527,6 +861,7 @@ class BaseLLMService(BaseService):
527
861
  'reason_about': 'reason',
528
862
  'solve_problem': 'solve',
529
863
  'create_plan': 'plan',
864
+ 'deep_research': 'deep_research',
530
865
  # 工具调用类
531
866
  'call_tools': 'tool_call',
532
867
  'call_function': 'function_call'