isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,12 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Dict, Any, List, Union, Optional, AsyncGenerator, Callable
3
3
  import logging
4
+ import json
4
5
 
5
6
  from isa_model.inference.services.base_service import BaseService
6
7
  from isa_model.inference.services.llm.helpers.llm_adapter import AdapterManager
8
+ from isa_model.inference.services.llm.helpers.llm_utils import TokenCounter, TextProcessor, ResponseParser, LLMMetrics
9
+ from isa_model.inference.services.llm.helpers.llm_prompts import LLMPrompts, LLMPromptTemplates
7
10
 
8
11
  logger = logging.getLogger(__name__)
9
12
 
@@ -18,6 +21,12 @@ class BaseLLMService(BaseService):
18
21
  # 初始化适配器管理器
19
22
  self.adapter_manager = AdapterManager()
20
23
 
24
+ # Initialize helper utilities (optional, can be overridden by specific services)
25
+ self.token_counter = TokenCounter(model_name)
26
+ self.text_processor = TextProcessor()
27
+ self.response_parser = ResponseParser()
28
+ self.llm_prompts = LLMPrompts()
29
+
21
30
  # Get config from provider
22
31
  provider_config = self.get_provider_config()
23
32
  self.streaming = provider_config.get("streaming", False)
@@ -29,6 +38,8 @@ class BaseLLMService(BaseService):
29
38
  input_data: Union[str, List[Dict[str, str]], Any],
30
39
  task: Optional[str] = None,
31
40
  show_reasoning: bool = False,
41
+ output_format: Optional[str] = None,
42
+ json_schema: Optional[Dict] = None,
32
43
  **kwargs
33
44
  ) -> Dict[str, Any]:
34
45
  """
@@ -40,75 +51,186 @@ class BaseLLMService(BaseService):
40
51
  - list: 消息历史 [{"role": "user", "content": "hello"}]
41
52
  - Any: LangChain 消息对象或其他格式
42
53
  task: 任务类型,支持多种LLM任务
54
+ output_format: Output format ("json", "markdown", "code", etc.)
55
+ json_schema: JSON schema for structured output validation
43
56
  **kwargs: 任务特定的附加参数
44
57
 
45
58
  Returns:
46
- Dict containing task results
59
+ Dict containing task results (optionally formatted as JSON)
47
60
  """
48
61
  task = task or "chat"
49
62
 
63
+ # Store formatting options for use by specific task methods
64
+ format_options = {
65
+ "output_format": output_format,
66
+ "json_schema": json_schema,
67
+ "repair_attempts": kwargs.get("repair_attempts", 3)
68
+ }
69
+
70
+ # Execute task and apply formatting
71
+ result = None
72
+
50
73
  # ==================== 对话类任务 ====================
51
74
  if task == "chat":
52
- return await self.chat(input_data, kwargs.get("max_tokens", self.max_tokens), show_reasoning=show_reasoning)
75
+ # Pass all kwargs to ainvoke for better parameter support (like response_format)
76
+ result_raw = await self.ainvoke(input_data, show_reasoning=show_reasoning, **kwargs)
77
+ # Wrap in chat response format, preserving AIMessage objects with tool_calls
78
+ if hasattr(result_raw, 'tool_calls'):
79
+ # This is an AIMessage with tool_calls - preserve the entire object
80
+ result = {"message": result_raw}
81
+ elif hasattr(result_raw, 'content'):
82
+ # Regular AIMessage without tool_calls - extract content
83
+ content = result_raw.content
84
+ result = {"message": content}
85
+ else:
86
+ # Plain string response
87
+ content = str(result_raw)
88
+ result = {"message": content}
53
89
  elif task == "complete":
54
- return await self.complete_text(input_data, kwargs.get("max_tokens", self.max_tokens))
90
+ result = await self.complete_text(input_data, kwargs.get("max_tokens", self.max_tokens))
55
91
  elif task == "instruct":
56
- return await self.instruct(input_data, kwargs.get("instruction"), kwargs.get("max_tokens", self.max_tokens))
92
+ result = await self.instruct(input_data, kwargs.get("instruction"), kwargs.get("max_tokens", self.max_tokens))
57
93
 
58
94
  # ==================== 文本生成类任务 ====================
59
95
  elif task == "generate":
60
- return await self.generate_text(input_data, kwargs.get("max_tokens", self.max_tokens))
96
+ result = await self.generate_text(input_data, kwargs.get("max_tokens", self.max_tokens))
61
97
  elif task == "rewrite":
62
- return await self.rewrite_text(input_data, kwargs.get("style"), kwargs.get("tone"))
98
+ result = await self.rewrite_text(input_data, kwargs.get("style"), kwargs.get("tone"))
63
99
  elif task == "summarize":
64
- return await self.summarize_text(input_data, kwargs.get("max_length"), kwargs.get("style"))
100
+ result = await self.summarize_text(input_data, kwargs.get("max_length"), kwargs.get("style"))
65
101
  elif task == "translate":
66
102
  target_language = kwargs.get("target_language")
67
103
  if not target_language:
68
104
  raise ValueError("target_language is required for translate task")
69
- return await self.translate_text(input_data, target_language, kwargs.get("source_language"))
105
+ result = await self.translate_text(input_data, target_language, kwargs.get("source_language"))
70
106
 
71
107
  # ==================== 分析类任务 ====================
72
108
  elif task == "analyze":
73
- return await self.analyze_text(input_data, kwargs.get("analysis_type"))
109
+ result = await self.analyze_text(input_data, kwargs.get("analysis_type"))
74
110
  elif task == "classify":
75
- return await self.classify_text(input_data, kwargs.get("categories"))
111
+ result = await self.classify_text(input_data, kwargs.get("categories"))
76
112
  elif task == "extract":
77
- return await self.extract_information(input_data, kwargs.get("extract_type"))
113
+ result = await self.extract_information(input_data, kwargs.get("extract_type"))
78
114
  elif task == "sentiment":
79
- return await self.analyze_sentiment(input_data)
115
+ # Always use chat with appropriate prompt for sentiment analysis
116
+ if output_format == "json":
117
+ # Create JSON-formatted prompt
118
+ json_prompt = self.create_json_prompt(
119
+ f"Please analyze the sentiment of the following text: {input_data}",
120
+ json_schema or {
121
+ "type": "object",
122
+ "properties": {
123
+ "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
124
+ "confidence": {"type": "number", "minimum": 0, "maximum": 1},
125
+ "explanation": {"type": "string"}
126
+ },
127
+ "required": ["sentiment"]
128
+ }
129
+ )
130
+ result = await self.chat(json_prompt, show_reasoning=show_reasoning)
131
+ else:
132
+ # Use simple chat prompt for sentiment analysis
133
+ sentiment_prompt = f"Please analyze the sentiment of the following text and classify it as positive, negative, or neutral:\n\n{input_data}\n\nSentiment:"
134
+ result = await self.chat(sentiment_prompt, show_reasoning=show_reasoning)
80
135
 
81
136
  # ==================== 编程类任务 ====================
82
137
  elif task == "code":
83
- return await self.generate_code(input_data, kwargs.get("language"), kwargs.get("style"))
138
+ # Always use chat with appropriate prompt for code generation
139
+ language = kwargs.get("language", "")
140
+ style = kwargs.get("style", "")
141
+
142
+ code_prompt = f"Please write code"
143
+ if language:
144
+ code_prompt += f" in {language}"
145
+ code_prompt += f" for the following requirement:\n\n{input_data}\n\n"
146
+
147
+ if style:
148
+ code_prompt += f"Style requirements: {style}\n\n"
149
+
150
+ code_prompt += "Please provide clean, working code with comments."
151
+
152
+ result = await self.chat(code_prompt, show_reasoning=show_reasoning)
84
153
  elif task == "explain_code":
85
- return await self.explain_code(input_data, kwargs.get("language"))
154
+ result = await self.explain_code(input_data, kwargs.get("language"))
86
155
  elif task == "debug_code":
87
- return await self.debug_code(input_data, kwargs.get("language"))
156
+ result = await self.debug_code(input_data, kwargs.get("language"))
88
157
  elif task == "refactor_code":
89
- return await self.refactor_code(input_data, kwargs.get("language"), kwargs.get("improvements"))
158
+ result = await self.refactor_code(input_data, kwargs.get("language"), kwargs.get("improvements"))
90
159
 
91
160
  # ==================== 推理类任务 ====================
92
161
  elif task == "reason":
93
- return await self.reason_about(input_data, kwargs.get("reasoning_type"))
162
+ # Always use chat with appropriate prompt for reasoning
163
+ reasoning_type = kwargs.get("reasoning_type", "")
164
+
165
+ reason_prompt = f"Please analyze and explain the reasoning behind the following question or topic"
166
+ if reasoning_type:
167
+ reason_prompt += f" using {reasoning_type} reasoning"
168
+ reason_prompt += f":\n\n{input_data}\n\n"
169
+ reason_prompt += "Provide a clear, step-by-step explanation of your reasoning process."
170
+
171
+ result = await self.chat(reason_prompt, show_reasoning=show_reasoning)
94
172
  elif task == "solve":
95
- return await self.solve_problem(input_data, kwargs.get("problem_type"))
173
+ # Always use chat with appropriate prompt for problem solving
174
+ problem_type = kwargs.get("problem_type", "")
175
+
176
+ solve_prompt = f"Please solve the following problem"
177
+ if problem_type:
178
+ solve_prompt += f" (type: {problem_type})"
179
+ solve_prompt += f":\n\n{input_data}\n\n"
180
+ solve_prompt += "Provide a clear solution with step-by-step explanation."
181
+
182
+ result = await self.chat(solve_prompt, show_reasoning=show_reasoning)
96
183
  elif task == "plan":
97
- return await self.create_plan(input_data, kwargs.get("plan_type"))
184
+ result = await self.create_plan(input_data, kwargs.get("plan_type"))
98
185
  elif task == "deep_research":
99
- return await self.deep_research(input_data, kwargs.get("research_type"), kwargs.get("search_enabled", True))
186
+ result = await self.deep_research(input_data, kwargs.get("research_type"), kwargs.get("search_enabled", True))
100
187
 
101
188
  # ==================== 工具调用类任务 ====================
102
189
  elif task == "tool_call":
103
- return await self.call_tools(input_data, kwargs.get("available_tools"))
190
+ result = await self.call_tools(input_data, kwargs.get("available_tools"))
104
191
  elif task == "function_call":
105
192
  function_name = kwargs.get("function_name")
106
193
  if not function_name:
107
194
  raise ValueError("function_name is required for function_call task")
108
- return await self.call_function(input_data, function_name, kwargs.get("parameters"))
195
+ result = await self.call_function(input_data, function_name, kwargs.get("parameters"))
109
196
 
110
197
  else:
111
198
  raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
199
+
200
+ # Apply output formatting if requested
201
+ if result is not None and output_format:
202
+ # Extract the raw response for formatting
203
+ # If result is a dict with 'message' key, use the message for formatting
204
+ format_input = result
205
+ if isinstance(result, dict) and 'message' in result:
206
+ format_input = result['message']
207
+
208
+ formatted_result = self.format_structured_output(
209
+ response=format_input,
210
+ output_format=output_format,
211
+ schema=json_schema,
212
+ repair_attempts=format_options.get("repair_attempts", 3)
213
+ )
214
+
215
+ # If formatting succeeded, return formatted result
216
+ if formatted_result.get("success", False):
217
+ return {
218
+ "result": formatted_result["data"],
219
+ "formatted": True,
220
+ "format": output_format,
221
+ "original": result
222
+ }
223
+ else:
224
+ # If formatting failed, return original with error info
225
+ return {
226
+ "result": result,
227
+ "formatted": False,
228
+ "format_errors": formatted_result.get("errors", []),
229
+ "original": result
230
+ }
231
+
232
+ # Return unformatted result
233
+ return result if result is not None else {"message": "Task completed but returned no result"}
112
234
 
113
235
  # ==================== 对话类方法 ====================
114
236
 
@@ -119,7 +241,7 @@ class BaseLLMService(BaseService):
119
241
  show_reasoning: bool = False
120
242
  ) -> Dict[str, Any]:
121
243
  """
122
- 对话聊天 - Provider必须实现
244
+ 对话聊天 - 委托给 ainvoke 方法
123
245
 
124
246
  Args:
125
247
  input_data: 输入消息
@@ -129,7 +251,19 @@ class BaseLLMService(BaseService):
129
251
  Returns:
130
252
  Dict containing chat response
131
253
  """
132
- raise NotImplementedError(f"{self.__class__.__name__} does not support chat task")
254
+ result = await self.ainvoke(input_data, show_reasoning=show_reasoning)
255
+ # Ensure we return a proper response structure
256
+ if result is None:
257
+ logger.warning("ainvoke returned None - this may indicate an implementation issue")
258
+ return {"message": ""}
259
+
260
+ # Extract content if it's an AIMessage object
261
+ if hasattr(result, 'content'):
262
+ content = result.content
263
+ else:
264
+ content = str(result)
265
+
266
+ return {"message": content}
133
267
 
134
268
  # ==================== 文本生成类方法 ====================
135
269
 
@@ -514,6 +648,183 @@ class BaseLLMService(BaseService):
514
648
  )
515
649
  return 0.0
516
650
 
651
+ # ==================== JSON OUTPUT AND FORMATTING METHODS ====================
652
+
653
+ def format_structured_output(
654
+ self,
655
+ response: Union[str, Any],
656
+ output_format: str = "json",
657
+ schema: Optional[Dict] = None,
658
+ repair_attempts: int = 3
659
+ ) -> Dict[str, Any]:
660
+ """
661
+ Format response as structured output (JSON, etc.)
662
+
663
+ Args:
664
+ response: Raw response from model
665
+ output_format: Desired output format ("json", "code", "structured")
666
+ schema: Optional JSON schema for validation
667
+ repair_attempts: Number of JSON repair attempts
668
+
669
+ Returns:
670
+ Dict with formatted output and metadata
671
+ """
672
+ if output_format == "json":
673
+ if isinstance(response, str):
674
+ return self.text_processor.extract_json_from_text(response, schema, repair_attempts)
675
+ else:
676
+ # Handle response objects with content attribute
677
+ content = getattr(response, 'content', str(response))
678
+ return self.text_processor.extract_json_from_text(content, schema, repair_attempts)
679
+
680
+ elif output_format == "code":
681
+ content = response if isinstance(response, str) else getattr(response, 'content', str(response))
682
+ code_blocks = self.text_processor.extract_code_blocks(content)
683
+ return {
684
+ "success": True,
685
+ "data": code_blocks,
686
+ "method": "code_block_extraction",
687
+ "errors": []
688
+ }
689
+
690
+ elif output_format == "structured":
691
+ # Use ResponseParser for general structured parsing
692
+ content = response if isinstance(response, str) else getattr(response, 'content', str(response))
693
+ parsed = self.response_parser.parse_structured_response(content, "json")
694
+ if parsed:
695
+ return {
696
+ "success": True,
697
+ "data": parsed,
698
+ "method": "structured_parsing",
699
+ "errors": []
700
+ }
701
+ else:
702
+ return {
703
+ "success": False,
704
+ "data": content,
705
+ "method": "raw_fallback",
706
+ "errors": ["Failed to parse as structured output"]
707
+ }
708
+
709
+ # Fallback: return raw response
710
+ return {
711
+ "success": True,
712
+ "data": response,
713
+ "method": "raw_output",
714
+ "errors": []
715
+ }
716
+
717
+ def create_json_prompt(
718
+ self,
719
+ base_prompt: str,
720
+ json_schema: Optional[Dict] = None,
721
+ output_instructions: Optional[str] = None
722
+ ) -> str:
723
+ """
724
+ Create a prompt that requests JSON output
725
+
726
+ Args:
727
+ base_prompt: The base prompt content
728
+ json_schema: Optional JSON schema to include in prompt
729
+ output_instructions: Custom output format instructions
730
+
731
+ Returns:
732
+ Enhanced prompt requesting JSON output
733
+ """
734
+ if output_instructions:
735
+ json_instruction = output_instructions
736
+ else:
737
+ json_instruction = LLMPromptTemplates.OUTPUT_FORMATS["json"]
738
+
739
+ if json_schema:
740
+ schema_text = f"\n\nPlease format your response according to this JSON schema:\n```json\n{json.dumps(json_schema, indent=2)}\n```"
741
+ return f"{base_prompt}{schema_text}\n\n{json_instruction}"
742
+ else:
743
+ return f"{base_prompt}\n\n{json_instruction}"
744
+
745
+ def create_structured_prompt(
746
+ self,
747
+ task_type: str,
748
+ content: str,
749
+ output_format: str = "json",
750
+ **kwargs
751
+ ) -> str:
752
+ """
753
+ Create a structured prompt using LLMPrompts templates
754
+
755
+ Args:
756
+ task_type: Type of task (from LLMPrompts methods)
757
+ content: Main content/input
758
+ output_format: Desired output format
759
+ **kwargs: Additional arguments for the prompt template
760
+
761
+ Returns:
762
+ Formatted prompt string
763
+ """
764
+ try:
765
+ # Get the appropriate prompt template
766
+ if hasattr(self.llm_prompts, f"{task_type}_prompt"):
767
+ method = getattr(self.llm_prompts, f"{task_type}_prompt")
768
+ base_prompt = method(content, **kwargs)
769
+ else:
770
+ # Fallback to generic prompt
771
+ base_prompt = f"Please {task_type} the following:\n\n{content}"
772
+
773
+ # Add output format instructions
774
+ if output_format in LLMPromptTemplates.OUTPUT_FORMATS:
775
+ format_instruction = LLMPromptTemplates.OUTPUT_FORMATS[output_format]
776
+ return f"{base_prompt}\n\n{format_instruction}"
777
+
778
+ return base_prompt
779
+
780
+ except Exception as e:
781
+ logger.warning(f"Failed to create structured prompt: {e}")
782
+ return f"Please {task_type} the following:\n\n{content}"
783
+
784
+ def count_tokens(self, text: Union[str, List[Dict[str, str]]]) -> int:
785
+ """
786
+ Count tokens in text or message list
787
+
788
+ Args:
789
+ text: String or message list to count tokens for
790
+
791
+ Returns:
792
+ Number of tokens
793
+ """
794
+ if isinstance(text, str):
795
+ return self.token_counter.count_tokens(text)
796
+ elif isinstance(text, list):
797
+ return self.token_counter.count_messages_tokens(text)
798
+ else:
799
+ return self.token_counter.count_tokens(str(text))
800
+
801
+ def truncate_to_token_limit(self, text: str, max_tokens: int) -> str:
802
+ """
803
+ Truncate text to fit within token limit
804
+
805
+ Args:
806
+ text: Text to truncate
807
+ max_tokens: Maximum number of tokens
808
+
809
+ Returns:
810
+ Truncated text
811
+ """
812
+ return self.token_counter.truncate_text(text, max_tokens)
813
+
814
+ def split_text_by_tokens(self, text: str, chunk_size: int, overlap: int = 0) -> List[str]:
815
+ """
816
+ Split text into chunks by token count
817
+
818
+ Args:
819
+ text: Text to split
820
+ chunk_size: Size of each chunk in tokens
821
+ overlap: Number of overlapping tokens between chunks
822
+
823
+ Returns:
824
+ List of text chunks
825
+ """
826
+ return self.token_counter.split_text_by_tokens(text, chunk_size, overlap)
827
+
517
828
  # ==================== METADATA AND UTILITY METHODS ====================
518
829
 
519
830
  def get_supported_tasks(self) -> List[str]: