isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,12 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from typing import Dict, Any, List, Union, Optional, AsyncGenerator, Callable
|
3
3
|
import logging
|
4
|
+
import json
|
4
5
|
|
5
6
|
from isa_model.inference.services.base_service import BaseService
|
6
7
|
from isa_model.inference.services.llm.helpers.llm_adapter import AdapterManager
|
8
|
+
from isa_model.inference.services.llm.helpers.llm_utils import TokenCounter, TextProcessor, ResponseParser, LLMMetrics
|
9
|
+
from isa_model.inference.services.llm.helpers.llm_prompts import LLMPrompts, LLMPromptTemplates
|
7
10
|
|
8
11
|
logger = logging.getLogger(__name__)
|
9
12
|
|
@@ -18,6 +21,12 @@ class BaseLLMService(BaseService):
|
|
18
21
|
# 初始化适配器管理器
|
19
22
|
self.adapter_manager = AdapterManager()
|
20
23
|
|
24
|
+
# Initialize helper utilities (optional, can be overridden by specific services)
|
25
|
+
self.token_counter = TokenCounter(model_name)
|
26
|
+
self.text_processor = TextProcessor()
|
27
|
+
self.response_parser = ResponseParser()
|
28
|
+
self.llm_prompts = LLMPrompts()
|
29
|
+
|
21
30
|
# Get config from provider
|
22
31
|
provider_config = self.get_provider_config()
|
23
32
|
self.streaming = provider_config.get("streaming", False)
|
@@ -29,6 +38,8 @@ class BaseLLMService(BaseService):
|
|
29
38
|
input_data: Union[str, List[Dict[str, str]], Any],
|
30
39
|
task: Optional[str] = None,
|
31
40
|
show_reasoning: bool = False,
|
41
|
+
output_format: Optional[str] = None,
|
42
|
+
json_schema: Optional[Dict] = None,
|
32
43
|
**kwargs
|
33
44
|
) -> Dict[str, Any]:
|
34
45
|
"""
|
@@ -40,75 +51,186 @@ class BaseLLMService(BaseService):
|
|
40
51
|
- list: 消息历史 [{"role": "user", "content": "hello"}]
|
41
52
|
- Any: LangChain 消息对象或其他格式
|
42
53
|
task: 任务类型,支持多种LLM任务
|
54
|
+
output_format: Output format ("json", "markdown", "code", etc.)
|
55
|
+
json_schema: JSON schema for structured output validation
|
43
56
|
**kwargs: 任务特定的附加参数
|
44
57
|
|
45
58
|
Returns:
|
46
|
-
Dict containing task results
|
59
|
+
Dict containing task results (optionally formatted as JSON)
|
47
60
|
"""
|
48
61
|
task = task or "chat"
|
49
62
|
|
63
|
+
# Store formatting options for use by specific task methods
|
64
|
+
format_options = {
|
65
|
+
"output_format": output_format,
|
66
|
+
"json_schema": json_schema,
|
67
|
+
"repair_attempts": kwargs.get("repair_attempts", 3)
|
68
|
+
}
|
69
|
+
|
70
|
+
# Execute task and apply formatting
|
71
|
+
result = None
|
72
|
+
|
50
73
|
# ==================== 对话类任务 ====================
|
51
74
|
if task == "chat":
|
52
|
-
|
75
|
+
# Pass all kwargs to ainvoke for better parameter support (like response_format)
|
76
|
+
result_raw = await self.ainvoke(input_data, show_reasoning=show_reasoning, **kwargs)
|
77
|
+
# Wrap in chat response format, preserving AIMessage objects with tool_calls
|
78
|
+
if hasattr(result_raw, 'tool_calls'):
|
79
|
+
# This is an AIMessage with tool_calls - preserve the entire object
|
80
|
+
result = {"message": result_raw}
|
81
|
+
elif hasattr(result_raw, 'content'):
|
82
|
+
# Regular AIMessage without tool_calls - extract content
|
83
|
+
content = result_raw.content
|
84
|
+
result = {"message": content}
|
85
|
+
else:
|
86
|
+
# Plain string response
|
87
|
+
content = str(result_raw)
|
88
|
+
result = {"message": content}
|
53
89
|
elif task == "complete":
|
54
|
-
|
90
|
+
result = await self.complete_text(input_data, kwargs.get("max_tokens", self.max_tokens))
|
55
91
|
elif task == "instruct":
|
56
|
-
|
92
|
+
result = await self.instruct(input_data, kwargs.get("instruction"), kwargs.get("max_tokens", self.max_tokens))
|
57
93
|
|
58
94
|
# ==================== 文本生成类任务 ====================
|
59
95
|
elif task == "generate":
|
60
|
-
|
96
|
+
result = await self.generate_text(input_data, kwargs.get("max_tokens", self.max_tokens))
|
61
97
|
elif task == "rewrite":
|
62
|
-
|
98
|
+
result = await self.rewrite_text(input_data, kwargs.get("style"), kwargs.get("tone"))
|
63
99
|
elif task == "summarize":
|
64
|
-
|
100
|
+
result = await self.summarize_text(input_data, kwargs.get("max_length"), kwargs.get("style"))
|
65
101
|
elif task == "translate":
|
66
102
|
target_language = kwargs.get("target_language")
|
67
103
|
if not target_language:
|
68
104
|
raise ValueError("target_language is required for translate task")
|
69
|
-
|
105
|
+
result = await self.translate_text(input_data, target_language, kwargs.get("source_language"))
|
70
106
|
|
71
107
|
# ==================== 分析类任务 ====================
|
72
108
|
elif task == "analyze":
|
73
|
-
|
109
|
+
result = await self.analyze_text(input_data, kwargs.get("analysis_type"))
|
74
110
|
elif task == "classify":
|
75
|
-
|
111
|
+
result = await self.classify_text(input_data, kwargs.get("categories"))
|
76
112
|
elif task == "extract":
|
77
|
-
|
113
|
+
result = await self.extract_information(input_data, kwargs.get("extract_type"))
|
78
114
|
elif task == "sentiment":
|
79
|
-
|
115
|
+
# Always use chat with appropriate prompt for sentiment analysis
|
116
|
+
if output_format == "json":
|
117
|
+
# Create JSON-formatted prompt
|
118
|
+
json_prompt = self.create_json_prompt(
|
119
|
+
f"Please analyze the sentiment of the following text: {input_data}",
|
120
|
+
json_schema or {
|
121
|
+
"type": "object",
|
122
|
+
"properties": {
|
123
|
+
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
|
124
|
+
"confidence": {"type": "number", "minimum": 0, "maximum": 1},
|
125
|
+
"explanation": {"type": "string"}
|
126
|
+
},
|
127
|
+
"required": ["sentiment"]
|
128
|
+
}
|
129
|
+
)
|
130
|
+
result = await self.chat(json_prompt, show_reasoning=show_reasoning)
|
131
|
+
else:
|
132
|
+
# Use simple chat prompt for sentiment analysis
|
133
|
+
sentiment_prompt = f"Please analyze the sentiment of the following text and classify it as positive, negative, or neutral:\n\n{input_data}\n\nSentiment:"
|
134
|
+
result = await self.chat(sentiment_prompt, show_reasoning=show_reasoning)
|
80
135
|
|
81
136
|
# ==================== 编程类任务 ====================
|
82
137
|
elif task == "code":
|
83
|
-
|
138
|
+
# Always use chat with appropriate prompt for code generation
|
139
|
+
language = kwargs.get("language", "")
|
140
|
+
style = kwargs.get("style", "")
|
141
|
+
|
142
|
+
code_prompt = f"Please write code"
|
143
|
+
if language:
|
144
|
+
code_prompt += f" in {language}"
|
145
|
+
code_prompt += f" for the following requirement:\n\n{input_data}\n\n"
|
146
|
+
|
147
|
+
if style:
|
148
|
+
code_prompt += f"Style requirements: {style}\n\n"
|
149
|
+
|
150
|
+
code_prompt += "Please provide clean, working code with comments."
|
151
|
+
|
152
|
+
result = await self.chat(code_prompt, show_reasoning=show_reasoning)
|
84
153
|
elif task == "explain_code":
|
85
|
-
|
154
|
+
result = await self.explain_code(input_data, kwargs.get("language"))
|
86
155
|
elif task == "debug_code":
|
87
|
-
|
156
|
+
result = await self.debug_code(input_data, kwargs.get("language"))
|
88
157
|
elif task == "refactor_code":
|
89
|
-
|
158
|
+
result = await self.refactor_code(input_data, kwargs.get("language"), kwargs.get("improvements"))
|
90
159
|
|
91
160
|
# ==================== 推理类任务 ====================
|
92
161
|
elif task == "reason":
|
93
|
-
|
162
|
+
# Always use chat with appropriate prompt for reasoning
|
163
|
+
reasoning_type = kwargs.get("reasoning_type", "")
|
164
|
+
|
165
|
+
reason_prompt = f"Please analyze and explain the reasoning behind the following question or topic"
|
166
|
+
if reasoning_type:
|
167
|
+
reason_prompt += f" using {reasoning_type} reasoning"
|
168
|
+
reason_prompt += f":\n\n{input_data}\n\n"
|
169
|
+
reason_prompt += "Provide a clear, step-by-step explanation of your reasoning process."
|
170
|
+
|
171
|
+
result = await self.chat(reason_prompt, show_reasoning=show_reasoning)
|
94
172
|
elif task == "solve":
|
95
|
-
|
173
|
+
# Always use chat with appropriate prompt for problem solving
|
174
|
+
problem_type = kwargs.get("problem_type", "")
|
175
|
+
|
176
|
+
solve_prompt = f"Please solve the following problem"
|
177
|
+
if problem_type:
|
178
|
+
solve_prompt += f" (type: {problem_type})"
|
179
|
+
solve_prompt += f":\n\n{input_data}\n\n"
|
180
|
+
solve_prompt += "Provide a clear solution with step-by-step explanation."
|
181
|
+
|
182
|
+
result = await self.chat(solve_prompt, show_reasoning=show_reasoning)
|
96
183
|
elif task == "plan":
|
97
|
-
|
184
|
+
result = await self.create_plan(input_data, kwargs.get("plan_type"))
|
98
185
|
elif task == "deep_research":
|
99
|
-
|
186
|
+
result = await self.deep_research(input_data, kwargs.get("research_type"), kwargs.get("search_enabled", True))
|
100
187
|
|
101
188
|
# ==================== 工具调用类任务 ====================
|
102
189
|
elif task == "tool_call":
|
103
|
-
|
190
|
+
result = await self.call_tools(input_data, kwargs.get("available_tools"))
|
104
191
|
elif task == "function_call":
|
105
192
|
function_name = kwargs.get("function_name")
|
106
193
|
if not function_name:
|
107
194
|
raise ValueError("function_name is required for function_call task")
|
108
|
-
|
195
|
+
result = await self.call_function(input_data, function_name, kwargs.get("parameters"))
|
109
196
|
|
110
197
|
else:
|
111
198
|
raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
|
199
|
+
|
200
|
+
# Apply output formatting if requested
|
201
|
+
if result is not None and output_format:
|
202
|
+
# Extract the raw response for formatting
|
203
|
+
# If result is a dict with 'message' key, use the message for formatting
|
204
|
+
format_input = result
|
205
|
+
if isinstance(result, dict) and 'message' in result:
|
206
|
+
format_input = result['message']
|
207
|
+
|
208
|
+
formatted_result = self.format_structured_output(
|
209
|
+
response=format_input,
|
210
|
+
output_format=output_format,
|
211
|
+
schema=json_schema,
|
212
|
+
repair_attempts=format_options.get("repair_attempts", 3)
|
213
|
+
)
|
214
|
+
|
215
|
+
# If formatting succeeded, return formatted result
|
216
|
+
if formatted_result.get("success", False):
|
217
|
+
return {
|
218
|
+
"result": formatted_result["data"],
|
219
|
+
"formatted": True,
|
220
|
+
"format": output_format,
|
221
|
+
"original": result
|
222
|
+
}
|
223
|
+
else:
|
224
|
+
# If formatting failed, return original with error info
|
225
|
+
return {
|
226
|
+
"result": result,
|
227
|
+
"formatted": False,
|
228
|
+
"format_errors": formatted_result.get("errors", []),
|
229
|
+
"original": result
|
230
|
+
}
|
231
|
+
|
232
|
+
# Return unformatted result
|
233
|
+
return result if result is not None else {"message": "Task completed but returned no result"}
|
112
234
|
|
113
235
|
# ==================== 对话类方法 ====================
|
114
236
|
|
@@ -119,7 +241,7 @@ class BaseLLMService(BaseService):
|
|
119
241
|
show_reasoning: bool = False
|
120
242
|
) -> Dict[str, Any]:
|
121
243
|
"""
|
122
|
-
对话聊天 -
|
244
|
+
对话聊天 - 委托给 ainvoke 方法
|
123
245
|
|
124
246
|
Args:
|
125
247
|
input_data: 输入消息
|
@@ -129,7 +251,19 @@ class BaseLLMService(BaseService):
|
|
129
251
|
Returns:
|
130
252
|
Dict containing chat response
|
131
253
|
"""
|
132
|
-
|
254
|
+
result = await self.ainvoke(input_data, show_reasoning=show_reasoning)
|
255
|
+
# Ensure we return a proper response structure
|
256
|
+
if result is None:
|
257
|
+
logger.warning("ainvoke returned None - this may indicate an implementation issue")
|
258
|
+
return {"message": ""}
|
259
|
+
|
260
|
+
# Extract content if it's an AIMessage object
|
261
|
+
if hasattr(result, 'content'):
|
262
|
+
content = result.content
|
263
|
+
else:
|
264
|
+
content = str(result)
|
265
|
+
|
266
|
+
return {"message": content}
|
133
267
|
|
134
268
|
# ==================== 文本生成类方法 ====================
|
135
269
|
|
@@ -514,6 +648,183 @@ class BaseLLMService(BaseService):
|
|
514
648
|
)
|
515
649
|
return 0.0
|
516
650
|
|
651
|
+
# ==================== JSON OUTPUT AND FORMATTING METHODS ====================
|
652
|
+
|
653
|
+
def format_structured_output(
|
654
|
+
self,
|
655
|
+
response: Union[str, Any],
|
656
|
+
output_format: str = "json",
|
657
|
+
schema: Optional[Dict] = None,
|
658
|
+
repair_attempts: int = 3
|
659
|
+
) -> Dict[str, Any]:
|
660
|
+
"""
|
661
|
+
Format response as structured output (JSON, etc.)
|
662
|
+
|
663
|
+
Args:
|
664
|
+
response: Raw response from model
|
665
|
+
output_format: Desired output format ("json", "code", "structured")
|
666
|
+
schema: Optional JSON schema for validation
|
667
|
+
repair_attempts: Number of JSON repair attempts
|
668
|
+
|
669
|
+
Returns:
|
670
|
+
Dict with formatted output and metadata
|
671
|
+
"""
|
672
|
+
if output_format == "json":
|
673
|
+
if isinstance(response, str):
|
674
|
+
return self.text_processor.extract_json_from_text(response, schema, repair_attempts)
|
675
|
+
else:
|
676
|
+
# Handle response objects with content attribute
|
677
|
+
content = getattr(response, 'content', str(response))
|
678
|
+
return self.text_processor.extract_json_from_text(content, schema, repair_attempts)
|
679
|
+
|
680
|
+
elif output_format == "code":
|
681
|
+
content = response if isinstance(response, str) else getattr(response, 'content', str(response))
|
682
|
+
code_blocks = self.text_processor.extract_code_blocks(content)
|
683
|
+
return {
|
684
|
+
"success": True,
|
685
|
+
"data": code_blocks,
|
686
|
+
"method": "code_block_extraction",
|
687
|
+
"errors": []
|
688
|
+
}
|
689
|
+
|
690
|
+
elif output_format == "structured":
|
691
|
+
# Use ResponseParser for general structured parsing
|
692
|
+
content = response if isinstance(response, str) else getattr(response, 'content', str(response))
|
693
|
+
parsed = self.response_parser.parse_structured_response(content, "json")
|
694
|
+
if parsed:
|
695
|
+
return {
|
696
|
+
"success": True,
|
697
|
+
"data": parsed,
|
698
|
+
"method": "structured_parsing",
|
699
|
+
"errors": []
|
700
|
+
}
|
701
|
+
else:
|
702
|
+
return {
|
703
|
+
"success": False,
|
704
|
+
"data": content,
|
705
|
+
"method": "raw_fallback",
|
706
|
+
"errors": ["Failed to parse as structured output"]
|
707
|
+
}
|
708
|
+
|
709
|
+
# Fallback: return raw response
|
710
|
+
return {
|
711
|
+
"success": True,
|
712
|
+
"data": response,
|
713
|
+
"method": "raw_output",
|
714
|
+
"errors": []
|
715
|
+
}
|
716
|
+
|
717
|
+
def create_json_prompt(
|
718
|
+
self,
|
719
|
+
base_prompt: str,
|
720
|
+
json_schema: Optional[Dict] = None,
|
721
|
+
output_instructions: Optional[str] = None
|
722
|
+
) -> str:
|
723
|
+
"""
|
724
|
+
Create a prompt that requests JSON output
|
725
|
+
|
726
|
+
Args:
|
727
|
+
base_prompt: The base prompt content
|
728
|
+
json_schema: Optional JSON schema to include in prompt
|
729
|
+
output_instructions: Custom output format instructions
|
730
|
+
|
731
|
+
Returns:
|
732
|
+
Enhanced prompt requesting JSON output
|
733
|
+
"""
|
734
|
+
if output_instructions:
|
735
|
+
json_instruction = output_instructions
|
736
|
+
else:
|
737
|
+
json_instruction = LLMPromptTemplates.OUTPUT_FORMATS["json"]
|
738
|
+
|
739
|
+
if json_schema:
|
740
|
+
schema_text = f"\n\nPlease format your response according to this JSON schema:\n```json\n{json.dumps(json_schema, indent=2)}\n```"
|
741
|
+
return f"{base_prompt}{schema_text}\n\n{json_instruction}"
|
742
|
+
else:
|
743
|
+
return f"{base_prompt}\n\n{json_instruction}"
|
744
|
+
|
745
|
+
def create_structured_prompt(
|
746
|
+
self,
|
747
|
+
task_type: str,
|
748
|
+
content: str,
|
749
|
+
output_format: str = "json",
|
750
|
+
**kwargs
|
751
|
+
) -> str:
|
752
|
+
"""
|
753
|
+
Create a structured prompt using LLMPrompts templates
|
754
|
+
|
755
|
+
Args:
|
756
|
+
task_type: Type of task (from LLMPrompts methods)
|
757
|
+
content: Main content/input
|
758
|
+
output_format: Desired output format
|
759
|
+
**kwargs: Additional arguments for the prompt template
|
760
|
+
|
761
|
+
Returns:
|
762
|
+
Formatted prompt string
|
763
|
+
"""
|
764
|
+
try:
|
765
|
+
# Get the appropriate prompt template
|
766
|
+
if hasattr(self.llm_prompts, f"{task_type}_prompt"):
|
767
|
+
method = getattr(self.llm_prompts, f"{task_type}_prompt")
|
768
|
+
base_prompt = method(content, **kwargs)
|
769
|
+
else:
|
770
|
+
# Fallback to generic prompt
|
771
|
+
base_prompt = f"Please {task_type} the following:\n\n{content}"
|
772
|
+
|
773
|
+
# Add output format instructions
|
774
|
+
if output_format in LLMPromptTemplates.OUTPUT_FORMATS:
|
775
|
+
format_instruction = LLMPromptTemplates.OUTPUT_FORMATS[output_format]
|
776
|
+
return f"{base_prompt}\n\n{format_instruction}"
|
777
|
+
|
778
|
+
return base_prompt
|
779
|
+
|
780
|
+
except Exception as e:
|
781
|
+
logger.warning(f"Failed to create structured prompt: {e}")
|
782
|
+
return f"Please {task_type} the following:\n\n{content}"
|
783
|
+
|
784
|
+
def count_tokens(self, text: Union[str, List[Dict[str, str]]]) -> int:
|
785
|
+
"""
|
786
|
+
Count tokens in text or message list
|
787
|
+
|
788
|
+
Args:
|
789
|
+
text: String or message list to count tokens for
|
790
|
+
|
791
|
+
Returns:
|
792
|
+
Number of tokens
|
793
|
+
"""
|
794
|
+
if isinstance(text, str):
|
795
|
+
return self.token_counter.count_tokens(text)
|
796
|
+
elif isinstance(text, list):
|
797
|
+
return self.token_counter.count_messages_tokens(text)
|
798
|
+
else:
|
799
|
+
return self.token_counter.count_tokens(str(text))
|
800
|
+
|
801
|
+
def truncate_to_token_limit(self, text: str, max_tokens: int) -> str:
|
802
|
+
"""
|
803
|
+
Truncate text to fit within token limit
|
804
|
+
|
805
|
+
Args:
|
806
|
+
text: Text to truncate
|
807
|
+
max_tokens: Maximum number of tokens
|
808
|
+
|
809
|
+
Returns:
|
810
|
+
Truncated text
|
811
|
+
"""
|
812
|
+
return self.token_counter.truncate_text(text, max_tokens)
|
813
|
+
|
814
|
+
def split_text_by_tokens(self, text: str, chunk_size: int, overlap: int = 0) -> List[str]:
|
815
|
+
"""
|
816
|
+
Split text into chunks by token count
|
817
|
+
|
818
|
+
Args:
|
819
|
+
text: Text to split
|
820
|
+
chunk_size: Size of each chunk in tokens
|
821
|
+
overlap: Number of overlapping tokens between chunks
|
822
|
+
|
823
|
+
Returns:
|
824
|
+
List of text chunks
|
825
|
+
"""
|
826
|
+
return self.token_counter.split_text_by_tokens(text, chunk_size, overlap)
|
827
|
+
|
517
828
|
# ==================== METADATA AND UTILITY METHODS ====================
|
518
829
|
|
519
830
|
def get_supported_tasks(self) -> List[str]:
|