isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,12 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from typing import Dict, Any, List, Union, Optional, AsyncGenerator, Callable
|
3
3
|
import logging
|
4
|
+
import json
|
4
5
|
|
5
6
|
from isa_model.inference.services.base_service import BaseService
|
6
7
|
from isa_model.inference.services.llm.helpers.llm_adapter import AdapterManager
|
8
|
+
from isa_model.inference.services.llm.helpers.llm_utils import TokenCounter, TextProcessor, ResponseParser, LLMMetrics
|
9
|
+
from isa_model.inference.services.llm.helpers.llm_prompts import LLMPrompts, LLMPromptTemplates
|
7
10
|
|
8
11
|
logger = logging.getLogger(__name__)
|
9
12
|
|
@@ -18,6 +21,12 @@ class BaseLLMService(BaseService):
|
|
18
21
|
# 初始化适配器管理器
|
19
22
|
self.adapter_manager = AdapterManager()
|
20
23
|
|
24
|
+
# Initialize helper utilities (optional, can be overridden by specific services)
|
25
|
+
self.token_counter = TokenCounter(model_name)
|
26
|
+
self.text_processor = TextProcessor()
|
27
|
+
self.response_parser = ResponseParser()
|
28
|
+
self.llm_prompts = LLMPrompts()
|
29
|
+
|
21
30
|
# Get config from provider
|
22
31
|
provider_config = self.get_provider_config()
|
23
32
|
self.streaming = provider_config.get("streaming", False)
|
@@ -28,6 +37,9 @@ class BaseLLMService(BaseService):
|
|
28
37
|
self,
|
29
38
|
input_data: Union[str, List[Dict[str, str]], Any],
|
30
39
|
task: Optional[str] = None,
|
40
|
+
show_reasoning: bool = False,
|
41
|
+
output_format: Optional[str] = None,
|
42
|
+
json_schema: Optional[Dict] = None,
|
31
43
|
**kwargs
|
32
44
|
) -> Dict[str, Any]:
|
33
45
|
"""
|
@@ -39,86 +51,219 @@ class BaseLLMService(BaseService):
|
|
39
51
|
- list: 消息历史 [{"role": "user", "content": "hello"}]
|
40
52
|
- Any: LangChain 消息对象或其他格式
|
41
53
|
task: 任务类型,支持多种LLM任务
|
54
|
+
output_format: Output format ("json", "markdown", "code", etc.)
|
55
|
+
json_schema: JSON schema for structured output validation
|
42
56
|
**kwargs: 任务特定的附加参数
|
43
57
|
|
44
58
|
Returns:
|
45
|
-
Dict containing task results
|
59
|
+
Dict containing task results (optionally formatted as JSON)
|
46
60
|
"""
|
47
61
|
task = task or "chat"
|
48
62
|
|
63
|
+
# Store formatting options for use by specific task methods
|
64
|
+
format_options = {
|
65
|
+
"output_format": output_format,
|
66
|
+
"json_schema": json_schema,
|
67
|
+
"repair_attempts": kwargs.get("repair_attempts", 3)
|
68
|
+
}
|
69
|
+
|
70
|
+
# Execute task and apply formatting
|
71
|
+
result = None
|
72
|
+
|
49
73
|
# ==================== 对话类任务 ====================
|
50
74
|
if task == "chat":
|
51
|
-
|
75
|
+
# Pass all kwargs to ainvoke for better parameter support (like response_format)
|
76
|
+
result_raw = await self.ainvoke(input_data, show_reasoning=show_reasoning, **kwargs)
|
77
|
+
# Wrap in chat response format, preserving AIMessage objects with tool_calls
|
78
|
+
if hasattr(result_raw, 'tool_calls'):
|
79
|
+
# This is an AIMessage with tool_calls - preserve the entire object
|
80
|
+
result = {"message": result_raw}
|
81
|
+
elif hasattr(result_raw, 'content'):
|
82
|
+
# Regular AIMessage without tool_calls - extract content
|
83
|
+
content = result_raw.content
|
84
|
+
result = {"message": content}
|
85
|
+
else:
|
86
|
+
# Plain string response
|
87
|
+
content = str(result_raw)
|
88
|
+
result = {"message": content}
|
52
89
|
elif task == "complete":
|
53
|
-
|
90
|
+
result = await self.complete_text(input_data, kwargs.get("max_tokens", self.max_tokens))
|
54
91
|
elif task == "instruct":
|
55
|
-
|
92
|
+
result = await self.instruct(input_data, kwargs.get("instruction"), kwargs.get("max_tokens", self.max_tokens))
|
56
93
|
|
57
94
|
# ==================== 文本生成类任务 ====================
|
58
95
|
elif task == "generate":
|
59
|
-
|
96
|
+
result = await self.generate_text(input_data, kwargs.get("max_tokens", self.max_tokens))
|
60
97
|
elif task == "rewrite":
|
61
|
-
|
98
|
+
result = await self.rewrite_text(input_data, kwargs.get("style"), kwargs.get("tone"))
|
62
99
|
elif task == "summarize":
|
63
|
-
|
100
|
+
result = await self.summarize_text(input_data, kwargs.get("max_length"), kwargs.get("style"))
|
64
101
|
elif task == "translate":
|
65
|
-
|
102
|
+
target_language = kwargs.get("target_language")
|
103
|
+
if not target_language:
|
104
|
+
raise ValueError("target_language is required for translate task")
|
105
|
+
result = await self.translate_text(input_data, target_language, kwargs.get("source_language"))
|
66
106
|
|
67
107
|
# ==================== 分析类任务 ====================
|
68
108
|
elif task == "analyze":
|
69
|
-
|
109
|
+
result = await self.analyze_text(input_data, kwargs.get("analysis_type"))
|
70
110
|
elif task == "classify":
|
71
|
-
|
111
|
+
result = await self.classify_text(input_data, kwargs.get("categories"))
|
72
112
|
elif task == "extract":
|
73
|
-
|
113
|
+
result = await self.extract_information(input_data, kwargs.get("extract_type"))
|
74
114
|
elif task == "sentiment":
|
75
|
-
|
115
|
+
# Always use chat with appropriate prompt for sentiment analysis
|
116
|
+
if output_format == "json":
|
117
|
+
# Create JSON-formatted prompt
|
118
|
+
json_prompt = self.create_json_prompt(
|
119
|
+
f"Please analyze the sentiment of the following text: {input_data}",
|
120
|
+
json_schema or {
|
121
|
+
"type": "object",
|
122
|
+
"properties": {
|
123
|
+
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
|
124
|
+
"confidence": {"type": "number", "minimum": 0, "maximum": 1},
|
125
|
+
"explanation": {"type": "string"}
|
126
|
+
},
|
127
|
+
"required": ["sentiment"]
|
128
|
+
}
|
129
|
+
)
|
130
|
+
result = await self.chat(json_prompt, show_reasoning=show_reasoning)
|
131
|
+
else:
|
132
|
+
# Use simple chat prompt for sentiment analysis
|
133
|
+
sentiment_prompt = f"Please analyze the sentiment of the following text and classify it as positive, negative, or neutral:\n\n{input_data}\n\nSentiment:"
|
134
|
+
result = await self.chat(sentiment_prompt, show_reasoning=show_reasoning)
|
76
135
|
|
77
136
|
# ==================== 编程类任务 ====================
|
78
137
|
elif task == "code":
|
79
|
-
|
138
|
+
# Always use chat with appropriate prompt for code generation
|
139
|
+
language = kwargs.get("language", "")
|
140
|
+
style = kwargs.get("style", "")
|
141
|
+
|
142
|
+
code_prompt = f"Please write code"
|
143
|
+
if language:
|
144
|
+
code_prompt += f" in {language}"
|
145
|
+
code_prompt += f" for the following requirement:\n\n{input_data}\n\n"
|
146
|
+
|
147
|
+
if style:
|
148
|
+
code_prompt += f"Style requirements: {style}\n\n"
|
149
|
+
|
150
|
+
code_prompt += "Please provide clean, working code with comments."
|
151
|
+
|
152
|
+
result = await self.chat(code_prompt, show_reasoning=show_reasoning)
|
80
153
|
elif task == "explain_code":
|
81
|
-
|
154
|
+
result = await self.explain_code(input_data, kwargs.get("language"))
|
82
155
|
elif task == "debug_code":
|
83
|
-
|
156
|
+
result = await self.debug_code(input_data, kwargs.get("language"))
|
84
157
|
elif task == "refactor_code":
|
85
|
-
|
158
|
+
result = await self.refactor_code(input_data, kwargs.get("language"), kwargs.get("improvements"))
|
86
159
|
|
87
160
|
# ==================== 推理类任务 ====================
|
88
161
|
elif task == "reason":
|
89
|
-
|
162
|
+
# Always use chat with appropriate prompt for reasoning
|
163
|
+
reasoning_type = kwargs.get("reasoning_type", "")
|
164
|
+
|
165
|
+
reason_prompt = f"Please analyze and explain the reasoning behind the following question or topic"
|
166
|
+
if reasoning_type:
|
167
|
+
reason_prompt += f" using {reasoning_type} reasoning"
|
168
|
+
reason_prompt += f":\n\n{input_data}\n\n"
|
169
|
+
reason_prompt += "Provide a clear, step-by-step explanation of your reasoning process."
|
170
|
+
|
171
|
+
result = await self.chat(reason_prompt, show_reasoning=show_reasoning)
|
90
172
|
elif task == "solve":
|
91
|
-
|
173
|
+
# Always use chat with appropriate prompt for problem solving
|
174
|
+
problem_type = kwargs.get("problem_type", "")
|
175
|
+
|
176
|
+
solve_prompt = f"Please solve the following problem"
|
177
|
+
if problem_type:
|
178
|
+
solve_prompt += f" (type: {problem_type})"
|
179
|
+
solve_prompt += f":\n\n{input_data}\n\n"
|
180
|
+
solve_prompt += "Provide a clear solution with step-by-step explanation."
|
181
|
+
|
182
|
+
result = await self.chat(solve_prompt, show_reasoning=show_reasoning)
|
92
183
|
elif task == "plan":
|
93
|
-
|
184
|
+
result = await self.create_plan(input_data, kwargs.get("plan_type"))
|
185
|
+
elif task == "deep_research":
|
186
|
+
result = await self.deep_research(input_data, kwargs.get("research_type"), kwargs.get("search_enabled", True))
|
94
187
|
|
95
188
|
# ==================== 工具调用类任务 ====================
|
96
189
|
elif task == "tool_call":
|
97
|
-
|
190
|
+
result = await self.call_tools(input_data, kwargs.get("available_tools"))
|
98
191
|
elif task == "function_call":
|
99
|
-
|
192
|
+
function_name = kwargs.get("function_name")
|
193
|
+
if not function_name:
|
194
|
+
raise ValueError("function_name is required for function_call task")
|
195
|
+
result = await self.call_function(input_data, function_name, kwargs.get("parameters"))
|
100
196
|
|
101
197
|
else:
|
102
198
|
raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
|
199
|
+
|
200
|
+
# Apply output formatting if requested
|
201
|
+
if result is not None and output_format:
|
202
|
+
# Extract the raw response for formatting
|
203
|
+
# If result is a dict with 'message' key, use the message for formatting
|
204
|
+
format_input = result
|
205
|
+
if isinstance(result, dict) and 'message' in result:
|
206
|
+
format_input = result['message']
|
207
|
+
|
208
|
+
formatted_result = self.format_structured_output(
|
209
|
+
response=format_input,
|
210
|
+
output_format=output_format,
|
211
|
+
schema=json_schema,
|
212
|
+
repair_attempts=format_options.get("repair_attempts", 3)
|
213
|
+
)
|
214
|
+
|
215
|
+
# If formatting succeeded, return formatted result
|
216
|
+
if formatted_result.get("success", False):
|
217
|
+
return {
|
218
|
+
"result": formatted_result["data"],
|
219
|
+
"formatted": True,
|
220
|
+
"format": output_format,
|
221
|
+
"original": result
|
222
|
+
}
|
223
|
+
else:
|
224
|
+
# If formatting failed, return original with error info
|
225
|
+
return {
|
226
|
+
"result": result,
|
227
|
+
"formatted": False,
|
228
|
+
"format_errors": formatted_result.get("errors", []),
|
229
|
+
"original": result
|
230
|
+
}
|
231
|
+
|
232
|
+
# Return unformatted result
|
233
|
+
return result if result is not None else {"message": "Task completed but returned no result"}
|
103
234
|
|
104
235
|
# ==================== 对话类方法 ====================
|
105
236
|
|
106
237
|
async def chat(
|
107
238
|
self,
|
108
239
|
input_data: Union[str, List[Dict[str, str]], Any],
|
109
|
-
max_tokens: Optional[int] = None
|
240
|
+
max_tokens: Optional[int] = None,
|
241
|
+
show_reasoning: bool = False
|
110
242
|
) -> Dict[str, Any]:
|
111
243
|
"""
|
112
|
-
对话聊天 -
|
244
|
+
对话聊天 - 委托给 ainvoke 方法
|
113
245
|
|
114
246
|
Args:
|
115
247
|
input_data: 输入消息
|
116
248
|
max_tokens: 最大生成token数
|
249
|
+
show_reasoning: 是否显示推理过程
|
117
250
|
|
118
251
|
Returns:
|
119
252
|
Dict containing chat response
|
120
253
|
"""
|
121
|
-
|
254
|
+
result = await self.ainvoke(input_data, show_reasoning=show_reasoning)
|
255
|
+
# Ensure we return a proper response structure
|
256
|
+
if result is None:
|
257
|
+
logger.warning("ainvoke returned None - this may indicate an implementation issue")
|
258
|
+
return {"message": ""}
|
259
|
+
|
260
|
+
# Extract content if it's an AIMessage object
|
261
|
+
if hasattr(result, 'content'):
|
262
|
+
content = result.content
|
263
|
+
else:
|
264
|
+
content = str(result)
|
265
|
+
|
266
|
+
return {"message": content}
|
122
267
|
|
123
268
|
# ==================== 文本生成类方法 ====================
|
124
269
|
|
@@ -303,6 +448,17 @@ class BaseLLMService(BaseService):
|
|
303
448
|
"""
|
304
449
|
raise NotImplementedError(f"{self.__class__.__name__} does not support create_plan task")
|
305
450
|
|
451
|
+
async def deep_research(
|
452
|
+
self,
|
453
|
+
input_data: Union[str, Any],
|
454
|
+
research_type: Optional[str] = None,
|
455
|
+
search_enabled: bool = True
|
456
|
+
) -> Dict[str, Any]:
|
457
|
+
"""
|
458
|
+
深度研究 - O-series模型专用任务,支持网络搜索和深入分析
|
459
|
+
"""
|
460
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support deep_research task")
|
461
|
+
|
306
462
|
# ==================== 工具调用类方法 ====================
|
307
463
|
|
308
464
|
async def call_tools(
|
@@ -354,7 +510,7 @@ class BaseLLMService(BaseService):
|
|
354
510
|
"""使用适配器管理器转换消息格式"""
|
355
511
|
return self.adapter_manager.convert_messages(input_data)
|
356
512
|
|
357
|
-
def _format_response(self, response: str, original_input: Any) -> Union[str, Any]:
|
513
|
+
def _format_response(self, response: Union[str, Any], original_input: Any) -> Union[str, Any]:
|
358
514
|
"""使用适配器管理器格式化响应"""
|
359
515
|
return self.adapter_manager.format_response(response, original_input)
|
360
516
|
|
@@ -379,7 +535,7 @@ class BaseLLMService(BaseService):
|
|
379
535
|
pass
|
380
536
|
|
381
537
|
@abstractmethod
|
382
|
-
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
|
538
|
+
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> Union[str, Any]:
|
383
539
|
"""
|
384
540
|
Universal async invocation method that handles different input types
|
385
541
|
|
@@ -388,6 +544,7 @@ class BaseLLMService(BaseService):
|
|
388
544
|
- str: Simple text prompt
|
389
545
|
- list: Message history like [{"role": "user", "content": "hello"}]
|
390
546
|
- Any: LangChain message objects or other formats
|
547
|
+
show_reasoning: If True and model supports it, show reasoning process
|
391
548
|
|
392
549
|
Returns:
|
393
550
|
Model response (string for simple cases, object for complex cases)
|
@@ -491,6 +648,183 @@ class BaseLLMService(BaseService):
|
|
491
648
|
)
|
492
649
|
return 0.0
|
493
650
|
|
651
|
+
# ==================== JSON OUTPUT AND FORMATTING METHODS ====================
|
652
|
+
|
653
|
+
def format_structured_output(
|
654
|
+
self,
|
655
|
+
response: Union[str, Any],
|
656
|
+
output_format: str = "json",
|
657
|
+
schema: Optional[Dict] = None,
|
658
|
+
repair_attempts: int = 3
|
659
|
+
) -> Dict[str, Any]:
|
660
|
+
"""
|
661
|
+
Format response as structured output (JSON, etc.)
|
662
|
+
|
663
|
+
Args:
|
664
|
+
response: Raw response from model
|
665
|
+
output_format: Desired output format ("json", "code", "structured")
|
666
|
+
schema: Optional JSON schema for validation
|
667
|
+
repair_attempts: Number of JSON repair attempts
|
668
|
+
|
669
|
+
Returns:
|
670
|
+
Dict with formatted output and metadata
|
671
|
+
"""
|
672
|
+
if output_format == "json":
|
673
|
+
if isinstance(response, str):
|
674
|
+
return self.text_processor.extract_json_from_text(response, schema, repair_attempts)
|
675
|
+
else:
|
676
|
+
# Handle response objects with content attribute
|
677
|
+
content = getattr(response, 'content', str(response))
|
678
|
+
return self.text_processor.extract_json_from_text(content, schema, repair_attempts)
|
679
|
+
|
680
|
+
elif output_format == "code":
|
681
|
+
content = response if isinstance(response, str) else getattr(response, 'content', str(response))
|
682
|
+
code_blocks = self.text_processor.extract_code_blocks(content)
|
683
|
+
return {
|
684
|
+
"success": True,
|
685
|
+
"data": code_blocks,
|
686
|
+
"method": "code_block_extraction",
|
687
|
+
"errors": []
|
688
|
+
}
|
689
|
+
|
690
|
+
elif output_format == "structured":
|
691
|
+
# Use ResponseParser for general structured parsing
|
692
|
+
content = response if isinstance(response, str) else getattr(response, 'content', str(response))
|
693
|
+
parsed = self.response_parser.parse_structured_response(content, "json")
|
694
|
+
if parsed:
|
695
|
+
return {
|
696
|
+
"success": True,
|
697
|
+
"data": parsed,
|
698
|
+
"method": "structured_parsing",
|
699
|
+
"errors": []
|
700
|
+
}
|
701
|
+
else:
|
702
|
+
return {
|
703
|
+
"success": False,
|
704
|
+
"data": content,
|
705
|
+
"method": "raw_fallback",
|
706
|
+
"errors": ["Failed to parse as structured output"]
|
707
|
+
}
|
708
|
+
|
709
|
+
# Fallback: return raw response
|
710
|
+
return {
|
711
|
+
"success": True,
|
712
|
+
"data": response,
|
713
|
+
"method": "raw_output",
|
714
|
+
"errors": []
|
715
|
+
}
|
716
|
+
|
717
|
+
def create_json_prompt(
|
718
|
+
self,
|
719
|
+
base_prompt: str,
|
720
|
+
json_schema: Optional[Dict] = None,
|
721
|
+
output_instructions: Optional[str] = None
|
722
|
+
) -> str:
|
723
|
+
"""
|
724
|
+
Create a prompt that requests JSON output
|
725
|
+
|
726
|
+
Args:
|
727
|
+
base_prompt: The base prompt content
|
728
|
+
json_schema: Optional JSON schema to include in prompt
|
729
|
+
output_instructions: Custom output format instructions
|
730
|
+
|
731
|
+
Returns:
|
732
|
+
Enhanced prompt requesting JSON output
|
733
|
+
"""
|
734
|
+
if output_instructions:
|
735
|
+
json_instruction = output_instructions
|
736
|
+
else:
|
737
|
+
json_instruction = LLMPromptTemplates.OUTPUT_FORMATS["json"]
|
738
|
+
|
739
|
+
if json_schema:
|
740
|
+
schema_text = f"\n\nPlease format your response according to this JSON schema:\n```json\n{json.dumps(json_schema, indent=2)}\n```"
|
741
|
+
return f"{base_prompt}{schema_text}\n\n{json_instruction}"
|
742
|
+
else:
|
743
|
+
return f"{base_prompt}\n\n{json_instruction}"
|
744
|
+
|
745
|
+
def create_structured_prompt(
|
746
|
+
self,
|
747
|
+
task_type: str,
|
748
|
+
content: str,
|
749
|
+
output_format: str = "json",
|
750
|
+
**kwargs
|
751
|
+
) -> str:
|
752
|
+
"""
|
753
|
+
Create a structured prompt using LLMPrompts templates
|
754
|
+
|
755
|
+
Args:
|
756
|
+
task_type: Type of task (from LLMPrompts methods)
|
757
|
+
content: Main content/input
|
758
|
+
output_format: Desired output format
|
759
|
+
**kwargs: Additional arguments for the prompt template
|
760
|
+
|
761
|
+
Returns:
|
762
|
+
Formatted prompt string
|
763
|
+
"""
|
764
|
+
try:
|
765
|
+
# Get the appropriate prompt template
|
766
|
+
if hasattr(self.llm_prompts, f"{task_type}_prompt"):
|
767
|
+
method = getattr(self.llm_prompts, f"{task_type}_prompt")
|
768
|
+
base_prompt = method(content, **kwargs)
|
769
|
+
else:
|
770
|
+
# Fallback to generic prompt
|
771
|
+
base_prompt = f"Please {task_type} the following:\n\n{content}"
|
772
|
+
|
773
|
+
# Add output format instructions
|
774
|
+
if output_format in LLMPromptTemplates.OUTPUT_FORMATS:
|
775
|
+
format_instruction = LLMPromptTemplates.OUTPUT_FORMATS[output_format]
|
776
|
+
return f"{base_prompt}\n\n{format_instruction}"
|
777
|
+
|
778
|
+
return base_prompt
|
779
|
+
|
780
|
+
except Exception as e:
|
781
|
+
logger.warning(f"Failed to create structured prompt: {e}")
|
782
|
+
return f"Please {task_type} the following:\n\n{content}"
|
783
|
+
|
784
|
+
def count_tokens(self, text: Union[str, List[Dict[str, str]]]) -> int:
|
785
|
+
"""
|
786
|
+
Count tokens in text or message list
|
787
|
+
|
788
|
+
Args:
|
789
|
+
text: String or message list to count tokens for
|
790
|
+
|
791
|
+
Returns:
|
792
|
+
Number of tokens
|
793
|
+
"""
|
794
|
+
if isinstance(text, str):
|
795
|
+
return self.token_counter.count_tokens(text)
|
796
|
+
elif isinstance(text, list):
|
797
|
+
return self.token_counter.count_messages_tokens(text)
|
798
|
+
else:
|
799
|
+
return self.token_counter.count_tokens(str(text))
|
800
|
+
|
801
|
+
def truncate_to_token_limit(self, text: str, max_tokens: int) -> str:
|
802
|
+
"""
|
803
|
+
Truncate text to fit within token limit
|
804
|
+
|
805
|
+
Args:
|
806
|
+
text: Text to truncate
|
807
|
+
max_tokens: Maximum number of tokens
|
808
|
+
|
809
|
+
Returns:
|
810
|
+
Truncated text
|
811
|
+
"""
|
812
|
+
return self.token_counter.truncate_text(text, max_tokens)
|
813
|
+
|
814
|
+
def split_text_by_tokens(self, text: str, chunk_size: int, overlap: int = 0) -> List[str]:
|
815
|
+
"""
|
816
|
+
Split text into chunks by token count
|
817
|
+
|
818
|
+
Args:
|
819
|
+
text: Text to split
|
820
|
+
chunk_size: Size of each chunk in tokens
|
821
|
+
overlap: Number of overlapping tokens between chunks
|
822
|
+
|
823
|
+
Returns:
|
824
|
+
List of text chunks
|
825
|
+
"""
|
826
|
+
return self.token_counter.split_text_by_tokens(text, chunk_size, overlap)
|
827
|
+
|
494
828
|
# ==================== METADATA AND UTILITY METHODS ====================
|
495
829
|
|
496
830
|
def get_supported_tasks(self) -> List[str]:
|
@@ -527,6 +861,7 @@ class BaseLLMService(BaseService):
|
|
527
861
|
'reason_about': 'reason',
|
528
862
|
'solve_problem': 'solve',
|
529
863
|
'create_plan': 'plan',
|
864
|
+
'deep_research': 'deep_research',
|
530
865
|
# 工具调用类
|
531
866
|
'call_tools': 'tool_call',
|
532
867
|
'call_function': 'function_call'
|