agentscope-runtime 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentscope_runtime/common/container_clients/__init__.py +0 -0
- agentscope_runtime/{sandbox/manager → common}/container_clients/kubernetes_client.py +546 -6
- agentscope_runtime/engine/__init__.py +12 -0
- agentscope_runtime/engine/agents/agentscope_agent.py +130 -10
- agentscope_runtime/engine/agents/agno_agent.py +8 -10
- agentscope_runtime/engine/agents/langgraph_agent.py +52 -9
- agentscope_runtime/engine/app/__init__.py +6 -0
- agentscope_runtime/engine/app/agent_app.py +239 -0
- agentscope_runtime/engine/app/base_app.py +181 -0
- agentscope_runtime/engine/app/celery_mixin.py +92 -0
- agentscope_runtime/engine/deployers/__init__.py +13 -0
- agentscope_runtime/engine/deployers/adapter/responses/__init__.py +0 -0
- agentscope_runtime/engine/deployers/adapter/responses/response_api_adapter_utils.py +2890 -0
- agentscope_runtime/engine/deployers/adapter/responses/response_api_agent_adapter.py +51 -0
- agentscope_runtime/engine/deployers/adapter/responses/response_api_protocol_adapter.py +314 -0
- agentscope_runtime/engine/deployers/base.py +1 -0
- agentscope_runtime/engine/deployers/cli_fc_deploy.py +203 -0
- agentscope_runtime/engine/deployers/kubernetes_deployer.py +272 -0
- agentscope_runtime/engine/deployers/local_deployer.py +414 -501
- agentscope_runtime/engine/deployers/modelstudio_deployer.py +838 -0
- agentscope_runtime/engine/deployers/utils/__init__.py +0 -0
- agentscope_runtime/engine/deployers/utils/deployment_modes.py +14 -0
- agentscope_runtime/engine/deployers/utils/docker_image_utils/__init__.py +8 -0
- agentscope_runtime/engine/deployers/utils/docker_image_utils/docker_image_builder.py +429 -0
- agentscope_runtime/engine/deployers/utils/docker_image_utils/dockerfile_generator.py +240 -0
- agentscope_runtime/engine/deployers/utils/docker_image_utils/runner_image_factory.py +306 -0
- agentscope_runtime/engine/deployers/utils/package_project_utils.py +1163 -0
- agentscope_runtime/engine/deployers/utils/service_utils/__init__.py +9 -0
- agentscope_runtime/engine/deployers/utils/service_utils/fastapi_factory.py +1064 -0
- agentscope_runtime/engine/deployers/utils/service_utils/fastapi_templates.py +157 -0
- agentscope_runtime/engine/deployers/utils/service_utils/process_manager.py +268 -0
- agentscope_runtime/engine/deployers/utils/service_utils/service_config.py +75 -0
- agentscope_runtime/engine/deployers/utils/service_utils/service_factory.py +220 -0
- agentscope_runtime/engine/deployers/utils/service_utils/standalone_main.py.j2 +211 -0
- agentscope_runtime/engine/deployers/utils/wheel_packager.py +389 -0
- agentscope_runtime/engine/helpers/agent_api_builder.py +651 -0
- agentscope_runtime/engine/runner.py +76 -35
- agentscope_runtime/engine/schemas/agent_schemas.py +112 -2
- agentscope_runtime/engine/schemas/embedding.py +37 -0
- agentscope_runtime/engine/schemas/modelstudio_llm.py +310 -0
- agentscope_runtime/engine/schemas/oai_llm.py +538 -0
- agentscope_runtime/engine/schemas/realtime.py +254 -0
- agentscope_runtime/engine/services/tablestore_memory_service.py +4 -1
- agentscope_runtime/engine/tracing/__init__.py +9 -3
- agentscope_runtime/engine/tracing/asyncio_util.py +24 -0
- agentscope_runtime/engine/tracing/base.py +66 -34
- agentscope_runtime/engine/tracing/local_logging_handler.py +45 -31
- agentscope_runtime/engine/tracing/message_util.py +528 -0
- agentscope_runtime/engine/tracing/tracing_metric.py +20 -8
- agentscope_runtime/engine/tracing/tracing_util.py +130 -0
- agentscope_runtime/engine/tracing/wrapper.py +794 -169
- agentscope_runtime/sandbox/box/base/base_sandbox.py +2 -1
- agentscope_runtime/sandbox/box/browser/browser_sandbox.py +2 -1
- agentscope_runtime/sandbox/box/dummy/dummy_sandbox.py +2 -1
- agentscope_runtime/sandbox/box/filesystem/filesystem_sandbox.py +2 -1
- agentscope_runtime/sandbox/box/gui/gui_sandbox.py +2 -1
- agentscope_runtime/sandbox/box/training_box/training_box.py +0 -42
- agentscope_runtime/sandbox/client/http_client.py +52 -18
- agentscope_runtime/sandbox/constant.py +3 -0
- agentscope_runtime/sandbox/custom/custom_sandbox.py +2 -1
- agentscope_runtime/sandbox/custom/example.py +2 -1
- agentscope_runtime/sandbox/enums.py +0 -1
- agentscope_runtime/sandbox/manager/sandbox_manager.py +29 -22
- agentscope_runtime/sandbox/model/container.py +6 -0
- agentscope_runtime/sandbox/registry.py +1 -1
- agentscope_runtime/sandbox/tools/tool.py +4 -0
- agentscope_runtime/version.py +1 -1
- {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/METADATA +103 -59
- {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/RECORD +87 -52
- {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/entry_points.txt +1 -0
- /agentscope_runtime/{sandbox/manager/container_clients → common}/__init__.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/__init__.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/base_mapping.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/base_queue.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/base_set.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/in_memory_mapping.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/in_memory_queue.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/in_memory_set.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/redis_mapping.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/redis_queue.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/collections/redis_set.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/container_clients/agentrun_client.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/container_clients/base_client.py +0 -0
- /agentscope_runtime/{sandbox/manager → common}/container_clients/docker_client.py +0 -0
- {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/WHEEL +0 -0
- {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,27 +1,33 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
import uuid
|
|
3
|
-
from typing import Optional, List, AsyncGenerator, Any
|
|
4
3
|
from contextlib import AsyncExitStack
|
|
4
|
+
from typing import Optional, List, AsyncGenerator, Any, Union, Dict
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
from agentscope_runtime.engine.deployers.utils.service_utils import (
|
|
7
|
+
ServicesConfig,
|
|
8
|
+
)
|
|
9
9
|
from .agents import Agent
|
|
10
|
-
from .schemas.context import Context
|
|
11
10
|
from .deployers import (
|
|
12
11
|
DeployManager,
|
|
13
12
|
LocalDeployManager,
|
|
14
13
|
)
|
|
14
|
+
from .deployers.adapter.protocol_adapter import ProtocolAdapter
|
|
15
15
|
from .schemas.agent_schemas import (
|
|
16
16
|
Event,
|
|
17
17
|
AgentRequest,
|
|
18
18
|
RunStatus,
|
|
19
19
|
AgentResponse,
|
|
20
|
+
SequenceNumberGenerator,
|
|
20
21
|
)
|
|
22
|
+
from .schemas.context import Context
|
|
21
23
|
from .services.context_manager import ContextManager
|
|
22
24
|
from .services.environment_manager import EnvironmentManager
|
|
23
25
|
from .tracing import TraceType
|
|
24
26
|
from .tracing.wrapper import trace
|
|
27
|
+
from .tracing.message_util import (
|
|
28
|
+
merge_agent_response,
|
|
29
|
+
get_agent_response_finish_reason,
|
|
30
|
+
)
|
|
25
31
|
|
|
26
32
|
|
|
27
33
|
class Runner:
|
|
@@ -40,7 +46,9 @@ class Runner:
|
|
|
40
46
|
"""
|
|
41
47
|
self._agent = agent
|
|
42
48
|
self._environment_manager = environment_manager
|
|
43
|
-
self._context_manager =
|
|
49
|
+
self._context_manager = (
|
|
50
|
+
context_manager or ContextManager()
|
|
51
|
+
) # Add default context manager
|
|
44
52
|
self._deploy_managers = {}
|
|
45
53
|
self._exit_stack = AsyncExitStack()
|
|
46
54
|
|
|
@@ -77,37 +85,62 @@ class Runner:
|
|
|
77
85
|
endpoint_path: str = "/process",
|
|
78
86
|
stream: bool = True,
|
|
79
87
|
protocol_adapters: Optional[list[ProtocolAdapter]] = None,
|
|
88
|
+
requirements: Optional[Union[str, List[str]]] = None,
|
|
89
|
+
extra_packages: Optional[List[str]] = None,
|
|
90
|
+
base_image: str = "python:3.9-slim",
|
|
91
|
+
environment: Optional[Dict[str, str]] = None,
|
|
92
|
+
runtime_config: Optional[Dict] = None,
|
|
93
|
+
services_config: Optional[Union[ServicesConfig, dict]] = None,
|
|
94
|
+
**kwargs,
|
|
80
95
|
):
|
|
81
96
|
"""
|
|
82
97
|
Deploys the agent as a service.
|
|
83
98
|
|
|
84
99
|
Args:
|
|
85
|
-
protocol_adapters: protocol adapters
|
|
86
100
|
deploy_manager: Deployment manager to handle service deployment
|
|
87
101
|
endpoint_path: API endpoint path for the processing function
|
|
88
102
|
stream: If start a streaming service
|
|
103
|
+
protocol_adapters: protocol adapters
|
|
104
|
+
requirements: PyPI dependencies
|
|
105
|
+
extra_packages: User code directory/file path
|
|
106
|
+
base_image: Docker base image (for containerized deployment)
|
|
107
|
+
environment: Environment variables dict
|
|
108
|
+
runtime_config: Runtime configuration dict
|
|
109
|
+
services_config: Services configuration dict
|
|
110
|
+
**kwargs: Additional arguments passed to deployment manager
|
|
89
111
|
Returns:
|
|
90
112
|
URL of the deployed service
|
|
91
113
|
|
|
92
114
|
Raises:
|
|
93
115
|
RuntimeError: If deployment fails
|
|
94
116
|
"""
|
|
95
|
-
if stream:
|
|
96
|
-
deploy_func = self.stream_query
|
|
97
|
-
else:
|
|
98
|
-
deploy_func = self.query
|
|
99
117
|
deploy_result = await deploy_manager.deploy(
|
|
100
|
-
|
|
118
|
+
runner=self,
|
|
101
119
|
endpoint_path=endpoint_path,
|
|
120
|
+
stream=stream,
|
|
102
121
|
protocol_adapters=protocol_adapters,
|
|
122
|
+
requirements=requirements,
|
|
123
|
+
extra_packages=extra_packages,
|
|
124
|
+
base_image=base_image,
|
|
125
|
+
environment=environment,
|
|
126
|
+
runtime_config=runtime_config,
|
|
127
|
+
services_config=services_config,
|
|
128
|
+
**kwargs,
|
|
103
129
|
)
|
|
130
|
+
|
|
131
|
+
# TODO: add redis or other persistant method
|
|
104
132
|
self._deploy_managers[deploy_manager.deploy_id] = deploy_result
|
|
105
133
|
return deploy_result
|
|
106
134
|
|
|
107
|
-
@trace(
|
|
135
|
+
@trace(
|
|
136
|
+
TraceType.AGENT_STEP,
|
|
137
|
+
trace_name="agent_step",
|
|
138
|
+
merge_output_func=merge_agent_response,
|
|
139
|
+
get_finish_reason_func=get_agent_response_finish_reason,
|
|
140
|
+
)
|
|
108
141
|
async def stream_query( # pylint:disable=unused-argument
|
|
109
142
|
self,
|
|
110
|
-
request: AgentRequest,
|
|
143
|
+
request: Union[AgentRequest, dict],
|
|
111
144
|
user_id: Optional[str] = None,
|
|
112
145
|
tools: Optional[List] = None,
|
|
113
146
|
**kwargs: Any,
|
|
@@ -115,13 +148,25 @@ class Runner:
|
|
|
115
148
|
"""
|
|
116
149
|
Streams the agent.
|
|
117
150
|
"""
|
|
151
|
+
if isinstance(request, dict):
|
|
152
|
+
request = AgentRequest(**request)
|
|
153
|
+
|
|
154
|
+
seq_gen = SequenceNumberGenerator()
|
|
155
|
+
|
|
156
|
+
# Initial response
|
|
118
157
|
response = AgentResponse()
|
|
119
|
-
yield response
|
|
158
|
+
yield seq_gen.yield_with_sequence(response)
|
|
120
159
|
|
|
160
|
+
# Set to in-progress status
|
|
121
161
|
response.in_progress()
|
|
122
|
-
yield response
|
|
162
|
+
yield seq_gen.yield_with_sequence(response)
|
|
163
|
+
|
|
164
|
+
if user_id is None:
|
|
165
|
+
if getattr(request, "user_id", None):
|
|
166
|
+
user_id = request.user_id
|
|
167
|
+
else:
|
|
168
|
+
user_id = "" # Default user id
|
|
123
169
|
|
|
124
|
-
user_id = user_id or str(uuid.uuid4())
|
|
125
170
|
session_id = request.session_id or str(uuid.uuid4())
|
|
126
171
|
request_input = request.input
|
|
127
172
|
session = await self._context_manager.compose_session(
|
|
@@ -167,36 +212,32 @@ class Runner:
|
|
|
167
212
|
request_input=request_input,
|
|
168
213
|
)
|
|
169
214
|
|
|
170
|
-
sequence_number = 0
|
|
171
215
|
async for event in context.agent.run_async(context):
|
|
172
216
|
if (
|
|
173
217
|
event.status == RunStatus.Completed
|
|
174
218
|
and event.object == "message"
|
|
175
219
|
):
|
|
176
220
|
response.add_new_message(event)
|
|
177
|
-
event
|
|
178
|
-
yield event
|
|
179
|
-
sequence_number += 1
|
|
221
|
+
yield seq_gen.yield_with_sequence(event)
|
|
180
222
|
|
|
181
223
|
await context.context_manager.append(
|
|
182
224
|
session=context.session,
|
|
183
225
|
event_output=response.output,
|
|
184
226
|
)
|
|
185
|
-
response.
|
|
186
|
-
yield response.completed()
|
|
227
|
+
yield seq_gen.yield_with_sequence(response.completed())
|
|
187
228
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
229
|
+
# TODO: will be added before 2025/11/30
|
|
230
|
+
# @trace(TraceType.AGENT_STEP)
|
|
231
|
+
# async def query( # pylint:disable=unused-argument
|
|
232
|
+
# self,
|
|
233
|
+
# message: List[dict],
|
|
234
|
+
# session_id: Optional[str] = None,
|
|
235
|
+
# **kwargs: Any,
|
|
236
|
+
# ) -> ChatCompletion:
|
|
237
|
+
# """
|
|
238
|
+
# Streams the agent.
|
|
239
|
+
# """
|
|
240
|
+
# return self._agent.query(message, session_id)
|
|
200
241
|
|
|
201
242
|
# TODO: should be sync method?
|
|
202
243
|
async def stop(
|
|
@@ -27,6 +27,7 @@ class MessageType:
|
|
|
27
27
|
MCP_APPROVAL_REQUEST = "mcp_approval_request"
|
|
28
28
|
MCP_TOOL_CALL = "mcp_call"
|
|
29
29
|
MCP_APPROVAL_RESPONSE = "mcp_approval_response"
|
|
30
|
+
REASONING = "reasoning"
|
|
30
31
|
HEARTBEAT = "heartbeat"
|
|
31
32
|
ERROR = "error"
|
|
32
33
|
|
|
@@ -45,6 +46,8 @@ class ContentType:
|
|
|
45
46
|
DATA = "data"
|
|
46
47
|
IMAGE = "image"
|
|
47
48
|
AUDIO = "audio"
|
|
49
|
+
FILE = "file"
|
|
50
|
+
REFUSAL = "refusal"
|
|
48
51
|
|
|
49
52
|
|
|
50
53
|
class Role:
|
|
@@ -66,6 +69,8 @@ class RunStatus:
|
|
|
66
69
|
Failed = "failed"
|
|
67
70
|
Rejected = "rejected"
|
|
68
71
|
Unknown = "unknown"
|
|
72
|
+
Queued = "queued"
|
|
73
|
+
Incomplete = "incomplete"
|
|
69
74
|
|
|
70
75
|
|
|
71
76
|
class FunctionParameters(BaseModel):
|
|
@@ -282,6 +287,63 @@ class DataContent(Content):
|
|
|
282
287
|
"""The data content."""
|
|
283
288
|
|
|
284
289
|
|
|
290
|
+
class AudioContent(Content):
|
|
291
|
+
type: Literal[ContentType.AUDIO] = ContentType.AUDIO
|
|
292
|
+
"""The type of the content part."""
|
|
293
|
+
|
|
294
|
+
data: Optional[str] = None
|
|
295
|
+
"""The audio data details."""
|
|
296
|
+
|
|
297
|
+
format: Optional[str] = None
|
|
298
|
+
"""
|
|
299
|
+
The format of the audio data.
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class FileContent(Content):
|
|
304
|
+
type: Literal[ContentType.FILE] = ContentType.FILE
|
|
305
|
+
"""The type of the content part."""
|
|
306
|
+
|
|
307
|
+
file_url: Optional[str] = None
|
|
308
|
+
"""The file URL details."""
|
|
309
|
+
|
|
310
|
+
file_id: Optional[str] = None
|
|
311
|
+
"""The file ID details."""
|
|
312
|
+
|
|
313
|
+
filename: Optional[str] = None
|
|
314
|
+
"""The file name details."""
|
|
315
|
+
|
|
316
|
+
file_data: Optional[str] = None
|
|
317
|
+
"""The file data details."""
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class RefusalContent(Content):
|
|
321
|
+
type: Literal[ContentType.REFUSAL] = ContentType.REFUSAL
|
|
322
|
+
"""The type of the content part."""
|
|
323
|
+
|
|
324
|
+
refusal: Optional[str] = None
|
|
325
|
+
"""The refusal content."""
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class ToolCall(BaseModel):
|
|
329
|
+
arguments: str
|
|
330
|
+
"""A JSON string of the arguments to pass to the function."""
|
|
331
|
+
|
|
332
|
+
call_id: str
|
|
333
|
+
"""The unique ID of the function tool call generated by the model."""
|
|
334
|
+
|
|
335
|
+
name: str
|
|
336
|
+
"""The name of the function to run."""
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
class ToolCallOutput(BaseModel):
|
|
340
|
+
call_id: str
|
|
341
|
+
"""The unique ID of the function tool call generated by the model."""
|
|
342
|
+
|
|
343
|
+
output: str
|
|
344
|
+
"""A JSON string of the output of the function tool call."""
|
|
345
|
+
|
|
346
|
+
|
|
285
347
|
AgentRole: TypeAlias = Literal[
|
|
286
348
|
Role.ASSISTANT,
|
|
287
349
|
Role.SYSTEM,
|
|
@@ -289,9 +351,15 @@ AgentRole: TypeAlias = Literal[
|
|
|
289
351
|
Role.TOOL,
|
|
290
352
|
]
|
|
291
353
|
|
|
292
|
-
|
|
293
354
|
AgentContent = Annotated[
|
|
294
|
-
Union[
|
|
355
|
+
Union[
|
|
356
|
+
TextContent,
|
|
357
|
+
ImageContent,
|
|
358
|
+
DataContent,
|
|
359
|
+
AudioContent,
|
|
360
|
+
FileContent,
|
|
361
|
+
RefusalContent,
|
|
362
|
+
],
|
|
295
363
|
Field(discriminator="type"),
|
|
296
364
|
]
|
|
297
365
|
|
|
@@ -684,6 +752,48 @@ class AgentResponse(BaseResponse):
|
|
|
684
752
|
"""conversation id for dialog"""
|
|
685
753
|
|
|
686
754
|
|
|
755
|
+
class SequenceNumberGenerator:
|
|
756
|
+
"""
|
|
757
|
+
A simple sequence number generator for streaming events.
|
|
758
|
+
|
|
759
|
+
This class encapsulates the logic for generating sequential numbers,
|
|
760
|
+
making the code more maintainable and less error-prone.
|
|
761
|
+
"""
|
|
762
|
+
|
|
763
|
+
def __init__(self, start: int = 0):
|
|
764
|
+
"""
|
|
765
|
+
Initialize the generator with a starting number.
|
|
766
|
+
|
|
767
|
+
Args:
|
|
768
|
+
start: The starting sequence number (default: 0)
|
|
769
|
+
"""
|
|
770
|
+
self._current = start
|
|
771
|
+
|
|
772
|
+
def next(self) -> int:
|
|
773
|
+
"""
|
|
774
|
+
Get the next sequence number and increment the counter.
|
|
775
|
+
|
|
776
|
+
Returns:
|
|
777
|
+
The current sequence number before incrementing
|
|
778
|
+
"""
|
|
779
|
+
current = self._current
|
|
780
|
+
self._current += 1
|
|
781
|
+
return current
|
|
782
|
+
|
|
783
|
+
def yield_with_sequence(self, event: Event) -> Event:
|
|
784
|
+
"""
|
|
785
|
+
Set the sequence number on an event and increment the counter.
|
|
786
|
+
|
|
787
|
+
Args:
|
|
788
|
+
event: The event to set the sequence number on
|
|
789
|
+
|
|
790
|
+
Returns:
|
|
791
|
+
The same event with sequence number set
|
|
792
|
+
"""
|
|
793
|
+
event.sequence_number = self.next()
|
|
794
|
+
return event
|
|
795
|
+
|
|
796
|
+
|
|
687
797
|
def convert_to_openai_tool_call(function: FunctionCall):
|
|
688
798
|
return {
|
|
689
799
|
"id": function.get("call_id", None),
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
from typing import List, Optional, Literal
|
|
3
|
+
|
|
4
|
+
from openai.types import Embedding
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Usage(BaseModel):
|
|
9
|
+
prompt_tokens: Optional[int] = None
|
|
10
|
+
"""The number of tokens used by the prompt."""
|
|
11
|
+
|
|
12
|
+
total_tokens: Optional[int] = None
|
|
13
|
+
"""The total number of tokens used by the request."""
|
|
14
|
+
|
|
15
|
+
input_tokens: Optional[int] = None
|
|
16
|
+
|
|
17
|
+
text_count: Optional[int] = None
|
|
18
|
+
|
|
19
|
+
image_count: Optional[int] = None
|
|
20
|
+
|
|
21
|
+
video_count: Optional[int] = None
|
|
22
|
+
|
|
23
|
+
duration: Optional[float] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EmbeddingResponse(BaseModel):
|
|
27
|
+
data: List[Embedding]
|
|
28
|
+
"""The list of embeddings generated by the model."""
|
|
29
|
+
|
|
30
|
+
model: str
|
|
31
|
+
"""The name of the model used to generate the embedding."""
|
|
32
|
+
|
|
33
|
+
object: Literal["list"]
|
|
34
|
+
"""The object type, which is always "list"."""
|
|
35
|
+
|
|
36
|
+
usage: Usage
|
|
37
|
+
"""The usage information for the request."""
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import os
|
|
3
|
+
from typing import List, Literal, Optional, Union
|
|
4
|
+
|
|
5
|
+
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
|
6
|
+
from pydantic import (
|
|
7
|
+
BaseModel,
|
|
8
|
+
StrictInt,
|
|
9
|
+
field_validator,
|
|
10
|
+
Field,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from .oai_llm import (
|
|
14
|
+
Parameters,
|
|
15
|
+
OpenAIMessage,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class KnowledgeHolder(BaseModel):
|
|
20
|
+
source: str
|
|
21
|
+
"""The source identifier or URL where the knowledge was retrieved from."""
|
|
22
|
+
|
|
23
|
+
content: str
|
|
24
|
+
"""The actual content or knowledge text retrieved from the source."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class IntentionOptions(BaseModel):
|
|
28
|
+
white_list: List[str] = Field(default_factory=list)
|
|
29
|
+
"""A list of allowed intentions that can be processed."""
|
|
30
|
+
|
|
31
|
+
black_list: List[str] = Field(default_factory=list)
|
|
32
|
+
"""A list of blocked intentions that should not be processed."""
|
|
33
|
+
|
|
34
|
+
search_model: str = "search_v6"
|
|
35
|
+
"""The search model version to use for intentions recognition."""
|
|
36
|
+
|
|
37
|
+
intensity: Optional[int] = None
|
|
38
|
+
"""The intensity level for intentions matching and processing."""
|
|
39
|
+
|
|
40
|
+
scene_id: Optional[str] = None
|
|
41
|
+
"""The scene identifier for context-aware intentions processing."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SearchOptions(BaseModel):
|
|
45
|
+
"""
|
|
46
|
+
Search Options on Modelstudio platform for knowledge retrieval and web
|
|
47
|
+
search.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
enable_source: bool = False
|
|
51
|
+
"""Whether to include source information in search results."""
|
|
52
|
+
|
|
53
|
+
enable_citation: bool = False
|
|
54
|
+
"""Whether to include citation information for retrieved content."""
|
|
55
|
+
|
|
56
|
+
enable_readpage: bool = False
|
|
57
|
+
"""Whether to enable full page reading for web content."""
|
|
58
|
+
|
|
59
|
+
enable_online_read: bool = False
|
|
60
|
+
"""Whether to enable online reading capabilities for real-time content."""
|
|
61
|
+
|
|
62
|
+
citation_format: str = "[<number>]"
|
|
63
|
+
"""The format string for citations in the response."""
|
|
64
|
+
|
|
65
|
+
search_strategy: Literal[
|
|
66
|
+
"standard",
|
|
67
|
+
"pro_ultra",
|
|
68
|
+
"pro",
|
|
69
|
+
"lite",
|
|
70
|
+
"pro_max",
|
|
71
|
+
"image",
|
|
72
|
+
"turbo",
|
|
73
|
+
"max",
|
|
74
|
+
] = "turbo"
|
|
75
|
+
"""The search strategy to use ('standard', 'pro_ultra',
|
|
76
|
+
'pro', 'lite','pro_max', 'image','turbo','max'). """
|
|
77
|
+
|
|
78
|
+
forced_search: bool = False
|
|
79
|
+
"""Whether to force search even when cached results are available."""
|
|
80
|
+
|
|
81
|
+
prepend_search_result: bool = False
|
|
82
|
+
"""Whether to prepend search results to the response."""
|
|
83
|
+
|
|
84
|
+
enable_search_extension: bool = False
|
|
85
|
+
"""Whether to enable extended search capabilities."""
|
|
86
|
+
|
|
87
|
+
item_cnt: int = 20000
|
|
88
|
+
"""The maximum number of items to retrieve in search results."""
|
|
89
|
+
|
|
90
|
+
top_n: int = 0
|
|
91
|
+
"""The number of top results to return (0 means return all)."""
|
|
92
|
+
|
|
93
|
+
intention_options: Union[IntentionOptions, None] = IntentionOptions()
|
|
94
|
+
"""Options for intentions recognition and processing during search."""
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# maximum chunk size from knowledge base [1, 20]
|
|
98
|
+
PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN = int(
|
|
99
|
+
os.getenv(
|
|
100
|
+
"PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN",
|
|
101
|
+
"1",
|
|
102
|
+
),
|
|
103
|
+
)
|
|
104
|
+
PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX = int(
|
|
105
|
+
os.getenv(
|
|
106
|
+
"PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX",
|
|
107
|
+
"20",
|
|
108
|
+
),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class RagOptions(BaseModel):
|
|
113
|
+
model_config = {"populate_by_name": True}
|
|
114
|
+
|
|
115
|
+
class FallbackOptions(BaseModel):
|
|
116
|
+
default_response_type: Optional[str] = "llm"
|
|
117
|
+
"""The type of default response when RAG fails ('llm', 'template',
|
|
118
|
+
'none'). """
|
|
119
|
+
|
|
120
|
+
default_response: Optional[str] = ""
|
|
121
|
+
"""The default response text to use when RAG fails."""
|
|
122
|
+
|
|
123
|
+
class RewriteOptions(BaseModel):
|
|
124
|
+
model_name: Optional[str] = None
|
|
125
|
+
"""The model name to use for rewriting."""
|
|
126
|
+
|
|
127
|
+
class_name: Optional[str] = None
|
|
128
|
+
"""The class name to use for rewriting."""
|
|
129
|
+
|
|
130
|
+
class RerankOptions(BaseModel):
|
|
131
|
+
model_name: Optional[str] = None
|
|
132
|
+
"""The model name to use for reranking."""
|
|
133
|
+
|
|
134
|
+
workspace_id: Optional[str] = ""
|
|
135
|
+
"""The modelstudio workspace id"""
|
|
136
|
+
|
|
137
|
+
replaced_word: str = "${documents}"
|
|
138
|
+
"""The placeholder word in prompts that will be replaced with retrieved
|
|
139
|
+
documents. """
|
|
140
|
+
|
|
141
|
+
index_names: Optional[List[str]] = Field(default_factory=list)
|
|
142
|
+
"""List of index names to use for document processing and retrieval."""
|
|
143
|
+
|
|
144
|
+
pipeline_ids: Optional[List[str]] = Field(default_factory=list)
|
|
145
|
+
"""List of pipeline IDs to use for document processing and retrieval."""
|
|
146
|
+
|
|
147
|
+
file_ids: Optional[List[str]] = Field(
|
|
148
|
+
default_factory=list,
|
|
149
|
+
alias="file_id_list",
|
|
150
|
+
)
|
|
151
|
+
"""List of specific file IDs to search within."""
|
|
152
|
+
|
|
153
|
+
prompt_strategy: Optional[str] = Field(
|
|
154
|
+
default="topK",
|
|
155
|
+
alias="prompt_strategy_name",
|
|
156
|
+
)
|
|
157
|
+
"""The strategy for selecting and organizing retrieved content in
|
|
158
|
+
prompts. """
|
|
159
|
+
|
|
160
|
+
maximum_allowed_chunk_num: Optional[int] = 5
|
|
161
|
+
"""The maximum number of document chunks to include in the context."""
|
|
162
|
+
|
|
163
|
+
maximum_allowed_length: Optional[int] = 2000
|
|
164
|
+
"""The maximum total length of retrieved content in characters."""
|
|
165
|
+
|
|
166
|
+
enable_citation: bool = Field(
|
|
167
|
+
default=False,
|
|
168
|
+
alias="prompt_enable_citation",
|
|
169
|
+
)
|
|
170
|
+
"""Whether to include citation information for retrieved documents."""
|
|
171
|
+
|
|
172
|
+
fallback_options: Optional[FallbackOptions] = None
|
|
173
|
+
"""Options for handling cases when RAG retrieval fails."""
|
|
174
|
+
|
|
175
|
+
enable_web_search: bool = False
|
|
176
|
+
"""Whether to enable web search as part of the RAG pipeline."""
|
|
177
|
+
|
|
178
|
+
session_file_ids: Optional[List[str]] = Field(default_factory=list)
|
|
179
|
+
"""List of file IDs that are specific to the current session."""
|
|
180
|
+
|
|
181
|
+
dense_similarity_top_k: Optional[int] = 100
|
|
182
|
+
"""The number of most similar dense vectors to retrieve."""
|
|
183
|
+
|
|
184
|
+
sparse_similarity_top_k: Optional[int] = 100
|
|
185
|
+
"""The number of most similar sparse vectors to retrieve."""
|
|
186
|
+
|
|
187
|
+
enable_rewrite: Optional[bool] = None
|
|
188
|
+
"""Whether to enable content rewrite during RAG."""
|
|
189
|
+
|
|
190
|
+
rewrite: Optional[List[RewriteOptions]] = None
|
|
191
|
+
"""Options for content rewrite."""
|
|
192
|
+
|
|
193
|
+
enable_reranking: Optional[bool] = None
|
|
194
|
+
"""Whether to enable content reranking."""
|
|
195
|
+
|
|
196
|
+
rerank_min_score: Optional[float] = None
|
|
197
|
+
"""The minimum score threshold for content reranking."""
|
|
198
|
+
|
|
199
|
+
rerank_top_n: Optional[int] = None
|
|
200
|
+
"""The number of top results to return for content reranking."""
|
|
201
|
+
|
|
202
|
+
rerank: Optional[List[RerankOptions]] = None
|
|
203
|
+
|
|
204
|
+
enable_reject_filter: Optional[bool] = None
|
|
205
|
+
"""Whether to enable content rejection filtering."""
|
|
206
|
+
|
|
207
|
+
reject_filter_type: Optional[str] = None
|
|
208
|
+
"""The type of content rejection filter to use."""
|
|
209
|
+
|
|
210
|
+
reject_filter_model_name: Optional[str] = None
|
|
211
|
+
"""The name of the model to use for content rejection filtering."""
|
|
212
|
+
|
|
213
|
+
reject_filter_prompt: Optional[str] = None
|
|
214
|
+
"""The prompt to use for content rejection filtering."""
|
|
215
|
+
|
|
216
|
+
enable_agg_search: Optional[bool] = None
|
|
217
|
+
"""Whether to enable aggregation search."""
|
|
218
|
+
|
|
219
|
+
enable_hybrid_gen: Optional[bool] = None
|
|
220
|
+
"""Whether to enable hybrid generations."""
|
|
221
|
+
|
|
222
|
+
@field_validator("prompt_strategy")
|
|
223
|
+
def prompt_strategy_check(self, value: str) -> str:
|
|
224
|
+
if value:
|
|
225
|
+
value = value.lower()
|
|
226
|
+
if value in ["topk", "top_k"]:
|
|
227
|
+
return "topK"
|
|
228
|
+
return value
|
|
229
|
+
|
|
230
|
+
@field_validator("maximum_allowed_chunk_num")
|
|
231
|
+
def maximum_allowed_chunk_num_check(self, value: int) -> int:
|
|
232
|
+
if value < int(PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN) or value > int(
|
|
233
|
+
PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX,
|
|
234
|
+
):
|
|
235
|
+
raise KeyError(
|
|
236
|
+
f"Range of maximum_allowed_chunk_num should be "
|
|
237
|
+
f"[{PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN}, "
|
|
238
|
+
f"{PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX}]",
|
|
239
|
+
)
|
|
240
|
+
return value
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class ModelstudioParameters(Parameters):
|
|
244
|
+
"""
|
|
245
|
+
Parameters for Modelstudio platform, extending the base Parameters with
|
|
246
|
+
Modelstudio-specific options.
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
repetition_penalty: Union[float, None] = None
|
|
250
|
+
"""Penalty for repeating tokens. Higher values reduce repetition."""
|
|
251
|
+
|
|
252
|
+
length_penalty: Union[float, None] = None
|
|
253
|
+
"""Penalty applied to longer sequences. Affects the length of generated
|
|
254
|
+
text. """
|
|
255
|
+
|
|
256
|
+
top_k: Union[StrictInt, None] = None
|
|
257
|
+
"""The number of highest probability vocabulary tokens to keep for top-k
|
|
258
|
+
filtering."""
|
|
259
|
+
|
|
260
|
+
min_tokens: Optional[int] = None
|
|
261
|
+
"""The minimum number of tokens to generate before stopping."""
|
|
262
|
+
|
|
263
|
+
result_format: Literal["text", "message"] = "message"
|
|
264
|
+
"""The format of the response ('text' for plain text, 'message' for
|
|
265
|
+
structured message) """
|
|
266
|
+
|
|
267
|
+
incremental_output: bool = False
|
|
268
|
+
"""Whether to return incremental output during generations."""
|
|
269
|
+
|
|
270
|
+
# Search
|
|
271
|
+
enable_search: bool = False
|
|
272
|
+
"""Whether to enable search capabilities for knowledge retrieval."""
|
|
273
|
+
|
|
274
|
+
search_options: Optional[SearchOptions] = SearchOptions()
|
|
275
|
+
"""Configuration options for search functionality."""
|
|
276
|
+
|
|
277
|
+
# RAG
|
|
278
|
+
enable_rag: bool = False # RAGs of modelstudio assistant service
|
|
279
|
+
"""Whether to enable Retrieval-Augmented Generation (RAG) for the
|
|
280
|
+
Modelstudio assistant service. """
|
|
281
|
+
|
|
282
|
+
rag_options: Union[RagOptions, None] = None
|
|
283
|
+
"""Configuration options for RAG functionality."""
|
|
284
|
+
|
|
285
|
+
selected_model: Optional[str] = "qwen-max"
|
|
286
|
+
"""The selected model name to use for generations."""
|
|
287
|
+
|
|
288
|
+
# Intention
|
|
289
|
+
intention_options: Optional[IntentionOptions] = None
|
|
290
|
+
"""Options for intentions recognition and processing."""
|
|
291
|
+
|
|
292
|
+
# MCP Servers
|
|
293
|
+
mcp_config_file: Optional[str] = None
|
|
294
|
+
"""Path to the MCP (Model Context Protocol) configuration file."""
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class ModelstudioChatRequest(ModelstudioParameters):
|
|
298
|
+
messages: List[OpenAIMessage]
|
|
299
|
+
"""A list of messages comprising the conversation so far."""
|
|
300
|
+
|
|
301
|
+
model: str
|
|
302
|
+
"""ID of the model to use for the chat completion."""
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class ModelstudioChatResponse(ChatCompletion):
|
|
306
|
+
pass
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class ModelstudioChatCompletionChunk(ChatCompletionChunk):
|
|
310
|
+
pass
|