agentscope-runtime 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. agentscope_runtime/common/container_clients/__init__.py +0 -0
  2. agentscope_runtime/{sandbox/manager → common}/container_clients/kubernetes_client.py +546 -6
  3. agentscope_runtime/engine/__init__.py +12 -0
  4. agentscope_runtime/engine/agents/agentscope_agent.py +130 -10
  5. agentscope_runtime/engine/agents/agno_agent.py +8 -10
  6. agentscope_runtime/engine/agents/langgraph_agent.py +52 -9
  7. agentscope_runtime/engine/app/__init__.py +6 -0
  8. agentscope_runtime/engine/app/agent_app.py +239 -0
  9. agentscope_runtime/engine/app/base_app.py +181 -0
  10. agentscope_runtime/engine/app/celery_mixin.py +92 -0
  11. agentscope_runtime/engine/deployers/__init__.py +13 -0
  12. agentscope_runtime/engine/deployers/adapter/responses/__init__.py +0 -0
  13. agentscope_runtime/engine/deployers/adapter/responses/response_api_adapter_utils.py +2890 -0
  14. agentscope_runtime/engine/deployers/adapter/responses/response_api_agent_adapter.py +51 -0
  15. agentscope_runtime/engine/deployers/adapter/responses/response_api_protocol_adapter.py +314 -0
  16. agentscope_runtime/engine/deployers/base.py +1 -0
  17. agentscope_runtime/engine/deployers/cli_fc_deploy.py +203 -0
  18. agentscope_runtime/engine/deployers/kubernetes_deployer.py +272 -0
  19. agentscope_runtime/engine/deployers/local_deployer.py +414 -501
  20. agentscope_runtime/engine/deployers/modelstudio_deployer.py +838 -0
  21. agentscope_runtime/engine/deployers/utils/__init__.py +0 -0
  22. agentscope_runtime/engine/deployers/utils/deployment_modes.py +14 -0
  23. agentscope_runtime/engine/deployers/utils/docker_image_utils/__init__.py +8 -0
  24. agentscope_runtime/engine/deployers/utils/docker_image_utils/docker_image_builder.py +429 -0
  25. agentscope_runtime/engine/deployers/utils/docker_image_utils/dockerfile_generator.py +240 -0
  26. agentscope_runtime/engine/deployers/utils/docker_image_utils/runner_image_factory.py +306 -0
  27. agentscope_runtime/engine/deployers/utils/package_project_utils.py +1163 -0
  28. agentscope_runtime/engine/deployers/utils/service_utils/__init__.py +9 -0
  29. agentscope_runtime/engine/deployers/utils/service_utils/fastapi_factory.py +1064 -0
  30. agentscope_runtime/engine/deployers/utils/service_utils/fastapi_templates.py +157 -0
  31. agentscope_runtime/engine/deployers/utils/service_utils/process_manager.py +268 -0
  32. agentscope_runtime/engine/deployers/utils/service_utils/service_config.py +75 -0
  33. agentscope_runtime/engine/deployers/utils/service_utils/service_factory.py +220 -0
  34. agentscope_runtime/engine/deployers/utils/service_utils/standalone_main.py.j2 +211 -0
  35. agentscope_runtime/engine/deployers/utils/wheel_packager.py +389 -0
  36. agentscope_runtime/engine/helpers/agent_api_builder.py +651 -0
  37. agentscope_runtime/engine/runner.py +76 -35
  38. agentscope_runtime/engine/schemas/agent_schemas.py +112 -2
  39. agentscope_runtime/engine/schemas/embedding.py +37 -0
  40. agentscope_runtime/engine/schemas/modelstudio_llm.py +310 -0
  41. agentscope_runtime/engine/schemas/oai_llm.py +538 -0
  42. agentscope_runtime/engine/schemas/realtime.py +254 -0
  43. agentscope_runtime/engine/services/tablestore_memory_service.py +4 -1
  44. agentscope_runtime/engine/tracing/__init__.py +9 -3
  45. agentscope_runtime/engine/tracing/asyncio_util.py +24 -0
  46. agentscope_runtime/engine/tracing/base.py +66 -34
  47. agentscope_runtime/engine/tracing/local_logging_handler.py +45 -31
  48. agentscope_runtime/engine/tracing/message_util.py +528 -0
  49. agentscope_runtime/engine/tracing/tracing_metric.py +20 -8
  50. agentscope_runtime/engine/tracing/tracing_util.py +130 -0
  51. agentscope_runtime/engine/tracing/wrapper.py +794 -169
  52. agentscope_runtime/sandbox/box/base/base_sandbox.py +2 -1
  53. agentscope_runtime/sandbox/box/browser/browser_sandbox.py +2 -1
  54. agentscope_runtime/sandbox/box/dummy/dummy_sandbox.py +2 -1
  55. agentscope_runtime/sandbox/box/filesystem/filesystem_sandbox.py +2 -1
  56. agentscope_runtime/sandbox/box/gui/gui_sandbox.py +2 -1
  57. agentscope_runtime/sandbox/box/training_box/training_box.py +0 -42
  58. agentscope_runtime/sandbox/client/http_client.py +52 -18
  59. agentscope_runtime/sandbox/constant.py +3 -0
  60. agentscope_runtime/sandbox/custom/custom_sandbox.py +2 -1
  61. agentscope_runtime/sandbox/custom/example.py +2 -1
  62. agentscope_runtime/sandbox/enums.py +0 -1
  63. agentscope_runtime/sandbox/manager/sandbox_manager.py +29 -22
  64. agentscope_runtime/sandbox/model/container.py +6 -0
  65. agentscope_runtime/sandbox/registry.py +1 -1
  66. agentscope_runtime/sandbox/tools/tool.py +4 -0
  67. agentscope_runtime/version.py +1 -1
  68. {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/METADATA +103 -59
  69. {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/RECORD +87 -52
  70. {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/entry_points.txt +1 -0
  71. /agentscope_runtime/{sandbox/manager/container_clients → common}/__init__.py +0 -0
  72. /agentscope_runtime/{sandbox/manager → common}/collections/__init__.py +0 -0
  73. /agentscope_runtime/{sandbox/manager → common}/collections/base_mapping.py +0 -0
  74. /agentscope_runtime/{sandbox/manager → common}/collections/base_queue.py +0 -0
  75. /agentscope_runtime/{sandbox/manager → common}/collections/base_set.py +0 -0
  76. /agentscope_runtime/{sandbox/manager → common}/collections/in_memory_mapping.py +0 -0
  77. /agentscope_runtime/{sandbox/manager → common}/collections/in_memory_queue.py +0 -0
  78. /agentscope_runtime/{sandbox/manager → common}/collections/in_memory_set.py +0 -0
  79. /agentscope_runtime/{sandbox/manager → common}/collections/redis_mapping.py +0 -0
  80. /agentscope_runtime/{sandbox/manager → common}/collections/redis_queue.py +0 -0
  81. /agentscope_runtime/{sandbox/manager → common}/collections/redis_set.py +0 -0
  82. /agentscope_runtime/{sandbox/manager → common}/container_clients/agentrun_client.py +0 -0
  83. /agentscope_runtime/{sandbox/manager → common}/container_clients/base_client.py +0 -0
  84. /agentscope_runtime/{sandbox/manager → common}/container_clients/docker_client.py +0 -0
  85. {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/WHEEL +0 -0
  86. {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/licenses/LICENSE +0 -0
  87. {agentscope_runtime-0.1.6.dist-info → agentscope_runtime-0.2.0.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,33 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  import uuid
3
- from typing import Optional, List, AsyncGenerator, Any
4
3
  from contextlib import AsyncExitStack
4
+ from typing import Optional, List, AsyncGenerator, Any, Union, Dict
5
5
 
6
- from openai.types.chat import ChatCompletion
7
-
8
- from .deployers.adapter.protocol_adapter import ProtocolAdapter
6
+ from agentscope_runtime.engine.deployers.utils.service_utils import (
7
+ ServicesConfig,
8
+ )
9
9
  from .agents import Agent
10
- from .schemas.context import Context
11
10
  from .deployers import (
12
11
  DeployManager,
13
12
  LocalDeployManager,
14
13
  )
14
+ from .deployers.adapter.protocol_adapter import ProtocolAdapter
15
15
  from .schemas.agent_schemas import (
16
16
  Event,
17
17
  AgentRequest,
18
18
  RunStatus,
19
19
  AgentResponse,
20
+ SequenceNumberGenerator,
20
21
  )
22
+ from .schemas.context import Context
21
23
  from .services.context_manager import ContextManager
22
24
  from .services.environment_manager import EnvironmentManager
23
25
  from .tracing import TraceType
24
26
  from .tracing.wrapper import trace
27
+ from .tracing.message_util import (
28
+ merge_agent_response,
29
+ get_agent_response_finish_reason,
30
+ )
25
31
 
26
32
 
27
33
  class Runner:
@@ -40,7 +46,9 @@ class Runner:
40
46
  """
41
47
  self._agent = agent
42
48
  self._environment_manager = environment_manager
43
- self._context_manager = context_manager
49
+ self._context_manager = (
50
+ context_manager or ContextManager()
51
+ ) # Add default context manager
44
52
  self._deploy_managers = {}
45
53
  self._exit_stack = AsyncExitStack()
46
54
 
@@ -77,37 +85,62 @@ class Runner:
77
85
  endpoint_path: str = "/process",
78
86
  stream: bool = True,
79
87
  protocol_adapters: Optional[list[ProtocolAdapter]] = None,
88
+ requirements: Optional[Union[str, List[str]]] = None,
89
+ extra_packages: Optional[List[str]] = None,
90
+ base_image: str = "python:3.9-slim",
91
+ environment: Optional[Dict[str, str]] = None,
92
+ runtime_config: Optional[Dict] = None,
93
+ services_config: Optional[Union[ServicesConfig, dict]] = None,
94
+ **kwargs,
80
95
  ):
81
96
  """
82
97
  Deploys the agent as a service.
83
98
 
84
99
  Args:
85
- protocol_adapters: protocol adapters
86
100
  deploy_manager: Deployment manager to handle service deployment
87
101
  endpoint_path: API endpoint path for the processing function
88
102
  stream: If start a streaming service
103
+ protocol_adapters: protocol adapters
104
+ requirements: PyPI dependencies
105
+ extra_packages: User code directory/file path
106
+ base_image: Docker base image (for containerized deployment)
107
+ environment: Environment variables dict
108
+ runtime_config: Runtime configuration dict
109
+ services_config: Services configuration dict
110
+ **kwargs: Additional arguments passed to deployment manager
89
111
  Returns:
90
112
  URL of the deployed service
91
113
 
92
114
  Raises:
93
115
  RuntimeError: If deployment fails
94
116
  """
95
- if stream:
96
- deploy_func = self.stream_query
97
- else:
98
- deploy_func = self.query
99
117
  deploy_result = await deploy_manager.deploy(
100
- deploy_func,
118
+ runner=self,
101
119
  endpoint_path=endpoint_path,
120
+ stream=stream,
102
121
  protocol_adapters=protocol_adapters,
122
+ requirements=requirements,
123
+ extra_packages=extra_packages,
124
+ base_image=base_image,
125
+ environment=environment,
126
+ runtime_config=runtime_config,
127
+ services_config=services_config,
128
+ **kwargs,
103
129
  )
130
+
131
+ # TODO: add redis or other persistant method
104
132
  self._deploy_managers[deploy_manager.deploy_id] = deploy_result
105
133
  return deploy_result
106
134
 
107
- @trace(TraceType.AGENT_STEP)
135
+ @trace(
136
+ TraceType.AGENT_STEP,
137
+ trace_name="agent_step",
138
+ merge_output_func=merge_agent_response,
139
+ get_finish_reason_func=get_agent_response_finish_reason,
140
+ )
108
141
  async def stream_query( # pylint:disable=unused-argument
109
142
  self,
110
- request: AgentRequest,
143
+ request: Union[AgentRequest, dict],
111
144
  user_id: Optional[str] = None,
112
145
  tools: Optional[List] = None,
113
146
  **kwargs: Any,
@@ -115,13 +148,25 @@ class Runner:
115
148
  """
116
149
  Streams the agent.
117
150
  """
151
+ if isinstance(request, dict):
152
+ request = AgentRequest(**request)
153
+
154
+ seq_gen = SequenceNumberGenerator()
155
+
156
+ # Initial response
118
157
  response = AgentResponse()
119
- yield response
158
+ yield seq_gen.yield_with_sequence(response)
120
159
 
160
+ # Set to in-progress status
121
161
  response.in_progress()
122
- yield response
162
+ yield seq_gen.yield_with_sequence(response)
163
+
164
+ if user_id is None:
165
+ if getattr(request, "user_id", None):
166
+ user_id = request.user_id
167
+ else:
168
+ user_id = "" # Default user id
123
169
 
124
- user_id = user_id or str(uuid.uuid4())
125
170
  session_id = request.session_id or str(uuid.uuid4())
126
171
  request_input = request.input
127
172
  session = await self._context_manager.compose_session(
@@ -167,36 +212,32 @@ class Runner:
167
212
  request_input=request_input,
168
213
  )
169
214
 
170
- sequence_number = 0
171
215
  async for event in context.agent.run_async(context):
172
216
  if (
173
217
  event.status == RunStatus.Completed
174
218
  and event.object == "message"
175
219
  ):
176
220
  response.add_new_message(event)
177
- event.sequence_number = sequence_number
178
- yield event
179
- sequence_number += 1
221
+ yield seq_gen.yield_with_sequence(event)
180
222
 
181
223
  await context.context_manager.append(
182
224
  session=context.session,
183
225
  event_output=response.output,
184
226
  )
185
- response.sequence_number = sequence_number
186
- yield response.completed()
227
+ yield seq_gen.yield_with_sequence(response.completed())
187
228
 
188
- @trace(TraceType.AGENT_STEP)
189
- async def query( # pylint:disable=unused-argument
190
- self,
191
- message: List[dict],
192
- session_id: Optional[str] = None,
193
- **kwargs: Any,
194
- ) -> ChatCompletion:
195
- """
196
- Streams the agent.
197
- """
198
- # TODO: fix this @zhicheng
199
- return self._agent.query(message, session_id)
229
+ # TODO: will be added before 2025/11/30
230
+ # @trace(TraceType.AGENT_STEP)
231
+ # async def query( # pylint:disable=unused-argument
232
+ # self,
233
+ # message: List[dict],
234
+ # session_id: Optional[str] = None,
235
+ # **kwargs: Any,
236
+ # ) -> ChatCompletion:
237
+ # """
238
+ # Streams the agent.
239
+ # """
240
+ # return self._agent.query(message, session_id)
200
241
 
201
242
  # TODO: should be sync method?
202
243
  async def stop(
@@ -27,6 +27,7 @@ class MessageType:
27
27
  MCP_APPROVAL_REQUEST = "mcp_approval_request"
28
28
  MCP_TOOL_CALL = "mcp_call"
29
29
  MCP_APPROVAL_RESPONSE = "mcp_approval_response"
30
+ REASONING = "reasoning"
30
31
  HEARTBEAT = "heartbeat"
31
32
  ERROR = "error"
32
33
 
@@ -45,6 +46,8 @@ class ContentType:
45
46
  DATA = "data"
46
47
  IMAGE = "image"
47
48
  AUDIO = "audio"
49
+ FILE = "file"
50
+ REFUSAL = "refusal"
48
51
 
49
52
 
50
53
  class Role:
@@ -66,6 +69,8 @@ class RunStatus:
66
69
  Failed = "failed"
67
70
  Rejected = "rejected"
68
71
  Unknown = "unknown"
72
+ Queued = "queued"
73
+ Incomplete = "incomplete"
69
74
 
70
75
 
71
76
  class FunctionParameters(BaseModel):
@@ -282,6 +287,63 @@ class DataContent(Content):
282
287
  """The data content."""
283
288
 
284
289
 
290
+ class AudioContent(Content):
291
+ type: Literal[ContentType.AUDIO] = ContentType.AUDIO
292
+ """The type of the content part."""
293
+
294
+ data: Optional[str] = None
295
+ """The audio data details."""
296
+
297
+ format: Optional[str] = None
298
+ """
299
+ The format of the audio data.
300
+ """
301
+
302
+
303
+ class FileContent(Content):
304
+ type: Literal[ContentType.FILE] = ContentType.FILE
305
+ """The type of the content part."""
306
+
307
+ file_url: Optional[str] = None
308
+ """The file URL details."""
309
+
310
+ file_id: Optional[str] = None
311
+ """The file ID details."""
312
+
313
+ filename: Optional[str] = None
314
+ """The file name details."""
315
+
316
+ file_data: Optional[str] = None
317
+ """The file data details."""
318
+
319
+
320
+ class RefusalContent(Content):
321
+ type: Literal[ContentType.REFUSAL] = ContentType.REFUSAL
322
+ """The type of the content part."""
323
+
324
+ refusal: Optional[str] = None
325
+ """The refusal content."""
326
+
327
+
328
+ class ToolCall(BaseModel):
329
+ arguments: str
330
+ """A JSON string of the arguments to pass to the function."""
331
+
332
+ call_id: str
333
+ """The unique ID of the function tool call generated by the model."""
334
+
335
+ name: str
336
+ """The name of the function to run."""
337
+
338
+
339
+ class ToolCallOutput(BaseModel):
340
+ call_id: str
341
+ """The unique ID of the function tool call generated by the model."""
342
+
343
+ output: str
344
+ """A JSON string of the output of the function tool call."""
345
+
346
+
285
347
  AgentRole: TypeAlias = Literal[
286
348
  Role.ASSISTANT,
287
349
  Role.SYSTEM,
@@ -289,9 +351,15 @@ AgentRole: TypeAlias = Literal[
289
351
  Role.TOOL,
290
352
  ]
291
353
 
292
-
293
354
  AgentContent = Annotated[
294
- Union[TextContent, ImageContent, DataContent],
355
+ Union[
356
+ TextContent,
357
+ ImageContent,
358
+ DataContent,
359
+ AudioContent,
360
+ FileContent,
361
+ RefusalContent,
362
+ ],
295
363
  Field(discriminator="type"),
296
364
  ]
297
365
 
@@ -684,6 +752,48 @@ class AgentResponse(BaseResponse):
684
752
  """conversation id for dialog"""
685
753
 
686
754
 
755
+ class SequenceNumberGenerator:
756
+ """
757
+ A simple sequence number generator for streaming events.
758
+
759
+ This class encapsulates the logic for generating sequential numbers,
760
+ making the code more maintainable and less error-prone.
761
+ """
762
+
763
+ def __init__(self, start: int = 0):
764
+ """
765
+ Initialize the generator with a starting number.
766
+
767
+ Args:
768
+ start: The starting sequence number (default: 0)
769
+ """
770
+ self._current = start
771
+
772
+ def next(self) -> int:
773
+ """
774
+ Get the next sequence number and increment the counter.
775
+
776
+ Returns:
777
+ The current sequence number before incrementing
778
+ """
779
+ current = self._current
780
+ self._current += 1
781
+ return current
782
+
783
+ def yield_with_sequence(self, event: Event) -> Event:
784
+ """
785
+ Set the sequence number on an event and increment the counter.
786
+
787
+ Args:
788
+ event: The event to set the sequence number on
789
+
790
+ Returns:
791
+ The same event with sequence number set
792
+ """
793
+ event.sequence_number = self.next()
794
+ return event
795
+
796
+
687
797
  def convert_to_openai_tool_call(function: FunctionCall):
688
798
  return {
689
799
  "id": function.get("call_id", None),
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ from typing import List, Optional, Literal
3
+
4
+ from openai.types import Embedding
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class Usage(BaseModel):
9
+ prompt_tokens: Optional[int] = None
10
+ """The number of tokens used by the prompt."""
11
+
12
+ total_tokens: Optional[int] = None
13
+ """The total number of tokens used by the request."""
14
+
15
+ input_tokens: Optional[int] = None
16
+
17
+ text_count: Optional[int] = None
18
+
19
+ image_count: Optional[int] = None
20
+
21
+ video_count: Optional[int] = None
22
+
23
+ duration: Optional[float] = None
24
+
25
+
26
+ class EmbeddingResponse(BaseModel):
27
+ data: List[Embedding]
28
+ """The list of embeddings generated by the model."""
29
+
30
+ model: str
31
+ """The name of the model used to generate the embedding."""
32
+
33
+ object: Literal["list"]
34
+ """The object type, which is always "list"."""
35
+
36
+ usage: Usage
37
+ """The usage information for the request."""
@@ -0,0 +1,310 @@
1
+ # -*- coding: utf-8 -*-
2
+ import os
3
+ from typing import List, Literal, Optional, Union
4
+
5
+ from openai.types.chat import ChatCompletion, ChatCompletionChunk
6
+ from pydantic import (
7
+ BaseModel,
8
+ StrictInt,
9
+ field_validator,
10
+ Field,
11
+ )
12
+
13
+ from .oai_llm import (
14
+ Parameters,
15
+ OpenAIMessage,
16
+ )
17
+
18
+
19
+ class KnowledgeHolder(BaseModel):
20
+ source: str
21
+ """The source identifier or URL where the knowledge was retrieved from."""
22
+
23
+ content: str
24
+ """The actual content or knowledge text retrieved from the source."""
25
+
26
+
27
+ class IntentionOptions(BaseModel):
28
+ white_list: List[str] = Field(default_factory=list)
29
+ """A list of allowed intentions that can be processed."""
30
+
31
+ black_list: List[str] = Field(default_factory=list)
32
+ """A list of blocked intentions that should not be processed."""
33
+
34
+ search_model: str = "search_v6"
35
+ """The search model version to use for intentions recognition."""
36
+
37
+ intensity: Optional[int] = None
38
+ """The intensity level for intentions matching and processing."""
39
+
40
+ scene_id: Optional[str] = None
41
+ """The scene identifier for context-aware intentions processing."""
42
+
43
+
44
+ class SearchOptions(BaseModel):
45
+ """
46
+ Search Options on Modelstudio platform for knowledge retrieval and web
47
+ search.
48
+ """
49
+
50
+ enable_source: bool = False
51
+ """Whether to include source information in search results."""
52
+
53
+ enable_citation: bool = False
54
+ """Whether to include citation information for retrieved content."""
55
+
56
+ enable_readpage: bool = False
57
+ """Whether to enable full page reading for web content."""
58
+
59
+ enable_online_read: bool = False
60
+ """Whether to enable online reading capabilities for real-time content."""
61
+
62
+ citation_format: str = "[<number>]"
63
+ """The format string for citations in the response."""
64
+
65
+ search_strategy: Literal[
66
+ "standard",
67
+ "pro_ultra",
68
+ "pro",
69
+ "lite",
70
+ "pro_max",
71
+ "image",
72
+ "turbo",
73
+ "max",
74
+ ] = "turbo"
75
+ """The search strategy to use ('standard', 'pro_ultra',
76
+ 'pro', 'lite','pro_max', 'image','turbo','max'). """
77
+
78
+ forced_search: bool = False
79
+ """Whether to force search even when cached results are available."""
80
+
81
+ prepend_search_result: bool = False
82
+ """Whether to prepend search results to the response."""
83
+
84
+ enable_search_extension: bool = False
85
+ """Whether to enable extended search capabilities."""
86
+
87
+ item_cnt: int = 20000
88
+ """The maximum number of items to retrieve in search results."""
89
+
90
+ top_n: int = 0
91
+ """The number of top results to return (0 means return all)."""
92
+
93
+ intention_options: Union[IntentionOptions, None] = IntentionOptions()
94
+ """Options for intentions recognition and processing during search."""
95
+
96
+
97
+ # maximum chunk size from knowledge base [1, 20]
98
+ PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN = int(
99
+ os.getenv(
100
+ "PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN",
101
+ "1",
102
+ ),
103
+ )
104
+ PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX = int(
105
+ os.getenv(
106
+ "PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX",
107
+ "20",
108
+ ),
109
+ )
110
+
111
+
112
+ class RagOptions(BaseModel):
113
+ model_config = {"populate_by_name": True}
114
+
115
+ class FallbackOptions(BaseModel):
116
+ default_response_type: Optional[str] = "llm"
117
+ """The type of default response when RAG fails ('llm', 'template',
118
+ 'none'). """
119
+
120
+ default_response: Optional[str] = ""
121
+ """The default response text to use when RAG fails."""
122
+
123
+ class RewriteOptions(BaseModel):
124
+ model_name: Optional[str] = None
125
+ """The model name to use for rewriting."""
126
+
127
+ class_name: Optional[str] = None
128
+ """The class name to use for rewriting."""
129
+
130
+ class RerankOptions(BaseModel):
131
+ model_name: Optional[str] = None
132
+ """The model name to use for reranking."""
133
+
134
+ workspace_id: Optional[str] = ""
135
+ """The modelstudio workspace id"""
136
+
137
+ replaced_word: str = "${documents}"
138
+ """The placeholder word in prompts that will be replaced with retrieved
139
+ documents. """
140
+
141
+ index_names: Optional[List[str]] = Field(default_factory=list)
142
+ """List of index names to use for document processing and retrieval."""
143
+
144
+ pipeline_ids: Optional[List[str]] = Field(default_factory=list)
145
+ """List of pipeline IDs to use for document processing and retrieval."""
146
+
147
+ file_ids: Optional[List[str]] = Field(
148
+ default_factory=list,
149
+ alias="file_id_list",
150
+ )
151
+ """List of specific file IDs to search within."""
152
+
153
+ prompt_strategy: Optional[str] = Field(
154
+ default="topK",
155
+ alias="prompt_strategy_name",
156
+ )
157
+ """The strategy for selecting and organizing retrieved content in
158
+ prompts. """
159
+
160
+ maximum_allowed_chunk_num: Optional[int] = 5
161
+ """The maximum number of document chunks to include in the context."""
162
+
163
+ maximum_allowed_length: Optional[int] = 2000
164
+ """The maximum total length of retrieved content in characters."""
165
+
166
+ enable_citation: bool = Field(
167
+ default=False,
168
+ alias="prompt_enable_citation",
169
+ )
170
+ """Whether to include citation information for retrieved documents."""
171
+
172
+ fallback_options: Optional[FallbackOptions] = None
173
+ """Options for handling cases when RAG retrieval fails."""
174
+
175
+ enable_web_search: bool = False
176
+ """Whether to enable web search as part of the RAG pipeline."""
177
+
178
+ session_file_ids: Optional[List[str]] = Field(default_factory=list)
179
+ """List of file IDs that are specific to the current session."""
180
+
181
+ dense_similarity_top_k: Optional[int] = 100
182
+ """The number of most similar dense vectors to retrieve."""
183
+
184
+ sparse_similarity_top_k: Optional[int] = 100
185
+ """The number of most similar sparse vectors to retrieve."""
186
+
187
+ enable_rewrite: Optional[bool] = None
188
+ """Whether to enable content rewrite during RAG."""
189
+
190
+ rewrite: Optional[List[RewriteOptions]] = None
191
+ """Options for content rewrite."""
192
+
193
+ enable_reranking: Optional[bool] = None
194
+ """Whether to enable content reranking."""
195
+
196
+ rerank_min_score: Optional[float] = None
197
+ """The minimum score threshold for content reranking."""
198
+
199
+ rerank_top_n: Optional[int] = None
200
+ """The number of top results to return for content reranking."""
201
+
202
+ rerank: Optional[List[RerankOptions]] = None
203
+
204
+ enable_reject_filter: Optional[bool] = None
205
+ """Whether to enable content rejection filtering."""
206
+
207
+ reject_filter_type: Optional[str] = None
208
+ """The type of content rejection filter to use."""
209
+
210
+ reject_filter_model_name: Optional[str] = None
211
+ """The name of the model to use for content rejection filtering."""
212
+
213
+ reject_filter_prompt: Optional[str] = None
214
+ """The prompt to use for content rejection filtering."""
215
+
216
+ enable_agg_search: Optional[bool] = None
217
+ """Whether to enable aggregation search."""
218
+
219
+ enable_hybrid_gen: Optional[bool] = None
220
+ """Whether to enable hybrid generations."""
221
+
222
+ @field_validator("prompt_strategy")
223
+ def prompt_strategy_check(self, value: str) -> str:
224
+ if value:
225
+ value = value.lower()
226
+ if value in ["topk", "top_k"]:
227
+ return "topK"
228
+ return value
229
+
230
+ @field_validator("maximum_allowed_chunk_num")
231
+ def maximum_allowed_chunk_num_check(self, value: int) -> int:
232
+ if value < int(PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN) or value > int(
233
+ PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX,
234
+ ):
235
+ raise KeyError(
236
+ f"Range of maximum_allowed_chunk_num should be "
237
+ f"[{PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN}, "
238
+ f"{PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX}]",
239
+ )
240
+ return value
241
+
242
+
243
+ class ModelstudioParameters(Parameters):
244
+ """
245
+ Parameters for Modelstudio platform, extending the base Parameters with
246
+ Modelstudio-specific options.
247
+ """
248
+
249
+ repetition_penalty: Union[float, None] = None
250
+ """Penalty for repeating tokens. Higher values reduce repetition."""
251
+
252
+ length_penalty: Union[float, None] = None
253
+ """Penalty applied to longer sequences. Affects the length of generated
254
+ text. """
255
+
256
+ top_k: Union[StrictInt, None] = None
257
+ """The number of highest probability vocabulary tokens to keep for top-k
258
+ filtering."""
259
+
260
+ min_tokens: Optional[int] = None
261
+ """The minimum number of tokens to generate before stopping."""
262
+
263
+ result_format: Literal["text", "message"] = "message"
264
+ """The format of the response ('text' for plain text, 'message' for
265
+ structured message) """
266
+
267
+ incremental_output: bool = False
268
+ """Whether to return incremental output during generations."""
269
+
270
+ # Search
271
+ enable_search: bool = False
272
+ """Whether to enable search capabilities for knowledge retrieval."""
273
+
274
+ search_options: Optional[SearchOptions] = SearchOptions()
275
+ """Configuration options for search functionality."""
276
+
277
+ # RAG
278
+ enable_rag: bool = False # RAGs of modelstudio assistant service
279
+ """Whether to enable Retrieval-Augmented Generation (RAG) for the
280
+ Modelstudio assistant service. """
281
+
282
+ rag_options: Union[RagOptions, None] = None
283
+ """Configuration options for RAG functionality."""
284
+
285
+ selected_model: Optional[str] = "qwen-max"
286
+ """The selected model name to use for generations."""
287
+
288
+ # Intention
289
+ intention_options: Optional[IntentionOptions] = None
290
+ """Options for intentions recognition and processing."""
291
+
292
+ # MCP Servers
293
+ mcp_config_file: Optional[str] = None
294
+ """Path to the MCP (Model Context Protocol) configuration file."""
295
+
296
+
297
+ class ModelstudioChatRequest(ModelstudioParameters):
298
+ messages: List[OpenAIMessage]
299
+ """A list of messages comprising the conversation so far."""
300
+
301
+ model: str
302
+ """ID of the model to use for the chat completion."""
303
+
304
+
305
+ class ModelstudioChatResponse(ChatCompletion):
306
+ pass
307
+
308
+
309
+ class ModelstudioChatCompletionChunk(ChatCompletionChunk):
310
+ pass