agentscope-runtime 0.2.0b1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. agentscope_runtime/adapters/__init__.py +0 -0
  2. agentscope_runtime/adapters/agentscope/__init__.py +0 -0
  3. agentscope_runtime/adapters/agentscope/long_term_memory/__init__.py +6 -0
  4. agentscope_runtime/adapters/agentscope/long_term_memory/_long_term_memory_adapter.py +258 -0
  5. agentscope_runtime/adapters/agentscope/memory/__init__.py +6 -0
  6. agentscope_runtime/adapters/agentscope/memory/_memory_adapter.py +152 -0
  7. agentscope_runtime/adapters/agentscope/message.py +535 -0
  8. agentscope_runtime/adapters/agentscope/stream.py +506 -0
  9. agentscope_runtime/adapters/agentscope/tool/__init__.py +9 -0
  10. agentscope_runtime/adapters/agentscope/tool/sandbox_tool.py +69 -0
  11. agentscope_runtime/adapters/agentscope/tool/tool.py +233 -0
  12. agentscope_runtime/adapters/autogen/__init__.py +0 -0
  13. agentscope_runtime/adapters/autogen/tool/__init__.py +7 -0
  14. agentscope_runtime/adapters/autogen/tool/tool.py +211 -0
  15. agentscope_runtime/adapters/text/__init__.py +0 -0
  16. agentscope_runtime/adapters/text/stream.py +29 -0
  17. agentscope_runtime/common/collections/redis_mapping.py +4 -1
  18. agentscope_runtime/common/container_clients/fc_client.py +855 -0
  19. agentscope_runtime/common/container_clients/kubernetes_client.py +6 -13
  20. agentscope_runtime/common/utils/__init__.py +0 -0
  21. agentscope_runtime/common/utils/lazy_loader.py +57 -0
  22. agentscope_runtime/engine/__init__.py +25 -18
  23. agentscope_runtime/engine/app/agent_app.py +161 -91
  24. agentscope_runtime/engine/app/base_app.py +4 -118
  25. agentscope_runtime/engine/constant.py +8 -0
  26. agentscope_runtime/engine/deployers/__init__.py +8 -0
  27. agentscope_runtime/engine/deployers/adapter/__init__.py +2 -0
  28. agentscope_runtime/engine/deployers/adapter/a2a/a2a_adapter_utils.py +0 -21
  29. agentscope_runtime/engine/deployers/adapter/a2a/a2a_protocol_adapter.py +28 -9
  30. agentscope_runtime/engine/deployers/adapter/responses/__init__.py +2 -0
  31. agentscope_runtime/engine/deployers/adapter/responses/response_api_adapter_utils.py +5 -2
  32. agentscope_runtime/engine/deployers/adapter/responses/response_api_protocol_adapter.py +1 -1
  33. agentscope_runtime/engine/deployers/agentrun_deployer.py +2541 -0
  34. agentscope_runtime/engine/deployers/cli_fc_deploy.py +1 -1
  35. agentscope_runtime/engine/deployers/kubernetes_deployer.py +9 -21
  36. agentscope_runtime/engine/deployers/local_deployer.py +47 -74
  37. agentscope_runtime/engine/deployers/modelstudio_deployer.py +216 -50
  38. agentscope_runtime/engine/deployers/utils/app_runner_utils.py +29 -0
  39. agentscope_runtime/engine/deployers/utils/detached_app.py +510 -0
  40. agentscope_runtime/engine/deployers/utils/docker_image_utils/__init__.py +1 -1
  41. agentscope_runtime/engine/deployers/utils/docker_image_utils/dockerfile_generator.py +1 -1
  42. agentscope_runtime/engine/deployers/utils/docker_image_utils/{runner_image_factory.py → image_factory.py} +121 -61
  43. agentscope_runtime/engine/deployers/utils/package.py +693 -0
  44. agentscope_runtime/engine/deployers/utils/service_utils/__init__.py +0 -5
  45. agentscope_runtime/engine/deployers/utils/service_utils/fastapi_factory.py +301 -282
  46. agentscope_runtime/engine/deployers/utils/service_utils/fastapi_templates.py +2 -4
  47. agentscope_runtime/engine/deployers/utils/service_utils/process_manager.py +23 -1
  48. agentscope_runtime/engine/deployers/utils/templates/app_main.py.j2 +84 -0
  49. agentscope_runtime/engine/deployers/utils/templates/runner_main.py.j2 +95 -0
  50. agentscope_runtime/engine/deployers/utils/{service_utils → templates}/standalone_main.py.j2 +0 -45
  51. agentscope_runtime/engine/deployers/utils/wheel_packager.py +119 -18
  52. agentscope_runtime/engine/helpers/runner.py +40 -0
  53. agentscope_runtime/engine/runner.py +171 -130
  54. agentscope_runtime/engine/schemas/agent_schemas.py +114 -3
  55. agentscope_runtime/engine/schemas/modelstudio_llm.py +4 -2
  56. agentscope_runtime/engine/schemas/oai_llm.py +23 -23
  57. agentscope_runtime/engine/schemas/response_api.py +65 -0
  58. agentscope_runtime/engine/schemas/session.py +24 -0
  59. agentscope_runtime/engine/services/__init__.py +0 -9
  60. agentscope_runtime/engine/services/agent_state/__init__.py +16 -0
  61. agentscope_runtime/engine/services/agent_state/redis_state_service.py +113 -0
  62. agentscope_runtime/engine/services/agent_state/state_service.py +179 -0
  63. agentscope_runtime/engine/services/memory/__init__.py +24 -0
  64. agentscope_runtime/engine/services/{mem0_memory_service.py → memory/mem0_memory_service.py} +17 -13
  65. agentscope_runtime/engine/services/{memory_service.py → memory/memory_service.py} +28 -7
  66. agentscope_runtime/engine/services/{redis_memory_service.py → memory/redis_memory_service.py} +1 -1
  67. agentscope_runtime/engine/services/{reme_personal_memory_service.py → memory/reme_personal_memory_service.py} +9 -6
  68. agentscope_runtime/engine/services/{reme_task_memory_service.py → memory/reme_task_memory_service.py} +2 -2
  69. agentscope_runtime/engine/services/{tablestore_memory_service.py → memory/tablestore_memory_service.py} +16 -19
  70. agentscope_runtime/engine/services/sandbox/__init__.py +13 -0
  71. agentscope_runtime/engine/services/{sandbox_service.py → sandbox/sandbox_service.py} +86 -71
  72. agentscope_runtime/engine/services/session_history/__init__.py +23 -0
  73. agentscope_runtime/engine/services/{redis_session_history_service.py → session_history/redis_session_history_service.py} +3 -2
  74. agentscope_runtime/engine/services/{session_history_service.py → session_history/session_history_service.py} +44 -34
  75. agentscope_runtime/engine/services/{tablestore_session_history_service.py → session_history/tablestore_session_history_service.py} +14 -19
  76. agentscope_runtime/engine/services/utils/tablestore_service_utils.py +2 -2
  77. agentscope_runtime/engine/tracing/base.py +10 -9
  78. agentscope_runtime/engine/tracing/message_util.py +1 -1
  79. agentscope_runtime/engine/tracing/tracing_util.py +7 -2
  80. agentscope_runtime/engine/tracing/wrapper.py +49 -31
  81. agentscope_runtime/sandbox/__init__.py +10 -2
  82. agentscope_runtime/sandbox/box/agentbay/__init__.py +4 -0
  83. agentscope_runtime/sandbox/box/agentbay/agentbay_sandbox.py +559 -0
  84. agentscope_runtime/sandbox/box/base/base_sandbox.py +12 -0
  85. agentscope_runtime/sandbox/box/browser/browser_sandbox.py +115 -11
  86. agentscope_runtime/sandbox/box/cloud/__init__.py +4 -0
  87. agentscope_runtime/sandbox/box/cloud/cloud_sandbox.py +254 -0
  88. agentscope_runtime/sandbox/box/filesystem/filesystem_sandbox.py +66 -0
  89. agentscope_runtime/sandbox/box/gui/gui_sandbox.py +42 -0
  90. agentscope_runtime/sandbox/box/mobile/__init__.py +4 -0
  91. agentscope_runtime/sandbox/box/mobile/box/__init__.py +0 -0
  92. agentscope_runtime/sandbox/box/mobile/mobile_sandbox.py +216 -0
  93. agentscope_runtime/sandbox/box/training_box/training_box.py +2 -44
  94. agentscope_runtime/sandbox/client/http_client.py +1 -0
  95. agentscope_runtime/sandbox/enums.py +2 -1
  96. agentscope_runtime/sandbox/manager/sandbox_manager.py +15 -2
  97. agentscope_runtime/sandbox/manager/server/app.py +12 -0
  98. agentscope_runtime/sandbox/manager/server/config.py +19 -0
  99. agentscope_runtime/sandbox/model/manager_config.py +79 -2
  100. agentscope_runtime/sandbox/utils.py +0 -18
  101. agentscope_runtime/tools/RAGs/__init__.py +0 -0
  102. agentscope_runtime/tools/RAGs/modelstudio_rag.py +377 -0
  103. agentscope_runtime/tools/RAGs/modelstudio_rag_lite.py +219 -0
  104. agentscope_runtime/tools/__init__.py +119 -0
  105. agentscope_runtime/tools/_constants.py +18 -0
  106. agentscope_runtime/tools/alipay/__init__.py +4 -0
  107. agentscope_runtime/tools/alipay/base.py +334 -0
  108. agentscope_runtime/tools/alipay/payment.py +835 -0
  109. agentscope_runtime/tools/alipay/subscribe.py +551 -0
  110. agentscope_runtime/tools/base.py +264 -0
  111. agentscope_runtime/tools/cli/__init__.py +0 -0
  112. agentscope_runtime/tools/cli/modelstudio_mcp_server.py +78 -0
  113. agentscope_runtime/tools/generations/__init__.py +75 -0
  114. agentscope_runtime/tools/generations/async_image_to_video.py +350 -0
  115. agentscope_runtime/tools/generations/async_image_to_video_wan25.py +366 -0
  116. agentscope_runtime/tools/generations/async_speech_to_video.py +422 -0
  117. agentscope_runtime/tools/generations/async_text_to_video.py +320 -0
  118. agentscope_runtime/tools/generations/async_text_to_video_wan25.py +334 -0
  119. agentscope_runtime/tools/generations/image_edit.py +208 -0
  120. agentscope_runtime/tools/generations/image_edit_wan25.py +193 -0
  121. agentscope_runtime/tools/generations/image_generation.py +202 -0
  122. agentscope_runtime/tools/generations/image_generation_wan25.py +201 -0
  123. agentscope_runtime/tools/generations/image_style_repaint.py +208 -0
  124. agentscope_runtime/tools/generations/image_to_video.py +233 -0
  125. agentscope_runtime/tools/generations/qwen_image_edit.py +205 -0
  126. agentscope_runtime/tools/generations/qwen_image_generation.py +214 -0
  127. agentscope_runtime/tools/generations/qwen_text_to_speech.py +154 -0
  128. agentscope_runtime/tools/generations/speech_to_text.py +260 -0
  129. agentscope_runtime/tools/generations/speech_to_video.py +314 -0
  130. agentscope_runtime/tools/generations/text_to_video.py +221 -0
  131. agentscope_runtime/tools/mcp_wrapper.py +215 -0
  132. agentscope_runtime/tools/realtime_clients/__init__.py +13 -0
  133. agentscope_runtime/tools/realtime_clients/asr_client.py +27 -0
  134. agentscope_runtime/tools/realtime_clients/azure_asr_client.py +195 -0
  135. agentscope_runtime/tools/realtime_clients/azure_tts_client.py +383 -0
  136. agentscope_runtime/tools/realtime_clients/modelstudio_asr_client.py +151 -0
  137. agentscope_runtime/tools/realtime_clients/modelstudio_tts_client.py +199 -0
  138. agentscope_runtime/tools/realtime_clients/realtime_tool.py +55 -0
  139. agentscope_runtime/tools/realtime_clients/tts_client.py +33 -0
  140. agentscope_runtime/tools/searches/__init__.py +3 -0
  141. agentscope_runtime/tools/searches/modelstudio_search.py +877 -0
  142. agentscope_runtime/tools/searches/modelstudio_search_lite.py +310 -0
  143. agentscope_runtime/tools/utils/__init__.py +0 -0
  144. agentscope_runtime/tools/utils/api_key_util.py +45 -0
  145. agentscope_runtime/tools/utils/crypto_utils.py +99 -0
  146. agentscope_runtime/tools/utils/mcp_util.py +35 -0
  147. agentscope_runtime/version.py +1 -1
  148. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/METADATA +244 -168
  149. agentscope_runtime-1.0.0.dist-info/RECORD +240 -0
  150. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/entry_points.txt +1 -0
  151. agentscope_runtime/engine/agents/__init__.py +0 -2
  152. agentscope_runtime/engine/agents/agentscope_agent.py +0 -488
  153. agentscope_runtime/engine/agents/agno_agent.py +0 -222
  154. agentscope_runtime/engine/agents/autogen_agent.py +0 -250
  155. agentscope_runtime/engine/agents/base_agent.py +0 -29
  156. agentscope_runtime/engine/agents/langgraph_agent.py +0 -59
  157. agentscope_runtime/engine/agents/utils.py +0 -53
  158. agentscope_runtime/engine/deployers/utils/package_project_utils.py +0 -1163
  159. agentscope_runtime/engine/deployers/utils/service_utils/service_config.py +0 -75
  160. agentscope_runtime/engine/deployers/utils/service_utils/service_factory.py +0 -220
  161. agentscope_runtime/engine/helpers/helper.py +0 -179
  162. agentscope_runtime/engine/schemas/context.py +0 -54
  163. agentscope_runtime/engine/services/context_manager.py +0 -164
  164. agentscope_runtime/engine/services/environment_manager.py +0 -50
  165. agentscope_runtime/engine/services/manager.py +0 -174
  166. agentscope_runtime/engine/services/rag_service.py +0 -195
  167. agentscope_runtime/engine/services/tablestore_rag_service.py +0 -143
  168. agentscope_runtime/sandbox/tools/__init__.py +0 -12
  169. agentscope_runtime/sandbox/tools/base/__init__.py +0 -8
  170. agentscope_runtime/sandbox/tools/base/tool.py +0 -52
  171. agentscope_runtime/sandbox/tools/browser/__init__.py +0 -57
  172. agentscope_runtime/sandbox/tools/browser/tool.py +0 -597
  173. agentscope_runtime/sandbox/tools/filesystem/__init__.py +0 -32
  174. agentscope_runtime/sandbox/tools/filesystem/tool.py +0 -319
  175. agentscope_runtime/sandbox/tools/function_tool.py +0 -321
  176. agentscope_runtime/sandbox/tools/gui/__init__.py +0 -7
  177. agentscope_runtime/sandbox/tools/gui/tool.py +0 -77
  178. agentscope_runtime/sandbox/tools/mcp_tool.py +0 -195
  179. agentscope_runtime/sandbox/tools/sandbox_tool.py +0 -104
  180. agentscope_runtime/sandbox/tools/tool.py +0 -238
  181. agentscope_runtime/sandbox/tools/utils.py +0 -68
  182. agentscope_runtime-0.2.0b1.dist-info/RECORD +0 -183
  183. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/WHEEL +0 -0
  184. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/licenses/LICENSE +0 -0
  185. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,877 @@
1
+ # -*- coding: utf-8 -*-
2
+ # pylint:disable=line-too-long, unused-argument, redefined-outer-name
3
+ # pylint:disable=consider-using-enumerate,too-many-branches,too-many-statements
4
+ # pylint:disable=too-many-nested-blocks
5
+
6
+ import copy
7
+ import datetime
8
+ import json
9
+ import os
10
+ import random
11
+ import re
12
+ import time
13
+ import uuid
14
+ from enum import Enum
15
+ from typing import Any, Dict, List, Optional, Tuple, Union
16
+
17
+ import aiohttp
18
+ import dashscope
19
+ from pydantic import BaseModel, Field
20
+
21
+ from ..base import Tool
22
+ from ...engine.schemas.modelstudio_llm import (
23
+ KnowledgeHolder,
24
+ OpenAIMessage,
25
+ SearchOptions,
26
+ )
27
+ from ...engine.tracing import trace
28
+
29
+ SEARCH_TIMEOUT = 5
30
+ SEARCH_PAGE = 1
31
+ SEARCH_ROWS = 10
32
+ _HTML_TAG_RE = re.compile(r" ?</?(a|span|em|br).*?> ?")
33
+
34
+ SEARCH_STRATEGY_SETTING = {
35
+ "lite": {"scene": "dolphin_search_bailian_lite", "timeout": 3000},
36
+ "standard": {"scene": "dolphin_search_bailian_standard", "timeout": 3000},
37
+ "pro": {"scene": "dolphin_search_bailian_pro", "timeout": 5000},
38
+ "pro_max": {"scene": "dolphin_search_bailian_proMax", "timeout": 5000},
39
+ "pro_ultra": {"scene": "dolphin_search_bailian_proUltra", "timeout": 6000},
40
+ "image": {"scene": "dolphin_search_360_image", "timeout": 3000},
41
+ "turbo": {"scene": "dolphin_search_bailian_turbo", "timeout": 5000},
42
+ "max": {"scene": "dolphin_search_bailian_max", "timeout": 5000},
43
+ }
44
+ SEARCH_URL = "https://dashscope.aliyuncs.com/api/v1/indices/plugin/web_search"
45
+
46
+
47
+ class SearchInput(BaseModel):
48
+ """
49
+ Search Input.
50
+ """
51
+
52
+ messages: List[Union[OpenAIMessage, Dict]] = Field(
53
+ ...,
54
+ description="user query in the format of Message",
55
+ )
56
+ search_options: Union[SearchOptions, Dict] = Field(
57
+ default=SearchOptions(),
58
+ description=" Search options",
59
+ )
60
+ search_output_rules: dict = Field(
61
+ default={},
62
+ description="Search output rules for formatting the search result",
63
+ )
64
+ search_timeout: int = Field(
65
+ default=SEARCH_TIMEOUT,
66
+ description="Search timeout in seconds",
67
+ )
68
+ type: Optional[str] = Field(default=None, description="Search type")
69
+
70
+
71
+ class SearchOutput(BaseModel):
72
+ """
73
+ Search Output.
74
+ """
75
+
76
+ search_result: str = Field(
77
+ ...,
78
+ description="Search result in the format of string",
79
+ )
80
+ search_info: dict = Field(
81
+ ...,
82
+ description="Additional information about the search operation result",
83
+ )
84
+
85
+
86
+ # for local use only
87
+ class SearchItem(BaseModel):
88
+ title: str = ""
89
+ image: str = ""
90
+ body: str = ""
91
+ href: str = ""
92
+ time: int = 0
93
+ exclusive: bool = False
94
+ relevance: float = 0 # important items have higher scores
95
+ original_order: int = -1 # for stable sort
96
+ source: str = ""
97
+ host_logo: str = ""
98
+ web_main_body: str = ""
99
+ csi_checked: bool = False
100
+
101
+
102
+ class ModelstudioSearch(Tool[SearchInput, SearchOutput]):
103
+ """
104
+ Search tool that calling dashscope for llm search result.
105
+ """
106
+
107
+ description = (
108
+ "中文搜索可用于查询百科知识、时事新闻、天气。但它不适用于解决编程问题。它仅收录中文信息,不收录英文资料。" # noqa E501
109
+ )
110
+
111
+ name = "modelstudio_search_pro"
112
+
113
+ @trace(trace_type="SEARCH", trace_name="modelstudio_search")
114
+ async def _arun(self, args: SearchInput, **kwargs: Any) -> SearchOutput:
115
+ """Modelstudio Web Search component
116
+
117
+ This method performs web search using DashScope's search service,
118
+ processes the results, and returns formatted search output. It handles
119
+ the complete search pipeline including payload generation, API calls,
120
+ and result post-processing.
121
+
122
+ Args:
123
+ args: SearchInput containing user messages, search options, output
124
+ rules, and timeout settings.
125
+ **kwargs: Additional keyword arguments including:
126
+ - request_id: Optional request ID for tracking
127
+ - user_id: Required user ID from Modelstudio platform
128
+ - use_green_net: Whether to use green network (defaults to
129
+ True)
130
+ - trace_event: Optional trace event for logging
131
+
132
+ Returns:
133
+ SearchOutput containing the formatted search result string and
134
+ additional search information.
135
+
136
+ Raises:
137
+ ValueError: If user_id is not provided, as it's required for the
138
+ search component.
139
+ """
140
+ if not isinstance(args.search_options, SearchOptions):
141
+ args.search_options = SearchOptions(**args.search_options)
142
+ request_id = kwargs.get("request_id", str(uuid.uuid4()))
143
+ user_id = kwargs.get("user_id", None)
144
+ if user_id is None:
145
+ raise ValueError(
146
+ "user_id is required for search component, "
147
+ "please find it on Modelstudio platform",
148
+ )
149
+ use_green_net = kwargs.get("use_green_net", True)
150
+ trace_event = kwargs.pop("trace_event", None)
151
+
152
+ # call search engine to get search result
153
+ payload: dict = ModelstudioSearch.generate_search_payload(
154
+ search_input=args,
155
+ search_options=args.search_options,
156
+ search_payload={},
157
+ request_id=request_id,
158
+ use_green_net=use_green_net,
159
+ user_id=user_id,
160
+ )
161
+
162
+ header = {
163
+ "Content-Type": "application/json",
164
+ "Accept-Encoding": "utf-8",
165
+ "Authorization": "Bearer "
166
+ + os.getenv("DASHSCOPE_API_KEY", dashscope.api_key),
167
+ }
168
+ payload_string = json.dumps(payload)
169
+ kwargs["context"] = {
170
+ "payload": payload_string,
171
+ "search_strategy": args.search_options.search_strategy,
172
+ "timeout": args.search_timeout,
173
+ }
174
+ try:
175
+ (
176
+ search_result,
177
+ extra_tool_info,
178
+ ) = await ModelstudioSearch.dashscope_search_kernel(
179
+ url=SEARCH_URL,
180
+ payload=payload_string,
181
+ headers=header,
182
+ timeout=args.search_timeout,
183
+ )
184
+ if trace_event:
185
+ trace_event.on_log(
186
+ "",
187
+ **{
188
+ "step_suffix": "results",
189
+ "payload": {
190
+ "search_result": search_result,
191
+ "extra_tool_info": extra_tool_info,
192
+ },
193
+ },
194
+ )
195
+
196
+ except Exception:
197
+ return SearchOutput()
198
+
199
+ # post process search results
200
+ (
201
+ search_items,
202
+ search_info,
203
+ ) = ModelstudioSearch.post_process_search_detail(
204
+ search_results=search_result,
205
+ extra_tool_info=extra_tool_info,
206
+ search_options=args.search_options,
207
+ search_output_rules=args.search_output_rules,
208
+ )
209
+
210
+ # post process search string
211
+ search_string = ModelstudioSearch.post_process_search_string(
212
+ search_input=args,
213
+ search_items=search_items,
214
+ search_options=args.search_options,
215
+ )
216
+
217
+ return SearchOutput(
218
+ search_result=search_string,
219
+ search_info=search_info,
220
+ )
221
+
222
+ @staticmethod
223
+ def generate_search_payload(
224
+ search_input: SearchInput,
225
+ search_options: Union[SearchOptions, Dict],
226
+ search_payload: Dict,
227
+ request_id: str,
228
+ use_green_net: bool,
229
+ **kwargs: Any,
230
+ ) -> Dict:
231
+ """Generate the payload for DashScope search API request.
232
+
233
+ This method constructs the request payload for the search API by
234
+ processing the input messages, search options, and other parameters.
235
+ It handles different search strategies and configurations.
236
+
237
+ Args:
238
+ search_input: SearchInput containing user messages and search
239
+ configuration.
240
+ search_options: SearchOptions or dict containing search strategy
241
+ and other search-related settings.
242
+ search_payload: Existing payload dict to modify, or empty dict
243
+ for new payload.
244
+ request_id: Unique request identifier for tracking.
245
+ use_green_net: Whether to enable content inspection/filtering.
246
+ **kwargs: Additional keyword arguments including:
247
+ - user_id: Required user ID for the search request
248
+ - is_xinwen_label: Whether to set news search intention
249
+
250
+ Returns:
251
+ Dict: The complete payload ready for API request, containing
252
+ scene, query, user info, and configuration parameters.
253
+ """
254
+ user_id = kwargs.get("user_id")
255
+ is_xinwen_label = kwargs.get("is_xinwen_label", False)
256
+ if isinstance(search_options, dict):
257
+ search_options = SearchOptions(**search_options)
258
+ search_strategy = search_options.search_strategy
259
+ messages = ModelstudioSearch.preprocess_messages(search_input.messages)
260
+ query = messages[-1].content
261
+ string_query = ""
262
+ if isinstance(query, list):
263
+ for item in query:
264
+ query_dict = item.model_dump()
265
+ if "text" in query_dict and query_dict["text"]:
266
+ string_query = query_dict["text"]
267
+ break
268
+ else:
269
+ string_query = query
270
+
271
+ history = [message.model_dump() for message in messages[:-1]]
272
+ tool_use = search_options.enable_search_extension
273
+ if search_payload != {}:
274
+ payload = copy.deepcopy(search_payload.get("payload", {}))
275
+ payload["rid"] = request_id
276
+ payload["uq"] = string_query.strip()
277
+ payload["customConfigInfo"]["qpMultiQueryHistory"] = history
278
+ payload["uid"] = user_id
279
+ else:
280
+ # refactor based on
281
+ # https://project.aone.alibaba-inc.com/v2/project/2018866/req/62839770 # noqa E501
282
+ payload = {
283
+ "scene": SEARCH_STRATEGY_SETTING[search_strategy]["scene"],
284
+ "uid": user_id,
285
+ "uq": string_query.strip(),
286
+ "rid": request_id,
287
+ "fields": [],
288
+ "page": int(SEARCH_PAGE),
289
+ "rows": int(SEARCH_ROWS),
290
+ "customConfigInfo": {
291
+ "qpToolPlan": False,
292
+ "readpage": False,
293
+ "readpageConfig": {
294
+ "onlyCache": False,
295
+ "topK": 10,
296
+ "tokens": 4000,
297
+ },
298
+ "qpMultiQueryHistory": history,
299
+ },
300
+ "headers": {
301
+ "__d_head_qto": SEARCH_STRATEGY_SETTING[search_strategy][
302
+ "timeout"
303
+ ],
304
+ },
305
+ }
306
+
307
+ if use_green_net:
308
+ payload["customConfigInfo"]["inspection"] = use_green_net
309
+
310
+ if tool_use:
311
+ payload["customConfigInfo"]["qpToolPlan"] = tool_use
312
+
313
+ if is_xinwen_label:
314
+ payload["customConfigInfo"]["searchIntention"] = ["xinwen"]
315
+
316
+ if search_input.type == "image":
317
+ payload["type"] = search_input.type
318
+ payload["customConfigInfo"]["qpMultiQuery"] = False
319
+ return payload
320
+
321
+ @staticmethod
322
+ async def dashscope_search_kernel(
323
+ url: str,
324
+ payload: str,
325
+ headers: Dict,
326
+ timeout: int,
327
+ **kwargs: Any,
328
+ ) -> Tuple[List, List]:
329
+ """Execute the core search request to DashScope API.
330
+
331
+ This method makes the HTTP POST request to the DashScope search
332
+ service and processes the response to extract search results and
333
+ additional tool information.
334
+
335
+ Args:
336
+ url: The DashScope search API endpoint URL.
337
+ payload: JSON string containing the search request payload.
338
+ headers: HTTP headers for the request including authorization.
339
+ timeout: Request timeout in seconds.
340
+ **kwargs: Additional keyword arguments (unused).
341
+
342
+ Returns:
343
+ Tuple containing:
344
+ - List of search result documents
345
+ - List of extra tool information from the response
346
+ """
347
+ extra_tool_info = []
348
+ results_list = []
349
+
350
+ try:
351
+ timeout_config = aiohttp.ClientTimeout(total=timeout)
352
+ async with aiohttp.ClientSession(
353
+ timeout=timeout_config,
354
+ ) as session:
355
+ async with session.post(
356
+ url,
357
+ headers=headers,
358
+ data=payload,
359
+ ) as response:
360
+ results = await response.json()
361
+ if results["status"] == 0:
362
+ extra_tool_info = results["data"]["extras"].get(
363
+ "toolResult",
364
+ [],
365
+ )
366
+ results_list = results["data"]["docs"]
367
+ except Exception as e:
368
+ print(f"Error: {e}")
369
+
370
+ return results_list, extra_tool_info
371
+
372
+ @staticmethod
373
+ def post_process_search_detail(
374
+ search_results: List,
375
+ extra_tool_info: List,
376
+ search_options: Union[SearchOptions, Dict],
377
+ search_output_rules: Dict,
378
+ **kwargs: Any,
379
+ ) -> Tuple[List[SearchItem], Dict]:
380
+ """Process and validate search results into structured format.
381
+
382
+ This method converts raw search results from the API into SearchItem
383
+ objects, applies validation rules, and prepares additional search
384
+ information for the response.
385
+
386
+ Args:
387
+ search_results: List of raw search result documents from API.
388
+ extra_tool_info: Additional tool information from the search
389
+ response.
390
+ search_options: SearchOptions or dict containing search
391
+ configuration.
392
+ search_output_rules: Dict containing validation rules for
393
+ filtering results.
394
+ **kwargs: Additional keyword arguments (unused).
395
+
396
+ Returns:
397
+ Tuple containing:
398
+ - List of processed SearchItem objects
399
+ - Dict with search information including extra tool info
400
+ """
401
+ if isinstance(search_options, dict):
402
+ search_options = SearchOptions(**search_options)
403
+ field_validator = FieldValidator(search_output_rules)
404
+ enable_source = search_options.enable_source
405
+ search_items = []
406
+
407
+ def convert_to_timestamp(
408
+ input_val: Any,
409
+ time_format: str = "%Y-%m-%d %H:%M:%S",
410
+ ) -> int:
411
+ """Convert various time formats to timestamp.
412
+
413
+ Args:
414
+ input_val: Time value in various formats (int, float, string).
415
+ time_format: Expected string time format for parsing.
416
+
417
+ Returns:
418
+ Unix timestamp as integer, or 0 if conversion fails.
419
+ """
420
+ if isinstance(input_val, (int, float)):
421
+ return int(input_val)
422
+ elif input_val.isdigit():
423
+ # Assume the timestamp string consists entirely of digits.
424
+ return int(input_val)
425
+ elif input_val == " ":
426
+ return 0
427
+ else:
428
+ try:
429
+ datetime_obj = datetime.datetime.strptime(
430
+ input_val,
431
+ time_format,
432
+ )
433
+ return int(datetime_obj.timestamp())
434
+ except Exception:
435
+ # If the timestamp format is incorrect, return 0.
436
+ return 0
437
+
438
+ try:
439
+ for doc in search_results:
440
+ tmp_search_result = {
441
+ "url": doc.get("url", "") or "",
442
+ "title": doc.get("title", "") or "",
443
+ "icon": doc.get("hostlogo", "") or "",
444
+ "site_name": doc.get("hostname", "") or "",
445
+ "image": doc.get("image", "") or "",
446
+ }
447
+ filtered_search_result = field_validator.validate(
448
+ tmp_search_result,
449
+ )
450
+ if filtered_search_result:
451
+ search_items.append(
452
+ SearchItem(
453
+ title=doc.get("title", "") or "",
454
+ body=doc.get("snippet", "") or "",
455
+ href=doc.get("url", "") or "",
456
+ time=convert_to_timestamp(
457
+ doc.get("timestamp_format", "0"),
458
+ ),
459
+ source=doc.get("hostname", "") or "",
460
+ relevance=doc.get("_score", 0.0) or 0.0,
461
+ host_logo=doc.get("hostlogo", "") or "",
462
+ web_main_body=doc.get("web_main_body", "") or "",
463
+ image=doc.get("image", "") or "",
464
+ csi_checked=doc.get("_csi_checked", False)
465
+ or False,
466
+ ),
467
+ )
468
+ except Exception as e:
469
+ print(f"Error: {e}")
470
+
471
+ for i, item in enumerate(search_items):
472
+ item.original_order = i
473
+ item.href = item.href.replace(" ", "%20").strip() or "expired_url"
474
+ item.href = item.href.replace("chatm6.sm.cn", "quark.sm.cn")
475
+
476
+ search_info = {"extra_tool_info": extra_tool_info}
477
+ if enable_source is True:
478
+ raw_results = []
479
+ i = 1
480
+ if isinstance(search_results, list):
481
+ for doc in search_results:
482
+ if not doc.get("_csi_checked", True):
483
+ continue
484
+ tmp_search_result = {
485
+ "url": doc.get("url", "") or "",
486
+ "title": doc.get("title", "") or "",
487
+ "index": i,
488
+ "icon": doc.get("hostlogo", "") or "",
489
+ "site_name": doc.get("hostname", "") or "",
490
+ }
491
+ filtered_search_result = field_validator.validate(
492
+ tmp_search_result,
493
+ )
494
+ if filtered_search_result:
495
+ raw_results.append(filtered_search_result)
496
+ i = i + 1
497
+ search_info["search_results"] = raw_results
498
+ return search_items, search_info
499
+
500
+ @staticmethod
501
+ def post_process_search_string(
502
+ search_input: SearchInput,
503
+ search_items: List[SearchItem],
504
+ search_options: Union[SearchOptions, Dict],
505
+ **kwargs: Any,
506
+ ) -> str:
507
+ if isinstance(search_options, dict):
508
+ search_options = SearchOptions(**search_options)
509
+ citation_format = search_options.citation_format
510
+ search_strategy = search_options.search_strategy
511
+ enable_citation = search_options.enable_citation
512
+ enable_source = search_options.enable_source
513
+ query = ModelstudioSearch.preprocess_messages(search_input.messages)[
514
+ -1
515
+ ].content
516
+
517
+ # Determine whether it is an image search
518
+ if search_input.type == "image":
519
+ images = []
520
+ top_n = (
521
+ int(os.getenv("TOP_N", "5"))
522
+ if search_options.top_n == 0
523
+ else search_options.top_n
524
+ )
525
+ image_count = (
526
+ top_n if len(search_items) > top_n else len(search_items)
527
+ )
528
+ for index in range(image_count):
529
+ image_url = search_items[index].image
530
+ images.append(image_url)
531
+ text_result_str = json.dumps(images)
532
+ return text_result_str
533
+
534
+ timestamp_templates = [
535
+ "(搜索结果收录于{}年{}月{}日)",
536
+ "({}年{}月{}日)",
537
+ "(来自{}年{}月{}日的资料)",
538
+ "({}年{}月{}日的资料)",
539
+ "(该信息的时间戳是{}年{}月{}日)",
540
+ "(资料日期为{}年{}月{}日)",
541
+ "(消息于{}年{}月{}日发布)",
542
+ "(发布时间是{}年{}月{}日)",
543
+ "(撰于{}年{}月{}日)",
544
+ "(截至{}年{}月{}日)",
545
+ ]
546
+ random.shuffle(timestamp_templates)
547
+
548
+ cnt_char = 0
549
+ text_result = []
550
+ other_text_result = []
551
+ search_top = kwargs.get("web_main_body_cnt", 3)
552
+ search_nlp_total_char = search_options.item_cnt
553
+
554
+ nlp_web_main_body_cnt = 0
555
+
556
+ def _rm_html(text: str) -> str:
557
+ text = text.replace("\xa0", " ")
558
+ text = text.replace(
559
+ "\t",
560
+ "",
561
+ ) # quark uses \t to split chinese words
562
+ text = text.replace("...", "……")
563
+ text = _HTML_TAG_RE.sub("", text)
564
+ text = text.strip()
565
+ if text.endswith("……"):
566
+ text = text[: -len("……")]
567
+ return text
568
+
569
+ for i, item in enumerate(search_items):
570
+ if item.time > 0:
571
+ t = time.localtime(item.time)
572
+ if i < len(timestamp_templates):
573
+ k = i
574
+ else:
575
+ k = random.randint(0, len(timestamp_templates) - 1)
576
+ text_timestamp = timestamp_templates[k].format(
577
+ t.tm_year,
578
+ t.tm_mon,
579
+ t.tm_mday,
580
+ )
581
+ else:
582
+ text_timestamp = ""
583
+
584
+ if (
585
+ len(item.body) < len(item.web_main_body)
586
+ and nlp_web_main_body_cnt < search_top
587
+ ):
588
+ nlp_web_main_body_cnt += 1
589
+ content = item.web_main_body
590
+ else:
591
+ content = item.body
592
+ snippet = f"{_rm_html(item.title)}\n{_rm_html(content)}".strip()
593
+ text_snippet = snippet.replace("\n", "\\n")
594
+ text_result_cur = text_snippet[:] + text_timestamp
595
+
596
+ # Place into corresponding collection based on whether it
597
+ # passes the check
598
+ if item.csi_checked:
599
+ text_result.append(text_result_cur)
600
+ else:
601
+ other_text_result.append(text_result_cur)
602
+
603
+ cnt_char += len(snippet)
604
+ if cnt_char > search_nlp_total_char:
605
+ # Currently limit search characters to 4k.
606
+ break
607
+
608
+ text_result_str = ""
609
+ match = re.search("<number>", citation_format)
610
+ if not match:
611
+ citation_format = "[<number>]" # Fallback for incorrect input
612
+ if enable_citation and enable_source:
613
+ for i in range(len(text_result)):
614
+ cite_form = re.sub("<number>", str(i + 1), citation_format)
615
+ text_result[i] = cite_form + text_result[i] + "\n\n"
616
+ text_result_str += text_result[i]
617
+ if len(text_result_str) > search_nlp_total_char:
618
+ break
619
+
620
+ if other_text_result:
621
+ # 1. Content removed by the green-net filter will not have
622
+ # [ref_x] citation numbers.
623
+ # 2. After normally citing web pages, add "## Other Internet
624
+ # Information:" and put the content removed by the
625
+ # green-net filter here.
626
+ text_result_str += "## 其他互联网信息:\n\n```"
627
+ for i, text in enumerate(other_text_result):
628
+ text = text + "\n\n"
629
+ text_result_str += text
630
+ if len(text_result_str) > search_nlp_total_char:
631
+ break
632
+ text_result_str += "```\n"
633
+ return text_result_str
634
+
635
+ text_result_str = "\n\n".join(text_result).strip()
636
+ while (
637
+ len(text_result) > 1
638
+ and len(text_result_str) > search_nlp_total_char
639
+ ):
640
+ text_result.pop(-1)
641
+ text_result_str = "\n\n".join(text_result).strip()
642
+
643
+ if search_strategy == "pro_ultra":
644
+ text_result_str = (
645
+ text_result_str.strip()
646
+ + f"# # 参考大纲\n\n{query}\n# 输出要求\n\n请做出有深度的回答,"
647
+ f"不少于1000字,回答时引用上述内容中的细节。"
648
+ )
649
+
650
+ return text_result_str
651
+
652
+ @staticmethod
653
+ def preprocess_messages(
654
+ messages: List[Union[OpenAIMessage, Dict]],
655
+ ) -> List[Union[OpenAIMessage, Dict]]:
656
+ for i, message in reversed(list(enumerate(messages))):
657
+ if isinstance(message, dict):
658
+ message = OpenAIMessage(**message)
659
+ if message.role == "user":
660
+ return messages[: i + 1]
661
+ raise RuntimeError("Input unknown")
662
+
663
+ @staticmethod
664
+ def build_knowledge_for_search(
665
+ search_output: SearchOutput,
666
+ **kwargs: Any,
667
+ ) -> List[KnowledgeHolder]:
668
+ search_strategy = kwargs.get("search_strategy", "pro_max")
669
+ tool_output = {
670
+ "search": search_output.search_result,
671
+ "extra_tool_info": search_output.search_info.get(
672
+ "extra_tool_info",
673
+ [],
674
+ ),
675
+ }
676
+
677
+ def tool_call_knowledge(_tool_output: List, **kwargs: Any) -> str:
678
+ prompt = (
679
+ """以下通过权威渠道的实时信息可能有助于你回答问题,请优先参考:#以下根据实际返回选择""" # noqa E501
680
+ )
681
+ for item in _tool_output:
682
+ if "result" not in item:
683
+ continue
684
+ if item.get("tool", "") == "oil_price":
685
+ prompt = prompt + "\n 油价信息:" + item.get("result", "")
686
+ elif item.get("tool", "") == "gold_price":
687
+ prompt = prompt + "\n 金价信息:" + item.get("result", "")
688
+ elif item.get("tool", "") == "exchange":
689
+ prompt = prompt + "\n 汇率信息:" + item.get("result", "")
690
+ elif item.get("tool", "") == "stock":
691
+ prompt = prompt + "\n 股市信息:" + item.get("result", "")
692
+ elif item.get("tool", "") == "silver_price":
693
+ prompt = prompt + "\n 银价信息:" + item.get("result", "")
694
+ elif item.get("tool", "") == "weather":
695
+ prompt = prompt + "\n 天气信息:" + item.get("result", "")
696
+ elif item.get("tool", "") == "calendar":
697
+ prompt = prompt + "\n 万年历信息:" + item.get("result", "")
698
+ return prompt
699
+
700
+ def get_current_date_str() -> str:
701
+ beijing_time = datetime.datetime.utcnow() + datetime.timedelta(
702
+ hours=8,
703
+ )
704
+ cur_time = beijing_time.timetuple()
705
+ date_str = (
706
+ f"当前时间:{cur_time.tm_year}年{cur_time.tm_mon}月"
707
+ f"{cur_time.tm_mday}日,星期"
708
+ )
709
+ date_str += ["一", "二", "三", "四", "五", "六", "日"][cur_time.tm_wday]
710
+ date_str += f"{cur_time.tm_hour}时{cur_time.tm_min}分"
711
+ date_str += "。"
712
+ return date_str
713
+
714
+ # Add time to all app requests.
715
+ knowledge = []
716
+ for tool_name, result in tool_output.items():
717
+ if tool_name == "search":
718
+ result = f"{result}".strip()
719
+ if result:
720
+ enable_citation = kwargs.get("enable_citation", False)
721
+ enable_source = kwargs.get("enable_source", False)
722
+ if enable_source and enable_citation:
723
+ citation_format = kwargs.get("citation_format", "")
724
+
725
+ if search_strategy == "pro_ultra":
726
+ result += (
727
+ f'# # 参考大纲\n\n{kwargs.get("query", "")}\n# 输出要求\n\n请做出有深度的回答,不少于1000字,回答时引用上述内容中的细节,并在引用处使用如`' # noqa E501
728
+ + re.sub(
729
+ "<number>",
730
+ "1",
731
+ citation_format,
732
+ ) # noqa E501
733
+ + re.sub(
734
+ "<number>",
735
+ "2",
736
+ citation_format,
737
+ ) # noqa E501
738
+ + "`, 的格式标记来源,每一处引用最多引用1个来源。"
739
+ )
740
+ else:
741
+ result += (
742
+ "输出要求\n\n请在回答时引用上述内容,并在引用处使用 `"
743
+ + citation_format
744
+ + "` 的格式标记来源,如果有多个来源,则用多个[]来表示,如`" # noqa E501
745
+ + re.sub(
746
+ "<number>",
747
+ "1",
748
+ citation_format,
749
+ ) # noqa E501
750
+ + re.sub(
751
+ "<number>",
752
+ "2",
753
+ citation_format,
754
+ ) # noqa E501
755
+ + "`,如果回答没有引用上述内容则不用输出角标,禁止输出`"
756
+ + re.sub(
757
+ "<number>",
758
+ "无",
759
+ citation_format,
760
+ ) # noqa E501
761
+ + "`或者`"
762
+ + re.sub(
763
+ "<number>",
764
+ "not_found",
765
+ citation_format, # noqa E501
766
+ )
767
+ + "`"
768
+ )
769
+ knowledge.append(
770
+ KnowledgeHolder(source="你的知识库", content=result),
771
+ )
772
+ elif tool_name == "extra_tool_info":
773
+ result = tool_call_knowledge(result, **kwargs)
774
+ if result:
775
+ knowledge.append(
776
+ KnowledgeHolder(
777
+ source="系统",
778
+ content=get_current_date_str(),
779
+ ),
780
+ )
781
+ knowledge.append(
782
+ KnowledgeHolder(source="你的知识库", content=result),
783
+ )
784
+ return knowledge
785
+
786
+
787
+ # for validator search item only
788
+ class ValidationMode(Enum):
789
+ NORMAL = "normal"
790
+ AVOID_EMPTY = "avoid_empty"
791
+ EXCLUDE = "exclude"
792
+ FORCE = "force"
793
+ DROPOUT_ENTIRE_IF_MISSING = "dropout_entire_if_missing"
794
+ FILTER_ITEMS_FROM_LIST = "filter_items_from_list"
795
+
796
+
797
+ class FieldValidator:
798
+ def __init__(self, modes: Optional[Dict] = None) -> None:
799
+ self.modes = modes if modes is not None else {}
800
+ if not isinstance(modes, dict) or not modes:
801
+ self.modes = {}
802
+
803
+ def validate(self, input_dict: dict) -> dict:
804
+ output_dict = {}
805
+
806
+ for key, mode in self.modes.items():
807
+ value = input_dict.get(key)
808
+
809
+ if isinstance(mode, dict):
810
+ for mode_key, mode_value in mode.items():
811
+ if (
812
+ mode_key
813
+ == ValidationMode.DROPOUT_ENTIRE_IF_MISSING.name
814
+ ):
815
+ if value not in (None, "", []):
816
+ output_dict[key] = value
817
+ else:
818
+ return {}
819
+ elif mode_key == ValidationMode.AVOID_EMPTY.name:
820
+ if value not in (None, "", []):
821
+ output_dict[key] = value
822
+
823
+ elif mode_key == ValidationMode.EXCLUDE.name:
824
+ continue # Do not add the key to output_dict
825
+
826
+ elif mode_key == ValidationMode.FORCE.name:
827
+ if value is None:
828
+ raise ValueError(
829
+ f"Key '{key}' is required but not provided.",
830
+ )
831
+ output_dict[key] = value
832
+
833
+ elif (
834
+ mode_key == ValidationMode.FILTER_ITEMS_FROM_LIST.name
835
+ ):
836
+ if value not in (None, "", []) and isinstance(
837
+ mode_value,
838
+ list,
839
+ ):
840
+ for filter_item in mode_value:
841
+ if value.startswith(filter_item):
842
+ return {}
843
+
844
+ else: # NORMAL behavior
845
+ if value is not None: # Keep it if it exists
846
+ output_dict[key] = value
847
+
848
+ else:
849
+ if mode == ValidationMode.DROPOUT_ENTIRE_IF_MISSING.name:
850
+ if value not in (None, "", []):
851
+ output_dict[key] = value
852
+ else:
853
+ return {}
854
+ elif mode == ValidationMode.AVOID_EMPTY.name:
855
+ if value not in (None, "", []):
856
+ output_dict[key] = value
857
+
858
+ elif mode == ValidationMode.EXCLUDE.name:
859
+ continue # Do not add the key to output_dict
860
+
861
+ elif mode == ValidationMode.FORCE.name:
862
+ if value is None:
863
+ raise ValueError(
864
+ f"Key '{key}' is required but not provided.",
865
+ )
866
+ output_dict[key] = value
867
+
868
+ else: # NORMAL behavior
869
+ if value is not None: # Keep it if it exists
870
+ output_dict[key] = value
871
+
872
+ # Add keys with NORMAL mode if not explicitly defined in modes
873
+ for key, value in input_dict.items():
874
+ if key not in self.modes:
875
+ output_dict[key] = value
876
+
877
+ return output_dict