kweaver-dolphin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. DolphinLanguageSDK/__init__.py +58 -0
  2. dolphin/__init__.py +62 -0
  3. dolphin/cli/__init__.py +20 -0
  4. dolphin/cli/args/__init__.py +9 -0
  5. dolphin/cli/args/parser.py +567 -0
  6. dolphin/cli/builtin_agents/__init__.py +22 -0
  7. dolphin/cli/commands/__init__.py +4 -0
  8. dolphin/cli/interrupt/__init__.py +8 -0
  9. dolphin/cli/interrupt/handler.py +205 -0
  10. dolphin/cli/interrupt/keyboard.py +82 -0
  11. dolphin/cli/main.py +49 -0
  12. dolphin/cli/multimodal/__init__.py +34 -0
  13. dolphin/cli/multimodal/clipboard.py +327 -0
  14. dolphin/cli/multimodal/handler.py +249 -0
  15. dolphin/cli/multimodal/image_processor.py +214 -0
  16. dolphin/cli/multimodal/input_parser.py +149 -0
  17. dolphin/cli/runner/__init__.py +8 -0
  18. dolphin/cli/runner/runner.py +989 -0
  19. dolphin/cli/ui/__init__.py +10 -0
  20. dolphin/cli/ui/console.py +2795 -0
  21. dolphin/cli/ui/input.py +340 -0
  22. dolphin/cli/ui/layout.py +425 -0
  23. dolphin/cli/ui/stream_renderer.py +302 -0
  24. dolphin/cli/utils/__init__.py +8 -0
  25. dolphin/cli/utils/helpers.py +135 -0
  26. dolphin/cli/utils/version.py +49 -0
  27. dolphin/core/__init__.py +107 -0
  28. dolphin/core/agent/__init__.py +10 -0
  29. dolphin/core/agent/agent_state.py +69 -0
  30. dolphin/core/agent/base_agent.py +970 -0
  31. dolphin/core/code_block/__init__.py +0 -0
  32. dolphin/core/code_block/agent_init_block.py +0 -0
  33. dolphin/core/code_block/assign_block.py +98 -0
  34. dolphin/core/code_block/basic_code_block.py +1865 -0
  35. dolphin/core/code_block/explore_block.py +1327 -0
  36. dolphin/core/code_block/explore_block_v2.py +712 -0
  37. dolphin/core/code_block/explore_strategy.py +672 -0
  38. dolphin/core/code_block/judge_block.py +220 -0
  39. dolphin/core/code_block/prompt_block.py +32 -0
  40. dolphin/core/code_block/skill_call_deduplicator.py +291 -0
  41. dolphin/core/code_block/tool_block.py +129 -0
  42. dolphin/core/common/__init__.py +17 -0
  43. dolphin/core/common/constants.py +176 -0
  44. dolphin/core/common/enums.py +1173 -0
  45. dolphin/core/common/exceptions.py +133 -0
  46. dolphin/core/common/multimodal.py +539 -0
  47. dolphin/core/common/object_type.py +165 -0
  48. dolphin/core/common/output_format.py +432 -0
  49. dolphin/core/common/types.py +36 -0
  50. dolphin/core/config/__init__.py +16 -0
  51. dolphin/core/config/global_config.py +1289 -0
  52. dolphin/core/config/ontology_config.py +133 -0
  53. dolphin/core/context/__init__.py +12 -0
  54. dolphin/core/context/context.py +1580 -0
  55. dolphin/core/context/context_manager.py +161 -0
  56. dolphin/core/context/var_output.py +82 -0
  57. dolphin/core/context/variable_pool.py +356 -0
  58. dolphin/core/context_engineer/__init__.py +41 -0
  59. dolphin/core/context_engineer/config/__init__.py +5 -0
  60. dolphin/core/context_engineer/config/settings.py +402 -0
  61. dolphin/core/context_engineer/core/__init__.py +7 -0
  62. dolphin/core/context_engineer/core/budget_manager.py +327 -0
  63. dolphin/core/context_engineer/core/context_assembler.py +583 -0
  64. dolphin/core/context_engineer/core/context_manager.py +637 -0
  65. dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
  66. dolphin/core/context_engineer/example/incremental_example.py +267 -0
  67. dolphin/core/context_engineer/example/traditional_example.py +334 -0
  68. dolphin/core/context_engineer/services/__init__.py +5 -0
  69. dolphin/core/context_engineer/services/compressor.py +399 -0
  70. dolphin/core/context_engineer/utils/__init__.py +6 -0
  71. dolphin/core/context_engineer/utils/context_utils.py +441 -0
  72. dolphin/core/context_engineer/utils/message_formatter.py +270 -0
  73. dolphin/core/context_engineer/utils/token_utils.py +139 -0
  74. dolphin/core/coroutine/__init__.py +15 -0
  75. dolphin/core/coroutine/context_snapshot.py +154 -0
  76. dolphin/core/coroutine/context_snapshot_profile.py +922 -0
  77. dolphin/core/coroutine/context_snapshot_store.py +268 -0
  78. dolphin/core/coroutine/execution_frame.py +145 -0
  79. dolphin/core/coroutine/execution_state_registry.py +161 -0
  80. dolphin/core/coroutine/resume_handle.py +101 -0
  81. dolphin/core/coroutine/step_result.py +101 -0
  82. dolphin/core/executor/__init__.py +18 -0
  83. dolphin/core/executor/debug_controller.py +630 -0
  84. dolphin/core/executor/dolphin_executor.py +1063 -0
  85. dolphin/core/executor/executor.py +624 -0
  86. dolphin/core/flags/__init__.py +27 -0
  87. dolphin/core/flags/definitions.py +49 -0
  88. dolphin/core/flags/manager.py +113 -0
  89. dolphin/core/hook/__init__.py +95 -0
  90. dolphin/core/hook/expression_evaluator.py +499 -0
  91. dolphin/core/hook/hook_dispatcher.py +380 -0
  92. dolphin/core/hook/hook_types.py +248 -0
  93. dolphin/core/hook/isolated_variable_pool.py +284 -0
  94. dolphin/core/interfaces.py +53 -0
  95. dolphin/core/llm/__init__.py +0 -0
  96. dolphin/core/llm/llm.py +495 -0
  97. dolphin/core/llm/llm_call.py +100 -0
  98. dolphin/core/llm/llm_client.py +1285 -0
  99. dolphin/core/llm/message_sanitizer.py +120 -0
  100. dolphin/core/logging/__init__.py +20 -0
  101. dolphin/core/logging/logger.py +526 -0
  102. dolphin/core/message/__init__.py +8 -0
  103. dolphin/core/message/compressor.py +749 -0
  104. dolphin/core/parser/__init__.py +8 -0
  105. dolphin/core/parser/parser.py +405 -0
  106. dolphin/core/runtime/__init__.py +10 -0
  107. dolphin/core/runtime/runtime_graph.py +926 -0
  108. dolphin/core/runtime/runtime_instance.py +446 -0
  109. dolphin/core/skill/__init__.py +14 -0
  110. dolphin/core/skill/context_retention.py +157 -0
  111. dolphin/core/skill/skill_function.py +686 -0
  112. dolphin/core/skill/skill_matcher.py +282 -0
  113. dolphin/core/skill/skillkit.py +700 -0
  114. dolphin/core/skill/skillset.py +72 -0
  115. dolphin/core/trajectory/__init__.py +10 -0
  116. dolphin/core/trajectory/recorder.py +189 -0
  117. dolphin/core/trajectory/trajectory.py +522 -0
  118. dolphin/core/utils/__init__.py +9 -0
  119. dolphin/core/utils/cache_kv.py +212 -0
  120. dolphin/core/utils/tools.py +340 -0
  121. dolphin/lib/__init__.py +93 -0
  122. dolphin/lib/debug/__init__.py +8 -0
  123. dolphin/lib/debug/visualizer.py +409 -0
  124. dolphin/lib/memory/__init__.py +28 -0
  125. dolphin/lib/memory/async_processor.py +220 -0
  126. dolphin/lib/memory/llm_calls.py +195 -0
  127. dolphin/lib/memory/manager.py +78 -0
  128. dolphin/lib/memory/sandbox.py +46 -0
  129. dolphin/lib/memory/storage.py +245 -0
  130. dolphin/lib/memory/utils.py +51 -0
  131. dolphin/lib/ontology/__init__.py +12 -0
  132. dolphin/lib/ontology/basic/__init__.py +0 -0
  133. dolphin/lib/ontology/basic/base.py +102 -0
  134. dolphin/lib/ontology/basic/concept.py +130 -0
  135. dolphin/lib/ontology/basic/object.py +11 -0
  136. dolphin/lib/ontology/basic/relation.py +63 -0
  137. dolphin/lib/ontology/datasource/__init__.py +27 -0
  138. dolphin/lib/ontology/datasource/datasource.py +66 -0
  139. dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
  140. dolphin/lib/ontology/datasource/sql.py +845 -0
  141. dolphin/lib/ontology/mapping.py +177 -0
  142. dolphin/lib/ontology/ontology.py +733 -0
  143. dolphin/lib/ontology/ontology_context.py +16 -0
  144. dolphin/lib/ontology/ontology_manager.py +107 -0
  145. dolphin/lib/skill_results/__init__.py +31 -0
  146. dolphin/lib/skill_results/cache_backend.py +559 -0
  147. dolphin/lib/skill_results/result_processor.py +181 -0
  148. dolphin/lib/skill_results/result_reference.py +179 -0
  149. dolphin/lib/skill_results/skillkit_hook.py +324 -0
  150. dolphin/lib/skill_results/strategies.py +328 -0
  151. dolphin/lib/skill_results/strategy_registry.py +150 -0
  152. dolphin/lib/skillkits/__init__.py +44 -0
  153. dolphin/lib/skillkits/agent_skillkit.py +155 -0
  154. dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
  155. dolphin/lib/skillkits/env_skillkit.py +250 -0
  156. dolphin/lib/skillkits/mcp_adapter.py +616 -0
  157. dolphin/lib/skillkits/mcp_skillkit.py +771 -0
  158. dolphin/lib/skillkits/memory_skillkit.py +650 -0
  159. dolphin/lib/skillkits/noop_skillkit.py +31 -0
  160. dolphin/lib/skillkits/ontology_skillkit.py +89 -0
  161. dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
  162. dolphin/lib/skillkits/resource/__init__.py +52 -0
  163. dolphin/lib/skillkits/resource/models/__init__.py +6 -0
  164. dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
  165. dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
  166. dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
  167. dolphin/lib/skillkits/resource/skill_cache.py +215 -0
  168. dolphin/lib/skillkits/resource/skill_loader.py +395 -0
  169. dolphin/lib/skillkits/resource/skill_validator.py +406 -0
  170. dolphin/lib/skillkits/resource_skillkit.py +11 -0
  171. dolphin/lib/skillkits/search_skillkit.py +163 -0
  172. dolphin/lib/skillkits/sql_skillkit.py +274 -0
  173. dolphin/lib/skillkits/system_skillkit.py +509 -0
  174. dolphin/lib/skillkits/vm_skillkit.py +65 -0
  175. dolphin/lib/utils/__init__.py +9 -0
  176. dolphin/lib/utils/data_process.py +207 -0
  177. dolphin/lib/utils/handle_progress.py +178 -0
  178. dolphin/lib/utils/security.py +139 -0
  179. dolphin/lib/utils/text_retrieval.py +462 -0
  180. dolphin/lib/vm/__init__.py +11 -0
  181. dolphin/lib/vm/env_executor.py +895 -0
  182. dolphin/lib/vm/python_session_manager.py +453 -0
  183. dolphin/lib/vm/vm.py +610 -0
  184. dolphin/sdk/__init__.py +60 -0
  185. dolphin/sdk/agent/__init__.py +12 -0
  186. dolphin/sdk/agent/agent_factory.py +236 -0
  187. dolphin/sdk/agent/dolphin_agent.py +1106 -0
  188. dolphin/sdk/api/__init__.py +4 -0
  189. dolphin/sdk/runtime/__init__.py +8 -0
  190. dolphin/sdk/runtime/env.py +363 -0
  191. dolphin/sdk/skill/__init__.py +10 -0
  192. dolphin/sdk/skill/global_skills.py +706 -0
  193. dolphin/sdk/skill/traditional_toolkit.py +260 -0
  194. kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
  195. kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
  196. kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
  197. kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
  198. kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
  199. kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1327 @@
1
+ """ExploreBlock Code Block Implementation
2
+
3
+ Supports two tool calling modes:
4
+ - prompt mode: call tools in the prompt using =># format
5
+ - tool_call mode (default): use LLM's native tool_call capability
6
+
7
+ Control which mode to use via the mode parameter:
8
+ - mode="prompt": use PromptStrategy
9
+ - mode="tool_call" (default): use ToolCallStrategy
10
+
11
+ Design document: docs/design/architecture/explore_block_merge.md
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import json
18
+ import traceback
19
+ from datetime import datetime
20
+ from typing import Any, AsyncGenerator, Dict, List, Optional
21
+
22
+ from dolphin.core.code_block.basic_code_block import BasicCodeBlock
23
+
24
+ # Hook imports
25
+ from dolphin.core.hook import (
26
+ HookConfig,
27
+ OnStopContext,
28
+ HookResult,
29
+ HookDispatcher,
30
+ HookValidationError,
31
+ parse_hook_config,
32
+ )
33
+ from dolphin.core.context_engineer.config.settings import BuildInBucket
34
+ from dolphin.lib.skillkits.system_skillkit import SystemFunctions
35
+
36
+ from dolphin.core.common.enums import (
37
+ CategoryBlock,
38
+ MessageRole,
39
+ Messages,
40
+ TypeStage,
41
+ StreamItem,
42
+ )
43
+ from dolphin.core.common.constants import (
44
+ MAX_SKILL_CALL_TIMES,
45
+ get_msg_duplicate_skill_call,
46
+ )
47
+ from dolphin.core.context.context import Context
48
+ from dolphin.core.logging.logger import console, console_skill_response
49
+ from dolphin.core.utils.tools import ToolInterrupt
50
+ from dolphin.core.llm.llm_client import LLMClient
51
+ from dolphin.core.context.var_output import SourceType
52
+ from dolphin.core.logging.logger import get_logger
53
+ from dolphin.lib.skillkits.cognitive_skillkit import CognitiveSkillkit
54
+ from dolphin.core.code_block.explore_strategy import (
55
+ ExploreStrategy,
56
+ PromptStrategy,
57
+ ToolCallStrategy,
58
+ ToolCall,
59
+ )
60
+ from dolphin.core.code_block.skill_call_deduplicator import (
61
+ DefaultSkillCallDeduplicator,
62
+ )
63
+ from dolphin.core import flags
64
+
65
+ logger = get_logger("code_block.explore_block")
66
+
67
+
68
+ class ExploreBlock(BasicCodeBlock):
69
+ """Explore code block implementation
70
+
71
+ Supports two modes:
72
+ - mode="prompt": uses PromptStrategy
73
+ - mode="tool_call" (default): uses ToolCallStrategy
74
+
75
+ Args:
76
+ context: context object
77
+ debug_infos: debug information (optional)
78
+ tools_format: tool description format, "short"/"medium"/"full"
79
+
80
+ Note:
81
+ The mode parameter can only be specified via DPH syntax /explore/(mode="..."),
82
+ not passed from the constructor, to avoid priority ambiguity.
83
+ The default is "tool_call" mode, and parse_block_content() will update it based on DPH parameters after parsing.
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ context: Context,
89
+ debug_infos: Optional[dict] = None,
90
+ tools_format: str = "medium",
91
+ ):
92
+ super().__init__(context)
93
+
94
+ self.llm_client = LLMClient(self.context)
95
+ self.debug_infos = debug_infos
96
+ self.tools_format = tools_format
97
+
98
+ # Mode control: The default uses the tool_call mode, and after parsing DPH parameters via parse_block_content(), updates are made.
99
+ self.mode = "tool_call"
100
+ self.strategy = self._create_strategy()
101
+
102
+ # By default, deduplication is controlled by the Block parameter (finally takes effect in execute/continue_exploration)
103
+ self.enable_skill_deduplicator = getattr(
104
+ self, "enable_skill_deduplicator", True
105
+ )
106
+
107
+ # State Variables
108
+ self.times = 0
109
+ self.should_stop_exploration = False
110
+ self.no_tool_call_count = 0 # Count consecutive rounds without tool calls
111
+ self.pending_content = None # Store content without tool_call for merging
112
+
113
+ # Session-level tool call batch counter for stable ID generation
114
+ # Incremented each time LLM returns tool calls (per batch, not per tool)
115
+ self.session_tool_call_counter = 0
116
+
117
+ # Hook-based verify attributes
118
+ self.on_stop: Optional[HookConfig] = None
119
+ self.current_attempt: int = 0
120
+ self.hook_history: List[Dict[str, Any]] = []
121
+ self._last_hook_result: Optional[HookResult] = None
122
+
123
+ def _create_strategy(self) -> ExploreStrategy:
124
+ """Create the corresponding strategy instance according to mode."""
125
+ if self.mode == "prompt":
126
+ return PromptStrategy()
127
+ else: # tool_call
128
+ return ToolCallStrategy(tools_format=self.tools_format)
129
+
130
+ def parse_block_content(self, content: str, category=None, replace_variables=True):
131
+ """Override the parent class method to update mode and strategy after parsing DPH syntax.
132
+
133
+ According to the design document docs/design/architecture/explore_block_merge.md:
134
+ - /explore/(mode="tool_call", ...) should use ToolCallStrategy
135
+ - /explore/(mode="prompt", ...) should use PromptStrategy
136
+ - Default mode is "tool_call"
137
+ """
138
+ # Call parent class parsing
139
+ super().parse_block_content(content, category, replace_variables)
140
+
141
+ # Get mode from parsed arguments
142
+ parsed_mode = self.params.get("mode", None)
143
+
144
+ if parsed_mode is not None:
145
+ # Validate mode values
146
+ if parsed_mode not in ["prompt", "tool_call"]:
147
+ raise ValueError(
148
+ f"Invalid mode: {parsed_mode}, must be 'prompt' or 'tool_call'"
149
+ )
150
+
151
+ # If mode differs from the current one, update mode and strategy
152
+ if parsed_mode != self.mode:
153
+ self.mode = parsed_mode
154
+ self.strategy = self._create_strategy()
155
+
156
+ async def execute(
157
+ self,
158
+ content,
159
+ category: CategoryBlock = CategoryBlock.EXPLORE,
160
+ replace_variables=True,
161
+ ) -> AsyncGenerator[Dict[str, Any], None]:
162
+ """Execute exploration code block"""
163
+ # Call the parent class's execute method
164
+ async for _ in super().execute(content, category, replace_variables):
165
+ pass
166
+
167
+ # Parse on_stop hook configuration from params
168
+ self._parse_hook_config()
169
+
170
+ assert self.recorder, "recorder is None"
171
+
172
+ # Compatible with older versions, output the entire progress content
173
+ self.recorder.set_output_dump_process(True)
174
+
175
+ self.block_start_log("explore")
176
+
177
+ # Enable or disable the skill invocation deduplicator based on parameter configuration (enabled by default, can be disabled via enable_skill_deduplicator)
178
+ if hasattr(self, "enable_skill_deduplicator"):
179
+ self.strategy.set_deduplicator_enabled(self.enable_skill_deduplicator)
180
+
181
+ # Save the current system prompt configuration to context for inheritance in multi-turn conversations.
182
+ if getattr(self, "system_prompt", None):
183
+ self.context.set_last_system_prompt(self.system_prompt)
184
+
185
+ # Save the current skills configuration to context, so it can be inherited during multi-turn conversations.
186
+ if getattr(self, "skills", None):
187
+ self.context.set_last_skills(self.skills)
188
+
189
+ # Save the current mode configuration to context for inheritance in multi-turn conversations.
190
+ if getattr(self, "mode", None):
191
+ self.context.set_last_explore_mode(self.mode)
192
+
193
+ # Build initial message
194
+ self._make_init_messages()
195
+
196
+ async for ret in self._execute_main():
197
+ yield ret
198
+
199
+ # Update history and cleanup buckets after execution
200
+ self._update_history_and_cleanup()
201
+
202
+ def _parse_hook_config(self) -> None:
203
+ """Parse on_stop hook configuration from params."""
204
+ on_stop_value = self.params.get("on_stop", None)
205
+ if on_stop_value is not None:
206
+ try:
207
+ self.on_stop = parse_hook_config(on_stop_value)
208
+ logger.debug(f"Parsed on_stop config: {self.on_stop}")
209
+ except HookValidationError as e:
210
+ logger.error(f"Invalid on_stop configuration: {e}")
211
+ raise
212
+
213
+ async def _execute_main(self) -> AsyncGenerator[Dict[str, Any], None]:
214
+ """Unified execution entry point (standard execution + on_stop retry verification)."""
215
+ if not self.on_stop:
216
+ async for ret in self._stream_exploration_with_assignment():
217
+ yield ret
218
+ return
219
+
220
+ max_attempts = self.on_stop.max_retries + 1
221
+ last_output: Optional[Dict[str, Any]] = None
222
+ last_hook_result: Optional[HookResult] = None
223
+
224
+ for attempt_idx in range(max_attempts):
225
+ self.current_attempt = attempt_idx + 1
226
+
227
+ if attempt_idx > 0:
228
+ self._reset_for_retry()
229
+
230
+ logger.info(
231
+ f"Hook verify attempt {self.current_attempt}/{max_attempts}"
232
+ )
233
+
234
+ async for ret in self._stream_exploration_with_assignment():
235
+ last_output = ret
236
+ yield ret
237
+
238
+ last_hook_result = await self._trigger_on_stop_hook(last_output or {})
239
+ self._last_hook_result = last_hook_result
240
+ self._record_hook_attempt(self.current_attempt, last_output or {}, last_hook_result)
241
+
242
+ if last_hook_result.passed:
243
+ logger.info(
244
+ f"Hook verify passed with score: {last_hook_result.score}"
245
+ )
246
+ yield self._build_hook_enriched_result(
247
+ last_output or {},
248
+ last_hook_result,
249
+ verified=True,
250
+ )
251
+ return
252
+
253
+ if (not last_hook_result.retry) or (attempt_idx >= max_attempts - 1):
254
+ logger.info(
255
+ f"Hook verify stopped: retry={last_hook_result.retry}, "
256
+ f"attempt={attempt_idx+1}/{max_attempts}"
257
+ )
258
+ break
259
+
260
+ if last_hook_result.feedback:
261
+ self._inject_feedback(
262
+ last_hook_result.feedback,
263
+ last_hook_result.score,
264
+ attempt_idx + 1,
265
+ )
266
+ logger.debug(
267
+ "Injected feedback for retry: "
268
+ f"{last_hook_result.feedback[:100]}..."
269
+ )
270
+
271
+ assert last_hook_result is not None
272
+ logger.info(
273
+ f"Hook verify failed after {self.current_attempt} attempts, "
274
+ f"final score: {last_hook_result.score}"
275
+ )
276
+ yield self._build_hook_enriched_result(
277
+ last_output or {},
278
+ last_hook_result,
279
+ verified=False,
280
+ )
281
+
282
+ def _reset_for_retry(self) -> None:
283
+ """Reset exploration state before retry (preserving message history)."""
284
+ self.should_stop_exploration = False
285
+ self.times = 0
286
+ self.no_tool_call_count = 0
287
+ self.strategy.reset_deduplicator()
288
+
289
+ async def _stream_exploration_with_assignment(
290
+ self,
291
+ ) -> AsyncGenerator[Dict[str, Any], None]:
292
+ """Execute exploration with streaming yield, maintaining assign_type output logic."""
293
+ has_add = False if self.assign_type == ">>" else None
294
+
295
+ while True:
296
+ self.context.check_user_interrupt()
297
+
298
+ async for ret in self._explore_once(no_cache=True):
299
+ has_add = self._write_output_var(ret, has_add)
300
+ yield ret
301
+
302
+ if not self._should_continue_explore():
303
+ break
304
+
305
+ def _write_output_var(
306
+ self,
307
+ ret: Dict[str, Any],
308
+ has_add: Optional[bool],
309
+ ) -> Optional[bool]:
310
+ """Write to output_var based on assign_type and return updated has_add flag."""
311
+ if self.assign_type == ">>":
312
+ if has_add:
313
+ self.context.update_var_output(
314
+ self.output_var, ret, SourceType.EXPLORE
315
+ )
316
+ else:
317
+ self.context.append_var_output(
318
+ self.output_var, ret, SourceType.EXPLORE
319
+ )
320
+ has_add = True
321
+ elif self.assign_type == "->":
322
+ self.context.set_var_output(self.output_var, ret, SourceType.EXPLORE)
323
+ return has_add
324
+
325
+ async def _trigger_on_stop_hook(self, output: Dict[str, Any]) -> HookResult:
326
+ """Trigger the on_stop hook and return result.
327
+
328
+ This method builds the OnStopContext from the exploration output and
329
+ dispatches it to the configured hook handler (expression or agent).
330
+
331
+ Note: Agent-based verification (@verifier) is not yet supported in v1.
332
+ Currently only expression-based handlers are functional. When agent
333
+ support is added, the runtime parameter will be properly initialized.
334
+
335
+ Args:
336
+ output: The exploration output containing answer, think, etc.
337
+
338
+ Returns:
339
+ HookResult from hook execution, or a degraded result on timeout/error.
340
+ """
341
+ # Build hook context from output
342
+ context = OnStopContext(
343
+ attempt=self.current_attempt,
344
+ stage="explore",
345
+ answer=self._extract_answer(output),
346
+ think=self._extract_think(output),
347
+ steps=self.times,
348
+ tool_calls=self._collect_tool_calls(),
349
+ )
350
+
351
+ # Dispatch hook with timeout protection
352
+ dispatcher = HookDispatcher(
353
+ config=self.on_stop,
354
+ context=context,
355
+ variable_pool=self.context.variable_pool,
356
+ # TODO: Pass runtime when agent-based verification is implemented.
357
+ # Agent verification requires runtime to load and execute .dph files.
358
+ runtime=None,
359
+ )
360
+
361
+ # Apply timeout protection to prevent hook execution from blocking indefinitely
362
+ # Use agent_timeout from HookConfig (default: 60s). Keep backward-compatible fallback.
363
+ timeout_seconds = getattr(self.on_stop, "agent_timeout", 60)
364
+
365
+ try:
366
+ return await asyncio.wait_for(
367
+ dispatcher.dispatch(),
368
+ timeout=timeout_seconds
369
+ )
370
+ except asyncio.TimeoutError:
371
+ logger.warning(
372
+ f"Hook dispatch timeout after {timeout_seconds}s, "
373
+ f"returning degraded result"
374
+ )
375
+ return HookResult(
376
+ score=0.0,
377
+ passed=False,
378
+ feedback=None,
379
+ retry=False,
380
+ breakdown=None,
381
+ error=f"Hook execution timeout after {timeout_seconds}s",
382
+ error_type="timeout",
383
+ execution_status="timeout",
384
+ )
385
+
386
+ def _extract_answer(self, output: Optional[Dict[str, Any]]) -> str:
387
+ """Extract answer from output dict."""
388
+ if not output:
389
+ return ""
390
+ if isinstance(output, dict):
391
+ return output.get("answer", "") or output.get("block_answer", "")
392
+ if isinstance(output, list) and len(output) > 0:
393
+ last = output[-1]
394
+ if isinstance(last, dict):
395
+ return last.get("answer", "") or last.get("block_answer", "")
396
+ return str(output) if output else ""
397
+
398
+ def _extract_think(self, output: Optional[Dict[str, Any]]) -> str:
399
+ """Extract thinking process from output dict."""
400
+ if not output:
401
+ return ""
402
+ if isinstance(output, dict):
403
+ return output.get("think", "")
404
+ if isinstance(output, list) and len(output) > 0:
405
+ last = output[-1]
406
+ if isinstance(last, dict):
407
+ return last.get("think", "")
408
+ return ""
409
+
410
+ def _collect_tool_calls(self) -> List[Dict[str, Any]]:
411
+ """Collect tool calls made during exploration."""
412
+ return self.strategy.get_tool_call_history()
413
+
414
+ def _record_hook_attempt(
415
+ self,
416
+ attempt: int,
417
+ output: Dict[str, Any],
418
+ hook_result: HookResult
419
+ ) -> None:
420
+ """Record hook attempt to history for trajectory tracking."""
421
+ record = {
422
+ "attempt": attempt,
423
+ "timestamp": datetime.now().isoformat(),
424
+ "score": hook_result.score,
425
+ "passed": hook_result.passed,
426
+ "feedback": hook_result.feedback,
427
+ "retry": hook_result.retry,
428
+ }
429
+ if hook_result.breakdown:
430
+ record["breakdown"] = hook_result.breakdown
431
+ if hook_result.error:
432
+ record["error"] = hook_result.error
433
+ record["error_type"] = hook_result.error_type
434
+
435
+ self.hook_history.append(record)
436
+
437
+ def _inject_feedback(self, feedback: str, score: float, attempt: int) -> None:
438
+ """Inject feedback as user message to scratchpad.
439
+
440
+ Args:
441
+ feedback: Feedback message from hook
442
+ score: Current score
443
+ attempt: Current attempt number
444
+ """
445
+ formatted = f"""[Verification Failed - Please Improve]
446
+ Score: {score:.2f} / Target: {self.on_stop.threshold:.2f}
447
+ Attempt: {attempt}
448
+
449
+ Feedback:
450
+ {feedback}
451
+
452
+ Please reconsider your approach and improve your answer based on the feedback above.
453
+ """
454
+ # Add feedback as user message to scratchpad
455
+ feedback_messages = Messages()
456
+ feedback_messages.add_message(formatted, MessageRole.USER)
457
+ self.context.add_bucket(
458
+ BuildInBucket.SCRATCHPAD.value,
459
+ feedback_messages,
460
+ )
461
+
462
+ def _build_hook_enriched_result(
463
+ self,
464
+ output: Dict[str, Any],
465
+ hook_result: HookResult,
466
+ verified: bool
467
+ ) -> Dict[str, Any]:
468
+ """Build result enriched with hook information.
469
+
470
+ Args:
471
+ output: Original exploration output
472
+ hook_result: Last hook result
473
+ verified: Whether verification passed
474
+
475
+ Returns:
476
+ Enriched result dict
477
+ """
478
+ result = output.copy() if isinstance(output, dict) else {"answer": output}
479
+
480
+ # Add hook-related fields
481
+ result["score"] = hook_result.score
482
+ result["passed"] = verified
483
+ result["attempts"] = self.current_attempt
484
+ result["hook_history"] = self.hook_history.copy()
485
+
486
+ if hook_result.feedback:
487
+ result["feedback"] = hook_result.feedback
488
+
489
+ if hook_result.error:
490
+ result["verification_error"] = hook_result.error
491
+ result["verification_status"] = hook_result.execution_status
492
+ else:
493
+ result["verification_status"] = "success"
494
+
495
+ return result
496
+
497
+ def _make_init_messages(self):
498
+ """Build initialization message"""
499
+ skillkit = self.get_skillkit()
500
+ system_message = self.strategy.make_system_message(
501
+ skillkit=skillkit,
502
+ system_prompt=self.system_prompt,
503
+ tools_format=self.tools_format,
504
+ )
505
+
506
+ # Add system message
507
+ if len(system_message.strip()) > 0 and self.context.context_manager:
508
+ self.context.add_bucket(
509
+ BuildInBucket.SYSTEM.value,
510
+ system_message,
511
+ message_role=MessageRole.SYSTEM,
512
+ )
513
+
514
+ # Add user question
515
+ if self.content and self.context.context_manager:
516
+ self.context.add_bucket(
517
+ BuildInBucket.QUERY.value,
518
+ self.content,
519
+ )
520
+
521
+ # Process historical messages
522
+ history_messages = self._make_history_messages()
523
+ if (
524
+ self.history
525
+ and history_messages is not None
526
+ and not history_messages.empty()
527
+ and self.context.context_manager
528
+ ):
529
+ self.context.set_history_bucket(history_messages)
530
+
531
+ def _make_history_messages(self) -> Optional[Messages]:
532
+ """Build history messages"""
533
+ if isinstance(self.history, bool):
534
+ use_history_flag = self.history
535
+ else:
536
+ use_history_flag = str(self.history).lower() == "true"
537
+
538
+ if use_history_flag:
539
+ history_messages = self.context.get_history_messages()
540
+ return history_messages or Messages()
541
+ return None
542
+
543
+ async def _explore_once(self, no_cache: bool = False):
544
+ """Perform one exploration"""
545
+ self.context.debug(
546
+ f"explore[{self.output_var}] messages[{self.context.get_messages().str_summary()}] "
547
+ f"length[{self.context.get_messages().length()}]"
548
+ )
549
+
550
+ # Check if there is a tool call for interruption recovery
551
+ if self._has_pending_tool_call():
552
+ async for ret in self._handle_resumed_tool_call():
553
+ yield ret
554
+ else:
555
+ async for ret in self._handle_new_tool_call(no_cache):
556
+ yield ret
557
+
558
+ def _has_pending_tool_call(self) -> bool:
559
+ """Check if there are pending tool calls (interrupt recovery)"""
560
+ intervention_tmp_key = "intervention_explore_block_vars"
561
+ return (
562
+ intervention_tmp_key in self.context.get_all_variables().keys()
563
+ and "tool" in self.context.get_all_variables().keys()
564
+ )
565
+
566
+ async def _handle_resumed_tool_call(self):
567
+ """Tools for handling interrupt recovery calls """
568
+ intervention_tmp_key = "intervention_explore_block_vars"
569
+
570
+ # Get the content of saved temporary variables
571
+ intervention_vars = self.context.get_var_value(intervention_tmp_key)
572
+ self.context.delete_variable(intervention_tmp_key)
573
+
574
+ # Restore complete message context
575
+ saved_messages = intervention_vars.get("prompt")
576
+ if saved_messages is not None:
577
+ msgs = Messages()
578
+ msgs.extend_plain_messages(saved_messages)
579
+ self.context.set_messages(msgs)
580
+
581
+ input_dict = self.context.get_var_value("tool")
582
+ function_name = input_dict["tool_name"]
583
+ raw_tool_args = input_dict["tool_args"]
584
+ function_params_json = {arg["key"]: arg["value"] for arg in raw_tool_args}
585
+
586
+ if self.recorder:
587
+ self.recorder.update(
588
+ stage=TypeStage.SKILL,
589
+ source_type=SourceType.EXPLORE,
590
+ skill_name=function_name,
591
+ skill_type=self.context.get_skill_type(function_name),
592
+ skill_args=function_params_json,
593
+ )
594
+ self.context.delete_variable("tool")
595
+
596
+ return_answer = {}
597
+ try:
598
+ props = {"intervention": False}
599
+ have_answer = False
600
+
601
+ async for resp in self.skill_run(
602
+ skill_name=function_name,
603
+ source_type=SourceType.EXPLORE,
604
+ skill_params_json=function_params_json,
605
+ props=props,
606
+ ):
607
+ if (
608
+ isinstance(resp, dict)
609
+ and "answer" in resp
610
+ and isinstance(resp["answer"], dict)
611
+ and "answer" in resp["answer"]
612
+ ):
613
+ return_answer["answer"] = resp.get("answer", "").get("answer", "")
614
+ return_answer["think"] = resp.get("answer", "").get("think", "")
615
+ if "block_answer" in resp:
616
+ return_answer["block_answer"] = resp.get("block_answer", "")
617
+ else:
618
+ if self.recorder:
619
+ self.recorder.update(
620
+ item={"answer": resp, "block_answer": resp},
621
+ stage=TypeStage.SKILL,
622
+ source_type=SourceType.EXPLORE,
623
+ skill_name=function_name,
624
+ skill_type=self.context.get_skill_type(function_name),
625
+ skill_args=function_params_json,
626
+ )
627
+ have_answer = True
628
+ yield self.recorder.get_progress_answers() if self.recorder else None
629
+
630
+ console_skill_response(
631
+ skill_name=function_name,
632
+ response=self.recorder.get_answer() if self.recorder else "",
633
+ max_length=1024,
634
+ )
635
+
636
+ if not have_answer and self.recorder:
637
+ self.recorder.update(
638
+ item=f"Calling {function_name} tool did not return proper results, need to call again.",
639
+ source_type=SourceType.EXPLORE,
640
+ )
641
+ except ToolInterrupt as e:
642
+ if "tool" in self.context.get_all_variables().keys():
643
+ self.context.delete_variable("tool")
644
+ yield self.recorder.get_progress_answers() if self.recorder else None
645
+ raise e
646
+ except Exception as e:
647
+ logger.error(f"Error calling tool, error type: {type(e)}")
648
+ logger.error(f"Error details: {str(e)}")
649
+ return_answer["think"] = (
650
+ f"Error occurred when calling {function_name} tool, need to call again. Error message: {str(e)}"
651
+ )
652
+ return_answer["answer"] = (
653
+ f"Error occurred when calling {function_name} tool, need to call again. Error message: {str(e)}"
654
+ )
655
+
656
+ return_answer["status"] = "completed"
657
+ yield [return_answer]
658
+
659
+ # Add tool response message
660
+ tool_response, metadata = self._process_skill_result_with_hook(function_name)
661
+
662
+ # Extract tool_call_id
663
+ tool_call_id = self._extract_tool_call_id()
664
+ if not tool_call_id:
665
+ tool_call_id = f"call_{function_name}_{self.times}"
666
+
667
+ self.strategy.append_tool_response_message(
668
+ self.context, tool_call_id, str(tool_response), metadata
669
+ )
670
+
671
+ async def _handle_new_tool_call(self, no_cache: bool):
672
+ """Handling New Tool Calls
673
+
674
+ Supports both single and multiple tool calls based on the
675
+ ENABLE_PARALLEL_TOOL_CALLS feature flag.
676
+ """
677
+ # Use current counter value; will only increment if tool calls detected
678
+ current_counter = self.session_tool_call_counter
679
+
680
+ # Regenerate system message to include dynamically loaded tools
681
+ current_skillkit = self.get_skillkit()
682
+ updated_system_message = self.strategy.make_system_message(
683
+ skillkit=current_skillkit,
684
+ system_prompt=self.system_prompt,
685
+ tools_format=self.tools_format,
686
+ )
687
+
688
+ # Update SYSTEM bucket
689
+ if len(updated_system_message.strip()) > 0 and self.context.context_manager:
690
+ self.context.add_bucket(
691
+ BuildInBucket.SYSTEM.value,
692
+ updated_system_message,
693
+ message_role=MessageRole.SYSTEM,
694
+ )
695
+
696
+ # Get LLM message
697
+ llm_messages = self.context.context_manager.to_dph_messages()
698
+
699
+ # Always re-fetch skillkit to include dynamically loaded tools
700
+ llm_params = self.strategy.get_llm_params(
701
+ messages=llm_messages,
702
+ model=self.model,
703
+ skillkit=current_skillkit, # Use current skillkit
704
+ tool_choice=getattr(self, "tool_choice", None), # Consistent with V2: use only when explicitly specified by user
705
+ no_cache=no_cache,
706
+ )
707
+ # Create stream renderer for live markdown (CLI layer)
708
+ renderer = None
709
+ on_chunk = None
710
+ if self.context.is_cli_mode():
711
+ try:
712
+ from dolphin.cli.ui.stream_renderer import LiveStreamRenderer
713
+ renderer = LiveStreamRenderer(verbose=self.context.is_verbose())
714
+ renderer.start()
715
+ on_chunk = renderer.on_chunk
716
+ except ImportError:
717
+ pass
718
+
719
+ try:
720
+ # Initialize stream_item
721
+ stream_item = StreamItem()
722
+ async for stream_item in self.llm_chat_stream(
723
+ llm_params=llm_params,
724
+ recorder=self.recorder,
725
+ content=self.content if self.content else "",
726
+ early_stop_on_tool_call=True,
727
+ on_stream_chunk=on_chunk,
728
+ session_counter=current_counter, # Pass counter for stable ID generation
729
+ ):
730
+ # Use strategy's has_valid_tool_call method, compatible with both prompt and tool_call modes
731
+ if not self.strategy.has_valid_tool_call(stream_item, self.context):
732
+ yield self.recorder.get_progress_answers() if self.recorder else None
733
+ else:
734
+ # In tool_call mode, wait for complete tool call (including arguments)
735
+ # In prompt mode, detect_tool_call will parse complete arguments
736
+ tool_call = self.strategy.detect_tool_call(stream_item, self.context)
737
+ if tool_call is not None:
738
+ # For tool_call mode, ensure arguments are completely received
739
+ if self.mode == "tool_call" and not stream_item.has_complete_tool_call():
740
+ # tool_name received but tool_args not complete yet, continue waiting
741
+ yield self.recorder.get_progress_answers() if self.recorder else None
742
+ continue
743
+
744
+ logger.debug(
745
+ f"explore[{self.output_var}] find skill call [{tool_call.name}]"
746
+ )
747
+ break
748
+ except Exception as e:
749
+ # Handle UserInterrupt: save partial output to context before re-raising
750
+ # This ensures the LLM's partial output is preserved in the scratchpad,
751
+ # so when resuming, the LLM can see what it was outputting before interruption.
752
+ from dolphin.core.common.exceptions import UserInterrupt
753
+ if isinstance(e, UserInterrupt):
754
+ if stream_item and stream_item.answer:
755
+ self._append_assistant_message(stream_item.answer)
756
+ logger.debug(f"UserInterrupt: saved partial output ({len(stream_item.answer)} chars) to context")
757
+ raise
758
+ finally:
759
+ if renderer:
760
+ renderer.stop()
761
+
762
+ console("\n", verbose=self.context.is_verbose())
763
+
764
+ if self.times >= MAX_SKILL_CALL_TIMES:
765
+ self.context.warn(
766
+ f"max skill call times reached {MAX_SKILL_CALL_TIMES} times, answer[{stream_item.to_dict()}]"
767
+ )
768
+ else:
769
+ self.times += 1
770
+
771
+ if self.recorder:
772
+ self.recorder.update(
773
+ item=stream_item,
774
+ raw_output=stream_item.answer,
775
+ is_completed=True,
776
+ source_type=SourceType.EXPLORE,
777
+ )
778
+ yield self.recorder.get_progress_answers() if self.recorder else None
779
+
780
+ # Detect tool calls based on feature flag
781
+ if flags.is_enabled(flags.ENABLE_PARALLEL_TOOL_CALLS):
782
+ tool_calls = self.strategy.detect_tool_calls(stream_item, self.context)
783
+ else:
784
+ single = self.strategy.detect_tool_call(stream_item, self.context)
785
+ tool_calls = [single] if single else []
786
+
787
+ if not tool_calls:
788
+ # No tool call detected: stop immediately
789
+ # If there is pending content, merge before adding
790
+ if self.pending_content:
791
+ # Merge pending content and current content
792
+ combined_content = self.pending_content + "\n\n" + stream_item.answer
793
+ self._append_assistant_message(combined_content)
794
+ self.context.debug(f"Added after merging pending content, total length: {len(combined_content)}")
795
+ self.pending_content = None
796
+ else:
797
+ # No pending content, add current answer directly
798
+ self._append_assistant_message(stream_item.answer)
799
+ self.context.debug(f"no valid skill call, answer[{stream_item.answer}]")
800
+
801
+ # Stop exploration immediately
802
+ self.should_stop_exploration = True
803
+ self.context.debug("No tool call, stopping exploration")
804
+ return
805
+
806
+ # Reset no-tool-call count (because this round has tool call)
807
+ self.no_tool_call_count = 0
808
+
809
+ # Increment session counter only when tool calls are actually detected
810
+ # This ensures stable ID generation without gaps
811
+ self.session_tool_call_counter += 1
812
+
813
+ # If there is pending content, merge with current tool_call
814
+ if self.pending_content:
815
+ self.context.debug(f"Detected pending content, will merge with tool_call")
816
+ # Merge pending content with current tool_call content
817
+ if stream_item.answer:
818
+ stream_item.answer = self.pending_content + "\n" + stream_item.answer
819
+ else:
820
+ stream_item.answer = self.pending_content
821
+ self.pending_content = None
822
+
823
+ # Log detected tool calls (use info level for significant multi-tool events)
824
+ if len(tool_calls) > 1:
825
+ logger.info(
826
+ f"explore[{self.output_var}] detected {len(tool_calls)} tool calls: "
827
+ f"{[tc.name for tc in tool_calls]}"
828
+ )
829
+
830
+ # Add tool calls message and execute
831
+ #
832
+ # Execution path selection:
833
+ # - Multiple tool calls (flag enabled + len > 1): Use new multi-tool-call path
834
+ # with append_tool_calls_message() and _execute_tool_calls_sequential()
835
+ # - Single tool call (or flag disabled): Use existing single-tool-call path
836
+ # for maximum backward compatibility, even when flag is enabled but only
837
+ # one tool call is returned by LLM
838
+ if flags.is_enabled(flags.ENABLE_PARALLEL_TOOL_CALLS) and len(tool_calls) > 1:
839
+ # Multiple tool calls: use new methods
840
+ self.strategy.append_tool_calls_message(
841
+ self.context, stream_item, tool_calls
842
+ )
843
+ async for ret in self._execute_tool_calls_sequential(stream_item, tool_calls):
844
+ yield ret
845
+ else:
846
+ # Single tool call (or flag disabled): use existing methods for backward compatibility
847
+ tool_call = tool_calls[0]
848
+
849
+ # Deduplicator
850
+ deduplicator = self.strategy.get_deduplicator()
851
+
852
+ # Check for duplicate calls
853
+ skill_call_for_dedup = (tool_call.name, tool_call.arguments)
854
+ if not deduplicator.is_duplicate(skill_call_for_dedup):
855
+ # Add tool call message
856
+ self.strategy.append_tool_call_message(
857
+ self.context, stream_item, tool_call
858
+ )
859
+ deduplicator.add(skill_call_for_dedup)
860
+
861
+ async for ret in self._execute_tool_call(stream_item, tool_call):
862
+ yield ret
863
+ else:
864
+ await self._handle_duplicate_tool_call(tool_call, stream_item)
865
+
866
+ async def _execute_tool_call(self, stream_item: StreamItem, tool_call: ToolCall):
867
+ """Execute tool call"""
868
+ # Checkpoint: Check user interrupt before executing tool
869
+ self.context.check_user_interrupt()
870
+
871
+ intervention_tmp_key = "intervention_explore_block_vars"
872
+
873
+ # Ensure tool response message will definitely be added
874
+ tool_response_added = False
875
+ answer_content = ""
876
+ metadata = None
877
+
878
+ try:
879
+ intervention_vars = {
880
+ "prompt": self.context.get_messages().get_messages_as_dict(),
881
+ "tool_name": tool_call.name,
882
+ "cur_llm_stream_answer": stream_item.answer,
883
+ "all_answer": stream_item.answer,
884
+ }
885
+
886
+ self.context.set_variable(intervention_tmp_key, intervention_vars)
887
+
888
+ async for resp in self.skill_run(
889
+ source_type=SourceType.EXPLORE,
890
+ skill_name=tool_call.name,
891
+ skill_params_json=tool_call.arguments or {},
892
+ ):
893
+ yield self.recorder.get_progress_answers() if self.recorder else None
894
+
895
+ # Update deduplicator results
896
+ deduplicator = self.strategy.get_deduplicator()
897
+ deduplicator.add(
898
+ (tool_call.name, tool_call.arguments),
899
+ self.recorder.get_answer() if self.recorder else None,
900
+ )
901
+
902
+ # Add tool response message
903
+ tool_response, metadata = self._process_skill_result_with_hook(tool_call.name)
904
+
905
+ answer_content = (
906
+ tool_response
907
+ if tool_response is not None
908
+ and not CognitiveSkillkit.is_cognitive_skill(tool_call.name)
909
+ else ""
910
+ )
911
+
912
+ if len(answer_content) > self.context.get_max_answer_len():
913
+ answer_content = answer_content[
914
+ : self.context.get_max_answer_len()
915
+ ] + "(... too long, truncated to {})".format(
916
+ self.context.get_max_answer_len()
917
+ )
918
+
919
+ self.strategy.append_tool_response_message(
920
+ self.context, tool_call.id, answer_content, metadata
921
+ )
922
+ tool_response_added = True
923
+
924
+ except ToolInterrupt as e:
925
+ self._handle_tool_interrupt(e, tool_call.name)
926
+ # Add tool response even if interrupted (maintain context integrity)
927
+ answer_content = f"Tool execution interrupted: {str(e)}"
928
+ self.strategy.append_tool_response_message(
929
+ self.context, tool_call.id, answer_content, metadata
930
+ )
931
+ tool_response_added = True
932
+ raise e
933
+ except Exception as e:
934
+ self._handle_tool_execution_error(e, tool_call.name)
935
+ # Add tool response even if error occurs (maintain context integrity)
936
+ answer_content = f"Tool execution error: {str(e)}"
937
+ self.strategy.append_tool_response_message(
938
+ self.context, tool_call.id, answer_content
939
+ )
940
+ tool_response_added = True
941
+ finally:
942
+ # Ensure tool response message is always added (core fix)
943
+ if not tool_response_added:
944
+ self.strategy.append_tool_response_message(
945
+ self.context, tool_call.id, answer_content
946
+ )
947
+
948
+ async def _execute_tool_calls_sequential(
949
+ self,
950
+ stream_item: StreamItem,
951
+ tool_calls: List[ToolCall]
952
+ ):
953
+ """Sequentially execute multiple tool calls (by index order).
954
+
955
+ Note: "parallel" in OpenAI terminology means the model decides multiple tool
956
+ calls in one turn, not that Dolphin executes them concurrently. This method
957
+ executes tool calls one after another in index order.
958
+
959
+ Error Handling Strategy (based on OpenAI best practices):
960
+ - Non-critical failures: Continue with remaining tools, log errors
961
+ - ToolInterrupt: Propagate immediately (critical user or system interrupt)
962
+ - Malformed arguments: Skip the tool call with error response, continue others
963
+
964
+ This approach provides graceful degradation while maintaining context integrity.
965
+ Each tool's response (success or error) is added to context for LLM visibility.
966
+
967
+ Args:
968
+ stream_item: The streaming response item containing the tool calls
969
+ tool_calls: List of ToolCall objects to execute
970
+
971
+ Yields:
972
+ Progress updates from each tool execution
973
+ """
974
+ # Track execution statistics for debugging
975
+ total_calls = len(tool_calls)
976
+ successful_calls = 0
977
+ failed_calls = 0
978
+ deduplicator = self.strategy.get_deduplicator()
979
+
980
+ for i, tool_call in enumerate(tool_calls):
981
+ logger.debug(f"Executing tool call {i+1}/{total_calls}: {tool_call.name}")
982
+
983
+ # Skip tool calls with unparseable JSON arguments
984
+ # (arguments is None when JSON parsing failed during streaming)
985
+ if tool_call.arguments is None:
986
+ failed_calls += 1
987
+ self.context.error(
988
+ f"Tool call {tool_call.name} (id={tool_call.id}) skipped: "
989
+ f"JSON arguments failed to parse."
990
+ )
991
+ # Add error response to maintain context integrity
992
+ # This allows LLM to see the failure and potentially retry
993
+ self.strategy.append_tool_response_message(
994
+ self.context,
995
+ tool_call.id,
996
+ f"Error: Failed to parse JSON arguments for tool {tool_call.name}",
997
+ metadata={"error": True}
998
+ )
999
+ continue
1000
+
1001
+ # Deduplicate to avoid repeated executions (side effects / cost).
1002
+ skill_call_for_dedup = (tool_call.name, tool_call.arguments)
1003
+ if deduplicator.is_duplicate(skill_call_for_dedup):
1004
+ failed_calls += 1
1005
+ self.context.warn(
1006
+ f"Duplicate tool call skipped: {deduplicator.get_call_key(skill_call_for_dedup)}"
1007
+ )
1008
+ self.strategy.append_tool_response_message(
1009
+ self.context,
1010
+ tool_call.id,
1011
+ f"Skipped duplicate tool call: {tool_call.name}",
1012
+ metadata={"duplicate": True},
1013
+ )
1014
+ continue
1015
+ deduplicator.add(skill_call_for_dedup)
1016
+
1017
+ # Execute the tool call
1018
+ try:
1019
+ async for ret in self._execute_tool_call(stream_item, tool_call):
1020
+ yield ret
1021
+ successful_calls += 1
1022
+ except ToolInterrupt as e:
1023
+ # ToolInterrupt is critical - propagate immediately
1024
+ # (e.g., user cancellation, system limit reached)
1025
+ logger.info(
1026
+ f"Tool execution interrupted at {i+1}/{total_calls}, "
1027
+ f"completed: {successful_calls}, failed: {failed_calls}"
1028
+ )
1029
+ raise e
1030
+ except Exception as e:
1031
+ # Non-critical failure: log and continue with remaining tools
1032
+ # Response message is already added in _execute_tool_call's exception handler
1033
+ failed_calls += 1
1034
+ self.context.error(
1035
+ f"Tool call {tool_call.name} failed: {e}, continuing with remaining tools"
1036
+ )
1037
+
1038
+ # Log execution summary for debugging
1039
+ if failed_calls > 0:
1040
+ logger.warning(
1041
+ f"Multiple tool calls completed with errors: "
1042
+ f"{successful_calls}/{total_calls} successful, {failed_calls} failed"
1043
+ )
1044
+
1045
+ async def _handle_duplicate_tool_call(self, tool_call: ToolCall, stream_item: StreamItem):
1046
+ """Handling Duplicate Tool Calls"""
1047
+ message = get_msg_duplicate_skill_call()
1048
+ self._append_assistant_message(message)
1049
+
1050
+ if self.recorder:
1051
+ self.recorder.update(
1052
+ item={"answer": message, "think": ""},
1053
+ raw_output=stream_item.answer,
1054
+ source_type=SourceType.EXPLORE,
1055
+ )
1056
+
1057
+ deduplicator = self.strategy.get_deduplicator()
1058
+ self.context.warn(
1059
+ f"Duplicate skill call detected: {deduplicator.get_call_key((tool_call.name, tool_call.arguments))}"
1060
+ )
1061
+
1062
+ def _handle_tool_interrupt(self, e: Exception, tool_name: str):
1063
+ """Handling Tool Interruptions"""
1064
+ self.context.info(f"tool interrupt in call {tool_name} tool")
1065
+ if "※tool" in self.context.get_all_variables().keys():
1066
+ self.context.delete_variable("※tool")
1067
+
1068
+ def _handle_tool_execution_error(self, e: Exception, tool_name: str):
1069
+ """Handling tool execution errors"""
1070
+ error_trace = traceback.format_exc()
1071
+ self.context.error(
1072
+ f"error in call {tool_name} tool, error type: {type(e)}, error info: {str(e)}, error trace: {error_trace}"
1073
+ )
1074
+
1075
+ def _should_continue_explore(self) -> bool:
1076
+ """Check whether to continue the next exploration.
1077
+
1078
+ Termination conditions:
1079
+ 1. Maximum number of tool calls has been reached
1080
+ 2. Number of repeated tool calls exceeds limit
1081
+ 3. No tool call occurred once
1082
+ """
1083
+ # 1. If the maximum number of calls has been reached, stop exploring
1084
+ if self.times >= MAX_SKILL_CALL_TIMES:
1085
+ return False
1086
+
1087
+ # 2. Check for repeated calls exceeding the limit
1088
+ deduplicator = self.strategy.get_deduplicator()
1089
+ if hasattr(deduplicator, 'skillcalls') and deduplicator.skillcalls:
1090
+ recent_calls = list(deduplicator.skillcalls.values())
1091
+ if (
1092
+ recent_calls
1093
+ and max(recent_calls) >= DefaultSkillCallDeduplicator.MAX_DUPLICATE_COUNT
1094
+ ):
1095
+ return False
1096
+
1097
+ # 3. Stop exploring when there is no tool call.
1098
+ if self.should_stop_exploration:
1099
+ return False
1100
+
1101
+ return True
1102
+
1103
+ def _process_skill_result_with_hook(self, skill_name: str) -> tuple[str | None, dict]:
1104
+ """Handle skill results using skillkit_hook"""
1105
+ # Get skill object
1106
+ skill = self.context.get_skill(skill_name)
1107
+ if not skill:
1108
+ skill = SystemFunctions.getSkill(skill_name)
1109
+
1110
+ # Get the last stage as reference
1111
+ last_stage = self.recorder.getProgress().get_last_stage()
1112
+ reference = last_stage.get_raw_output() if last_stage else None
1113
+
1114
+ # Process results using skillkit_hook (handles dynamic tools automatically)
1115
+ if reference and self.skillkit_hook and self.context.has_skillkit_hook():
1116
+ # Use new hook to get context-optimized content
1117
+ content, metadata = self.skillkit_hook.on_before_send_to_context(
1118
+ reference_id=reference.reference_id,
1119
+ skill=skill,
1120
+ skillkit_name=type(skill.owner_skillkit).__name__ if skill.owner_skillkit else "",
1121
+ resource_skill_path=getattr(skill, 'resource_skill_path', None),
1122
+ )
1123
+ return content, metadata
1124
+ return self.recorder.getProgress().get_step_answers(), {}
1125
+
1126
+ def _append_assistant_message(self, content: str):
1127
+ """Add assistant message to context"""
1128
+ scrapted_messages = Messages()
1129
+ scrapted_messages.add_message(content, MessageRole.ASSISTANT)
1130
+ self.context.add_bucket(
1131
+ BuildInBucket.SCRATCHPAD.value,
1132
+ scrapted_messages,
1133
+ )
1134
+
1135
+ def _extract_tool_call_id(self) -> str | None:
1136
+ """Extract tool call ID from message"""
1137
+ messages_with_calls = self.context.get_messages_with_tool_calls()
1138
+ if messages_with_calls:
1139
+ last_call_msg = messages_with_calls[-1]
1140
+ if last_call_msg.tool_calls:
1141
+ return last_call_msg.tool_calls[0].get("id")
1142
+ return None
1143
+
1144
+ # ===================== continue_exploration method =====================
1145
+
1146
+ async def continue_exploration(
1147
+ self,
1148
+ model: Optional[str] = None,
1149
+ use_history: bool = True,
1150
+ preserve_context: bool = False,
1151
+ **kwargs
1152
+ ) -> AsyncGenerator[Dict[str, Any], None]:
1153
+ """Continue exploring based on the existing context (multi-turn dialogue scenario)
1154
+
1155
+ This method reuses the message history, variable pool, and other states from the current context,
1156
+ and executes a new exploration session to handle the user's subsequent input.
1157
+
1158
+ Args:
1159
+ model: Name of the model; if None, use the model used in the previous session from context
1160
+ use_history: Whether to use historical messages, default is True
1161
+ preserve_context: If True, skip reset_for_block() to preserve scratchpad content.
1162
+ Use this when resuming from UserInterrupt to keep the conversation context.
1163
+ **kwargs: Additional parameters
1164
+
1165
+ Yields:
1166
+ Execution results
1167
+ """
1168
+ # continue_exploration bypasses BasicCodeBlock.execute(), so we must align with
1169
+ # normal block semantics by resetting transient buckets before assembling messages.
1170
+ # Otherwise, previous round SCRATCHPAD/SYSTEM/QUERY may leak and crowd out SYSTEM/HISTORY.
1171
+ # Exception: when preserve_context=True (e.g., resuming from UserInterrupt), skip reset
1172
+ if self.context and not preserve_context:
1173
+ self.context.reset_for_block()
1174
+
1175
+ # 1. Resolve parameters
1176
+ self.history = use_history
1177
+ self.model = self._resolve_model(model)
1178
+ self.content = self._resolve_content(kwargs)
1179
+ self.output_var = kwargs.get("output_var", "result")
1180
+ self.assign_type = kwargs.get("assign_type", "->")
1181
+
1182
+ # 2. Resolve inherited configurations
1183
+ self._resolve_skills(kwargs)
1184
+ self._resolve_mode(kwargs)
1185
+ self._resolve_system_prompt(kwargs)
1186
+ self._apply_deduplicator_config(kwargs)
1187
+
1188
+ # 3. Reset exploration status
1189
+ self.times = 0
1190
+ self.should_stop_exploration = False
1191
+ self.no_tool_call_count = 0
1192
+ self.pending_content = None # Reset pending content
1193
+
1194
+ # 4. Setup buckets
1195
+ self._setup_system_bucket()
1196
+ if self.content and self.context.context_manager:
1197
+ if preserve_context:
1198
+ # When preserving context (e.g., resuming from UserInterrupt),
1199
+ # add user input to SCRATCHPAD to maintain correct temporal order.
1200
+ # The bucket order is: SYSTEM -> HISTORY -> QUERY -> SCRATCHPAD
1201
+ # If we add to QUERY, user's new input would appear BEFORE the
1202
+ # previous conversation in SCRATCHPAD, which is wrong.
1203
+ self.context.add_user_message(
1204
+ self.content,
1205
+ bucket=BuildInBucket.SCRATCHPAD.value
1206
+ )
1207
+ else:
1208
+ # Use add_user_message instead of add_bucket to properly handle
1209
+ # multimodal content (List[Dict]). add_user_message correctly wraps
1210
+ # content in a Messages object which supports multimodal content.
1211
+ self.context.add_user_message(
1212
+ self.content,
1213
+ bucket=BuildInBucket.QUERY.value
1214
+ )
1215
+
1216
+ history_messages = self._make_history_messages()
1217
+ if (
1218
+ self.history
1219
+ and history_messages is not None
1220
+ and not history_messages.empty()
1221
+ and self.context.context_manager
1222
+ ):
1223
+ self.context.set_history_bucket(history_messages)
1224
+
1225
+ # 5. Run exploration loop
1226
+ while True:
1227
+ async for ret in self._explore_once(no_cache=True):
1228
+ yield ret
1229
+
1230
+ if not self._should_continue_explore():
1231
+ break
1232
+
1233
+ # 6. Cleanup
1234
+ self._update_history_and_cleanup()
1235
+
1236
+ # ===================== continue_exploration helpers =====================
1237
+
1238
+ def _resolve_model(self, model: Optional[str]) -> str:
1239
+ """Resolve model name from parameter or context."""
1240
+ if model:
1241
+ return model
1242
+ return self.context.get_last_model_name() or ""
1243
+
1244
+ def _resolve_content(self, kwargs: dict):
1245
+ """Resolve user content from kwargs or context.
1246
+
1247
+ Returns:
1248
+ str for plain text, or List[Dict] for multimodal content
1249
+ """
1250
+ user_content = kwargs.get("content", "")
1251
+
1252
+ # If content is already provided (either str or multimodal List[Dict]), return it
1253
+ if user_content:
1254
+ return user_content
1255
+ # Otherwise try to get from context bucket
1256
+ if self.context.context_manager:
1257
+ bucket = self.context.context_manager.state.buckets.get(
1258
+ BuildInBucket.QUERY.value
1259
+ )
1260
+ if bucket:
1261
+ user_content = bucket._get_content_text()
1262
+ return user_content
1263
+
1264
+ def _resolve_skills(self, kwargs: dict):
1265
+ """Resolve skills configuration from kwargs or inherit from context."""
1266
+ if "skills" in kwargs:
1267
+ self.skills = kwargs["skills"]
1268
+ elif "tools" in kwargs:
1269
+ self.skills = kwargs["tools"]
1270
+ else:
1271
+ last_skills = self.context.get_last_skills()
1272
+ if last_skills is not None:
1273
+ self.skills = last_skills
1274
+
1275
+ if getattr(self, "skills", None):
1276
+ self.context.set_last_skills(self.skills)
1277
+
1278
+ def _resolve_mode(self, kwargs: dict):
1279
+ """Resolve exploration mode from kwargs or inherit from context."""
1280
+ if "mode" in kwargs:
1281
+ new_mode = kwargs["mode"]
1282
+ if new_mode in ["prompt", "tool_call"] and new_mode != self.mode:
1283
+ self.mode = new_mode
1284
+ self.strategy = self._create_strategy()
1285
+ else:
1286
+ last_mode = self.context.get_last_explore_mode()
1287
+ if last_mode is not None and last_mode != self.mode:
1288
+ self.mode = last_mode
1289
+ self.strategy = self._create_strategy()
1290
+
1291
+ if getattr(self, "mode", None):
1292
+ self.context.set_last_explore_mode(self.mode)
1293
+
1294
+ def _resolve_system_prompt(self, kwargs: dict):
1295
+ """Resolve system prompt from kwargs or inherit from context."""
1296
+ if "system_prompt" in kwargs:
1297
+ self.system_prompt = kwargs.get("system_prompt") or ""
1298
+ else:
1299
+ last_system_prompt = self.context.get_last_system_prompt()
1300
+ if (not getattr(self, "system_prompt", None)) and last_system_prompt:
1301
+ self.system_prompt = last_system_prompt
1302
+
1303
+ if getattr(self, "system_prompt", None):
1304
+ self.context.set_last_system_prompt(self.system_prompt)
1305
+
1306
+ def _setup_system_bucket(self):
1307
+ """Rebuild system bucket for multi-turn exploration (reset_for_block may have cleared it)."""
1308
+ skillkit = self.get_skillkit()
1309
+ system_message = self.strategy.make_system_message(
1310
+ skillkit=skillkit,
1311
+ system_prompt=getattr(self, "system_prompt", "") or "",
1312
+ tools_format=self.tools_format,
1313
+ )
1314
+
1315
+ if len(system_message.strip()) > 0 and self.context.context_manager:
1316
+ self.context.add_bucket(
1317
+ BuildInBucket.SYSTEM.value,
1318
+ system_message,
1319
+ message_role=MessageRole.SYSTEM,
1320
+ )
1321
+
1322
+ def _apply_deduplicator_config(self, kwargs: dict):
1323
+ """Apply skill deduplicator configuration."""
1324
+ if "enable_skill_deduplicator" in kwargs:
1325
+ self.enable_skill_deduplicator = kwargs["enable_skill_deduplicator"]
1326
+ if hasattr(self, "enable_skill_deduplicator"):
1327
+ self.strategy.set_deduplicator_enabled(self.enable_skill_deduplicator)