lybic-guiagents 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import json
2
3
  import logging
3
4
  import os
@@ -19,9 +20,32 @@ from gui_agents.utils.common_utils import (
19
20
  agent_log_to_string,
20
21
  )
21
22
  from gui_agents.tools.tools import Tools
23
+ from gui_agents.agents.stream_manager import stream_manager
22
24
 
23
25
  logger = logging.getLogger("desktopenv.agent")
24
26
 
27
+ def load_config():
28
+ """
29
+ Load tool configurations from the repository's tools/tools_config.json and produce a mapping keyed by tool name.
30
+
31
+ Returns:
32
+ tuple: (tools_config, tools_dict) where `tools_config` is the parsed JSON object from tools_config.json, and `tools_dict` is a dict mapping each tool's `tool_name` to a dict with `provider` and `model`.
33
+ """
34
+ # Load tools configuration from tools_config.json
35
+ tools_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tools", "tools_config.json")
36
+ with open(tools_config_path, "r") as f:
37
+ tools_config = json.load(f)
38
+ print(f"Loaded tools configuration from: {tools_config_path}")
39
+ tools_dict = {}
40
+ for tool in tools_config["tools"]:
41
+ tool_name = tool["tool_name"]
42
+ tools_dict[tool_name] = {
43
+ "provider": tool["provider"],
44
+ "model": tool["model_name"]
45
+ }
46
+ print(f"Tools configuration: {tools_dict}")
47
+ return tools_config,tools_dict
48
+
25
49
  class UIAgent:
26
50
  """Base class for UI automation agents"""
27
51
 
@@ -37,18 +61,28 @@ class UIAgent:
37
61
  self.platform = platform
38
62
 
39
63
  def reset(self) -> None:
40
- """Reset agent state"""
64
+ """
65
+ Reset the agent to its initial internal state.
66
+
67
+ Performs any subclass-specific reinitialization needed so the agent is ready to start a new task or episode.
68
+ """
41
69
  pass
42
70
 
43
- def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]|None:
44
- """Generate next action prediction
71
+ def _send_stream_message(self, task_id: str, stage: str, message: str) -> None:
72
+ """
73
+ Safely send stream message to task stream.
74
+ """
75
+ if not task_id:
76
+ return
45
77
 
46
- Args:
47
- instruction: Natural language instruction
48
- observation: Current UI state observation
78
+ stream_manager.add_message_threadsafe(task_id, stage, message)
49
79
 
80
+ def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]|None:
81
+ """
82
+ Produce the next agent information and action sequence for the given instruction and current observation.
83
+
50
84
  Returns:
51
- Tuple containing agent info dictionary and list of actions
85
+ (info, actions) where `info` is a dictionary containing planner, executor and evaluator metadata (including subtask metadata and statuses) and `actions` is a list of action strings to execute; returns `None` if no prediction is available.
52
86
  """
53
87
  pass
54
88
 
@@ -84,16 +118,15 @@ class AgentS2(UIAgent):
84
118
  kb_release_tag: str = "v0.2.2",
85
119
  enable_takeover: bool = False,
86
120
  enable_search: bool = True,
121
+ tools_config: dict | None = None,
87
122
  ):
88
- """Initialize AgentS2
89
-
90
- Args:
91
- platform: Operating system platform (darwin, linux, windows)
92
- memory_root_path: Path to memory directory. Defaults to current working directory.
93
- memory_folder_name: Name of memory folder. Defaults to "kb_s2".
94
- kb_release_tag: Release tag for knowledge base. Defaults to "v0.2.2".
95
- enable_takeover: Whether to enable user takeover functionality. Defaults to False.
96
- enable_search: Whether to enable web search functionality. Defaults to True.
123
+ """
124
+ Initialize an AgentS2 instance and prepare its tools and local knowledge base.
125
+
126
+ If `tools_config` is provided, build `Tools_dict` mapping each `tool_name` to its config (renaming `model_name` to `model` and removing `tool_name`). If `tools_config` is not provided, load configuration via `load_config()`. Ensure a platform-specific knowledge base directory exists under `memory_root_path/memory_folder_name` (creating it if missing). Sets initial attributes (platform, screen_size, memory paths, flags) and initializes internal state via `reset()`.
127
+
128
+ Parameters:
129
+ tools_config (dict | None): Optional pre-loaded tools configuration; when present it is transformed into `Tools_dict`. Omit to load configuration from disk.
97
130
  """
98
131
  super().__init__(
99
132
  platform,
@@ -105,20 +138,24 @@ class AgentS2(UIAgent):
105
138
  self.screen_size = screen_size
106
139
  self.enable_takeover = enable_takeover
107
140
  self.enable_search = enable_search
141
+ self.task_id = None # Will be set when task starts
108
142
 
109
- # Load tools configuration from tools_config.json
110
- tools_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tools", "tools_config.json")
111
- with open(tools_config_path, "r") as f:
112
- self.tools_config = json.load(f)
113
- print(f"Loaded tools configuration from: {tools_config_path}")
143
+ if tools_config is not None:
144
+ self.tools_config = tools_config
145
+ # Create the dictionary mapping from the list-based config
114
146
  self.Tools_dict = {}
115
147
  for tool in self.tools_config["tools"]:
116
148
  tool_name = tool["tool_name"]
117
- self.Tools_dict[tool_name] = {
118
- "provider": tool["provider"],
119
- "model": tool["model_name"]
120
- }
121
- print(f"Tools configuration: {self.Tools_dict}")
149
+ # Create a copy of the tool's config to avoid modifying the original
150
+ config_copy = tool.copy()
151
+ # Rename 'model_name' to 'model' for consistency in downstream use
152
+ if 'model_name' in config_copy:
153
+ config_copy['model'] = config_copy.pop('model_name')
154
+ # Remove tool_name as it's now the key
155
+ config_copy.pop('tool_name', None)
156
+ self.Tools_dict[tool_name] = config_copy
157
+ else:
158
+ self.tools_config, self.Tools_dict = load_config()
122
159
 
123
160
  # Initialize agent's knowledge base path
124
161
  self.local_kb_path = os.path.join(
@@ -138,16 +175,22 @@ class AgentS2(UIAgent):
138
175
  self.reset()
139
176
 
140
177
  def reset(self) -> None:
141
- """Reset agent state and initialize components"""
142
- # Initialize core components
178
+ """
179
+ Reinitialize core components and reset the agent's runtime state.
143
180
 
181
+ Recreates the Manager, Worker, and Grounding components using the agent's current configuration,
182
+ resets planning/execution flags and counters, clears subtask-related state, reloads the shared
183
+ global state from the registry, and propagates the agent's task_id to the components when present.
184
+ """
185
+ # Initialize core components
186
+
144
187
  self.manager = Manager(
145
188
  Tools_dict=self.Tools_dict,
146
189
  local_kb_path=self.local_kb_path,
147
190
  platform=self.platform,
148
191
  enable_search=self.enable_search, # Pass global switch to Manager
149
192
  )
150
-
193
+
151
194
  self.worker = Worker(
152
195
  Tools_dict=self.Tools_dict,
153
196
  local_kb_path=self.local_kb_path,
@@ -178,6 +221,25 @@ class AgentS2(UIAgent):
178
221
  self.subtask_status: str = "Start"
179
222
  self.global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
180
223
 
224
+ # Pass task_id to components
225
+ if self.task_id:
226
+ self.manager.task_id = self.task_id
227
+ self.worker.task_id = self.task_id
228
+
229
+ def set_task_id(self, task_id: str) -> None:
230
+ """
231
+ Set the task identifier and propagate it to internal components used for streaming.
232
+
233
+ Parameters:
234
+ task_id (str): Identifier for the current task; assigned to this agent and, if present, to its manager and worker so stream messages are tagged consistently.
235
+ """
236
+ self.task_id = task_id
237
+ # Also set task_id for components if they exist
238
+ if hasattr(self, 'manager') and self.manager:
239
+ self.manager.task_id = task_id
240
+ if hasattr(self, 'worker') and self.worker:
241
+ self.worker.task_id = task_id
242
+
181
243
  def reset_executor_state(self) -> None:
182
244
  """Reset executor and step counter"""
183
245
  self.worker.reset()
@@ -185,6 +247,19 @@ class AgentS2(UIAgent):
185
247
 
186
248
  def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]:
187
249
  # Initialize the three info dictionaries
250
+ """
251
+ Produce the next executor actions and diagnostic information for the current task step.
252
+
253
+ This method coordinates planning, subtask selection, action generation, grounding (code extraction and execution), and status updates. It may trigger replanning, advance to the next subtask, mark subtasks as completed or failed, and emit stream messages and logs. The returned info merges planner, executor, and evaluator metadata and includes current subtask details.
254
+
255
+ Parameters:
256
+ instruction (str): The user or system instruction describing the task to accomplish; forwarded to the manager/worker as the task utterance.
257
+ observation (Dict): Current environment observation/state used for grounding and coordinate assignment.
258
+
259
+ Returns:
260
+ info (Dict): A merged dictionary containing planner_info, executor_info, evaluator_info and the keys `subtask`, `subtask_info`, and `subtask_status`.
261
+ actions (List[Dict]): List of action dictionaries produced for execution (may include actions with type "DONE", failure indicators, or other executor-generated actions).
262
+ """
188
263
  planner_info = {}
189
264
  executor_info = {}
190
265
  evaluator_info = {
@@ -209,6 +284,10 @@ class AgentS2(UIAgent):
209
284
  # If replan is true, generate a new plan. True at start, after a failed plan, or after subtask completion
210
285
  if self.requires_replan:
211
286
  logger.info("(RE)PLANNING...")
287
+
288
+ # Stream planning start message
289
+ self._send_stream_message(self.task_id, "planning", f"Start planning task steps (Step {self.step_count + 1})...")
290
+
212
291
  Manager_info, self.subtasks = self.manager.get_action_queue(
213
292
  Tu=self.global_state.get_Tu(),
214
293
  observation=self.global_state.get_obs_for_manager(),
@@ -224,6 +303,9 @@ class AgentS2(UIAgent):
224
303
  self.search_query = Manager_info["search_query"]
225
304
  else:
226
305
  self.search_query = ""
306
+
307
+ # Stream planning completion message
308
+ self._send_stream_message(self.task_id, "planning", f"Planning completed, {len(self.subtasks)} subtasks generated")
227
309
  get_action_queue_time = time.time() - manager_start
228
310
  logger.info(f"[Timing] manager.get_action_queue execution time: {get_action_queue_time:.2f} seconds")
229
311
  self.global_state.log_operation(
@@ -253,8 +335,10 @@ class AgentS2(UIAgent):
253
335
  "reflection": "agent.done()",
254
336
  }
255
337
  actions = [{"type": "DONE"}]
256
-
257
- # 记录任务完成
338
+
339
+ # Stream task completion message
340
+ self._send_stream_message(self.task_id, "completion", "🎉 Mission Completed! All subtasks have been successfully executed")
341
+
258
342
  self.global_state.log_operation(
259
343
  module="agent",
260
344
  operation="task_complete",
@@ -272,40 +356,58 @@ class AgentS2(UIAgent):
272
356
  logger.info(f"REMAINING SUBTASKS FROM GLOBAL STATE: {self.global_state.get_remaining_subtasks()}")
273
357
  self.needs_next_subtask = False
274
358
  self.subtask_status = "Start"
275
-
359
+
360
+ # Stream current subtask message
361
+ if self.current_subtask is not None:
362
+ self._send_stream_message(self.task_id, "subtask", f"Start executing subtasks: {self.current_subtask.name}")
363
+ else:
364
+ self._send_stream_message(self.task_id, "subtask", "Start executing a new subtask")
365
+
276
366
  self.global_state.log_operation(
277
367
  module="agent",
278
368
  operation="current_subtask",
279
369
  data={
280
- "content": str(self.current_subtask),
370
+ "content": str(self.current_subtask) if self.current_subtask is not None else "No active subtask",
281
371
  "status": "start"
282
372
  }
283
373
  )
284
374
 
285
375
  worker_start_time = time.time()
286
-
376
+
377
+ # Stream action generation start message
378
+ self._send_stream_message(self.task_id, "thinking", "Generating execution actions...")
379
+
287
380
  # get the next action from the worker
381
+ # Handle case where current_subtask might be None
382
+ subtask_name = self.current_subtask.name if self.current_subtask is not None else "No active subtask"
383
+ subtask_info = self.current_subtask.info if self.current_subtask is not None else ""
384
+
288
385
  executor_info = self.worker.generate_next_action(
289
386
  Tu=instruction,
290
387
  search_query=self.search_query,
291
- subtask=self.current_subtask.name, # type: ignore
292
- subtask_info=self.current_subtask.info, # type: ignore
388
+ subtask=subtask_name,
389
+ subtask_info=subtask_info,
293
390
  future_tasks=self.global_state.get_remaining_subtasks(),
294
391
  done_task=self.global_state.get_completed_subtasks(),
295
392
  obs=self.global_state.get_obs_for_manager(),
296
393
  )
297
-
394
+
298
395
  worker_execution_time = time.time() - worker_start_time
299
-
396
+
300
397
  self.global_state.log_operation(
301
398
  module="agent",
302
399
  operation="worker_execution",
303
400
  data={
304
401
  "duration": worker_execution_time,
305
- "subtask": self.current_subtask.name # type: ignore
402
+ "subtask": self.current_subtask.name if self.current_subtask is not None else "No active subtask" # type: ignore
306
403
  }
307
404
  )
308
405
 
406
+ # Stream action plan message
407
+ if self.task_id and "executor_plan" in executor_info:
408
+ plan_preview = executor_info["executor_plan"][:100] + "..." if len(executor_info["executor_plan"]) > 100 else executor_info["executor_plan"]
409
+ self._send_stream_message(self.task_id, "action_plan", f"Generate an execution plan: {plan_preview}")
410
+
309
411
  try:
310
412
  grounding_start_time = time.time()
311
413
  current_width, current_height = self.global_state.get_screen_size()
@@ -345,6 +447,11 @@ class AgentS2(UIAgent):
345
447
 
346
448
  actions = [exec_code]
347
449
 
450
+ # Stream action execution message
451
+ if actions:
452
+ action_type = actions[0].get("type", "unknown")
453
+ self._send_stream_message(self.task_id, "action", f"Execute an action: {action_type}")
454
+
348
455
  self.step_count += 1
349
456
 
350
457
  # set the should_send_action flag to True if the executor returns an action
@@ -356,15 +463,22 @@ class AgentS2(UIAgent):
356
463
  self.needs_next_subtask = True
357
464
 
358
465
  # assign the failed subtask
359
- self.global_state.add_failed_subtask(self.current_subtask) # type: ignore
466
+ if self.current_subtask is not None:
467
+ self.global_state.add_failed_subtask(self.current_subtask) # type: ignore
360
468
  self.failure_subtask = self.global_state.get_latest_failed_subtask()
361
-
469
+
470
+ # Stream failure message
471
+ if self.current_subtask is not None:
472
+ self._send_stream_message(self.task_id, "error", f"Subtask execution failed: {self.current_subtask.name}, will re-plan")
473
+ else:
474
+ self._send_stream_message(self.task_id, "error", "Subtask execution failed and will be re-planned")
475
+
362
476
  # 记录失败的子任务
363
477
  self.global_state.log_operation(
364
478
  module="agent",
365
479
  operation="subtask_failed",
366
480
  data={
367
- "content": str(self.current_subtask),
481
+ "content": str(self.current_subtask) if self.current_subtask is not None else "Unknown subtask",
368
482
  "status": "failed"
369
483
  }
370
484
  )
@@ -381,14 +495,22 @@ class AgentS2(UIAgent):
381
495
  self.requires_replan = True
382
496
  self.needs_next_subtask = True
383
497
  self.failure_subtask = None
384
- self.global_state.add_completed_subtask(self.current_subtask) # type: ignore
385
-
498
+ # add completed subtask only if it exists
499
+ if self.current_subtask is not None:
500
+ self.global_state.add_completed_subtask(self.current_subtask) # type: ignore
501
+
502
+ # Stream subtask completion message
503
+ if self.current_subtask is not None:
504
+ self._send_stream_message(self.task_id, "subtask_complete", f"✅ Subtask completed: {self.current_subtask.name}")
505
+ else:
506
+ self._send_stream_message(self.task_id, "subtask_complete", "✅ Subtask completed")
507
+
386
508
  # 记录完成的子任务
387
509
  self.global_state.log_operation(
388
510
  module="agent",
389
511
  operation="subtask_completed",
390
512
  data={
391
- "content": str(self.current_subtask),
513
+ "content": str(self.current_subtask) if self.current_subtask is not None else "Unknown subtask",
392
514
  "status": "completed"
393
515
  }
394
516
  )
@@ -414,13 +536,24 @@ class AgentS2(UIAgent):
414
536
  for k, v in d.items()
415
537
  }
416
538
  }
417
- info.update(
418
- {
419
- "subtask": self.current_subtask.name, # type: ignore
420
- "subtask_info": self.current_subtask.info, # type: ignore
421
- "subtask_status": self.subtask_status,
422
- }
423
- )
539
+ # Handle case where current_subtask might be None
540
+ if self.current_subtask is not None:
541
+ info.update(
542
+ {
543
+ "subtask": self.current_subtask.name, # type: ignore
544
+ "subtask_info": self.current_subtask.info, # type: ignore
545
+ "subtask_status": self.subtask_status,
546
+ }
547
+ )
548
+ else:
549
+ # Handle None case - provide default values
550
+ info.update(
551
+ {
552
+ "subtask": "No active subtask",
553
+ "subtask_info": "",
554
+ "subtask_status": "no_subtask",
555
+ }
556
+ )
424
557
 
425
558
  # 记录predict函数总执行时间
426
559
  predict_total_time = time.time() - predict_start_time
@@ -538,18 +671,23 @@ class AgentSFast(UIAgent):
538
671
  enable_takeover: bool = False,
539
672
  enable_search: bool = True,
540
673
  enable_reflection: bool = True,
674
+ tools_config: dict | None = None,
541
675
  # enable_reflection: bool = False,
542
676
  ):
543
- """Initialize AgentSFast
544
-
545
- Args:
546
- platform: Operating system platform (darwin, linux, windows)
547
- memory_root_path: Path to memory directory. Defaults to current working directory.
548
- memory_folder_name: Name of memory folder. Defaults to "kb_s2".
549
- kb_release_tag: Release tag for knowledge base. Defaults to "v0.2.2".
550
- enable_takeover: Whether to enable user takeover functionality. Defaults to False.
551
- enable_search: Whether to enable web search functionality. Defaults to True.
552
- enable_reflection: Whether to enable reflection functionality. Defaults to True.
677
+ """
678
+ Create and initialize an AgentSFast instance, configuring tools, memory paths, and optional features.
679
+
680
+ Parameters:
681
+ platform (str): Operating system platform identifier (e.g., "darwin", "linux", "windows"); used to scope platform-specific knowledge base.
682
+ screen_size (List[int]): Screen width and height used for grounding calculations.
683
+ memory_root_path (str): Root directory for agent memory storage.
684
+ memory_folder_name (str): Subfolder name under memory_root_path for this agent's knowledge base.
685
+ kb_release_tag (str): Knowledge base release tag used for bookkeeping or compatibility.
686
+ enable_takeover (bool): If True, enable user takeover capabilities in the fast action generator.
687
+ enable_search (bool): If True, enable web/search-related features when registering tools.
688
+ enable_reflection (bool): If True, enable trajectory reflection and a reflection agent to summarize agent behavior.
689
+ tools_config (dict | None): Optional pre-loaded tools configuration; if omitted, configuration is loaded from disk.
690
+
553
691
  """
554
692
  super().__init__(
555
693
  platform,
@@ -562,20 +700,24 @@ class AgentSFast(UIAgent):
562
700
  self.enable_takeover = enable_takeover
563
701
  self.enable_search = enable_search
564
702
  self.enable_reflection = enable_reflection
703
+ self.task_id = None # Will be set when task starts
565
704
 
566
- # Load tools configuration from tools_config.json
567
- tools_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tools", "tools_config.json")
568
- with open(tools_config_path, "r") as f:
569
- self.tools_config = json.load(f)
570
- print(f"Loaded tools configuration from: {tools_config_path}")
705
+ if tools_config is not None:
706
+ self.tools_config = tools_config
707
+ # Create the dictionary mapping from the list-based config
571
708
  self.Tools_dict = {}
572
709
  for tool in self.tools_config["tools"]:
573
710
  tool_name = tool["tool_name"]
574
- self.Tools_dict[tool_name] = {
575
- "provider": tool["provider"],
576
- "model": tool["model_name"]
577
- }
578
- print(f"Tools configuration: {self.Tools_dict}")
711
+ # Create a copy of the tool's config to avoid modifying the original
712
+ config_copy = tool.copy()
713
+ # Rename 'model_name' to 'model' for consistency in downstream use
714
+ if 'model_name' in config_copy:
715
+ config_copy['model'] = config_copy.pop('model_name')
716
+ # Remove tool_name as it's now the key
717
+ config_copy.pop('tool_name', None)
718
+ self.Tools_dict[tool_name] = config_copy
719
+ else:
720
+ self.tools_config, self.Tools_dict = load_config()
579
721
 
580
722
  # Initialize agent's knowledge base path
581
723
  self.local_kb_path = os.path.join(
@@ -594,21 +736,25 @@ class AgentSFast(UIAgent):
594
736
  self.reset()
595
737
 
596
738
  def reset(self) -> None:
597
- """Reset agent state and initialize components"""
739
+ """
740
+ Reinitialize the fast-agent components and reset internal runtime state.
741
+
742
+ Initializes and registers the fast action generator tool (and traj_reflector if reflection is enabled), configures search/auth parameters from tool configuration, creates or updates the grounding subsystem with resolved grounding dimensions, resets counters and runtime references (step_count, turn_count, latest_action, global_state), and propagates the current task_id to any registered tools.
743
+ """
598
744
  # Initialize the fast action generator tool
599
745
  self.fast_action_generator = Tools()
600
746
  self.fast_action_generator_tool = "fast_action_generator_with_takeover" if self.enable_takeover else "fast_action_generator"
601
-
747
+
602
748
  # Get tool configuration from tools_config
603
749
  tool_config = None
604
750
  for tool in self.tools_config["tools"]:
605
751
  if tool["tool_name"] == self.fast_action_generator_tool:
606
752
  tool_config = tool
607
753
  break
608
-
754
+
609
755
  # Prepare tool parameters
610
756
  tool_params = {}
611
-
757
+
612
758
  # First check global search switch
613
759
  if not self.enable_search:
614
760
  # If global search is disabled, force disable search for this tool
@@ -622,15 +768,28 @@ class AgentSFast(UIAgent):
622
768
  tool_params["enable_search"] = enable_search
623
769
  tool_params["search_provider"] = tool_config.get("search_provider", "bocha")
624
770
  tool_params["search_model"] = tool_config.get("search_model", "")
625
-
771
+
626
772
  logger.info(f"Configuring {self.fast_action_generator_tool} with search enabled: {enable_search} (from config)")
627
-
628
- # Register the tool with parameters
773
+
774
+ # Get base config from Tools_dict
775
+ tool_config = self.Tools_dict[self.fast_action_generator_tool].copy()
776
+ provider = tool_config.get("provider")
777
+ model = tool_config.get("model")
778
+
779
+ # Merge with search-related parameters
780
+ all_params = {**tool_config, **tool_params}
781
+
782
+ auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
783
+ for key in auth_keys:
784
+ if key in all_params:
785
+ logger.info(f"AgentSFast.reset: Setting {key} for fast_action_generator_tool")
786
+
787
+ # Register the tool with all parameters
629
788
  self.fast_action_generator.register_tool(
630
- self.fast_action_generator_tool,
631
- self.Tools_dict[self.fast_action_generator_tool]["provider"],
632
- self.Tools_dict[self.fast_action_generator_tool]["model"],
633
- **tool_params
789
+ self.fast_action_generator_tool,
790
+ provider,
791
+ model,
792
+ **all_params
634
793
  )
635
794
 
636
795
  if self.enable_reflection:
@@ -660,15 +819,38 @@ class AgentSFast(UIAgent):
660
819
  self.global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
661
820
  self.latest_action = None
662
821
 
663
- def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]:
664
- """Generate next action prediction using only the fast_action_generator tool
822
+ # Pass task_id to tools if available
823
+ if self.task_id:
824
+ self.fast_action_generator.task_id = self.task_id
825
+ if self.enable_reflection and hasattr(self, 'reflection_agent'):
826
+ self.reflection_agent.task_id = self.task_id
665
827
 
666
- Args:
667
- instruction: Natural language instruction
668
- observation: Current UI state observation
828
+ def set_task_id(self, task_id: str) -> None:
829
+ """
830
+ Store the task identifier on the agent and propagate it to subcomponents that use it.
831
+
832
+ Parameters:
833
+ task_id (str): Identifier for the active task; assigned to this agent and, if present, to
834
+ `fast_action_generator` and `reflection_agent`.
835
+ """
836
+ self.task_id = task_id
837
+ # Also set task_id for components if they exist
838
+ if hasattr(self, 'fast_action_generator') and self.fast_action_generator:
839
+ self.fast_action_generator.task_id = task_id
840
+ if hasattr(self, 'reflection_agent') and self.reflection_agent:
841
+ self.reflection_agent.task_id = task_id
669
842
 
843
+ def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]:
844
+ """
845
+ Generate the next executor plan and corresponding actions using the configured fast action generator.
846
+
847
+ Parameters:
848
+ instruction (str): Natural language task description.
849
+ observation (Dict): Current UI state; must include a "screenshot" entry with the screen image.
850
+
670
851
  Returns:
671
- Tuple containing agent info dictionary and list of actions
852
+ executor_info (dict): Contains at least the keys `executor_plan` (raw plan text), `reflection` (reflection text or empty string), and `plan_code` (the latest extracted/used action code).
853
+ actions (List[dict]): List of action dictionaries produced by grounding execution; typically a single action dict describing the operation to perform.
672
854
  """
673
855
  import time
674
856
  predict_start_time = time.time()
@@ -725,11 +907,14 @@ class AgentSFast(UIAgent):
725
907
  generator_message = textwrap.dedent(f"""
726
908
  Task Description: {instruction}
727
909
  """)
728
-
910
+
729
911
  generator_message += f"\n\nPlease refer to the agent log to understand the progress and context of the task so far.\n{agent_log}"
730
912
 
731
913
  fast_action_start_time = time.time()
732
-
914
+
915
+ # Stream action generation start message
916
+ self._send_stream_message(self.task_id, "thinking", "Generating execution actions quickly...")
917
+
733
918
  plan, total_tokens, cost_string = self.fast_action_generator.execute_tool(
734
919
  self.fast_action_generator_tool,
735
920
  {
@@ -738,9 +923,9 @@ class AgentSFast(UIAgent):
738
923
  }
739
924
  )
740
925
  self.fast_action_generator.reset(self.fast_action_generator_tool)
741
-
926
+
742
927
  fast_action_execution_time = time.time() - fast_action_start_time
743
-
928
+
744
929
  self.global_state.log_operation(
745
930
  module="agent",
746
931
  operation="fast_action_execution",
@@ -750,7 +935,12 @@ class AgentSFast(UIAgent):
750
935
  "cost": cost_string
751
936
  }
752
937
  )
753
-
938
+
939
+ # Stream action plan message
940
+ if self.task_id:
941
+ plan_preview = plan[:100] + "..." if len(plan) > 100 else plan
942
+ self._send_stream_message(self.task_id, "action_plan", f"Quickly generate execution plans: {plan_preview}")
943
+
754
944
  logger.info("Fast Action Plan: %s", plan)
755
945
 
756
946
  current_width, current_height = self.global_state.get_screen_size()
@@ -809,13 +999,18 @@ class AgentSFast(UIAgent):
809
999
 
810
1000
  self.step_count += 1
811
1001
  self.turn_count += 1
812
-
1002
+
1003
+ # Stream action execution message
1004
+ if actions:
1005
+ action_type = actions[0].get("type", "unknown")
1006
+ self._send_stream_message(self.task_id, "action", f"Execute an action: {action_type}")
1007
+
813
1008
  executor_info = {
814
1009
  "executor_plan": plan,
815
1010
  "reflection": reflection or "",
816
1011
  "plan_code": self.latest_action
817
1012
  }
818
-
1013
+
819
1014
  predict_total_time = time.time() - predict_start_time
820
1015
  self.global_state.log_operation(
821
1016
  module="agent",
@@ -827,4 +1022,4 @@ class AgentSFast(UIAgent):
827
1022
  }
828
1023
  )
829
1024
 
830
- return executor_info, actions
1025
+ return executor_info, actions