autoglm-gui 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. AutoGLM_GUI/agents/glm/agent.py +6 -1
  2. AutoGLM_GUI/agents/mai/agent.py +3 -0
  3. AutoGLM_GUI/agents/stream_runner.py +7 -2
  4. AutoGLM_GUI/api/agents.py +26 -1
  5. AutoGLM_GUI/api/history.py +27 -1
  6. AutoGLM_GUI/models/history.py +45 -1
  7. AutoGLM_GUI/scheduler_manager.py +52 -6
  8. AutoGLM_GUI/schemas.py +12 -0
  9. AutoGLM_GUI/static/assets/{about-BQm96DAl.js → about-CfwX1Cmc.js} +1 -1
  10. AutoGLM_GUI/static/assets/{alert-dialog-B42XxGPR.js → alert-dialog-CtGlN2IJ.js} +1 -1
  11. AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
  12. AutoGLM_GUI/static/assets/{circle-alert-D4rSJh37.js → circle-alert-t08bEMPO.js} +1 -1
  13. AutoGLM_GUI/static/assets/{dialog-DZ78cEcj.js → dialog-FNwZJFwk.js} +1 -1
  14. AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
  15. AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
  16. AutoGLM_GUI/static/assets/{index-CmZSnDqc.js → index-BaLMSqd3.js} +1 -1
  17. AutoGLM_GUI/static/assets/{index-CssG-3TH.js → index-CTHbFvKl.js} +5 -5
  18. AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
  19. AutoGLM_GUI/static/assets/{label-BCUzE_nm.js → label-DJFevVmr.js} +1 -1
  20. AutoGLM_GUI/static/assets/{logs-eoFxn5of.js → logs-RW09DyYY.js} +1 -1
  21. AutoGLM_GUI/static/assets/{popover-DLsuV5Sx.js → popover--JTJrE5v.js} +1 -1
  22. AutoGLM_GUI/static/assets/{scheduled-tasks-MyqGJvy_.js → scheduled-tasks-DTRKsQXF.js} +1 -1
  23. AutoGLM_GUI/static/assets/{square-pen-zGWYrdfj.js → square-pen-CPK_K680.js} +1 -1
  24. AutoGLM_GUI/static/assets/{textarea-BX6y7uM5.js → textarea-PRmVnWq5.js} +1 -1
  25. AutoGLM_GUI/static/assets/{workflows-CYFs6ssC.js → workflows-CdcsAoaT.js} +1 -1
  26. AutoGLM_GUI/static/index.html +2 -2
  27. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +49 -7
  28. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/RECORD +31 -70
  29. AutoGLM_GUI/device_adapter.py +0 -263
  30. AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +0 -129
  31. AutoGLM_GUI/static/assets/history-DFBv7TGc.js +0 -1
  32. AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +0 -1
  33. mai_agent/base.py +0 -137
  34. mai_agent/mai_grounding_agent.py +0 -263
  35. mai_agent/mai_naivigation_agent.py +0 -526
  36. mai_agent/prompt.py +0 -148
  37. mai_agent/unified_memory.py +0 -67
  38. mai_agent/utils.py +0 -73
  39. phone_agent/__init__.py +0 -12
  40. phone_agent/actions/__init__.py +0 -5
  41. phone_agent/actions/handler.py +0 -400
  42. phone_agent/actions/handler_ios.py +0 -278
  43. phone_agent/adb/__init__.py +0 -51
  44. phone_agent/adb/connection.py +0 -358
  45. phone_agent/adb/device.py +0 -253
  46. phone_agent/adb/input.py +0 -108
  47. phone_agent/adb/screenshot.py +0 -108
  48. phone_agent/agent.py +0 -253
  49. phone_agent/agent_ios.py +0 -277
  50. phone_agent/config/__init__.py +0 -53
  51. phone_agent/config/apps.py +0 -227
  52. phone_agent/config/apps_harmonyos.py +0 -256
  53. phone_agent/config/apps_ios.py +0 -339
  54. phone_agent/config/i18n.py +0 -81
  55. phone_agent/config/prompts.py +0 -80
  56. phone_agent/config/prompts_en.py +0 -79
  57. phone_agent/config/prompts_zh.py +0 -82
  58. phone_agent/config/timing.py +0 -167
  59. phone_agent/device_factory.py +0 -166
  60. phone_agent/hdc/__init__.py +0 -53
  61. phone_agent/hdc/connection.py +0 -384
  62. phone_agent/hdc/device.py +0 -269
  63. phone_agent/hdc/input.py +0 -145
  64. phone_agent/hdc/screenshot.py +0 -127
  65. phone_agent/model/__init__.py +0 -5
  66. phone_agent/model/client.py +0 -290
  67. phone_agent/xctest/__init__.py +0 -47
  68. phone_agent/xctest/connection.py +0 -379
  69. phone_agent/xctest/device.py +0 -472
  70. phone_agent/xctest/input.py +0 -311
  71. phone_agent/xctest/screenshot.py +0 -226
  72. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
  73. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
  74. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,67 +0,0 @@
1
- # Copyright (c) 2025, Alibaba Cloud and its affiliates;
2
- # Licensed under the Apache License, Version 2.0 (the "License");
3
- # you may not use this file except in compliance with the License.
4
- # You may obtain a copy of the License at
5
- #
6
- # http://www.apache.org/licenses/LICENSE-2.0
7
- #
8
- # Unless required by applicable law or agreed to in writing, software
9
- # distributed under the License is distributed on an "AS IS" BASIS,
10
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
- # See the License for the specific language governing permissions and
12
- # limitations under the License.
13
-
14
- """Unified memory structures for trajectory tracking."""
15
-
16
- from dataclasses import dataclass, field
17
- from typing import Any, Dict, List, Optional
18
-
19
- from PIL import Image
20
-
21
-
22
- @dataclass
23
- class TrajStep:
24
- """
25
- Represents a single step in an agent's trajectory.
26
-
27
- Attributes:
28
- screenshot: PIL Image of the screen at this step.
29
- accessibility_tree: Accessibility tree data for the screen.
30
- prediction: Raw model prediction/response.
31
- action: Parsed action dictionary.
32
- conclusion: Conclusion or summary of the step.
33
- thought: Model's reasoning/thinking process.
34
- step_index: Index of this step in the trajectory.
35
- agent_type: Type of agent that produced this step.
36
- model_name: Name of the model used.
37
- screenshot_bytes: Original screenshot as bytes (for compatibility).
38
- structured_action: Structured action with metadata.
39
- """
40
-
41
- screenshot: Image.Image
42
- accessibility_tree: Optional[Dict[str, Any]]
43
- prediction: str
44
- action: Dict[str, Any]
45
- conclusion: str
46
- thought: str
47
- step_index: int
48
- agent_type: str
49
- model_name: str
50
- screenshot_bytes: Optional[bytes] = None
51
- structured_action: Optional[Dict[str, Any]] = None
52
-
53
-
54
- @dataclass
55
- class TrajMemory:
56
- """
57
- Container for a complete trajectory of agent steps.
58
-
59
- Attributes:
60
- task_goal: The goal/instruction for this trajectory.
61
- task_id: Unique identifier for the task.
62
- steps: List of trajectory steps.
63
- """
64
-
65
- task_goal: str
66
- task_id: str
67
- steps: List[TrajStep] = field(default_factory=list)
mai_agent/utils.py DELETED
@@ -1,73 +0,0 @@
1
- # Copyright (c) 2025, Alibaba Cloud and its affiliates;
2
- # Licensed under the Apache License, Version 2.0 (the "License");
3
- # you may not use this file except in compliance with the License.
4
- # You may obtain a copy of the License at
5
- #
6
- # http://www.apache.org/licenses/LICENSE-2.0
7
- #
8
- # Unless required by applicable law or agreed to in writing, software
9
- # distributed under the License is distributed on an "AS IS" BASIS,
10
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
- # See the License for the specific language governing permissions and
12
- # limitations under the License.
13
-
14
- """Utility functions for image processing and conversion."""
15
-
16
- import base64
17
- from io import BytesIO
18
- from typing import Union, Optional, Tuple, Dict, Any
19
-
20
- from PIL import Image
21
- from PIL import ImageDraw
22
-
23
-
24
- def safe_pil_to_bytes(image: Union[Image.Image, bytes]) -> bytes:
25
- if isinstance(image, Image.Image):
26
- img_byte_arr = BytesIO()
27
- image.save(img_byte_arr, format="PNG")
28
- return img_byte_arr.getvalue()
29
- elif isinstance(image, bytes):
30
- return image
31
- else:
32
- raise TypeError(f"Expected PIL Image or bytes, got {type(image)}")
33
-
34
-
35
- def pil_to_base64(image: Image.Image) -> str:
36
- buffer = BytesIO()
37
- image.save(buffer, format="PNG")
38
- return base64.b64encode(buffer.getvalue()).decode("utf-8")
39
-
40
-
41
- def save_screenshot(screenshot: Image.Image, path: str) -> None:
42
- screenshot.save(path)
43
- print(f"Screenshot saved in {path}")
44
-
45
-
46
- def extract_click_coordinates(action: Dict[str, Any]) -> Tuple[float, float]:
47
- x = action.get("coordinate")[0]
48
- y = action.get("coordinate")[1]
49
- action_corr = (x, y)
50
- return action_corr
51
-
52
-
53
- # Function to draw points on an image
54
- def draw_clicks_on_image(
55
- image_path: str,
56
- click_coords: Tuple[float, float],
57
- output_path: Optional[str] = None,
58
- ) -> Image.Image:
59
- image = Image.open(image_path)
60
- draw = ImageDraw.Draw(image)
61
-
62
- # Draw each click coordinate as a red circle
63
- (x, y) = click_coords
64
- radius = 20
65
- if x and y: # if get the coordinate, draw a circle
66
- draw.ellipse(
67
- (x - radius, y - radius, x + radius, y + radius), fill="red", outline="red"
68
- )
69
-
70
- # Save the modified image
71
- if output_path:
72
- save_screenshot(image, output_path)
73
- return image
phone_agent/__init__.py DELETED
@@ -1,12 +0,0 @@
1
- """
2
- Phone Agent - An AI-powered phone automation framework.
3
-
4
- This package provides tools for automating Android and iOS phone interactions
5
- using AI models for visual understanding and decision making.
6
- """
7
-
8
- from phone_agent.agent import PhoneAgent
9
- from phone_agent.agent_ios import IOSPhoneAgent
10
-
11
- __version__ = "0.1.0"
12
- __all__ = ["PhoneAgent", "IOSPhoneAgent"]
@@ -1,5 +0,0 @@
1
- """Action handling module for Phone Agent."""
2
-
3
- from phone_agent.actions.handler import ActionHandler, ActionResult
4
-
5
- __all__ = ["ActionHandler", "ActionResult"]
@@ -1,400 +0,0 @@
1
- """Action handler for processing AI model outputs."""
2
-
3
- import ast
4
- import subprocess
5
- import time
6
- from dataclasses import dataclass
7
- from typing import Any, Callable
8
-
9
- from phone_agent.config.timing import TIMING_CONFIG
10
- from phone_agent.device_factory import get_device_factory
11
-
12
-
13
- @dataclass
14
- class ActionResult:
15
- """Result of an action execution."""
16
-
17
- success: bool
18
- should_finish: bool
19
- message: str | None = None
20
- requires_confirmation: bool = False
21
-
22
-
23
- class ActionHandler:
24
- """
25
- Handles execution of actions from AI model output.
26
-
27
- Args:
28
- device_id: Optional ADB device ID for multi-device setups.
29
- confirmation_callback: Optional callback for sensitive action confirmation.
30
- Should return True to proceed, False to cancel.
31
- takeover_callback: Optional callback for takeover requests (login, captcha).
32
- """
33
-
34
- def __init__(
35
- self,
36
- device_id: str | None = None,
37
- confirmation_callback: Callable[[str], bool] | None = None,
38
- takeover_callback: Callable[[str], None] | None = None,
39
- ):
40
- self.device_id = device_id
41
- self.confirmation_callback = confirmation_callback or self._default_confirmation
42
- self.takeover_callback = takeover_callback or self._default_takeover
43
-
44
- def execute(
45
- self, action: dict[str, Any], screen_width: int, screen_height: int
46
- ) -> ActionResult:
47
- """
48
- Execute an action from the AI model.
49
-
50
- Args:
51
- action: The action dictionary from the model.
52
- screen_width: Current screen width in pixels.
53
- screen_height: Current screen height in pixels.
54
-
55
- Returns:
56
- ActionResult indicating success and whether to finish.
57
- """
58
- action_type = action.get("_metadata")
59
-
60
- if action_type == "finish":
61
- return ActionResult(
62
- success=True, should_finish=True, message=action.get("message")
63
- )
64
-
65
- if action_type != "do":
66
- return ActionResult(
67
- success=False,
68
- should_finish=True,
69
- message=f"Unknown action type: {action_type}",
70
- )
71
-
72
- action_name = action.get("action")
73
- handler_method = self._get_handler(action_name)
74
-
75
- if handler_method is None:
76
- return ActionResult(
77
- success=False,
78
- should_finish=False,
79
- message=f"Unknown action: {action_name}",
80
- )
81
-
82
- try:
83
- return handler_method(action, screen_width, screen_height)
84
- except Exception as e:
85
- return ActionResult(
86
- success=False, should_finish=False, message=f"Action failed: {e}"
87
- )
88
-
89
- def _get_handler(self, action_name: str) -> Callable | None:
90
- """Get the handler method for an action."""
91
- handlers = {
92
- "Launch": self._handle_launch,
93
- "Tap": self._handle_tap,
94
- "Type": self._handle_type,
95
- "Type_Name": self._handle_type,
96
- "Swipe": self._handle_swipe,
97
- "Back": self._handle_back,
98
- "Home": self._handle_home,
99
- "Double Tap": self._handle_double_tap,
100
- "Long Press": self._handle_long_press,
101
- "Wait": self._handle_wait,
102
- "Take_over": self._handle_takeover,
103
- "Note": self._handle_note,
104
- "Call_API": self._handle_call_api,
105
- "Interact": self._handle_interact,
106
- }
107
- return handlers.get(action_name)
108
-
109
- def _convert_relative_to_absolute(
110
- self, element: list[int], screen_width: int, screen_height: int
111
- ) -> tuple[int, int]:
112
- """Convert relative coordinates (0-1000) to absolute pixels."""
113
- x = int(element[0] / 1000 * screen_width)
114
- y = int(element[1] / 1000 * screen_height)
115
- return x, y
116
-
117
- def _handle_launch(self, action: dict, width: int, height: int) -> ActionResult:
118
- """Handle app launch action."""
119
- app_name = action.get("app")
120
- if not app_name:
121
- return ActionResult(False, False, "No app name specified")
122
-
123
- device_factory = get_device_factory()
124
- success = device_factory.launch_app(app_name, self.device_id)
125
- if success:
126
- return ActionResult(True, False)
127
- return ActionResult(False, False, f"App not found: {app_name}")
128
-
129
- def _handle_tap(self, action: dict, width: int, height: int) -> ActionResult:
130
- """Handle tap action."""
131
- element = action.get("element")
132
- if not element:
133
- return ActionResult(False, False, "No element coordinates")
134
-
135
- x, y = self._convert_relative_to_absolute(element, width, height)
136
-
137
- # Check for sensitive operation
138
- if "message" in action:
139
- if not self.confirmation_callback(action["message"]):
140
- return ActionResult(
141
- success=False,
142
- should_finish=True,
143
- message="User cancelled sensitive operation",
144
- )
145
-
146
- device_factory = get_device_factory()
147
- device_factory.tap(x, y, self.device_id)
148
- return ActionResult(True, False)
149
-
150
- def _handle_type(self, action: dict, width: int, height: int) -> ActionResult:
151
- """Handle text input action."""
152
- text = action.get("text", "")
153
-
154
- device_factory = get_device_factory()
155
-
156
- # Switch to ADB keyboard
157
- original_ime = device_factory.detect_and_set_adb_keyboard(self.device_id)
158
- time.sleep(TIMING_CONFIG.action.keyboard_switch_delay)
159
-
160
- # Clear existing text and type new text
161
- device_factory.clear_text(self.device_id)
162
- time.sleep(TIMING_CONFIG.action.text_clear_delay)
163
-
164
- # Handle multiline text by splitting on newlines
165
- device_factory.type_text(text, self.device_id)
166
- time.sleep(TIMING_CONFIG.action.text_input_delay)
167
-
168
- # Restore original keyboard
169
- device_factory.restore_keyboard(original_ime, self.device_id)
170
- time.sleep(TIMING_CONFIG.action.keyboard_restore_delay)
171
-
172
- return ActionResult(True, False)
173
-
174
- def _handle_swipe(self, action: dict, width: int, height: int) -> ActionResult:
175
- """Handle swipe action."""
176
- start = action.get("start")
177
- end = action.get("end")
178
-
179
- if not start or not end:
180
- return ActionResult(False, False, "Missing swipe coordinates")
181
-
182
- start_x, start_y = self._convert_relative_to_absolute(start, width, height)
183
- end_x, end_y = self._convert_relative_to_absolute(end, width, height)
184
-
185
- device_factory = get_device_factory()
186
- device_factory.swipe(start_x, start_y, end_x, end_y, device_id=self.device_id)
187
- return ActionResult(True, False)
188
-
189
- def _handle_back(self, action: dict, width: int, height: int) -> ActionResult:
190
- """Handle back button action."""
191
- device_factory = get_device_factory()
192
- device_factory.back(self.device_id)
193
- return ActionResult(True, False)
194
-
195
- def _handle_home(self, action: dict, width: int, height: int) -> ActionResult:
196
- """Handle home button action."""
197
- device_factory = get_device_factory()
198
- device_factory.home(self.device_id)
199
- return ActionResult(True, False)
200
-
201
- def _handle_double_tap(self, action: dict, width: int, height: int) -> ActionResult:
202
- """Handle double tap action."""
203
- element = action.get("element")
204
- if not element:
205
- return ActionResult(False, False, "No element coordinates")
206
-
207
- x, y = self._convert_relative_to_absolute(element, width, height)
208
- device_factory = get_device_factory()
209
- device_factory.double_tap(x, y, self.device_id)
210
- return ActionResult(True, False)
211
-
212
- def _handle_long_press(self, action: dict, width: int, height: int) -> ActionResult:
213
- """Handle long press action."""
214
- element = action.get("element")
215
- if not element:
216
- return ActionResult(False, False, "No element coordinates")
217
-
218
- x, y = self._convert_relative_to_absolute(element, width, height)
219
- device_factory = get_device_factory()
220
- device_factory.long_press(x, y, device_id=self.device_id)
221
- return ActionResult(True, False)
222
-
223
- def _handle_wait(self, action: dict, width: int, height: int) -> ActionResult:
224
- """Handle wait action."""
225
- duration_str = action.get("duration", "1 seconds")
226
- try:
227
- duration = float(duration_str.replace("seconds", "").strip())
228
- except ValueError:
229
- duration = 1.0
230
-
231
- time.sleep(duration)
232
- return ActionResult(True, False)
233
-
234
- def _handle_takeover(self, action: dict, width: int, height: int) -> ActionResult:
235
- """Handle takeover request (login, captcha, etc.)."""
236
- message = action.get("message", "User intervention required")
237
- self.takeover_callback(message)
238
- return ActionResult(True, False)
239
-
240
- def _handle_note(self, action: dict, width: int, height: int) -> ActionResult:
241
- """Handle note action (placeholder for content recording)."""
242
- # This action is typically used for recording page content
243
- # Implementation depends on specific requirements
244
- return ActionResult(True, False)
245
-
246
- def _handle_call_api(self, action: dict, width: int, height: int) -> ActionResult:
247
- """Handle API call action (placeholder for summarization)."""
248
- # This action is typically used for content summarization
249
- # Implementation depends on specific requirements
250
- return ActionResult(True, False)
251
-
252
- def _handle_interact(self, action: dict, width: int, height: int) -> ActionResult:
253
- """Handle interaction request (user choice needed)."""
254
- # This action signals that user input is needed
255
- return ActionResult(True, False, message="User interaction required")
256
-
257
- def _send_keyevent(self, keycode: str) -> None:
258
- """Send a keyevent to the device."""
259
- from phone_agent.device_factory import DeviceType, get_device_factory
260
- from phone_agent.hdc.connection import _run_hdc_command
261
-
262
- device_factory = get_device_factory()
263
-
264
- # Handle HDC devices with HarmonyOS-specific keyEvent command
265
- if device_factory.device_type == DeviceType.HDC:
266
- hdc_prefix = ["hdc", "-t", self.device_id] if self.device_id else ["hdc"]
267
-
268
- # Map common keycodes to HarmonyOS keyEvent codes
269
- # KEYCODE_ENTER (66) -> 2054 (HarmonyOS Enter key code)
270
- if keycode == "KEYCODE_ENTER" or keycode == "66":
271
- _run_hdc_command(
272
- hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2054"],
273
- capture_output=True,
274
- text=True,
275
- )
276
- else:
277
- # For other keys, try to use the numeric code directly
278
- # If keycode is a string like "KEYCODE_ENTER", convert it
279
- try:
280
- # Try to extract numeric code from string or use as-is
281
- if keycode.startswith("KEYCODE_"):
282
- # For now, only handle ENTER, other keys may need mapping
283
- if "ENTER" in keycode:
284
- _run_hdc_command(
285
- hdc_prefix
286
- + ["shell", "uitest", "uiInput", "keyEvent", "2054"],
287
- capture_output=True,
288
- text=True,
289
- )
290
- else:
291
- # Fallback to ADB-style command for unsupported keys
292
- subprocess.run(
293
- hdc_prefix + ["shell", "input", "keyevent", keycode],
294
- capture_output=True,
295
- text=True,
296
- )
297
- else:
298
- # Assume it's a numeric code
299
- _run_hdc_command(
300
- hdc_prefix
301
- + ["shell", "uitest", "uiInput", "keyEvent", str(keycode)],
302
- capture_output=True,
303
- text=True,
304
- )
305
- except Exception:
306
- # Fallback to ADB-style command
307
- subprocess.run(
308
- hdc_prefix + ["shell", "input", "keyevent", keycode],
309
- capture_output=True,
310
- text=True,
311
- )
312
- else:
313
- # ADB devices use standard input keyevent command
314
- cmd_prefix = ["adb", "-s", self.device_id] if self.device_id else ["adb"]
315
- subprocess.run(
316
- cmd_prefix + ["shell", "input", "keyevent", keycode],
317
- capture_output=True,
318
- text=True,
319
- )
320
-
321
- @staticmethod
322
- def _default_confirmation(message: str) -> bool:
323
- """Default confirmation callback using console input."""
324
- response = input(f"Sensitive operation: {message}\nConfirm? (Y/N): ")
325
- return response.upper() == "Y"
326
-
327
- @staticmethod
328
- def _default_takeover(message: str) -> None:
329
- """Default takeover callback using console input."""
330
- input(f"{message}\nPress Enter after completing manual operation...")
331
-
332
-
333
- def parse_action(response: str) -> dict[str, Any]:
334
- """
335
- Parse action from model response.
336
-
337
- Args:
338
- response: Raw response string from the model.
339
-
340
- Returns:
341
- Parsed action dictionary.
342
-
343
- Raises:
344
- ValueError: If the response cannot be parsed.
345
- """
346
- print(f"Parsing action: {response}")
347
- try:
348
- response = response.strip()
349
- if response.startswith('do(action="Type"') or response.startswith(
350
- 'do(action="Type_Name"'
351
- ):
352
- text = response.split("text=", 1)[1][1:-2]
353
- action = {"_metadata": "do", "action": "Type", "text": text}
354
- return action
355
- elif response.startswith("do"):
356
- # Use AST parsing instead of eval for safety
357
- try:
358
- # Escape special characters (newlines, tabs, etc.) for valid Python syntax
359
- response = response.replace("\n", "\\n")
360
- response = response.replace("\r", "\\r")
361
- response = response.replace("\t", "\\t")
362
-
363
- tree = ast.parse(response, mode="eval")
364
- if not isinstance(tree.body, ast.Call):
365
- raise ValueError("Expected a function call")
366
-
367
- call = tree.body
368
- # Extract keyword arguments safely
369
- action = {"_metadata": "do"}
370
- for keyword in call.keywords:
371
- key = keyword.arg
372
- value = ast.literal_eval(keyword.value)
373
- action[key] = value
374
-
375
- return action
376
- except (SyntaxError, ValueError) as e:
377
- raise ValueError(f"Failed to parse do() action: {e}")
378
-
379
- elif response.startswith("finish"):
380
- action = {
381
- "_metadata": "finish",
382
- "message": response.replace("finish(message=", "")[1:-2],
383
- }
384
- else:
385
- raise ValueError(f"Failed to parse action: {response}")
386
- return action
387
- except Exception as e:
388
- raise ValueError(f"Failed to parse action: {e}")
389
-
390
-
391
- def do(**kwargs) -> dict[str, Any]:
392
- """Helper function for creating 'do' actions."""
393
- kwargs["_metadata"] = "do"
394
- return kwargs
395
-
396
-
397
- def finish(**kwargs) -> dict[str, Any]:
398
- """Helper function for creating 'finish' actions."""
399
- kwargs["_metadata"] = "finish"
400
- return kwargs