autoglm-gui 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. AutoGLM_GUI/__init__.py +11 -0
  2. AutoGLM_GUI/__main__.py +26 -8
  3. AutoGLM_GUI/actions/__init__.py +6 -0
  4. AutoGLM_GUI/actions/handler.py +196 -0
  5. AutoGLM_GUI/actions/types.py +15 -0
  6. AutoGLM_GUI/adb/__init__.py +53 -0
  7. AutoGLM_GUI/adb/apps.py +227 -0
  8. AutoGLM_GUI/adb/connection.py +323 -0
  9. AutoGLM_GUI/adb/device.py +171 -0
  10. AutoGLM_GUI/adb/input.py +67 -0
  11. AutoGLM_GUI/adb/screenshot.py +11 -0
  12. AutoGLM_GUI/adb/timing.py +167 -0
  13. AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
  14. AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
  15. AutoGLM_GUI/adb_plus/screenshot.py +22 -1
  16. AutoGLM_GUI/adb_plus/serial.py +38 -20
  17. AutoGLM_GUI/adb_plus/touch.py +4 -9
  18. AutoGLM_GUI/agents/__init__.py +51 -0
  19. AutoGLM_GUI/agents/events.py +19 -0
  20. AutoGLM_GUI/agents/factory.py +153 -0
  21. AutoGLM_GUI/agents/glm/__init__.py +7 -0
  22. AutoGLM_GUI/agents/glm/agent.py +292 -0
  23. AutoGLM_GUI/agents/glm/message_builder.py +81 -0
  24. AutoGLM_GUI/agents/glm/parser.py +110 -0
  25. AutoGLM_GUI/agents/glm/prompts_en.py +77 -0
  26. AutoGLM_GUI/agents/glm/prompts_zh.py +75 -0
  27. AutoGLM_GUI/agents/mai/__init__.py +28 -0
  28. AutoGLM_GUI/agents/mai/agent.py +405 -0
  29. AutoGLM_GUI/agents/mai/parser.py +254 -0
  30. AutoGLM_GUI/agents/mai/prompts.py +103 -0
  31. AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
  32. AutoGLM_GUI/agents/protocols.py +27 -0
  33. AutoGLM_GUI/agents/stream_runner.py +188 -0
  34. AutoGLM_GUI/api/__init__.py +71 -11
  35. AutoGLM_GUI/api/agents.py +190 -229
  36. AutoGLM_GUI/api/control.py +9 -6
  37. AutoGLM_GUI/api/devices.py +112 -28
  38. AutoGLM_GUI/api/health.py +13 -0
  39. AutoGLM_GUI/api/history.py +78 -0
  40. AutoGLM_GUI/api/layered_agent.py +306 -181
  41. AutoGLM_GUI/api/mcp.py +11 -10
  42. AutoGLM_GUI/api/media.py +64 -1
  43. AutoGLM_GUI/api/scheduled_tasks.py +98 -0
  44. AutoGLM_GUI/api/version.py +23 -10
  45. AutoGLM_GUI/api/workflows.py +2 -1
  46. AutoGLM_GUI/config.py +72 -14
  47. AutoGLM_GUI/config_manager.py +98 -27
  48. AutoGLM_GUI/device_adapter.py +263 -0
  49. AutoGLM_GUI/device_manager.py +248 -29
  50. AutoGLM_GUI/device_protocol.py +266 -0
  51. AutoGLM_GUI/devices/__init__.py +49 -0
  52. AutoGLM_GUI/devices/adb_device.py +200 -0
  53. AutoGLM_GUI/devices/mock_device.py +185 -0
  54. AutoGLM_GUI/devices/remote_device.py +177 -0
  55. AutoGLM_GUI/exceptions.py +3 -3
  56. AutoGLM_GUI/history_manager.py +164 -0
  57. AutoGLM_GUI/i18n.py +81 -0
  58. AutoGLM_GUI/metrics.py +13 -20
  59. AutoGLM_GUI/model/__init__.py +5 -0
  60. AutoGLM_GUI/model/message_builder.py +69 -0
  61. AutoGLM_GUI/model/types.py +24 -0
  62. AutoGLM_GUI/models/__init__.py +10 -0
  63. AutoGLM_GUI/models/history.py +96 -0
  64. AutoGLM_GUI/models/scheduled_task.py +71 -0
  65. AutoGLM_GUI/parsers/__init__.py +22 -0
  66. AutoGLM_GUI/parsers/base.py +50 -0
  67. AutoGLM_GUI/parsers/phone_parser.py +58 -0
  68. AutoGLM_GUI/phone_agent_manager.py +118 -367
  69. AutoGLM_GUI/platform_utils.py +31 -2
  70. AutoGLM_GUI/prompt_config.py +15 -0
  71. AutoGLM_GUI/prompts/__init__.py +32 -0
  72. AutoGLM_GUI/scheduler_manager.py +304 -0
  73. AutoGLM_GUI/schemas.py +272 -63
  74. AutoGLM_GUI/scrcpy_stream.py +159 -37
  75. AutoGLM_GUI/server.py +3 -1
  76. AutoGLM_GUI/socketio_server.py +114 -29
  77. AutoGLM_GUI/state.py +10 -30
  78. AutoGLM_GUI/static/assets/{about-DeclntHg.js → about-BQm96DAl.js} +1 -1
  79. AutoGLM_GUI/static/assets/alert-dialog-B42XxGPR.js +1 -0
  80. AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +129 -0
  81. AutoGLM_GUI/static/assets/circle-alert-D4rSJh37.js +1 -0
  82. AutoGLM_GUI/static/assets/dialog-DZ78cEcj.js +45 -0
  83. AutoGLM_GUI/static/assets/history-DFBv7TGc.js +1 -0
  84. AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +1 -0
  85. AutoGLM_GUI/static/assets/{index-zQ4KKDHt.js → index-CmZSnDqc.js} +1 -1
  86. AutoGLM_GUI/static/assets/index-CssG-3TH.js +11 -0
  87. AutoGLM_GUI/static/assets/label-BCUzE_nm.js +1 -0
  88. AutoGLM_GUI/static/assets/logs-eoFxn5of.js +1 -0
  89. AutoGLM_GUI/static/assets/popover-DLsuV5Sx.js +1 -0
  90. AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js +1 -0
  91. AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js +1 -0
  92. AutoGLM_GUI/static/assets/textarea-BX6y7uM5.js +1 -0
  93. AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js +1 -0
  94. AutoGLM_GUI/static/index.html +2 -2
  95. AutoGLM_GUI/types.py +142 -0
  96. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/METADATA +178 -92
  97. autoglm_gui-1.5.0.dist-info/RECORD +157 -0
  98. mai_agent/base.py +137 -0
  99. mai_agent/mai_grounding_agent.py +263 -0
  100. mai_agent/mai_naivigation_agent.py +526 -0
  101. mai_agent/prompt.py +148 -0
  102. mai_agent/unified_memory.py +67 -0
  103. mai_agent/utils.py +73 -0
  104. AutoGLM_GUI/api/dual_model.py +0 -311
  105. AutoGLM_GUI/dual_model/__init__.py +0 -53
  106. AutoGLM_GUI/dual_model/decision_model.py +0 -664
  107. AutoGLM_GUI/dual_model/dual_agent.py +0 -917
  108. AutoGLM_GUI/dual_model/protocols.py +0 -354
  109. AutoGLM_GUI/dual_model/vision_model.py +0 -442
  110. AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
  111. AutoGLM_GUI/phone_agent_patches.py +0 -146
  112. AutoGLM_GUI/static/assets/chat-Iut2yhSw.js +0 -125
  113. AutoGLM_GUI/static/assets/dialog-BfdcBs1x.js +0 -45
  114. AutoGLM_GUI/static/assets/index-5hCCwHA7.css +0 -1
  115. AutoGLM_GUI/static/assets/index-DHF1NZh0.js +0 -12
  116. AutoGLM_GUI/static/assets/workflows-xiplap-r.js +0 -1
  117. autoglm_gui-1.4.0.dist-info/RECORD +0 -100
  118. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/WHEEL +0 -0
  119. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/entry_points.txt +0 -0
  120. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,291 +0,0 @@
1
- """MAI-UI PhoneAgent wrapper for compatibility with AutoGLM-GUI interface."""
2
-
3
- from dataclasses import dataclass
4
- from typing import Any, Callable, Optional
5
-
6
- from phone_agent.agent import AgentConfig, StepResult
7
- from phone_agent.actions.handler import ActionHandler
8
- from phone_agent.model import ModelConfig
9
-
10
- from AutoGLM_GUI.logger import logger
11
- from AutoGLM_GUI.mai_ui.mai_navigation_agent import MAIUINaivigationAgent
12
- from AutoGLM_GUI.mai_ui_adapter.action_adapter import MAIUIActionAdapter
13
-
14
-
15
- @dataclass
16
- class MAIUIConfig:
17
- """MAI-UI specific configuration."""
18
-
19
- history_n: int = 3
20
- temperature: float = 0.0
21
- top_k: int = -1
22
- top_p: float = 1.0
23
- max_tokens: int = 2048
24
-
25
-
26
- class MAIUIPhoneAgent:
27
- """
28
- MAI-UI Agent wrapper that implements the PhoneAgent interface.
29
-
30
- This wrapper allows MAI-UI agents to be used transparently in place of
31
- the standard PhoneAgent, providing compatibility with the existing
32
- PhoneAgentManager and API infrastructure.
33
-
34
- Usage:
35
- agent = MAIUIPhoneAgent(
36
- model_config=model_config,
37
- agent_config=agent_config,
38
- )
39
- result = agent.run("Open WeChat")
40
- """
41
-
42
- def __init__(
43
- self,
44
- model_config: ModelConfig,
45
- agent_config: AgentConfig,
46
- mai_config: Optional[MAIUIConfig] = None,
47
- takeover_callback: Optional[Callable[[str], None]] = None,
48
- ):
49
- """
50
- Initialize MAI-UI PhoneAgent wrapper.
51
-
52
- Args:
53
- model_config: Model configuration (base_url, api_key, model_name).
54
- agent_config: Agent configuration (device_id, max_steps, etc.).
55
- mai_config: MAI-UI specific configuration.
56
- takeover_callback: Callback for takeover requests.
57
- """
58
- self.model_config = model_config
59
- self.agent_config = agent_config
60
- self.mai_config = mai_config or MAIUIConfig()
61
-
62
- # Create MAI-UI navigation agent
63
- self._mai_agent = MAIUINaivigationAgent(
64
- llm_base_url=model_config.base_url,
65
- model_name=model_config.model_name,
66
- api_key=model_config.api_key,
67
- runtime_conf={
68
- "history_n": self.mai_config.history_n,
69
- "temperature": self.mai_config.temperature,
70
- "top_k": self.mai_config.top_k,
71
- "top_p": self.mai_config.top_p,
72
- "max_tokens": self.mai_config.max_tokens,
73
- },
74
- )
75
-
76
- # Action adapter and handler
77
- self._action_adapter = MAIUIActionAdapter()
78
- self.action_handler = ActionHandler(
79
- device_id=agent_config.device_id,
80
- takeover_callback=takeover_callback,
81
- )
82
-
83
- # PhoneAgent-compatible state
84
- self._context: list[dict[str, Any]] = []
85
- self._step_count = 0
86
- self._current_task: str = ""
87
-
88
- # For model_client compatibility (used by streaming patches)
89
- self.model_client = _DummyModelClient()
90
-
91
- # Debug: Print model configuration for troubleshooting
92
- logger.info("=" * 60)
93
- logger.info("[MAI-UI Agent] Initialization")
94
- logger.info(f" Device ID: {agent_config.device_id}")
95
- logger.info(f" Base URL: {model_config.base_url}")
96
- logger.info(f" Model: {model_config.model_name}")
97
- logger.info("=" * 60)
98
-
99
- def run(self, task: str) -> str:
100
- """
101
- Execute a complete task.
102
-
103
- Args:
104
- task: Natural language task description.
105
-
106
- Returns:
107
- Final message from the agent.
108
- """
109
- self.reset()
110
- self._current_task = task
111
-
112
- # First step
113
- result = self._execute_step(task, is_first=True)
114
-
115
- if result.finished:
116
- return result.message or "Task completed"
117
-
118
- # Continue until finished or max steps reached
119
- while self._step_count < self.agent_config.max_steps:
120
- result = self._execute_step(is_first=False)
121
-
122
- if result.finished:
123
- return result.message or "Task completed"
124
-
125
- return "Max steps reached"
126
-
127
- def step(self, task: Optional[str] = None) -> StepResult:
128
- """
129
- Execute a single step.
130
-
131
- Args:
132
- task: Task description (required for first step).
133
-
134
- Returns:
135
- StepResult with step details.
136
- """
137
- is_first = len(self._context) == 0
138
-
139
- if is_first:
140
- if not task:
141
- raise ValueError("Task is required for the first step")
142
- self._current_task = task
143
-
144
- return self._execute_step(task, is_first)
145
-
146
- def _execute_step(
147
- self, user_prompt: Optional[str] = None, is_first: bool = False
148
- ) -> StepResult:
149
- """Execute a single step of the agent loop."""
150
- from phone_agent.device_factory import get_device_factory
151
- from PIL import Image
152
- from io import BytesIO
153
-
154
- self._step_count += 1
155
- logger.info(f"[MAI-UI] Executing step {self._step_count}")
156
-
157
- # Get screenshot
158
- device_factory = get_device_factory()
159
- screenshot = device_factory.get_screenshot(self.agent_config.device_id)
160
-
161
- # Convert base64 to PIL Image
162
- import base64
163
-
164
- image_bytes = base64.b64decode(screenshot.base64_data)
165
- pil_image = Image.open(BytesIO(image_bytes))
166
-
167
- # Build observation
168
- obs = {
169
- "screenshot": pil_image,
170
- "accessibility_tree": None,
171
- }
172
-
173
- # Get instruction
174
- instruction = user_prompt or self._current_task
175
-
176
- # Call MAI-UI predict
177
- try:
178
- raw_response, action_json = self._mai_agent.predict(
179
- instruction=instruction,
180
- obs=obs,
181
- )
182
- except Exception as e:
183
- logger.error(f"[MAI-UI] Predict failed: {e}")
184
- return StepResult(
185
- success=False,
186
- finished=True,
187
- action=None,
188
- thinking="",
189
- message=f"Prediction failed: {e}",
190
- )
191
-
192
- # Check for error
193
- if action_json.get("action") is None:
194
- logger.error("[MAI-UI] Invalid action returned")
195
- return StepResult(
196
- success=False,
197
- finished=True,
198
- action=None,
199
- thinking="",
200
- message="Invalid action from model",
201
- )
202
-
203
- # Get thinking from trajectory
204
- thinking = ""
205
- if self._mai_agent.traj_memory.steps:
206
- last_step = self._mai_agent.traj_memory.steps[-1]
207
- thinking = last_step.thought or ""
208
-
209
- # Convert action to AutoGLM-GUI format
210
- converted_action = self._action_adapter.convert(action_json)
211
- logger.debug(f"[MAI-UI] Converted action: {converted_action}")
212
-
213
- # Check if finished (terminate action)
214
- if converted_action.get("_metadata") == "finish":
215
- return StepResult(
216
- success=True,
217
- finished=True,
218
- action=converted_action,
219
- thinking=thinking,
220
- message=converted_action.get("message", "Task completed"),
221
- )
222
-
223
- # Execute action
224
- try:
225
- result = self.action_handler.execute(
226
- converted_action,
227
- screenshot.width,
228
- screenshot.height,
229
- )
230
- except Exception as e:
231
- logger.error(f"[MAI-UI] Action execution failed: {e}")
232
- return StepResult(
233
- success=False,
234
- finished=False,
235
- action=converted_action,
236
- thinking=thinking,
237
- message=f"Action failed: {e}",
238
- )
239
-
240
- # Update context for compatibility
241
- self._context.append(
242
- {
243
- "step": self._step_count,
244
- "action": action_json,
245
- "converted_action": converted_action,
246
- "result": result.success,
247
- "thinking": thinking,
248
- }
249
- )
250
-
251
- return StepResult(
252
- success=result.success,
253
- finished=result.should_finish,
254
- action=converted_action,
255
- thinking=thinking,
256
- message=result.message,
257
- )
258
-
259
- def reset(self) -> None:
260
- """Reset agent state for a new task."""
261
- self._context = []
262
- self._step_count = 0
263
- self._current_task = ""
264
- self._mai_agent.reset()
265
- logger.debug("[MAI-UI] Agent reset")
266
-
267
- @property
268
- def step_count(self) -> int:
269
- """Get current step count."""
270
- return self._step_count
271
-
272
- @property
273
- def context(self) -> list[dict[str, Any]]:
274
- """Get conversation context (for compatibility)."""
275
- return self._context.copy()
276
-
277
-
278
- class _DummyModelClient:
279
- """
280
- Dummy model client for compatibility with streaming patches.
281
-
282
- The actual model calls are handled by MAI-UI agent internally.
283
- This exists to satisfy code that expects model_client attribute.
284
- """
285
-
286
- def request(self, messages: list, **kwargs) -> Any:
287
- """Dummy request method - should not be called directly."""
288
- raise NotImplementedError(
289
- "MAIUIPhoneAgent handles model calls internally. "
290
- "Do not call model_client.request() directly."
291
- )
@@ -1,146 +0,0 @@
1
- """
2
- Monkey patches for phone_agent to add streaming functionality.
3
-
4
- This module patches the upstream phone_agent code without modifying the original files.
5
- """
6
-
7
- from typing import Any, Callable
8
-
9
- from phone_agent.model import ModelClient
10
-
11
-
12
- # Store original methods
13
- _original_model_request = ModelClient.request
14
-
15
-
16
- def _patched_model_request(
17
- self,
18
- messages: list[dict[str, Any]],
19
- on_thinking_chunk: Callable[[str], None] | None = None,
20
- ) -> Any:
21
- """
22
- Patched version of ModelClient.request that supports streaming thinking chunks.
23
-
24
- This wraps the original request method and adds callback support for thinking chunks.
25
- """
26
- import time
27
-
28
- from phone_agent.model.client import ModelResponse
29
-
30
- # Start timing
31
- start_time = time.time()
32
- time_to_first_token = None
33
- time_to_thinking_end = None
34
-
35
- stream = self.client.chat.completions.create(
36
- messages=messages,
37
- model=self.config.model_name,
38
- max_tokens=self.config.max_tokens,
39
- temperature=self.config.temperature,
40
- top_p=self.config.top_p,
41
- frequency_penalty=self.config.frequency_penalty,
42
- extra_body=self.config.extra_body,
43
- stream=True,
44
- )
45
-
46
- raw_content = ""
47
- buffer = "" # Buffer to hold content that might be part of a marker
48
- action_markers = ["finish(message=", "do(action="]
49
- in_action_phase = False # Track if we've entered the action phase
50
- first_token_received = False
51
-
52
- for chunk in stream:
53
- if len(chunk.choices) == 0:
54
- continue
55
- if chunk.choices[0].delta.content is not None:
56
- content = chunk.choices[0].delta.content
57
- raw_content += content
58
-
59
- # Record time to first token
60
- if not first_token_received:
61
- time_to_first_token = time.time() - start_time
62
- first_token_received = True
63
-
64
- if in_action_phase:
65
- # Already in action phase, just accumulate content without printing
66
- continue
67
-
68
- buffer += content
69
-
70
- # Check if any marker is fully present in buffer
71
- marker_found = False
72
- for marker in action_markers:
73
- if marker in buffer:
74
- # Marker found, print everything before it
75
- thinking_part = buffer.split(marker, 1)[0]
76
- print(thinking_part, end="", flush=True)
77
- if on_thinking_chunk:
78
- on_thinking_chunk(thinking_part)
79
- print() # Print newline after thinking is complete
80
- in_action_phase = True
81
- marker_found = True
82
-
83
- # Record time to thinking end
84
- if time_to_thinking_end is None:
85
- time_to_thinking_end = time.time() - start_time
86
-
87
- break
88
-
89
- if marker_found:
90
- continue # Continue to collect remaining content
91
-
92
- # Check if buffer ends with a prefix of any marker
93
- # If so, don't print yet (wait for more content)
94
- is_potential_marker = False
95
- for marker in action_markers:
96
- for i in range(1, len(marker)):
97
- if buffer.endswith(marker[:i]):
98
- is_potential_marker = True
99
- break
100
- if is_potential_marker:
101
- break
102
-
103
- if not is_potential_marker:
104
- # Safe to print the buffer
105
- print(buffer, end="", flush=True)
106
- if on_thinking_chunk:
107
- on_thinking_chunk(buffer)
108
- buffer = ""
109
-
110
- # Calculate total time
111
- total_time = time.time() - start_time
112
-
113
- # Parse thinking and action from response
114
- thinking, action = self._parse_response(raw_content)
115
-
116
- # Print performance metrics
117
- from phone_agent.config.i18n import get_message
118
-
119
- lang = self.config.lang
120
- print()
121
- print("=" * 50)
122
- print(f"⏱️ {get_message('performance_metrics', lang)}:")
123
- print("-" * 50)
124
- if time_to_first_token is not None:
125
- print(f"{get_message('time_to_first_token', lang)}: {time_to_first_token:.3f}s")
126
- if time_to_thinking_end is not None:
127
- print(
128
- f"{get_message('time_to_thinking_end', lang)}: {time_to_thinking_end:.3f}s"
129
- )
130
- print(f"{get_message('total_inference_time', lang)}: {total_time:.3f}s")
131
- print("=" * 50)
132
-
133
- return ModelResponse(
134
- thinking=thinking,
135
- action=action,
136
- raw_content=raw_content,
137
- time_to_first_token=time_to_first_token,
138
- time_to_thinking_end=time_to_thinking_end,
139
- total_time=total_time,
140
- )
141
-
142
-
143
- def apply_patches():
144
- """Apply all monkey patches to phone_agent."""
145
- # Patch ModelClient.request to support streaming callbacks
146
- ModelClient.request = _patched_model_request