autoglm-gui 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. AutoGLM_GUI/__init__.py +11 -0
  2. AutoGLM_GUI/__main__.py +26 -4
  3. AutoGLM_GUI/actions/__init__.py +6 -0
  4. AutoGLM_GUI/actions/handler.py +196 -0
  5. AutoGLM_GUI/actions/types.py +15 -0
  6. AutoGLM_GUI/adb/__init__.py +53 -0
  7. AutoGLM_GUI/adb/apps.py +227 -0
  8. AutoGLM_GUI/adb/connection.py +323 -0
  9. AutoGLM_GUI/adb/device.py +171 -0
  10. AutoGLM_GUI/adb/input.py +67 -0
  11. AutoGLM_GUI/adb/screenshot.py +11 -0
  12. AutoGLM_GUI/adb/timing.py +167 -0
  13. AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
  14. AutoGLM_GUI/adb_plus/screenshot.py +22 -1
  15. AutoGLM_GUI/adb_plus/serial.py +38 -20
  16. AutoGLM_GUI/adb_plus/touch.py +4 -9
  17. AutoGLM_GUI/agents/__init__.py +43 -12
  18. AutoGLM_GUI/agents/events.py +19 -0
  19. AutoGLM_GUI/agents/factory.py +31 -38
  20. AutoGLM_GUI/agents/glm/__init__.py +7 -0
  21. AutoGLM_GUI/agents/glm/agent.py +292 -0
  22. AutoGLM_GUI/agents/glm/message_builder.py +81 -0
  23. AutoGLM_GUI/agents/glm/parser.py +110 -0
  24. AutoGLM_GUI/agents/glm/prompts_en.py +77 -0
  25. AutoGLM_GUI/agents/glm/prompts_zh.py +75 -0
  26. AutoGLM_GUI/agents/mai/__init__.py +28 -0
  27. AutoGLM_GUI/agents/mai/agent.py +405 -0
  28. AutoGLM_GUI/agents/mai/parser.py +254 -0
  29. AutoGLM_GUI/agents/mai/prompts.py +103 -0
  30. AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
  31. AutoGLM_GUI/agents/protocols.py +12 -8
  32. AutoGLM_GUI/agents/stream_runner.py +188 -0
  33. AutoGLM_GUI/api/__init__.py +40 -21
  34. AutoGLM_GUI/api/agents.py +157 -240
  35. AutoGLM_GUI/api/control.py +9 -6
  36. AutoGLM_GUI/api/devices.py +102 -12
  37. AutoGLM_GUI/api/history.py +78 -0
  38. AutoGLM_GUI/api/layered_agent.py +67 -15
  39. AutoGLM_GUI/api/media.py +64 -1
  40. AutoGLM_GUI/api/scheduled_tasks.py +98 -0
  41. AutoGLM_GUI/config.py +81 -0
  42. AutoGLM_GUI/config_manager.py +68 -51
  43. AutoGLM_GUI/device_manager.py +248 -29
  44. AutoGLM_GUI/device_protocol.py +1 -1
  45. AutoGLM_GUI/devices/adb_device.py +5 -10
  46. AutoGLM_GUI/devices/mock_device.py +4 -2
  47. AutoGLM_GUI/devices/remote_device.py +8 -3
  48. AutoGLM_GUI/history_manager.py +164 -0
  49. AutoGLM_GUI/i18n.py +81 -0
  50. AutoGLM_GUI/model/__init__.py +5 -0
  51. AutoGLM_GUI/model/message_builder.py +69 -0
  52. AutoGLM_GUI/model/types.py +24 -0
  53. AutoGLM_GUI/models/__init__.py +10 -0
  54. AutoGLM_GUI/models/history.py +96 -0
  55. AutoGLM_GUI/models/scheduled_task.py +71 -0
  56. AutoGLM_GUI/parsers/__init__.py +22 -0
  57. AutoGLM_GUI/parsers/base.py +50 -0
  58. AutoGLM_GUI/parsers/phone_parser.py +58 -0
  59. AutoGLM_GUI/phone_agent_manager.py +62 -396
  60. AutoGLM_GUI/platform_utils.py +26 -0
  61. AutoGLM_GUI/prompt_config.py +15 -0
  62. AutoGLM_GUI/prompts/__init__.py +32 -0
  63. AutoGLM_GUI/scheduler_manager.py +304 -0
  64. AutoGLM_GUI/schemas.py +234 -72
  65. AutoGLM_GUI/scrcpy_stream.py +142 -24
  66. AutoGLM_GUI/socketio_server.py +100 -27
  67. AutoGLM_GUI/static/assets/{about-_XNhzQZX.js → about-BQm96DAl.js} +1 -1
  68. AutoGLM_GUI/static/assets/alert-dialog-B42XxGPR.js +1 -0
  69. AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +129 -0
  70. AutoGLM_GUI/static/assets/circle-alert-D4rSJh37.js +1 -0
  71. AutoGLM_GUI/static/assets/dialog-DZ78cEcj.js +45 -0
  72. AutoGLM_GUI/static/assets/history-DFBv7TGc.js +1 -0
  73. AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +1 -0
  74. AutoGLM_GUI/static/assets/{index-Cy8TmmHV.js → index-CmZSnDqc.js} +1 -1
  75. AutoGLM_GUI/static/assets/index-CssG-3TH.js +11 -0
  76. AutoGLM_GUI/static/assets/label-BCUzE_nm.js +1 -0
  77. AutoGLM_GUI/static/assets/logs-eoFxn5of.js +1 -0
  78. AutoGLM_GUI/static/assets/popover-DLsuV5Sx.js +1 -0
  79. AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js +1 -0
  80. AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js +1 -0
  81. AutoGLM_GUI/static/assets/textarea-BX6y7uM5.js +1 -0
  82. AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js +1 -0
  83. AutoGLM_GUI/static/index.html +2 -2
  84. AutoGLM_GUI/types.py +17 -0
  85. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.0.dist-info}/METADATA +137 -130
  86. autoglm_gui-1.5.0.dist-info/RECORD +157 -0
  87. AutoGLM_GUI/agents/mai_adapter.py +0 -627
  88. AutoGLM_GUI/api/dual_model.py +0 -317
  89. AutoGLM_GUI/dual_model/__init__.py +0 -53
  90. AutoGLM_GUI/dual_model/decision_model.py +0 -664
  91. AutoGLM_GUI/dual_model/dual_agent.py +0 -917
  92. AutoGLM_GUI/dual_model/protocols.py +0 -354
  93. AutoGLM_GUI/dual_model/vision_model.py +0 -442
  94. AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
  95. AutoGLM_GUI/phone_agent_patches.py +0 -147
  96. AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +0 -126
  97. AutoGLM_GUI/static/assets/dialog-B3uW4T8V.js +0 -45
  98. AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +0 -1
  99. AutoGLM_GUI/static/assets/index-UYYauTly.js +0 -12
  100. AutoGLM_GUI/static/assets/workflows-Du_de-dt.js +0 -1
  101. autoglm_gui-1.4.1.dist-info/RECORD +0 -117
  102. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.0.dist-info}/WHEEL +0 -0
  103. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.0.dist-info}/entry_points.txt +0 -0
  104. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,291 +0,0 @@
1
- """MAI-UI PhoneAgent wrapper for compatibility with AutoGLM-GUI interface."""
2
-
3
- from dataclasses import dataclass
4
- from typing import Any, Callable, Optional
5
-
6
- from phone_agent.agent import AgentConfig, StepResult
7
- from phone_agent.actions.handler import ActionHandler
8
- from phone_agent.model import ModelConfig
9
-
10
- from AutoGLM_GUI.logger import logger
11
- from AutoGLM_GUI.mai_ui.mai_navigation_agent import MAIUINaivigationAgent # type: ignore[import-not-found]
12
- from AutoGLM_GUI.mai_ui_adapter.action_adapter import MAIUIActionAdapter # type: ignore[import-not-found]
13
-
14
-
15
- @dataclass
16
- class MAIUIConfig:
17
- """MAI-UI specific configuration."""
18
-
19
- history_n: int = 3
20
- temperature: float = 0.0
21
- top_k: int = -1
22
- top_p: float = 1.0
23
- max_tokens: int = 2048
24
-
25
-
26
- class MAIUIPhoneAgent:
27
- """
28
- MAI-UI Agent wrapper that implements the PhoneAgent interface.
29
-
30
- This wrapper allows MAI-UI agents to be used transparently in place of
31
- the standard PhoneAgent, providing compatibility with the existing
32
- PhoneAgentManager and API infrastructure.
33
-
34
- Usage:
35
- agent = MAIUIPhoneAgent(
36
- model_config=model_config,
37
- agent_config=agent_config,
38
- )
39
- result = agent.run("Open WeChat")
40
- """
41
-
42
- def __init__(
43
- self,
44
- model_config: ModelConfig,
45
- agent_config: AgentConfig,
46
- mai_config: Optional[MAIUIConfig] = None,
47
- takeover_callback: Optional[Callable[[str], None]] = None,
48
- ):
49
- """
50
- Initialize MAI-UI PhoneAgent wrapper.
51
-
52
- Args:
53
- model_config: Model configuration (base_url, api_key, model_name).
54
- agent_config: Agent configuration (device_id, max_steps, etc.).
55
- mai_config: MAI-UI specific configuration.
56
- takeover_callback: Callback for takeover requests.
57
- """
58
- self.model_config = model_config
59
- self.agent_config = agent_config
60
- self.mai_config = mai_config or MAIUIConfig()
61
-
62
- # Create MAI-UI navigation agent
63
- self._mai_agent = MAIUINaivigationAgent(
64
- llm_base_url=model_config.base_url,
65
- model_name=model_config.model_name,
66
- api_key=model_config.api_key,
67
- runtime_conf={
68
- "history_n": self.mai_config.history_n,
69
- "temperature": self.mai_config.temperature,
70
- "top_k": self.mai_config.top_k,
71
- "top_p": self.mai_config.top_p,
72
- "max_tokens": self.mai_config.max_tokens,
73
- },
74
- )
75
-
76
- # Action adapter and handler
77
- self._action_adapter = MAIUIActionAdapter()
78
- self.action_handler = ActionHandler(
79
- device_id=agent_config.device_id,
80
- takeover_callback=takeover_callback,
81
- )
82
-
83
- # PhoneAgent-compatible state
84
- self._context: list[dict[str, Any]] = []
85
- self._step_count = 0
86
- self._current_task: str = ""
87
-
88
- # For model_client compatibility (used by streaming patches)
89
- self.model_client = _DummyModelClient()
90
-
91
- # Debug: Print model configuration for troubleshooting
92
- logger.info("=" * 60)
93
- logger.info("[MAI-UI Agent] Initialization")
94
- logger.info(f" Device ID: {agent_config.device_id}")
95
- logger.info(f" Base URL: {model_config.base_url}")
96
- logger.info(f" Model: {model_config.model_name}")
97
- logger.info("=" * 60)
98
-
99
- def run(self, task: str) -> str:
100
- """
101
- Execute a complete task.
102
-
103
- Args:
104
- task: Natural language task description.
105
-
106
- Returns:
107
- Final message from the agent.
108
- """
109
- self.reset()
110
- self._current_task = task
111
-
112
- # First step
113
- result = self._execute_step(task, is_first=True)
114
-
115
- if result.finished:
116
- return result.message or "Task completed"
117
-
118
- # Continue until finished or max steps reached
119
- while self._step_count < self.agent_config.max_steps:
120
- result = self._execute_step(is_first=False)
121
-
122
- if result.finished:
123
- return result.message or "Task completed"
124
-
125
- return "Max steps reached"
126
-
127
- def step(self, task: Optional[str] = None) -> StepResult:
128
- """
129
- Execute a single step.
130
-
131
- Args:
132
- task: Task description (required for first step).
133
-
134
- Returns:
135
- StepResult with step details.
136
- """
137
- is_first = len(self._context) == 0
138
-
139
- if is_first:
140
- if not task:
141
- raise ValueError("Task is required for the first step")
142
- self._current_task = task
143
-
144
- return self._execute_step(task, is_first)
145
-
146
- def _execute_step(
147
- self, user_prompt: Optional[str] = None, is_first: bool = False
148
- ) -> StepResult:
149
- """Execute a single step of the agent loop."""
150
- from phone_agent.device_factory import get_device_factory
151
- from PIL import Image
152
- from io import BytesIO
153
-
154
- self._step_count += 1
155
- logger.info(f"[MAI-UI] Executing step {self._step_count}")
156
-
157
- # Get screenshot
158
- device_factory = get_device_factory()
159
- screenshot = device_factory.get_screenshot(self.agent_config.device_id)
160
-
161
- # Convert base64 to PIL Image
162
- import base64
163
-
164
- image_bytes = base64.b64decode(screenshot.base64_data)
165
- pil_image = Image.open(BytesIO(image_bytes))
166
-
167
- # Build observation
168
- obs = {
169
- "screenshot": pil_image,
170
- "accessibility_tree": None,
171
- }
172
-
173
- # Get instruction
174
- instruction = user_prompt or self._current_task
175
-
176
- # Call MAI-UI predict
177
- try:
178
- raw_response, action_json = self._mai_agent.predict(
179
- instruction=instruction,
180
- obs=obs,
181
- )
182
- except Exception as e:
183
- logger.error(f"[MAI-UI] Predict failed: {e}")
184
- return StepResult(
185
- success=False,
186
- finished=True,
187
- action=None,
188
- thinking="",
189
- message=f"Prediction failed: {e}",
190
- )
191
-
192
- # Check for error
193
- if action_json.get("action") is None:
194
- logger.error("[MAI-UI] Invalid action returned")
195
- return StepResult(
196
- success=False,
197
- finished=True,
198
- action=None,
199
- thinking="",
200
- message="Invalid action from model",
201
- )
202
-
203
- # Get thinking from trajectory
204
- thinking = ""
205
- if self._mai_agent.traj_memory.steps:
206
- last_step = self._mai_agent.traj_memory.steps[-1]
207
- thinking = last_step.thought or ""
208
-
209
- # Convert action to AutoGLM-GUI format
210
- converted_action = self._action_adapter.convert(action_json)
211
- logger.debug(f"[MAI-UI] Converted action: {converted_action}")
212
-
213
- # Check if finished (terminate action)
214
- if converted_action.get("_metadata") == "finish":
215
- return StepResult(
216
- success=True,
217
- finished=True,
218
- action=converted_action,
219
- thinking=thinking,
220
- message=converted_action.get("message", "Task completed"),
221
- )
222
-
223
- # Execute action
224
- try:
225
- result = self.action_handler.execute(
226
- converted_action,
227
- screenshot.width,
228
- screenshot.height,
229
- )
230
- except Exception as e:
231
- logger.error(f"[MAI-UI] Action execution failed: {e}")
232
- return StepResult(
233
- success=False,
234
- finished=False,
235
- action=converted_action,
236
- thinking=thinking,
237
- message=f"Action failed: {e}",
238
- )
239
-
240
- # Update context for compatibility
241
- self._context.append(
242
- {
243
- "step": self._step_count,
244
- "action": action_json,
245
- "converted_action": converted_action,
246
- "result": result.success,
247
- "thinking": thinking,
248
- }
249
- )
250
-
251
- return StepResult(
252
- success=result.success,
253
- finished=result.should_finish,
254
- action=converted_action,
255
- thinking=thinking,
256
- message=result.message,
257
- )
258
-
259
- def reset(self) -> None:
260
- """Reset agent state for a new task."""
261
- self._context = []
262
- self._step_count = 0
263
- self._current_task = ""
264
- self._mai_agent.reset()
265
- logger.debug("[MAI-UI] Agent reset")
266
-
267
- @property
268
- def step_count(self) -> int:
269
- """Get current step count."""
270
- return self._step_count
271
-
272
- @property
273
- def context(self) -> list[dict[str, Any]]:
274
- """Get conversation context (for compatibility)."""
275
- return self._context.copy()
276
-
277
-
278
- class _DummyModelClient:
279
- """
280
- Dummy model client for compatibility with streaming patches.
281
-
282
- The actual model calls are handled by MAI-UI agent internally.
283
- This exists to satisfy code that expects model_client attribute.
284
- """
285
-
286
- def request(self, messages: list, **kwargs) -> Any:
287
- """Dummy request method - should not be called directly."""
288
- raise NotImplementedError(
289
- "MAIUIPhoneAgent handles model calls internally. "
290
- "Do not call model_client.request() directly."
291
- )
@@ -1,147 +0,0 @@
1
- """
2
- Monkey patches for phone_agent to add streaming functionality.
3
-
4
- This module patches the upstream phone_agent code without modifying the original files.
5
- """
6
-
7
- from typing import Any, Callable
8
-
9
- from phone_agent.model import ModelClient
10
- from phone_agent.model.client import ModelResponse
11
-
12
-
13
- # Store original methods
14
- _original_model_request = ModelClient.request
15
-
16
-
17
- def _patched_model_request(
18
- self,
19
- messages: list[dict[str, Any]],
20
- on_thinking_chunk: Callable[[str], None] | None = None,
21
- ) -> ModelResponse:
22
- """
23
- Patched version of ModelClient.request that supports streaming thinking chunks.
24
-
25
- This wraps the original request method and adds callback support for thinking chunks.
26
- """
27
- import time
28
-
29
- from phone_agent.model.client import ModelResponse
30
-
31
- # Start timing
32
- start_time = time.time()
33
- time_to_first_token = None
34
- time_to_thinking_end = None
35
-
36
- stream = self.client.chat.completions.create(
37
- messages=messages,
38
- model=self.config.model_name,
39
- max_tokens=self.config.max_tokens,
40
- temperature=self.config.temperature,
41
- top_p=self.config.top_p,
42
- frequency_penalty=self.config.frequency_penalty,
43
- extra_body=self.config.extra_body,
44
- stream=True,
45
- )
46
-
47
- raw_content = ""
48
- buffer = "" # Buffer to hold content that might be part of a marker
49
- action_markers = ["finish(message=", "do(action="]
50
- in_action_phase = False # Track if we've entered the action phase
51
- first_token_received = False
52
-
53
- for chunk in stream:
54
- if len(chunk.choices) == 0:
55
- continue
56
- if chunk.choices[0].delta.content is not None:
57
- content = chunk.choices[0].delta.content
58
- raw_content += content
59
-
60
- # Record time to first token
61
- if not first_token_received:
62
- time_to_first_token = time.time() - start_time
63
- first_token_received = True
64
-
65
- if in_action_phase:
66
- # Already in action phase, just accumulate content without printing
67
- continue
68
-
69
- buffer += content
70
-
71
- # Check if any marker is fully present in buffer
72
- marker_found = False
73
- for marker in action_markers:
74
- if marker in buffer:
75
- # Marker found, print everything before it
76
- thinking_part = buffer.split(marker, 1)[0]
77
- print(thinking_part, end="", flush=True)
78
- if on_thinking_chunk:
79
- on_thinking_chunk(thinking_part)
80
- print() # Print newline after thinking is complete
81
- in_action_phase = True
82
- marker_found = True
83
-
84
- # Record time to thinking end
85
- if time_to_thinking_end is None:
86
- time_to_thinking_end = time.time() - start_time
87
-
88
- break
89
-
90
- if marker_found:
91
- continue # Continue to collect remaining content
92
-
93
- # Check if buffer ends with a prefix of any marker
94
- # If so, don't print yet (wait for more content)
95
- is_potential_marker = False
96
- for marker in action_markers:
97
- for i in range(1, len(marker)):
98
- if buffer.endswith(marker[:i]):
99
- is_potential_marker = True
100
- break
101
- if is_potential_marker:
102
- break
103
-
104
- if not is_potential_marker:
105
- # Safe to print the buffer
106
- print(buffer, end="", flush=True)
107
- if on_thinking_chunk:
108
- on_thinking_chunk(buffer)
109
- buffer = ""
110
-
111
- # Calculate total time
112
- total_time = time.time() - start_time
113
-
114
- # Parse thinking and action from response
115
- thinking, action = self._parse_response(raw_content)
116
-
117
- # Print performance metrics
118
- from phone_agent.config.i18n import get_message
119
-
120
- lang = self.config.lang
121
- print()
122
- print("=" * 50)
123
- print(f"⏱️ {get_message('performance_metrics', lang)}:")
124
- print("-" * 50)
125
- if time_to_first_token is not None:
126
- print(f"{get_message('time_to_first_token', lang)}: {time_to_first_token:.3f}s")
127
- if time_to_thinking_end is not None:
128
- print(
129
- f"{get_message('time_to_thinking_end', lang)}: {time_to_thinking_end:.3f}s"
130
- )
131
- print(f"{get_message('total_inference_time', lang)}: {total_time:.3f}s")
132
- print("=" * 50)
133
-
134
- return ModelResponse(
135
- thinking=thinking,
136
- action=action,
137
- raw_content=raw_content,
138
- time_to_first_token=time_to_first_token,
139
- time_to_thinking_end=time_to_thinking_end,
140
- total_time=total_time,
141
- )
142
-
143
-
144
- def apply_patches():
145
- """Apply all monkey patches to phone_agent."""
146
- # Patch ModelClient.request to support streaming callbacks
147
- ModelClient.request = _patched_model_request