autoglm-gui 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/__main__.py +0 -4
- AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
- AutoGLM_GUI/agents/__init__.py +20 -0
- AutoGLM_GUI/agents/factory.py +160 -0
- AutoGLM_GUI/agents/mai_adapter.py +627 -0
- AutoGLM_GUI/agents/protocols.py +23 -0
- AutoGLM_GUI/api/__init__.py +48 -7
- AutoGLM_GUI/api/agents.py +61 -17
- AutoGLM_GUI/api/devices.py +12 -18
- AutoGLM_GUI/api/dual_model.py +15 -9
- AutoGLM_GUI/api/health.py +13 -0
- AutoGLM_GUI/api/layered_agent.py +239 -166
- AutoGLM_GUI/api/mcp.py +11 -10
- AutoGLM_GUI/api/version.py +23 -10
- AutoGLM_GUI/api/workflows.py +2 -1
- AutoGLM_GUI/config_manager.py +55 -1
- AutoGLM_GUI/device_adapter.py +263 -0
- AutoGLM_GUI/device_protocol.py +266 -0
- AutoGLM_GUI/devices/__init__.py +49 -0
- AutoGLM_GUI/devices/adb_device.py +205 -0
- AutoGLM_GUI/devices/mock_device.py +183 -0
- AutoGLM_GUI/devices/remote_device.py +172 -0
- AutoGLM_GUI/dual_model/decision_model.py +4 -4
- AutoGLM_GUI/exceptions.py +3 -3
- AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +2 -2
- AutoGLM_GUI/metrics.py +13 -20
- AutoGLM_GUI/phone_agent_manager.py +219 -134
- AutoGLM_GUI/phone_agent_patches.py +2 -1
- AutoGLM_GUI/platform_utils.py +5 -2
- AutoGLM_GUI/schemas.py +47 -0
- AutoGLM_GUI/scrcpy_stream.py +17 -13
- AutoGLM_GUI/server.py +3 -1
- AutoGLM_GUI/socketio_server.py +16 -4
- AutoGLM_GUI/state.py +10 -30
- AutoGLM_GUI/static/assets/{about-DeclntHg.js → about-_XNhzQZX.js} +1 -1
- AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +126 -0
- AutoGLM_GUI/static/assets/{dialog-BfdcBs1x.js → dialog-B3uW4T8V.js} +3 -3
- AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +1 -0
- AutoGLM_GUI/static/assets/{index-zQ4KKDHt.js → index-Cy8TmmHV.js} +1 -1
- AutoGLM_GUI/static/assets/{index-DHF1NZh0.js → index-UYYauTly.js} +6 -6
- AutoGLM_GUI/static/assets/{workflows-xiplap-r.js → workflows-Du_de-dt.js} +1 -1
- AutoGLM_GUI/static/index.html +2 -2
- AutoGLM_GUI/types.py +125 -0
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.4.1.dist-info}/METADATA +83 -4
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.4.1.dist-info}/RECORD +54 -37
- mai_agent/base.py +137 -0
- mai_agent/mai_grounding_agent.py +263 -0
- mai_agent/mai_naivigation_agent.py +526 -0
- mai_agent/prompt.py +148 -0
- mai_agent/unified_memory.py +67 -0
- mai_agent/utils.py +73 -0
- AutoGLM_GUI/config.py +0 -23
- AutoGLM_GUI/static/assets/chat-Iut2yhSw.js +0 -125
- AutoGLM_GUI/static/assets/index-5hCCwHA7.css +0 -1
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.4.1.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.4.1.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Copyright (c) 2025, Alibaba Cloud and its affiliates;
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
"""Unified memory structures for trajectory tracking."""
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
|
|
19
|
+
from PIL import Image
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class TrajStep:
|
|
24
|
+
"""
|
|
25
|
+
Represents a single step in an agent's trajectory.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
screenshot: PIL Image of the screen at this step.
|
|
29
|
+
accessibility_tree: Accessibility tree data for the screen.
|
|
30
|
+
prediction: Raw model prediction/response.
|
|
31
|
+
action: Parsed action dictionary.
|
|
32
|
+
conclusion: Conclusion or summary of the step.
|
|
33
|
+
thought: Model's reasoning/thinking process.
|
|
34
|
+
step_index: Index of this step in the trajectory.
|
|
35
|
+
agent_type: Type of agent that produced this step.
|
|
36
|
+
model_name: Name of the model used.
|
|
37
|
+
screenshot_bytes: Original screenshot as bytes (for compatibility).
|
|
38
|
+
structured_action: Structured action with metadata.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
screenshot: Image.Image
|
|
42
|
+
accessibility_tree: Optional[Dict[str, Any]]
|
|
43
|
+
prediction: str
|
|
44
|
+
action: Dict[str, Any]
|
|
45
|
+
conclusion: str
|
|
46
|
+
thought: str
|
|
47
|
+
step_index: int
|
|
48
|
+
agent_type: str
|
|
49
|
+
model_name: str
|
|
50
|
+
screenshot_bytes: Optional[bytes] = None
|
|
51
|
+
structured_action: Optional[Dict[str, Any]] = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class TrajMemory:
|
|
56
|
+
"""
|
|
57
|
+
Container for a complete trajectory of agent steps.
|
|
58
|
+
|
|
59
|
+
Attributes:
|
|
60
|
+
task_goal: The goal/instruction for this trajectory.
|
|
61
|
+
task_id: Unique identifier for the task.
|
|
62
|
+
steps: List of trajectory steps.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
task_goal: str
|
|
66
|
+
task_id: str
|
|
67
|
+
steps: List[TrajStep] = field(default_factory=list)
|
mai_agent/utils.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Copyright (c) 2025, Alibaba Cloud and its affiliates;
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
"""Utility functions for image processing and conversion."""
|
|
15
|
+
|
|
16
|
+
import base64
|
|
17
|
+
from io import BytesIO
|
|
18
|
+
from typing import Union, Optional, Tuple, Dict, Any
|
|
19
|
+
|
|
20
|
+
from PIL import Image
|
|
21
|
+
from PIL import ImageDraw
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def safe_pil_to_bytes(image: Union[Image.Image, bytes]) -> bytes:
|
|
25
|
+
if isinstance(image, Image.Image):
|
|
26
|
+
img_byte_arr = BytesIO()
|
|
27
|
+
image.save(img_byte_arr, format="PNG")
|
|
28
|
+
return img_byte_arr.getvalue()
|
|
29
|
+
elif isinstance(image, bytes):
|
|
30
|
+
return image
|
|
31
|
+
else:
|
|
32
|
+
raise TypeError(f"Expected PIL Image or bytes, got {type(image)}")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def pil_to_base64(image: Image.Image) -> str:
|
|
36
|
+
buffer = BytesIO()
|
|
37
|
+
image.save(buffer, format="PNG")
|
|
38
|
+
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def save_screenshot(screenshot: Image.Image, path: str) -> None:
|
|
42
|
+
screenshot.save(path)
|
|
43
|
+
print(f"Screenshot saved in {path}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def extract_click_coordinates(action: Dict[str, Any]) -> Tuple[float, float]:
|
|
47
|
+
x = action.get("coordinate")[0]
|
|
48
|
+
y = action.get("coordinate")[1]
|
|
49
|
+
action_corr = (x, y)
|
|
50
|
+
return action_corr
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Function to draw points on an image
|
|
54
|
+
def draw_clicks_on_image(
|
|
55
|
+
image_path: str,
|
|
56
|
+
click_coords: Tuple[float, float],
|
|
57
|
+
output_path: Optional[str] = None,
|
|
58
|
+
) -> Image.Image:
|
|
59
|
+
image = Image.open(image_path)
|
|
60
|
+
draw = ImageDraw.Draw(image)
|
|
61
|
+
|
|
62
|
+
# Draw each click coordinate as a red circle
|
|
63
|
+
(x, y) = click_coords
|
|
64
|
+
radius = 20
|
|
65
|
+
if x and y: # if get the coordinate, draw a circle
|
|
66
|
+
draw.ellipse(
|
|
67
|
+
(x - radius, y - radius, x + radius, y + radius), fill="red", outline="red"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Save the modified image
|
|
71
|
+
if output_path:
|
|
72
|
+
save_screenshot(image, output_path)
|
|
73
|
+
return image
|
AutoGLM_GUI/config.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
"""Application configuration singleton."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class AppConfig:
|
|
9
|
-
"""Global application configuration."""
|
|
10
|
-
|
|
11
|
-
base_url: str = ""
|
|
12
|
-
model_name: str = "autoglm-phone-9b"
|
|
13
|
-
api_key: str = "EMPTY"
|
|
14
|
-
|
|
15
|
-
def refresh_from_env(self):
|
|
16
|
-
"""从环境变量刷新配置(适用于 reload 模式)"""
|
|
17
|
-
self.base_url = os.getenv("AUTOGLM_BASE_URL", self.base_url)
|
|
18
|
-
self.model_name = os.getenv("AUTOGLM_MODEL_NAME", self.model_name)
|
|
19
|
-
self.api_key = os.getenv("AUTOGLM_API_KEY", self.api_key)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# Global singleton instance
|
|
23
|
-
config = AppConfig()
|