autoglm-gui 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/__init__.py +11 -0
- AutoGLM_GUI/__main__.py +26 -4
- AutoGLM_GUI/actions/__init__.py +6 -0
- AutoGLM_GUI/actions/handler.py +196 -0
- AutoGLM_GUI/actions/types.py +15 -0
- AutoGLM_GUI/adb/__init__.py +53 -0
- AutoGLM_GUI/adb/apps.py +227 -0
- AutoGLM_GUI/adb/connection.py +323 -0
- AutoGLM_GUI/adb/device.py +171 -0
- AutoGLM_GUI/adb/input.py +67 -0
- AutoGLM_GUI/adb/screenshot.py +11 -0
- AutoGLM_GUI/adb/timing.py +167 -0
- AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
- AutoGLM_GUI/adb_plus/screenshot.py +22 -1
- AutoGLM_GUI/adb_plus/serial.py +38 -20
- AutoGLM_GUI/adb_plus/touch.py +4 -9
- AutoGLM_GUI/agents/__init__.py +43 -12
- AutoGLM_GUI/agents/events.py +19 -0
- AutoGLM_GUI/agents/factory.py +31 -38
- AutoGLM_GUI/agents/glm/__init__.py +7 -0
- AutoGLM_GUI/agents/glm/agent.py +292 -0
- AutoGLM_GUI/agents/glm/message_builder.py +81 -0
- AutoGLM_GUI/agents/glm/parser.py +110 -0
- AutoGLM_GUI/agents/glm/prompts_en.py +77 -0
- AutoGLM_GUI/agents/glm/prompts_zh.py +75 -0
- AutoGLM_GUI/agents/mai/__init__.py +28 -0
- AutoGLM_GUI/agents/mai/agent.py +405 -0
- AutoGLM_GUI/agents/mai/parser.py +254 -0
- AutoGLM_GUI/agents/mai/prompts.py +103 -0
- AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
- AutoGLM_GUI/agents/protocols.py +12 -8
- AutoGLM_GUI/agents/stream_runner.py +188 -0
- AutoGLM_GUI/api/__init__.py +40 -21
- AutoGLM_GUI/api/agents.py +157 -240
- AutoGLM_GUI/api/control.py +9 -6
- AutoGLM_GUI/api/devices.py +102 -12
- AutoGLM_GUI/api/history.py +78 -0
- AutoGLM_GUI/api/layered_agent.py +67 -15
- AutoGLM_GUI/api/media.py +64 -1
- AutoGLM_GUI/api/scheduled_tasks.py +98 -0
- AutoGLM_GUI/config.py +81 -0
- AutoGLM_GUI/config_manager.py +68 -51
- AutoGLM_GUI/device_manager.py +248 -29
- AutoGLM_GUI/device_protocol.py +1 -1
- AutoGLM_GUI/devices/adb_device.py +5 -10
- AutoGLM_GUI/devices/mock_device.py +4 -2
- AutoGLM_GUI/devices/remote_device.py +8 -3
- AutoGLM_GUI/history_manager.py +164 -0
- AutoGLM_GUI/i18n.py +81 -0
- AutoGLM_GUI/model/__init__.py +5 -0
- AutoGLM_GUI/model/message_builder.py +69 -0
- AutoGLM_GUI/model/types.py +24 -0
- AutoGLM_GUI/models/__init__.py +10 -0
- AutoGLM_GUI/models/history.py +96 -0
- AutoGLM_GUI/models/scheduled_task.py +71 -0
- AutoGLM_GUI/parsers/__init__.py +22 -0
- AutoGLM_GUI/parsers/base.py +50 -0
- AutoGLM_GUI/parsers/phone_parser.py +58 -0
- AutoGLM_GUI/phone_agent_manager.py +62 -396
- AutoGLM_GUI/platform_utils.py +26 -0
- AutoGLM_GUI/prompt_config.py +15 -0
- AutoGLM_GUI/prompts/__init__.py +32 -0
- AutoGLM_GUI/scheduler_manager.py +304 -0
- AutoGLM_GUI/schemas.py +234 -72
- AutoGLM_GUI/scrcpy_stream.py +142 -24
- AutoGLM_GUI/socketio_server.py +100 -27
- AutoGLM_GUI/static/assets/{about-_XNhzQZX.js → about-BQm96DAl.js} +1 -1
- AutoGLM_GUI/static/assets/alert-dialog-B42XxGPR.js +1 -0
- AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +129 -0
- AutoGLM_GUI/static/assets/circle-alert-D4rSJh37.js +1 -0
- AutoGLM_GUI/static/assets/dialog-DZ78cEcj.js +45 -0
- AutoGLM_GUI/static/assets/history-DFBv7TGc.js +1 -0
- AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +1 -0
- AutoGLM_GUI/static/assets/{index-Cy8TmmHV.js → index-CmZSnDqc.js} +1 -1
- AutoGLM_GUI/static/assets/index-CssG-3TH.js +11 -0
- AutoGLM_GUI/static/assets/label-BCUzE_nm.js +1 -0
- AutoGLM_GUI/static/assets/logs-eoFxn5of.js +1 -0
- AutoGLM_GUI/static/assets/popover-DLsuV5Sx.js +1 -0
- AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js +1 -0
- AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js +1 -0
- AutoGLM_GUI/static/assets/textarea-BX6y7uM5.js +1 -0
- AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js +1 -0
- AutoGLM_GUI/static/index.html +2 -2
- AutoGLM_GUI/types.py +17 -0
- {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.0.dist-info}/METADATA +137 -130
- autoglm_gui-1.5.0.dist-info/RECORD +157 -0
- AutoGLM_GUI/agents/mai_adapter.py +0 -627
- AutoGLM_GUI/api/dual_model.py +0 -317
- AutoGLM_GUI/dual_model/__init__.py +0 -53
- AutoGLM_GUI/dual_model/decision_model.py +0 -664
- AutoGLM_GUI/dual_model/dual_agent.py +0 -917
- AutoGLM_GUI/dual_model/protocols.py +0 -354
- AutoGLM_GUI/dual_model/vision_model.py +0 -442
- AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
- AutoGLM_GUI/phone_agent_patches.py +0 -147
- AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +0 -126
- AutoGLM_GUI/static/assets/dialog-B3uW4T8V.js +0 -45
- AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +0 -1
- AutoGLM_GUI/static/assets/index-UYYauTly.js +0 -12
- AutoGLM_GUI/static/assets/workflows-Du_de-dt.js +0 -1
- autoglm_gui-1.4.1.dist-info/RECORD +0 -117
- {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.0.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.0.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,6 +8,8 @@ from typing import TYPE_CHECKING
|
|
|
8
8
|
|
|
9
9
|
from AutoGLM_GUI.device_protocol import (
|
|
10
10
|
DeviceInfo,
|
|
11
|
+
DeviceManagerProtocol,
|
|
12
|
+
DeviceProtocol,
|
|
11
13
|
Screenshot,
|
|
12
14
|
)
|
|
13
15
|
|
|
@@ -15,7 +17,7 @@ if TYPE_CHECKING:
|
|
|
15
17
|
from tests.integration.state_machine import StateMachine
|
|
16
18
|
|
|
17
19
|
|
|
18
|
-
class MockDevice:
|
|
20
|
+
class MockDevice(DeviceProtocol):
|
|
19
21
|
"""
|
|
20
22
|
Mock device implementation driven by a state machine.
|
|
21
23
|
|
|
@@ -125,7 +127,7 @@ class MockDevice:
|
|
|
125
127
|
pass
|
|
126
128
|
|
|
127
129
|
|
|
128
|
-
class MockDeviceManager:
|
|
130
|
+
class MockDeviceManager(DeviceManagerProtocol):
|
|
129
131
|
"""
|
|
130
132
|
Mock device manager for testing.
|
|
131
133
|
|
|
@@ -6,10 +6,15 @@ via HTTP, allowing remote control of devices.
|
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
8
|
|
|
9
|
-
from AutoGLM_GUI.device_protocol import
|
|
9
|
+
from AutoGLM_GUI.device_protocol import (
|
|
10
|
+
DeviceInfo,
|
|
11
|
+
DeviceManagerProtocol,
|
|
12
|
+
DeviceProtocol,
|
|
13
|
+
Screenshot,
|
|
14
|
+
)
|
|
10
15
|
|
|
11
16
|
|
|
12
|
-
class RemoteDevice:
|
|
17
|
+
class RemoteDevice(DeviceProtocol):
|
|
13
18
|
"""
|
|
14
19
|
Remote device implementation using HTTP.
|
|
15
20
|
|
|
@@ -126,7 +131,7 @@ class RemoteDevice:
|
|
|
126
131
|
self.close()
|
|
127
132
|
|
|
128
133
|
|
|
129
|
-
class RemoteDeviceManager:
|
|
134
|
+
class RemoteDeviceManager(DeviceManagerProtocol):
|
|
130
135
|
"""
|
|
131
136
|
Remote device manager using HTTP.
|
|
132
137
|
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Conversation history manager with JSON file persistence."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from AutoGLM_GUI.logger import logger
|
|
11
|
+
from AutoGLM_GUI.models.history import ConversationRecord, DeviceHistory
|
|
12
|
+
|
|
13
|
+
# ADB serialno 合法字符:字母数字、下划线、破折号、冒号、点
|
|
14
|
+
# USB: ABC123DEF456
|
|
15
|
+
# WiFi: 192.168.1.100:5555
|
|
16
|
+
# mDNS: adb-243a09b7._adb-tls-connect._tcp
|
|
17
|
+
_SERIALNO_PATTERN = re.compile(r"^[a-zA-Z0-9_\-:\.]+$")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class HistoryManager:
|
|
21
|
+
"""对话历史管理器(单例模式)."""
|
|
22
|
+
|
|
23
|
+
_instance: Optional["HistoryManager"] = None
|
|
24
|
+
|
|
25
|
+
def __new__(cls):
|
|
26
|
+
if cls._instance is None:
|
|
27
|
+
cls._instance = super().__new__(cls)
|
|
28
|
+
return cls._instance
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
if hasattr(self, "_initialized"):
|
|
32
|
+
return
|
|
33
|
+
self._initialized = True
|
|
34
|
+
self._history_dir = Path.home() / ".config" / "autoglm" / "history"
|
|
35
|
+
self._file_cache: dict[str, DeviceHistory] = {}
|
|
36
|
+
self._file_mtime: dict[str, float] = {}
|
|
37
|
+
|
|
38
|
+
def _sanitize_serialno(self, serialno: str) -> str:
|
|
39
|
+
"""将 serialno 转换为安全的文件名.
|
|
40
|
+
|
|
41
|
+
如果 serialno 包含合法字符,直接使用;否则使用 SHA1 哈希作为文件名。
|
|
42
|
+
这样可以防止路径遍历攻击,同时保证功能正常。
|
|
43
|
+
"""
|
|
44
|
+
if not serialno:
|
|
45
|
+
return hashlib.sha1(b"empty").hexdigest()
|
|
46
|
+
|
|
47
|
+
# 检查是否包含路径遍历字符或不合法字符
|
|
48
|
+
if ".." in serialno or not _SERIALNO_PATTERN.match(serialno):
|
|
49
|
+
# 使用 SHA1 哈希作为安全的文件名
|
|
50
|
+
hashed = hashlib.sha1(serialno.encode("utf-8")).hexdigest()
|
|
51
|
+
logger.warning(
|
|
52
|
+
f"Unsafe serialno detected, using hash: {serialno!r} -> {hashed}"
|
|
53
|
+
)
|
|
54
|
+
return hashed
|
|
55
|
+
|
|
56
|
+
return serialno
|
|
57
|
+
|
|
58
|
+
def _get_history_path(self, serialno: str) -> Path:
|
|
59
|
+
"""获取历史记录文件路径(带路径遍历防护)."""
|
|
60
|
+
safe_name = self._sanitize_serialno(serialno)
|
|
61
|
+
path = (self._history_dir / f"{safe_name}.json").resolve()
|
|
62
|
+
|
|
63
|
+
# 防御深度:确保解析后的路径仍在 history_dir 内
|
|
64
|
+
history_dir_resolved = self._history_dir.resolve()
|
|
65
|
+
if not path.is_relative_to(history_dir_resolved):
|
|
66
|
+
# 理论上不应该到这里,但作为最后防线
|
|
67
|
+
hashed = hashlib.sha1(serialno.encode("utf-8")).hexdigest()
|
|
68
|
+
logger.error(f"Path escape detected for {serialno!r}, using hash: {hashed}")
|
|
69
|
+
path = history_dir_resolved / f"{hashed}.json"
|
|
70
|
+
|
|
71
|
+
return path
|
|
72
|
+
|
|
73
|
+
def _load_history(self, serialno: str) -> DeviceHistory:
|
|
74
|
+
path = self._get_history_path(serialno)
|
|
75
|
+
|
|
76
|
+
if not path.exists():
|
|
77
|
+
return DeviceHistory(serialno=serialno)
|
|
78
|
+
|
|
79
|
+
current_mtime = path.stat().st_mtime
|
|
80
|
+
if (
|
|
81
|
+
serialno in self._file_mtime
|
|
82
|
+
and self._file_mtime[serialno] == current_mtime
|
|
83
|
+
and serialno in self._file_cache
|
|
84
|
+
):
|
|
85
|
+
return self._file_cache[serialno]
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
with open(path, encoding="utf-8") as f:
|
|
89
|
+
data = json.load(f)
|
|
90
|
+
history = DeviceHistory.from_dict(data)
|
|
91
|
+
self._file_cache[serialno] = history
|
|
92
|
+
self._file_mtime[serialno] = current_mtime
|
|
93
|
+
logger.debug(f"Loaded {len(history.records)} records for {serialno}")
|
|
94
|
+
return history
|
|
95
|
+
except (json.JSONDecodeError, FileNotFoundError) as e:
|
|
96
|
+
logger.warning(f"Failed to load history for {serialno}: {e}")
|
|
97
|
+
return DeviceHistory(serialno=serialno)
|
|
98
|
+
|
|
99
|
+
def _save_history(self, history: DeviceHistory) -> bool:
|
|
100
|
+
self._history_dir.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
path = self._get_history_path(history.serialno)
|
|
102
|
+
temp_path = path.with_suffix(".tmp")
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
history.last_updated = datetime.now()
|
|
106
|
+
with open(temp_path, "w", encoding="utf-8") as f:
|
|
107
|
+
json.dump(history.to_dict(), f, indent=2, ensure_ascii=False)
|
|
108
|
+
temp_path.replace(path)
|
|
109
|
+
|
|
110
|
+
self._file_cache[history.serialno] = history
|
|
111
|
+
self._file_mtime[history.serialno] = path.stat().st_mtime
|
|
112
|
+
logger.debug(f"Saved {len(history.records)} records for {history.serialno}")
|
|
113
|
+
return True
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.error(f"Failed to save history for {history.serialno}: {e}")
|
|
116
|
+
if temp_path.exists():
|
|
117
|
+
temp_path.unlink()
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
def add_record(self, serialno: str, record: ConversationRecord) -> None:
|
|
121
|
+
history = self._load_history(serialno)
|
|
122
|
+
history.records.insert(0, record)
|
|
123
|
+
self._save_history(history)
|
|
124
|
+
logger.info(f"Added history record for {serialno}: {record.id}")
|
|
125
|
+
|
|
126
|
+
def list_records(
|
|
127
|
+
self, serialno: str, limit: int = 50, offset: int = 0
|
|
128
|
+
) -> list[ConversationRecord]:
|
|
129
|
+
history = self._load_history(serialno)
|
|
130
|
+
return history.records[offset : offset + limit]
|
|
131
|
+
|
|
132
|
+
def get_record(self, serialno: str, record_id: str) -> Optional[ConversationRecord]:
|
|
133
|
+
history = self._load_history(serialno)
|
|
134
|
+
return next((r for r in history.records if r.id == record_id), None)
|
|
135
|
+
|
|
136
|
+
def delete_record(self, serialno: str, record_id: str) -> bool:
|
|
137
|
+
history = self._load_history(serialno)
|
|
138
|
+
original_len = len(history.records)
|
|
139
|
+
history.records = [r for r in history.records if r.id != record_id]
|
|
140
|
+
|
|
141
|
+
if len(history.records) < original_len:
|
|
142
|
+
self._save_history(history)
|
|
143
|
+
logger.info(f"Deleted history record {record_id} for {serialno}")
|
|
144
|
+
return True
|
|
145
|
+
|
|
146
|
+
logger.warning(f"Record {record_id} not found for {serialno}")
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
def clear_device_history(self, serialno: str) -> bool:
|
|
150
|
+
path = self._get_history_path(serialno)
|
|
151
|
+
if path.exists():
|
|
152
|
+
path.unlink()
|
|
153
|
+
self._file_cache.pop(serialno, None)
|
|
154
|
+
self._file_mtime.pop(serialno, None)
|
|
155
|
+
logger.info(f"Cleared all history for {serialno}")
|
|
156
|
+
return True
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
def get_total_count(self, serialno: str) -> int:
|
|
160
|
+
history = self._load_history(serialno)
|
|
161
|
+
return len(history.records)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
history_manager = HistoryManager()
|
AutoGLM_GUI/i18n.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Internationalization (i18n) module for Phone Agent UI messages."""
|
|
2
|
+
|
|
3
|
+
# Chinese messages
|
|
4
|
+
MESSAGES_ZH = {
|
|
5
|
+
"thinking": "思考过程",
|
|
6
|
+
"action": "执行动作",
|
|
7
|
+
"task_completed": "任务完成",
|
|
8
|
+
"done": "完成",
|
|
9
|
+
"starting_task": "开始执行任务",
|
|
10
|
+
"final_result": "最终结果",
|
|
11
|
+
"task_result": "任务结果",
|
|
12
|
+
"confirmation_required": "需要确认",
|
|
13
|
+
"continue_prompt": "是否继续?(y/n)",
|
|
14
|
+
"manual_operation_required": "需要人工操作",
|
|
15
|
+
"manual_operation_hint": "请手动完成操作...",
|
|
16
|
+
"press_enter_when_done": "完成后按回车继续",
|
|
17
|
+
"connection_failed": "连接失败",
|
|
18
|
+
"connection_successful": "连接成功",
|
|
19
|
+
"step": "步骤",
|
|
20
|
+
"task": "任务",
|
|
21
|
+
"result": "结果",
|
|
22
|
+
"performance_metrics": "性能指标",
|
|
23
|
+
"time_to_first_token": "首 Token 延迟 (TTFT)",
|
|
24
|
+
"time_to_thinking_end": "思考完成延迟",
|
|
25
|
+
"total_inference_time": "总推理时间",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# English messages
|
|
29
|
+
MESSAGES_EN = {
|
|
30
|
+
"thinking": "Thinking",
|
|
31
|
+
"action": "Action",
|
|
32
|
+
"task_completed": "Task Completed",
|
|
33
|
+
"done": "Done",
|
|
34
|
+
"starting_task": "Starting task",
|
|
35
|
+
"final_result": "Final Result",
|
|
36
|
+
"task_result": "Task Result",
|
|
37
|
+
"confirmation_required": "Confirmation Required",
|
|
38
|
+
"continue_prompt": "Continue? (y/n)",
|
|
39
|
+
"manual_operation_required": "Manual Operation Required",
|
|
40
|
+
"manual_operation_hint": "Please complete the operation manually...",
|
|
41
|
+
"press_enter_when_done": "Press Enter when done",
|
|
42
|
+
"connection_failed": "Connection Failed",
|
|
43
|
+
"connection_successful": "Connection Successful",
|
|
44
|
+
"step": "Step",
|
|
45
|
+
"task": "Task",
|
|
46
|
+
"result": "Result",
|
|
47
|
+
"performance_metrics": "Performance Metrics",
|
|
48
|
+
"time_to_first_token": "Time to First Token (TTFT)",
|
|
49
|
+
"time_to_thinking_end": "Time to Thinking End",
|
|
50
|
+
"total_inference_time": "Total Inference Time",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_messages(lang: str = "cn") -> dict:
|
|
55
|
+
"""
|
|
56
|
+
Get UI messages dictionary by language.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
lang: Language code, 'cn' for Chinese, 'en' for English.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Dictionary of UI messages.
|
|
63
|
+
"""
|
|
64
|
+
if lang == "en":
|
|
65
|
+
return MESSAGES_EN
|
|
66
|
+
return MESSAGES_ZH
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_message(key: str, lang: str = "cn") -> str:
|
|
70
|
+
"""
|
|
71
|
+
Get a single UI message by key and language.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
key: Message key.
|
|
75
|
+
lang: Language code, 'cn' for Chinese, 'en' for English.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Message string.
|
|
79
|
+
"""
|
|
80
|
+
messages = get_messages(lang)
|
|
81
|
+
return messages.get(key, key)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Builder for constructing multimodal chat messages."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MessageBuilder:
|
|
7
|
+
@staticmethod
|
|
8
|
+
def create_system_message(content: str) -> dict[str, Any]:
|
|
9
|
+
return {"role": "system", "content": content}
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def create_user_message(
|
|
13
|
+
text: str, image_base64: str | None = None
|
|
14
|
+
) -> dict[str, Any]:
|
|
15
|
+
if image_base64 is None:
|
|
16
|
+
return {"role": "user", "content": text}
|
|
17
|
+
|
|
18
|
+
return {
|
|
19
|
+
"role": "user",
|
|
20
|
+
"content": [
|
|
21
|
+
{"type": "text", "text": text},
|
|
22
|
+
{
|
|
23
|
+
"type": "image_url",
|
|
24
|
+
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def create_multi_image_user_message(
|
|
31
|
+
text: str, image_base64_list: list[str]
|
|
32
|
+
) -> dict[str, Any]:
|
|
33
|
+
if not image_base64_list:
|
|
34
|
+
return {"role": "user", "content": text}
|
|
35
|
+
|
|
36
|
+
content_parts: list[dict[str, Any]] = [{"type": "text", "text": text}]
|
|
37
|
+
|
|
38
|
+
for image_base64 in image_base64_list:
|
|
39
|
+
content_parts.append(
|
|
40
|
+
{
|
|
41
|
+
"type": "image_url",
|
|
42
|
+
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
|
43
|
+
}
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return {"role": "user", "content": content_parts}
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def create_assistant_message(content: str) -> dict[str, Any]:
|
|
50
|
+
return {"role": "assistant", "content": content}
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def remove_images_from_message(message: dict[str, Any]) -> dict[str, Any]:
|
|
54
|
+
if message["role"] != "user":
|
|
55
|
+
return message
|
|
56
|
+
|
|
57
|
+
content = message["content"]
|
|
58
|
+
if isinstance(content, str):
|
|
59
|
+
return message
|
|
60
|
+
|
|
61
|
+
text_parts = [part for part in content if part["type"] == "text"]
|
|
62
|
+
if len(text_parts) == 1:
|
|
63
|
+
return {"role": "user", "content": text_parts[0]["text"]}
|
|
64
|
+
|
|
65
|
+
return {"role": "user", "content": text_parts}
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def build_screen_info(current_app: str) -> str:
|
|
69
|
+
return f"** Screen Info **\n\nCurrent App: {current_app}"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Type definitions for model interactions."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class ModelResponse:
|
|
8
|
+
"""Response from the vision-language model.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
thinking: The model's reasoning process (from <think> tag)
|
|
12
|
+
action: The action to execute (from <answer> tag)
|
|
13
|
+
raw_content: Full response text from the model
|
|
14
|
+
time_to_first_token: Time until first token received (seconds)
|
|
15
|
+
time_to_thinking_end: Time until thinking phase completed (seconds)
|
|
16
|
+
total_time: Total inference time (seconds)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
thinking: str
|
|
20
|
+
action: str
|
|
21
|
+
raw_content: str
|
|
22
|
+
time_to_first_token: float | None = None
|
|
23
|
+
time_to_thinking_end: float | None = None
|
|
24
|
+
total_time: float | None = None
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Conversation history data models."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Literal
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ConversationRecord:
|
|
11
|
+
"""单条对话记录."""
|
|
12
|
+
|
|
13
|
+
id: str = field(default_factory=lambda: str(uuid4()))
|
|
14
|
+
|
|
15
|
+
# 任务信息
|
|
16
|
+
task_text: str = "" # 用户输入的任务
|
|
17
|
+
final_message: str = "" # 最终结果消息
|
|
18
|
+
|
|
19
|
+
# 执行信息
|
|
20
|
+
success: bool = False
|
|
21
|
+
steps: int = 0
|
|
22
|
+
start_time: datetime = field(default_factory=datetime.now)
|
|
23
|
+
end_time: datetime | None = None
|
|
24
|
+
duration_ms: int = 0 # 执行时长(毫秒)
|
|
25
|
+
|
|
26
|
+
# 来源标记
|
|
27
|
+
source: Literal["chat", "layered", "scheduled"] = "chat"
|
|
28
|
+
source_detail: str = "" # 定时任务名称 or session_id
|
|
29
|
+
|
|
30
|
+
# 错误信息
|
|
31
|
+
error_message: str | None = None
|
|
32
|
+
|
|
33
|
+
def to_dict(self) -> dict:
|
|
34
|
+
"""转换为可序列化的字典."""
|
|
35
|
+
return {
|
|
36
|
+
"id": self.id,
|
|
37
|
+
"task_text": self.task_text,
|
|
38
|
+
"final_message": self.final_message,
|
|
39
|
+
"success": self.success,
|
|
40
|
+
"steps": self.steps,
|
|
41
|
+
"start_time": self.start_time.isoformat(),
|
|
42
|
+
"end_time": self.end_time.isoformat() if self.end_time else None,
|
|
43
|
+
"duration_ms": self.duration_ms,
|
|
44
|
+
"source": self.source,
|
|
45
|
+
"source_detail": self.source_detail,
|
|
46
|
+
"error_message": self.error_message,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def from_dict(cls, data: dict) -> "ConversationRecord":
|
|
51
|
+
"""从字典创建实例."""
|
|
52
|
+
return cls(
|
|
53
|
+
id=data.get("id", str(uuid4())),
|
|
54
|
+
task_text=data.get("task_text", ""),
|
|
55
|
+
final_message=data.get("final_message", ""),
|
|
56
|
+
success=data.get("success", False),
|
|
57
|
+
steps=data.get("steps", 0),
|
|
58
|
+
start_time=datetime.fromisoformat(data["start_time"])
|
|
59
|
+
if data.get("start_time")
|
|
60
|
+
else datetime.now(),
|
|
61
|
+
end_time=datetime.fromisoformat(data["end_time"])
|
|
62
|
+
if data.get("end_time")
|
|
63
|
+
else None,
|
|
64
|
+
duration_ms=data.get("duration_ms", 0),
|
|
65
|
+
source=data.get("source", "chat"),
|
|
66
|
+
source_detail=data.get("source_detail", ""),
|
|
67
|
+
error_message=data.get("error_message"),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class DeviceHistory:
|
|
73
|
+
"""设备对话历史(一个设备一个文件)."""
|
|
74
|
+
|
|
75
|
+
serialno: str
|
|
76
|
+
records: list[ConversationRecord] = field(default_factory=list)
|
|
77
|
+
last_updated: datetime = field(default_factory=datetime.now)
|
|
78
|
+
|
|
79
|
+
def to_dict(self) -> dict:
|
|
80
|
+
"""转换为可序列化的字典."""
|
|
81
|
+
return {
|
|
82
|
+
"serialno": self.serialno,
|
|
83
|
+
"records": [r.to_dict() for r in self.records],
|
|
84
|
+
"last_updated": self.last_updated.isoformat(),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def from_dict(cls, data: dict) -> "DeviceHistory":
|
|
89
|
+
"""从字典创建实例."""
|
|
90
|
+
return cls(
|
|
91
|
+
serialno=data.get("serialno", ""),
|
|
92
|
+
records=[ConversationRecord.from_dict(r) for r in data.get("records", [])],
|
|
93
|
+
last_updated=datetime.fromisoformat(data["last_updated"])
|
|
94
|
+
if data.get("last_updated")
|
|
95
|
+
else datetime.now(),
|
|
96
|
+
)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Scheduled task data models."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class ScheduledTask:
|
|
10
|
+
"""定时任务定义."""
|
|
11
|
+
|
|
12
|
+
id: str = field(default_factory=lambda: str(uuid4()))
|
|
13
|
+
|
|
14
|
+
# 基础信息
|
|
15
|
+
name: str = "" # 任务名称
|
|
16
|
+
workflow_uuid: str = "" # 关联的 Workflow UUID
|
|
17
|
+
device_serialno: str = "" # 绑定的设备 serialno
|
|
18
|
+
|
|
19
|
+
# 调度配置
|
|
20
|
+
cron_expression: str = "" # Cron 表达式 (如 "0 8 * * *")
|
|
21
|
+
enabled: bool = True # 是否启用
|
|
22
|
+
|
|
23
|
+
# 元数据
|
|
24
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
25
|
+
updated_at: datetime = field(default_factory=datetime.now)
|
|
26
|
+
|
|
27
|
+
# 最近执行信息(只记录最后一次)
|
|
28
|
+
last_run_time: datetime | None = None
|
|
29
|
+
last_run_success: bool | None = None
|
|
30
|
+
last_run_message: str | None = None
|
|
31
|
+
|
|
32
|
+
def to_dict(self) -> dict:
|
|
33
|
+
"""转换为可序列化的字典."""
|
|
34
|
+
return {
|
|
35
|
+
"id": self.id,
|
|
36
|
+
"name": self.name,
|
|
37
|
+
"workflow_uuid": self.workflow_uuid,
|
|
38
|
+
"device_serialno": self.device_serialno,
|
|
39
|
+
"cron_expression": self.cron_expression,
|
|
40
|
+
"enabled": self.enabled,
|
|
41
|
+
"created_at": self.created_at.isoformat(),
|
|
42
|
+
"updated_at": self.updated_at.isoformat(),
|
|
43
|
+
"last_run_time": self.last_run_time.isoformat()
|
|
44
|
+
if self.last_run_time
|
|
45
|
+
else None,
|
|
46
|
+
"last_run_success": self.last_run_success,
|
|
47
|
+
"last_run_message": self.last_run_message,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_dict(cls, data: dict) -> "ScheduledTask":
|
|
52
|
+
"""从字典创建实例."""
|
|
53
|
+
return cls(
|
|
54
|
+
id=data.get("id", str(uuid4())),
|
|
55
|
+
name=data.get("name", ""),
|
|
56
|
+
workflow_uuid=data.get("workflow_uuid", ""),
|
|
57
|
+
device_serialno=data.get("device_serialno", ""),
|
|
58
|
+
cron_expression=data.get("cron_expression", ""),
|
|
59
|
+
enabled=data.get("enabled", True),
|
|
60
|
+
created_at=datetime.fromisoformat(data["created_at"])
|
|
61
|
+
if data.get("created_at")
|
|
62
|
+
else datetime.now(),
|
|
63
|
+
updated_at=datetime.fromisoformat(data["updated_at"])
|
|
64
|
+
if data.get("updated_at")
|
|
65
|
+
else datetime.now(),
|
|
66
|
+
last_run_time=datetime.fromisoformat(data["last_run_time"])
|
|
67
|
+
if data.get("last_run_time")
|
|
68
|
+
else None,
|
|
69
|
+
last_run_success=data.get("last_run_success"),
|
|
70
|
+
last_run_message=data.get("last_run_message"),
|
|
71
|
+
)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Action parsers for different agent types.
|
|
2
|
+
|
|
3
|
+
This module provides parser implementations for converting model outputs
|
|
4
|
+
into standardized action dictionaries that can be executed by ActionHandler.
|
|
5
|
+
|
|
6
|
+
Each agent type has its own parser implementation:
|
|
7
|
+
- GLMParser: For GLM-based agents (enhanced AST parsing)
|
|
8
|
+
- PhoneAgentParser: For standard PhoneAgent (basic AST parsing)
|
|
9
|
+
- MAIParser: For MAI agent (XML + JSON parsing)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .base import ActionParser
|
|
13
|
+
from AutoGLM_GUI.agents.glm.parser import GLMParser
|
|
14
|
+
from AutoGLM_GUI.agents.mai.parser import MAIParser
|
|
15
|
+
from .phone_parser import PhoneAgentParser
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ActionParser",
|
|
19
|
+
"GLMParser",
|
|
20
|
+
"MAIParser",
|
|
21
|
+
"PhoneAgentParser",
|
|
22
|
+
]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Base protocol for action parsers.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all action parsers must implement.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Protocol
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ActionParser(Protocol):
|
|
10
|
+
"""Protocol for parsing model outputs into action dictionaries.
|
|
11
|
+
|
|
12
|
+
All parser implementations must provide:
|
|
13
|
+
1. parse() method to convert raw model output into standardized action dict
|
|
14
|
+
2. coordinate_scale property to specify the coordinate normalization range
|
|
15
|
+
|
|
16
|
+
The standardized action dictionary format:
|
|
17
|
+
{
|
|
18
|
+
"_metadata": "do" | "finish",
|
|
19
|
+
"action": "Tap" | "Swipe" | "Type" | ..., # Only when _metadata="do"
|
|
20
|
+
"coordinate": [x, y], # Normalized to 0-1000 range
|
|
21
|
+
"text": "...", # For Type action
|
|
22
|
+
... # Other action-specific parameters
|
|
23
|
+
}
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def parse(self, raw_response: str) -> dict[str, Any]:
|
|
27
|
+
"""Parse raw model output into standardized action dictionary.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
raw_response: Raw text output from the model.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Standardized action dictionary with:
|
|
34
|
+
- "_metadata": "do" or "finish"
|
|
35
|
+
- "action": Action type (Tap, Swipe, etc.) when _metadata="do"
|
|
36
|
+
- Additional parameters based on action type
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
ValueError: If the response cannot be parsed.
|
|
40
|
+
"""
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def coordinate_scale(self) -> int:
|
|
45
|
+
"""Get the coordinate normalization scale used by this parser.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
999 for MAI parser, 1000 for GLM/PhoneAgent parsers.
|
|
49
|
+
"""
|
|
50
|
+
...
|