autoglm-gui 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/agents/glm/agent.py +6 -1
- AutoGLM_GUI/agents/mai/agent.py +3 -0
- AutoGLM_GUI/agents/stream_runner.py +7 -2
- AutoGLM_GUI/api/agents.py +26 -1
- AutoGLM_GUI/api/history.py +27 -1
- AutoGLM_GUI/models/history.py +45 -1
- AutoGLM_GUI/scheduler_manager.py +52 -6
- AutoGLM_GUI/schemas.py +12 -0
- AutoGLM_GUI/static/assets/{about-BQm96DAl.js → about-CfwX1Cmc.js} +1 -1
- AutoGLM_GUI/static/assets/{alert-dialog-B42XxGPR.js → alert-dialog-CtGlN2IJ.js} +1 -1
- AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
- AutoGLM_GUI/static/assets/{circle-alert-D4rSJh37.js → circle-alert-t08bEMPO.js} +1 -1
- AutoGLM_GUI/static/assets/{dialog-DZ78cEcj.js → dialog-FNwZJFwk.js} +1 -1
- AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
- AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
- AutoGLM_GUI/static/assets/{index-CmZSnDqc.js → index-BaLMSqd3.js} +1 -1
- AutoGLM_GUI/static/assets/{index-CssG-3TH.js → index-CTHbFvKl.js} +5 -5
- AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
- AutoGLM_GUI/static/assets/{label-BCUzE_nm.js → label-DJFevVmr.js} +1 -1
- AutoGLM_GUI/static/assets/{logs-eoFxn5of.js → logs-RW09DyYY.js} +1 -1
- AutoGLM_GUI/static/assets/{popover-DLsuV5Sx.js → popover--JTJrE5v.js} +1 -1
- AutoGLM_GUI/static/assets/{scheduled-tasks-MyqGJvy_.js → scheduled-tasks-DTRKsQXF.js} +1 -1
- AutoGLM_GUI/static/assets/{square-pen-zGWYrdfj.js → square-pen-CPK_K680.js} +1 -1
- AutoGLM_GUI/static/assets/{textarea-BX6y7uM5.js → textarea-PRmVnWq5.js} +1 -1
- AutoGLM_GUI/static/assets/{workflows-CYFs6ssC.js → workflows-CdcsAoaT.js} +1 -1
- AutoGLM_GUI/static/index.html +2 -2
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +49 -7
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/RECORD +31 -70
- AutoGLM_GUI/device_adapter.py +0 -263
- AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +0 -129
- AutoGLM_GUI/static/assets/history-DFBv7TGc.js +0 -1
- AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +0 -1
- mai_agent/base.py +0 -137
- mai_agent/mai_grounding_agent.py +0 -263
- mai_agent/mai_naivigation_agent.py +0 -526
- mai_agent/prompt.py +0 -148
- mai_agent/unified_memory.py +0 -67
- mai_agent/utils.py +0 -73
- phone_agent/__init__.py +0 -12
- phone_agent/actions/__init__.py +0 -5
- phone_agent/actions/handler.py +0 -400
- phone_agent/actions/handler_ios.py +0 -278
- phone_agent/adb/__init__.py +0 -51
- phone_agent/adb/connection.py +0 -358
- phone_agent/adb/device.py +0 -253
- phone_agent/adb/input.py +0 -108
- phone_agent/adb/screenshot.py +0 -108
- phone_agent/agent.py +0 -253
- phone_agent/agent_ios.py +0 -277
- phone_agent/config/__init__.py +0 -53
- phone_agent/config/apps.py +0 -227
- phone_agent/config/apps_harmonyos.py +0 -256
- phone_agent/config/apps_ios.py +0 -339
- phone_agent/config/i18n.py +0 -81
- phone_agent/config/prompts.py +0 -80
- phone_agent/config/prompts_en.py +0 -79
- phone_agent/config/prompts_zh.py +0 -82
- phone_agent/config/timing.py +0 -167
- phone_agent/device_factory.py +0 -166
- phone_agent/hdc/__init__.py +0 -53
- phone_agent/hdc/connection.py +0 -384
- phone_agent/hdc/device.py +0 -269
- phone_agent/hdc/input.py +0 -145
- phone_agent/hdc/screenshot.py +0 -127
- phone_agent/model/__init__.py +0 -5
- phone_agent/model/client.py +0 -290
- phone_agent/xctest/__init__.py +0 -47
- phone_agent/xctest/connection.py +0 -379
- phone_agent/xctest/device.py +0 -472
- phone_agent/xctest/input.py +0 -311
- phone_agent/xctest/screenshot.py +0 -226
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0
phone_agent/hdc/input.py
DELETED
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
"""Input utilities for HarmonyOS device text input."""
|
|
2
|
-
|
|
3
|
-
from phone_agent.hdc.connection import _run_hdc_command
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def type_text(text: str, device_id: str | None = None) -> None:
|
|
7
|
-
"""
|
|
8
|
-
Type text into the currently focused input field.
|
|
9
|
-
|
|
10
|
-
Args:
|
|
11
|
-
text: The text to type. Supports multi-line text with newline characters.
|
|
12
|
-
device_id: Optional HDC device ID for multi-device setups.
|
|
13
|
-
|
|
14
|
-
Note:
|
|
15
|
-
HarmonyOS uses: hdc shell uitest uiInput text "文本内容"
|
|
16
|
-
This command works without coordinates when input field is focused.
|
|
17
|
-
For multi-line text, the function splits by newlines and sends ENTER keyEvents.
|
|
18
|
-
ENTER key code in HarmonyOS: 2054
|
|
19
|
-
Recommendation: Click on the input field first to focus it, then use this function.
|
|
20
|
-
"""
|
|
21
|
-
hdc_prefix = _get_hdc_prefix(device_id)
|
|
22
|
-
|
|
23
|
-
# Handle multi-line text by splitting on newlines
|
|
24
|
-
if "\n" in text:
|
|
25
|
-
lines = text.split("\n")
|
|
26
|
-
for i, line in enumerate(lines):
|
|
27
|
-
if line: # Only process non-empty lines
|
|
28
|
-
# Escape special characters for shell
|
|
29
|
-
escaped_line = line.replace('"', '\\"').replace("$", "\\$")
|
|
30
|
-
|
|
31
|
-
_run_hdc_command(
|
|
32
|
-
hdc_prefix + ["shell", "uitest", "uiInput", "text", escaped_line],
|
|
33
|
-
capture_output=True,
|
|
34
|
-
text=True,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
# Send ENTER key event after each line except the last one
|
|
38
|
-
if i < len(lines) - 1:
|
|
39
|
-
try:
|
|
40
|
-
_run_hdc_command(
|
|
41
|
-
hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2054"],
|
|
42
|
-
capture_output=True,
|
|
43
|
-
text=True,
|
|
44
|
-
)
|
|
45
|
-
except Exception as e:
|
|
46
|
-
print(f"[HDC] ENTER keyEvent failed: {e}")
|
|
47
|
-
else:
|
|
48
|
-
# Single line text - original logic
|
|
49
|
-
# Escape special characters for shell (keep quotes for proper text handling)
|
|
50
|
-
# The text will be wrapped in quotes in the command
|
|
51
|
-
escaped_text = text.replace('"', '\\"').replace("$", "\\$")
|
|
52
|
-
|
|
53
|
-
# HarmonyOS uitest uiInput text command
|
|
54
|
-
# Format: hdc shell uitest uiInput text "文本内容"
|
|
55
|
-
_run_hdc_command(
|
|
56
|
-
hdc_prefix + ["shell", "uitest", "uiInput", "text", escaped_text],
|
|
57
|
-
capture_output=True,
|
|
58
|
-
text=True,
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def clear_text(device_id: str | None = None) -> None:
|
|
63
|
-
"""
|
|
64
|
-
Clear text in the currently focused input field.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
device_id: Optional HDC device ID for multi-device setups.
|
|
68
|
-
|
|
69
|
-
Note:
|
|
70
|
-
This method uses repeated delete key events to clear text.
|
|
71
|
-
For HarmonyOS, you might also use select all + delete for better efficiency.
|
|
72
|
-
"""
|
|
73
|
-
hdc_prefix = _get_hdc_prefix(device_id)
|
|
74
|
-
# Ctrl+A to select all (key code 2072 for Ctrl, 2017 for A)
|
|
75
|
-
# Then delete
|
|
76
|
-
_run_hdc_command(
|
|
77
|
-
hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2072", "2017"],
|
|
78
|
-
capture_output=True,
|
|
79
|
-
text=True,
|
|
80
|
-
)
|
|
81
|
-
_run_hdc_command(
|
|
82
|
-
hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2055"], # Delete key
|
|
83
|
-
capture_output=True,
|
|
84
|
-
text=True,
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def detect_and_set_adb_keyboard(device_id: str | None = None) -> str:
|
|
89
|
-
"""
|
|
90
|
-
Detect current keyboard and switch to ADB Keyboard if available.
|
|
91
|
-
|
|
92
|
-
Args:
|
|
93
|
-
device_id: Optional HDC device ID for multi-device setups.
|
|
94
|
-
|
|
95
|
-
Returns:
|
|
96
|
-
The original keyboard IME identifier for later restoration.
|
|
97
|
-
|
|
98
|
-
Note:
|
|
99
|
-
This is a placeholder. HarmonyOS may not support ADB Keyboard.
|
|
100
|
-
If there's a similar tool for HarmonyOS, integrate it here.
|
|
101
|
-
"""
|
|
102
|
-
hdc_prefix = _get_hdc_prefix(device_id)
|
|
103
|
-
|
|
104
|
-
# Get current IME (if HarmonyOS supports this)
|
|
105
|
-
try:
|
|
106
|
-
result = _run_hdc_command(
|
|
107
|
-
hdc_prefix + ["shell", "settings", "get", "secure", "default_input_method"],
|
|
108
|
-
capture_output=True,
|
|
109
|
-
text=True,
|
|
110
|
-
)
|
|
111
|
-
current_ime = (result.stdout + result.stderr).strip()
|
|
112
|
-
|
|
113
|
-
# If ADB Keyboard equivalent exists for HarmonyOS, switch to it
|
|
114
|
-
# For now, we'll just return the current IME
|
|
115
|
-
return current_ime
|
|
116
|
-
except Exception:
|
|
117
|
-
return ""
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def restore_keyboard(ime: str, device_id: str | None = None) -> None:
|
|
121
|
-
"""
|
|
122
|
-
Restore the original keyboard IME.
|
|
123
|
-
|
|
124
|
-
Args:
|
|
125
|
-
ime: The IME identifier to restore.
|
|
126
|
-
device_id: Optional HDC device ID for multi-device setups.
|
|
127
|
-
"""
|
|
128
|
-
if not ime:
|
|
129
|
-
return
|
|
130
|
-
|
|
131
|
-
hdc_prefix = _get_hdc_prefix(device_id)
|
|
132
|
-
|
|
133
|
-
try:
|
|
134
|
-
_run_hdc_command(
|
|
135
|
-
hdc_prefix + ["shell", "ime", "set", ime], capture_output=True, text=True
|
|
136
|
-
)
|
|
137
|
-
except Exception:
|
|
138
|
-
pass
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def _get_hdc_prefix(device_id: str | None) -> list:
|
|
142
|
-
"""Get HDC command prefix with optional device specifier."""
|
|
143
|
-
if device_id:
|
|
144
|
-
return ["hdc", "-t", device_id]
|
|
145
|
-
return ["hdc"]
|
phone_agent/hdc/screenshot.py
DELETED
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
"""Screenshot utilities for capturing HarmonyOS device screen."""
|
|
2
|
-
|
|
3
|
-
import base64
|
|
4
|
-
import os
|
|
5
|
-
import tempfile
|
|
6
|
-
import uuid
|
|
7
|
-
from dataclasses import dataclass
|
|
8
|
-
from io import BytesIO
|
|
9
|
-
|
|
10
|
-
from PIL import Image
|
|
11
|
-
from phone_agent.hdc.connection import _run_hdc_command
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass
|
|
15
|
-
class Screenshot:
|
|
16
|
-
"""Represents a captured screenshot."""
|
|
17
|
-
|
|
18
|
-
base64_data: str
|
|
19
|
-
width: int
|
|
20
|
-
height: int
|
|
21
|
-
is_sensitive: bool = False
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screenshot:
|
|
25
|
-
"""
|
|
26
|
-
Capture a screenshot from the connected HarmonyOS device.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
device_id: Optional HDC device ID for multi-device setups.
|
|
30
|
-
timeout: Timeout in seconds for screenshot operations.
|
|
31
|
-
|
|
32
|
-
Returns:
|
|
33
|
-
Screenshot object containing base64 data and dimensions.
|
|
34
|
-
|
|
35
|
-
Note:
|
|
36
|
-
If the screenshot fails (e.g., on sensitive screens like payment pages),
|
|
37
|
-
a black fallback image is returned with is_sensitive=True.
|
|
38
|
-
"""
|
|
39
|
-
temp_path = os.path.join(tempfile.gettempdir(), f"screenshot_{uuid.uuid4()}.png")
|
|
40
|
-
hdc_prefix = _get_hdc_prefix(device_id)
|
|
41
|
-
|
|
42
|
-
try:
|
|
43
|
-
# Execute screenshot command
|
|
44
|
-
# HarmonyOS HDC only supports JPEG format
|
|
45
|
-
remote_path = "/data/local/tmp/tmp_screenshot.jpeg"
|
|
46
|
-
|
|
47
|
-
# Try method 1: hdc shell screenshot (newer HarmonyOS versions)
|
|
48
|
-
result = _run_hdc_command(
|
|
49
|
-
hdc_prefix + ["shell", "screenshot", remote_path],
|
|
50
|
-
capture_output=True,
|
|
51
|
-
text=True,
|
|
52
|
-
timeout=timeout,
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
# Check for screenshot failure (sensitive screen)
|
|
56
|
-
output = result.stdout + result.stderr
|
|
57
|
-
if (
|
|
58
|
-
"fail" in output.lower()
|
|
59
|
-
or "error" in output.lower()
|
|
60
|
-
or "not found" in output.lower()
|
|
61
|
-
):
|
|
62
|
-
# Try method 2: snapshot_display (older versions or different devices)
|
|
63
|
-
result = _run_hdc_command(
|
|
64
|
-
hdc_prefix + ["shell", "snapshot_display", "-f", remote_path],
|
|
65
|
-
capture_output=True,
|
|
66
|
-
text=True,
|
|
67
|
-
timeout=timeout,
|
|
68
|
-
)
|
|
69
|
-
output = result.stdout + result.stderr
|
|
70
|
-
if "fail" in output.lower() or "error" in output.lower():
|
|
71
|
-
return _create_fallback_screenshot(is_sensitive=True)
|
|
72
|
-
|
|
73
|
-
# Pull screenshot to local temp path
|
|
74
|
-
# Note: remote file is JPEG, but PIL can open it regardless of local extension
|
|
75
|
-
_run_hdc_command(
|
|
76
|
-
hdc_prefix + ["file", "recv", remote_path, temp_path],
|
|
77
|
-
capture_output=True,
|
|
78
|
-
text=True,
|
|
79
|
-
timeout=5,
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
if not os.path.exists(temp_path):
|
|
83
|
-
return _create_fallback_screenshot(is_sensitive=False)
|
|
84
|
-
|
|
85
|
-
# Read JPEG image and convert to PNG for model inference
|
|
86
|
-
# PIL automatically detects the image format from file content
|
|
87
|
-
img = Image.open(temp_path)
|
|
88
|
-
width, height = img.size
|
|
89
|
-
|
|
90
|
-
buffered = BytesIO()
|
|
91
|
-
img.save(buffered, format="PNG")
|
|
92
|
-
base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
93
|
-
|
|
94
|
-
# Cleanup
|
|
95
|
-
os.remove(temp_path)
|
|
96
|
-
|
|
97
|
-
return Screenshot(
|
|
98
|
-
base64_data=base64_data, width=width, height=height, is_sensitive=False
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
except Exception as e:
|
|
102
|
-
print(f"Screenshot error: {e}")
|
|
103
|
-
return _create_fallback_screenshot(is_sensitive=False)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def _get_hdc_prefix(device_id: str | None) -> list:
|
|
107
|
-
"""Get HDC command prefix with optional device specifier."""
|
|
108
|
-
if device_id:
|
|
109
|
-
return ["hdc", "-t", device_id]
|
|
110
|
-
return ["hdc"]
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot:
|
|
114
|
-
"""Create a black fallback image when screenshot fails."""
|
|
115
|
-
default_width, default_height = 1080, 2400
|
|
116
|
-
|
|
117
|
-
black_img = Image.new("RGB", (default_width, default_height), color="black")
|
|
118
|
-
buffered = BytesIO()
|
|
119
|
-
black_img.save(buffered, format="PNG")
|
|
120
|
-
base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
121
|
-
|
|
122
|
-
return Screenshot(
|
|
123
|
-
base64_data=base64_data,
|
|
124
|
-
width=default_width,
|
|
125
|
-
height=default_height,
|
|
126
|
-
is_sensitive=is_sensitive,
|
|
127
|
-
)
|
phone_agent/model/__init__.py
DELETED
phone_agent/model/client.py
DELETED
|
@@ -1,290 +0,0 @@
|
|
|
1
|
-
"""Model client for AI inference using OpenAI-compatible API."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import time
|
|
5
|
-
from dataclasses import dataclass, field
|
|
6
|
-
from typing import Any
|
|
7
|
-
|
|
8
|
-
from openai import OpenAI
|
|
9
|
-
|
|
10
|
-
from phone_agent.config.i18n import get_message
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class ModelConfig:
|
|
15
|
-
"""Configuration for the AI model."""
|
|
16
|
-
|
|
17
|
-
base_url: str = "http://localhost:8000/v1"
|
|
18
|
-
api_key: str = "EMPTY"
|
|
19
|
-
model_name: str = "autoglm-phone-9b"
|
|
20
|
-
max_tokens: int = 3000
|
|
21
|
-
temperature: float = 0.0
|
|
22
|
-
top_p: float = 0.85
|
|
23
|
-
frequency_penalty: float = 0.2
|
|
24
|
-
extra_body: dict[str, Any] = field(default_factory=dict)
|
|
25
|
-
lang: str = "cn" # Language for UI messages: 'cn' or 'en'
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class ModelResponse:
|
|
30
|
-
"""Response from the AI model."""
|
|
31
|
-
|
|
32
|
-
thinking: str
|
|
33
|
-
action: str
|
|
34
|
-
raw_content: str
|
|
35
|
-
# Performance metrics
|
|
36
|
-
time_to_first_token: float | None = None # Time to first token (seconds)
|
|
37
|
-
time_to_thinking_end: float | None = None # Time to thinking end (seconds)
|
|
38
|
-
total_time: float | None = None # Total inference time (seconds)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class ModelClient:
|
|
42
|
-
"""
|
|
43
|
-
Client for interacting with OpenAI-compatible vision-language models.
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
config: Model configuration.
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
def __init__(self, config: ModelConfig | None = None):
|
|
50
|
-
self.config = config or ModelConfig()
|
|
51
|
-
self.client = OpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
|
|
52
|
-
|
|
53
|
-
def request(self, messages: list[dict[str, Any]]) -> ModelResponse:
|
|
54
|
-
"""
|
|
55
|
-
Send a request to the model.
|
|
56
|
-
|
|
57
|
-
Args:
|
|
58
|
-
messages: List of message dictionaries in OpenAI format.
|
|
59
|
-
|
|
60
|
-
Returns:
|
|
61
|
-
ModelResponse containing thinking and action.
|
|
62
|
-
|
|
63
|
-
Raises:
|
|
64
|
-
ValueError: If the response cannot be parsed.
|
|
65
|
-
"""
|
|
66
|
-
# Start timing
|
|
67
|
-
start_time = time.time()
|
|
68
|
-
time_to_first_token = None
|
|
69
|
-
time_to_thinking_end = None
|
|
70
|
-
|
|
71
|
-
stream = self.client.chat.completions.create(
|
|
72
|
-
messages=messages,
|
|
73
|
-
model=self.config.model_name,
|
|
74
|
-
max_tokens=self.config.max_tokens,
|
|
75
|
-
temperature=self.config.temperature,
|
|
76
|
-
top_p=self.config.top_p,
|
|
77
|
-
frequency_penalty=self.config.frequency_penalty,
|
|
78
|
-
extra_body=self.config.extra_body,
|
|
79
|
-
stream=True,
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
raw_content = ""
|
|
83
|
-
buffer = "" # Buffer to hold content that might be part of a marker
|
|
84
|
-
action_markers = ["finish(message=", "do(action="]
|
|
85
|
-
in_action_phase = False # Track if we've entered the action phase
|
|
86
|
-
first_token_received = False
|
|
87
|
-
|
|
88
|
-
for chunk in stream:
|
|
89
|
-
if len(chunk.choices) == 0:
|
|
90
|
-
continue
|
|
91
|
-
if chunk.choices[0].delta.content is not None:
|
|
92
|
-
content = chunk.choices[0].delta.content
|
|
93
|
-
raw_content += content
|
|
94
|
-
|
|
95
|
-
# Record time to first token
|
|
96
|
-
if not first_token_received:
|
|
97
|
-
time_to_first_token = time.time() - start_time
|
|
98
|
-
first_token_received = True
|
|
99
|
-
|
|
100
|
-
if in_action_phase:
|
|
101
|
-
# Already in action phase, just accumulate content without printing
|
|
102
|
-
continue
|
|
103
|
-
|
|
104
|
-
buffer += content
|
|
105
|
-
|
|
106
|
-
# Check if any marker is fully present in buffer
|
|
107
|
-
marker_found = False
|
|
108
|
-
for marker in action_markers:
|
|
109
|
-
if marker in buffer:
|
|
110
|
-
# Marker found, print everything before it
|
|
111
|
-
thinking_part = buffer.split(marker, 1)[0]
|
|
112
|
-
print(thinking_part, end="", flush=True)
|
|
113
|
-
print() # Print newline after thinking is complete
|
|
114
|
-
in_action_phase = True
|
|
115
|
-
marker_found = True
|
|
116
|
-
|
|
117
|
-
# Record time to thinking end
|
|
118
|
-
if time_to_thinking_end is None:
|
|
119
|
-
time_to_thinking_end = time.time() - start_time
|
|
120
|
-
|
|
121
|
-
break
|
|
122
|
-
|
|
123
|
-
if marker_found:
|
|
124
|
-
continue # Continue to collect remaining content
|
|
125
|
-
|
|
126
|
-
# Check if buffer ends with a prefix of any marker
|
|
127
|
-
# If so, don't print yet (wait for more content)
|
|
128
|
-
is_potential_marker = False
|
|
129
|
-
for marker in action_markers:
|
|
130
|
-
for i in range(1, len(marker)):
|
|
131
|
-
if buffer.endswith(marker[:i]):
|
|
132
|
-
is_potential_marker = True
|
|
133
|
-
break
|
|
134
|
-
if is_potential_marker:
|
|
135
|
-
break
|
|
136
|
-
|
|
137
|
-
if not is_potential_marker:
|
|
138
|
-
# Safe to print the buffer
|
|
139
|
-
print(buffer, end="", flush=True)
|
|
140
|
-
buffer = ""
|
|
141
|
-
|
|
142
|
-
# Calculate total time
|
|
143
|
-
total_time = time.time() - start_time
|
|
144
|
-
|
|
145
|
-
# Parse thinking and action from response
|
|
146
|
-
thinking, action = self._parse_response(raw_content)
|
|
147
|
-
|
|
148
|
-
# Print performance metrics
|
|
149
|
-
lang = self.config.lang
|
|
150
|
-
print()
|
|
151
|
-
print("=" * 50)
|
|
152
|
-
print(f"⏱️ {get_message('performance_metrics', lang)}:")
|
|
153
|
-
print("-" * 50)
|
|
154
|
-
if time_to_first_token is not None:
|
|
155
|
-
print(
|
|
156
|
-
f"{get_message('time_to_first_token', lang)}: {time_to_first_token:.3f}s"
|
|
157
|
-
)
|
|
158
|
-
if time_to_thinking_end is not None:
|
|
159
|
-
print(
|
|
160
|
-
f"{get_message('time_to_thinking_end', lang)}: {time_to_thinking_end:.3f}s"
|
|
161
|
-
)
|
|
162
|
-
print(
|
|
163
|
-
f"{get_message('total_inference_time', lang)}: {total_time:.3f}s"
|
|
164
|
-
)
|
|
165
|
-
print("=" * 50)
|
|
166
|
-
|
|
167
|
-
return ModelResponse(
|
|
168
|
-
thinking=thinking,
|
|
169
|
-
action=action,
|
|
170
|
-
raw_content=raw_content,
|
|
171
|
-
time_to_first_token=time_to_first_token,
|
|
172
|
-
time_to_thinking_end=time_to_thinking_end,
|
|
173
|
-
total_time=total_time,
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
def _parse_response(self, content: str) -> tuple[str, str]:
|
|
177
|
-
"""
|
|
178
|
-
Parse the model response into thinking and action parts.
|
|
179
|
-
|
|
180
|
-
Parsing rules:
|
|
181
|
-
1. If content contains 'finish(message=', everything before is thinking,
|
|
182
|
-
everything from 'finish(message=' onwards is action.
|
|
183
|
-
2. If rule 1 doesn't apply but content contains 'do(action=',
|
|
184
|
-
everything before is thinking, everything from 'do(action=' onwards is action.
|
|
185
|
-
3. Fallback: If content contains '<answer>', use legacy parsing with XML tags.
|
|
186
|
-
4. Otherwise, return empty thinking and full content as action.
|
|
187
|
-
|
|
188
|
-
Args:
|
|
189
|
-
content: Raw response content.
|
|
190
|
-
|
|
191
|
-
Returns:
|
|
192
|
-
Tuple of (thinking, action).
|
|
193
|
-
"""
|
|
194
|
-
# Rule 1: Check for finish(message=
|
|
195
|
-
if "finish(message=" in content:
|
|
196
|
-
parts = content.split("finish(message=", 1)
|
|
197
|
-
thinking = parts[0].strip()
|
|
198
|
-
action = "finish(message=" + parts[1]
|
|
199
|
-
return thinking, action
|
|
200
|
-
|
|
201
|
-
# Rule 2: Check for do(action=
|
|
202
|
-
if "do(action=" in content:
|
|
203
|
-
parts = content.split("do(action=", 1)
|
|
204
|
-
thinking = parts[0].strip()
|
|
205
|
-
action = "do(action=" + parts[1]
|
|
206
|
-
return thinking, action
|
|
207
|
-
|
|
208
|
-
# Rule 3: Fallback to legacy XML tag parsing
|
|
209
|
-
if "<answer>" in content:
|
|
210
|
-
parts = content.split("<answer>", 1)
|
|
211
|
-
thinking = parts[0].replace("<think>", "").replace("</think>", "").strip()
|
|
212
|
-
action = parts[1].replace("</answer>", "").strip()
|
|
213
|
-
return thinking, action
|
|
214
|
-
|
|
215
|
-
# Rule 4: No markers found, return content as action
|
|
216
|
-
return "", content
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
class MessageBuilder:
|
|
220
|
-
"""Helper class for building conversation messages."""
|
|
221
|
-
|
|
222
|
-
@staticmethod
|
|
223
|
-
def create_system_message(content: str) -> dict[str, Any]:
|
|
224
|
-
"""Create a system message."""
|
|
225
|
-
return {"role": "system", "content": content}
|
|
226
|
-
|
|
227
|
-
@staticmethod
|
|
228
|
-
def create_user_message(
|
|
229
|
-
text: str, image_base64: str | None = None
|
|
230
|
-
) -> dict[str, Any]:
|
|
231
|
-
"""
|
|
232
|
-
Create a user message with optional image.
|
|
233
|
-
|
|
234
|
-
Args:
|
|
235
|
-
text: Text content.
|
|
236
|
-
image_base64: Optional base64-encoded image.
|
|
237
|
-
|
|
238
|
-
Returns:
|
|
239
|
-
Message dictionary.
|
|
240
|
-
"""
|
|
241
|
-
content = []
|
|
242
|
-
|
|
243
|
-
if image_base64:
|
|
244
|
-
content.append(
|
|
245
|
-
{
|
|
246
|
-
"type": "image_url",
|
|
247
|
-
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
|
248
|
-
}
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
content.append({"type": "text", "text": text})
|
|
252
|
-
|
|
253
|
-
return {"role": "user", "content": content}
|
|
254
|
-
|
|
255
|
-
@staticmethod
|
|
256
|
-
def create_assistant_message(content: str) -> dict[str, Any]:
|
|
257
|
-
"""Create an assistant message."""
|
|
258
|
-
return {"role": "assistant", "content": content}
|
|
259
|
-
|
|
260
|
-
@staticmethod
|
|
261
|
-
def remove_images_from_message(message: dict[str, Any]) -> dict[str, Any]:
|
|
262
|
-
"""
|
|
263
|
-
Remove image content from a message to save context space.
|
|
264
|
-
|
|
265
|
-
Args:
|
|
266
|
-
message: Message dictionary.
|
|
267
|
-
|
|
268
|
-
Returns:
|
|
269
|
-
Message with images removed.
|
|
270
|
-
"""
|
|
271
|
-
if isinstance(message.get("content"), list):
|
|
272
|
-
message["content"] = [
|
|
273
|
-
item for item in message["content"] if item.get("type") == "text"
|
|
274
|
-
]
|
|
275
|
-
return message
|
|
276
|
-
|
|
277
|
-
@staticmethod
|
|
278
|
-
def build_screen_info(current_app: str, **extra_info) -> str:
|
|
279
|
-
"""
|
|
280
|
-
Build screen info string for the model.
|
|
281
|
-
|
|
282
|
-
Args:
|
|
283
|
-
current_app: Current app name.
|
|
284
|
-
**extra_info: Additional info to include.
|
|
285
|
-
|
|
286
|
-
Returns:
|
|
287
|
-
JSON string with screen info.
|
|
288
|
-
"""
|
|
289
|
-
info = {"current_app": current_app, **extra_info}
|
|
290
|
-
return json.dumps(info, ensure_ascii=False)
|
phone_agent/xctest/__init__.py
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
"""XCTest utilities for iOS device interaction via WebDriverAgent/XCUITest."""
|
|
2
|
-
|
|
3
|
-
from phone_agent.xctest.connection import (
|
|
4
|
-
ConnectionType,
|
|
5
|
-
DeviceInfo,
|
|
6
|
-
XCTestConnection,
|
|
7
|
-
list_devices,
|
|
8
|
-
quick_connect,
|
|
9
|
-
)
|
|
10
|
-
from phone_agent.xctest.device import (
|
|
11
|
-
back,
|
|
12
|
-
double_tap,
|
|
13
|
-
get_current_app,
|
|
14
|
-
home,
|
|
15
|
-
launch_app,
|
|
16
|
-
long_press,
|
|
17
|
-
swipe,
|
|
18
|
-
tap,
|
|
19
|
-
)
|
|
20
|
-
from phone_agent.xctest.input import (
|
|
21
|
-
clear_text,
|
|
22
|
-
type_text,
|
|
23
|
-
)
|
|
24
|
-
from phone_agent.xctest.screenshot import get_screenshot
|
|
25
|
-
|
|
26
|
-
__all__ = [
|
|
27
|
-
# Screenshot
|
|
28
|
-
"get_screenshot",
|
|
29
|
-
# Input
|
|
30
|
-
"type_text",
|
|
31
|
-
"clear_text",
|
|
32
|
-
# Device control
|
|
33
|
-
"get_current_app",
|
|
34
|
-
"tap",
|
|
35
|
-
"swipe",
|
|
36
|
-
"back",
|
|
37
|
-
"home",
|
|
38
|
-
"double_tap",
|
|
39
|
-
"long_press",
|
|
40
|
-
"launch_app",
|
|
41
|
-
# Connection management
|
|
42
|
-
"XCTestConnection",
|
|
43
|
-
"DeviceInfo",
|
|
44
|
-
"ConnectionType",
|
|
45
|
-
"quick_connect",
|
|
46
|
-
"list_devices",
|
|
47
|
-
]
|