autoglm-gui 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. AutoGLM_GUI/api/devices.py +49 -0
  2. AutoGLM_GUI/schemas.py +16 -0
  3. AutoGLM_GUI/static/assets/{about-29B5FDM8.js → about-BOnRPlKQ.js} +1 -1
  4. AutoGLM_GUI/static/assets/chat-CGW6uMKB.js +149 -0
  5. AutoGLM_GUI/static/assets/{index-mVNV0VwM.js → index-CRFVU0eu.js} +1 -1
  6. AutoGLM_GUI/static/assets/{index-wu8Wjf12.js → index-DH-Dl4tK.js} +5 -5
  7. AutoGLM_GUI/static/assets/index-DzUQ89YC.css +1 -0
  8. AutoGLM_GUI/static/index.html +2 -2
  9. {autoglm_gui-1.0.0.dist-info → autoglm_gui-1.0.2.dist-info}/METADATA +9 -4
  10. autoglm_gui-1.0.2.dist-info/RECORD +73 -0
  11. phone_agent/__init__.py +3 -2
  12. phone_agent/actions/handler.py +124 -31
  13. phone_agent/actions/handler_ios.py +278 -0
  14. phone_agent/adb/connection.py +14 -5
  15. phone_agent/adb/device.py +47 -16
  16. phone_agent/agent.py +8 -8
  17. phone_agent/agent_ios.py +277 -0
  18. phone_agent/config/__init__.py +18 -0
  19. phone_agent/config/apps.py +1 -1
  20. phone_agent/config/apps_harmonyos.py +256 -0
  21. phone_agent/config/apps_ios.py +339 -0
  22. phone_agent/config/i18n.py +8 -0
  23. phone_agent/config/timing.py +167 -0
  24. phone_agent/device_factory.py +166 -0
  25. phone_agent/hdc/__init__.py +53 -0
  26. phone_agent/hdc/connection.py +384 -0
  27. phone_agent/hdc/device.py +269 -0
  28. phone_agent/hdc/input.py +145 -0
  29. phone_agent/hdc/screenshot.py +127 -0
  30. phone_agent/model/client.py +104 -4
  31. phone_agent/xctest/__init__.py +47 -0
  32. phone_agent/xctest/connection.py +379 -0
  33. phone_agent/xctest/device.py +472 -0
  34. phone_agent/xctest/input.py +311 -0
  35. phone_agent/xctest/screenshot.py +226 -0
  36. AutoGLM_GUI/static/assets/chat-DTN2oKtA.js +0 -149
  37. AutoGLM_GUI/static/assets/index-Dy550Qqg.css +0 -1
  38. autoglm_gui-1.0.0.dist-info/RECORD +0 -57
  39. {autoglm_gui-1.0.0.dist-info → autoglm_gui-1.0.2.dist-info}/WHEEL +0 -0
  40. {autoglm_gui-1.0.0.dist-info → autoglm_gui-1.0.2.dist-info}/entry_points.txt +0 -0
  41. {autoglm_gui-1.0.0.dist-info → autoglm_gui-1.0.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,145 @@
1
+ """Input utilities for HarmonyOS device text input."""
2
+
3
+ from phone_agent.hdc.connection import _run_hdc_command
4
+
5
+
6
+ def type_text(text: str, device_id: str | None = None) -> None:
7
+ """
8
+ Type text into the currently focused input field.
9
+
10
+ Args:
11
+ text: The text to type. Supports multi-line text with newline characters.
12
+ device_id: Optional HDC device ID for multi-device setups.
13
+
14
+ Note:
15
+ HarmonyOS uses: hdc shell uitest uiInput text "文本内容"
16
+ This command works without coordinates when input field is focused.
17
+ For multi-line text, the function splits by newlines and sends ENTER keyEvents.
18
+ ENTER key code in HarmonyOS: 2054
19
+ Recommendation: Click on the input field first to focus it, then use this function.
20
+ """
21
+ hdc_prefix = _get_hdc_prefix(device_id)
22
+
23
+ # Handle multi-line text by splitting on newlines
24
+ if "\n" in text:
25
+ lines = text.split("\n")
26
+ for i, line in enumerate(lines):
27
+ if line: # Only process non-empty lines
28
+ # Escape special characters for shell
29
+ escaped_line = line.replace('"', '\\"').replace("$", "\\$")
30
+
31
+ _run_hdc_command(
32
+ hdc_prefix + ["shell", "uitest", "uiInput", "text", escaped_line],
33
+ capture_output=True,
34
+ text=True,
35
+ )
36
+
37
+ # Send ENTER key event after each line except the last one
38
+ if i < len(lines) - 1:
39
+ try:
40
+ _run_hdc_command(
41
+ hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2054"],
42
+ capture_output=True,
43
+ text=True,
44
+ )
45
+ except Exception as e:
46
+ print(f"[HDC] ENTER keyEvent failed: {e}")
47
+ else:
48
+ # Single line text - original logic
49
+ # Escape special characters for shell (keep quotes for proper text handling)
50
+ # The text will be wrapped in quotes in the command
51
+ escaped_text = text.replace('"', '\\"').replace("$", "\\$")
52
+
53
+ # HarmonyOS uitest uiInput text command
54
+ # Format: hdc shell uitest uiInput text "文本内容"
55
+ _run_hdc_command(
56
+ hdc_prefix + ["shell", "uitest", "uiInput", "text", escaped_text],
57
+ capture_output=True,
58
+ text=True,
59
+ )
60
+
61
+
62
+ def clear_text(device_id: str | None = None) -> None:
63
+ """
64
+ Clear text in the currently focused input field.
65
+
66
+ Args:
67
+ device_id: Optional HDC device ID for multi-device setups.
68
+
69
+ Note:
70
+ This method uses repeated delete key events to clear text.
71
+ For HarmonyOS, you might also use select all + delete for better efficiency.
72
+ """
73
+ hdc_prefix = _get_hdc_prefix(device_id)
74
+ # Ctrl+A to select all (key code 2072 for Ctrl, 2017 for A)
75
+ # Then delete
76
+ _run_hdc_command(
77
+ hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2072", "2017"],
78
+ capture_output=True,
79
+ text=True,
80
+ )
81
+ _run_hdc_command(
82
+ hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2055"], # Delete key
83
+ capture_output=True,
84
+ text=True,
85
+ )
86
+
87
+
88
+ def detect_and_set_adb_keyboard(device_id: str | None = None) -> str:
89
+ """
90
+ Detect current keyboard and switch to ADB Keyboard if available.
91
+
92
+ Args:
93
+ device_id: Optional HDC device ID for multi-device setups.
94
+
95
+ Returns:
96
+ The original keyboard IME identifier for later restoration.
97
+
98
+ Note:
99
+ This is a placeholder. HarmonyOS may not support ADB Keyboard.
100
+ If there's a similar tool for HarmonyOS, integrate it here.
101
+ """
102
+ hdc_prefix = _get_hdc_prefix(device_id)
103
+
104
+ # Get current IME (if HarmonyOS supports this)
105
+ try:
106
+ result = _run_hdc_command(
107
+ hdc_prefix + ["shell", "settings", "get", "secure", "default_input_method"],
108
+ capture_output=True,
109
+ text=True,
110
+ )
111
+ current_ime = (result.stdout + result.stderr).strip()
112
+
113
+ # If ADB Keyboard equivalent exists for HarmonyOS, switch to it
114
+ # For now, we'll just return the current IME
115
+ return current_ime
116
+ except Exception:
117
+ return ""
118
+
119
+
120
+ def restore_keyboard(ime: str, device_id: str | None = None) -> None:
121
+ """
122
+ Restore the original keyboard IME.
123
+
124
+ Args:
125
+ ime: The IME identifier to restore.
126
+ device_id: Optional HDC device ID for multi-device setups.
127
+ """
128
+ if not ime:
129
+ return
130
+
131
+ hdc_prefix = _get_hdc_prefix(device_id)
132
+
133
+ try:
134
+ _run_hdc_command(
135
+ hdc_prefix + ["shell", "ime", "set", ime], capture_output=True, text=True
136
+ )
137
+ except Exception:
138
+ pass
139
+
140
+
141
+ def _get_hdc_prefix(device_id: str | None) -> list:
142
+ """Get HDC command prefix with optional device specifier."""
143
+ if device_id:
144
+ return ["hdc", "-t", device_id]
145
+ return ["hdc"]
@@ -0,0 +1,127 @@
1
+ """Screenshot utilities for capturing HarmonyOS device screen."""
2
+
3
+ import base64
4
+ import os
5
+ import tempfile
6
+ import uuid
7
+ from dataclasses import dataclass
8
+ from io import BytesIO
9
+
10
+ from PIL import Image
11
+ from phone_agent.hdc.connection import _run_hdc_command
12
+
13
+
14
+ @dataclass
15
+ class Screenshot:
16
+ """Represents a captured screenshot."""
17
+
18
+ base64_data: str
19
+ width: int
20
+ height: int
21
+ is_sensitive: bool = False
22
+
23
+
24
+ def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screenshot:
25
+ """
26
+ Capture a screenshot from the connected HarmonyOS device.
27
+
28
+ Args:
29
+ device_id: Optional HDC device ID for multi-device setups.
30
+ timeout: Timeout in seconds for screenshot operations.
31
+
32
+ Returns:
33
+ Screenshot object containing base64 data and dimensions.
34
+
35
+ Note:
36
+ If the screenshot fails (e.g., on sensitive screens like payment pages),
37
+ a black fallback image is returned with is_sensitive=True.
38
+ """
39
+ temp_path = os.path.join(tempfile.gettempdir(), f"screenshot_{uuid.uuid4()}.png")
40
+ hdc_prefix = _get_hdc_prefix(device_id)
41
+
42
+ try:
43
+ # Execute screenshot command
44
+ # HarmonyOS HDC only supports JPEG format
45
+ remote_path = "/data/local/tmp/tmp_screenshot.jpeg"
46
+
47
+ # Try method 1: hdc shell screenshot (newer HarmonyOS versions)
48
+ result = _run_hdc_command(
49
+ hdc_prefix + ["shell", "screenshot", remote_path],
50
+ capture_output=True,
51
+ text=True,
52
+ timeout=timeout,
53
+ )
54
+
55
+ # Check for screenshot failure (sensitive screen)
56
+ output = result.stdout + result.stderr
57
+ if (
58
+ "fail" in output.lower()
59
+ or "error" in output.lower()
60
+ or "not found" in output.lower()
61
+ ):
62
+ # Try method 2: snapshot_display (older versions or different devices)
63
+ result = _run_hdc_command(
64
+ hdc_prefix + ["shell", "snapshot_display", "-f", remote_path],
65
+ capture_output=True,
66
+ text=True,
67
+ timeout=timeout,
68
+ )
69
+ output = result.stdout + result.stderr
70
+ if "fail" in output.lower() or "error" in output.lower():
71
+ return _create_fallback_screenshot(is_sensitive=True)
72
+
73
+ # Pull screenshot to local temp path
74
+ # Note: remote file is JPEG, but PIL can open it regardless of local extension
75
+ _run_hdc_command(
76
+ hdc_prefix + ["file", "recv", remote_path, temp_path],
77
+ capture_output=True,
78
+ text=True,
79
+ timeout=5,
80
+ )
81
+
82
+ if not os.path.exists(temp_path):
83
+ return _create_fallback_screenshot(is_sensitive=False)
84
+
85
+ # Read JPEG image and convert to PNG for model inference
86
+ # PIL automatically detects the image format from file content
87
+ img = Image.open(temp_path)
88
+ width, height = img.size
89
+
90
+ buffered = BytesIO()
91
+ img.save(buffered, format="PNG")
92
+ base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
93
+
94
+ # Cleanup
95
+ os.remove(temp_path)
96
+
97
+ return Screenshot(
98
+ base64_data=base64_data, width=width, height=height, is_sensitive=False
99
+ )
100
+
101
+ except Exception as e:
102
+ print(f"Screenshot error: {e}")
103
+ return _create_fallback_screenshot(is_sensitive=False)
104
+
105
+
106
+ def _get_hdc_prefix(device_id: str | None) -> list:
107
+ """Get HDC command prefix with optional device specifier."""
108
+ if device_id:
109
+ return ["hdc", "-t", device_id]
110
+ return ["hdc"]
111
+
112
+
113
+ def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot:
114
+ """Create a black fallback image when screenshot fails."""
115
+ default_width, default_height = 1080, 2400
116
+
117
+ black_img = Image.new("RGB", (default_width, default_height), color="black")
118
+ buffered = BytesIO()
119
+ black_img.save(buffered, format="PNG")
120
+ base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
121
+
122
+ return Screenshot(
123
+ base64_data=base64_data,
124
+ width=default_width,
125
+ height=default_height,
126
+ is_sensitive=is_sensitive,
127
+ )
@@ -1,11 +1,14 @@
1
1
  """Model client for AI inference using OpenAI-compatible API."""
2
2
 
3
3
  import json
4
+ import time
4
5
  from dataclasses import dataclass, field
5
6
  from typing import Any
6
7
 
7
8
  from openai import OpenAI
8
9
 
10
+ from phone_agent.config.i18n import get_message
11
+
9
12
 
10
13
  @dataclass
11
14
  class ModelConfig:
@@ -19,6 +22,7 @@ class ModelConfig:
19
22
  top_p: float = 0.85
20
23
  frequency_penalty: float = 0.2
21
24
  extra_body: dict[str, Any] = field(default_factory=dict)
25
+ lang: str = "cn" # Language for UI messages: 'cn' or 'en'
22
26
 
23
27
 
24
28
  @dataclass
@@ -28,6 +32,10 @@ class ModelResponse:
28
32
  thinking: str
29
33
  action: str
30
34
  raw_content: str
35
+ # Performance metrics
36
+ time_to_first_token: float | None = None # Time to first token (seconds)
37
+ time_to_thinking_end: float | None = None # Time to thinking end (seconds)
38
+ total_time: float | None = None # Total inference time (seconds)
31
39
 
32
40
 
33
41
  class ModelClient:
@@ -55,7 +63,12 @@ class ModelClient:
55
63
  Raises:
56
64
  ValueError: If the response cannot be parsed.
57
65
  """
58
- response = self.client.chat.completions.create(
66
+ # Start timing
67
+ start_time = time.time()
68
+ time_to_first_token = None
69
+ time_to_thinking_end = None
70
+
71
+ stream = self.client.chat.completions.create(
59
72
  messages=messages,
60
73
  model=self.config.model_name,
61
74
  max_tokens=self.config.max_tokens,
@@ -63,15 +76,102 @@ class ModelClient:
63
76
  top_p=self.config.top_p,
64
77
  frequency_penalty=self.config.frequency_penalty,
65
78
  extra_body=self.config.extra_body,
66
- stream=False,
79
+ stream=True,
67
80
  )
68
81
 
69
- raw_content = response.choices[0].message.content
82
+ raw_content = ""
83
+ buffer = "" # Buffer to hold content that might be part of a marker
84
+ action_markers = ["finish(message=", "do(action="]
85
+ in_action_phase = False # Track if we've entered the action phase
86
+ first_token_received = False
87
+
88
+ for chunk in stream:
89
+ if len(chunk.choices) == 0:
90
+ continue
91
+ if chunk.choices[0].delta.content is not None:
92
+ content = chunk.choices[0].delta.content
93
+ raw_content += content
94
+
95
+ # Record time to first token
96
+ if not first_token_received:
97
+ time_to_first_token = time.time() - start_time
98
+ first_token_received = True
99
+
100
+ if in_action_phase:
101
+ # Already in action phase, just accumulate content without printing
102
+ continue
103
+
104
+ buffer += content
105
+
106
+ # Check if any marker is fully present in buffer
107
+ marker_found = False
108
+ for marker in action_markers:
109
+ if marker in buffer:
110
+ # Marker found, print everything before it
111
+ thinking_part = buffer.split(marker, 1)[0]
112
+ print(thinking_part, end="", flush=True)
113
+ print() # Print newline after thinking is complete
114
+ in_action_phase = True
115
+ marker_found = True
116
+
117
+ # Record time to thinking end
118
+ if time_to_thinking_end is None:
119
+ time_to_thinking_end = time.time() - start_time
120
+
121
+ break
122
+
123
+ if marker_found:
124
+ continue # Continue to collect remaining content
125
+
126
+ # Check if buffer ends with a prefix of any marker
127
+ # If so, don't print yet (wait for more content)
128
+ is_potential_marker = False
129
+ for marker in action_markers:
130
+ for i in range(1, len(marker)):
131
+ if buffer.endswith(marker[:i]):
132
+ is_potential_marker = True
133
+ break
134
+ if is_potential_marker:
135
+ break
136
+
137
+ if not is_potential_marker:
138
+ # Safe to print the buffer
139
+ print(buffer, end="", flush=True)
140
+ buffer = ""
141
+
142
+ # Calculate total time
143
+ total_time = time.time() - start_time
70
144
 
71
145
  # Parse thinking and action from response
72
146
  thinking, action = self._parse_response(raw_content)
73
147
 
74
- return ModelResponse(thinking=thinking, action=action, raw_content=raw_content)
148
+ # Print performance metrics
149
+ lang = self.config.lang
150
+ print()
151
+ print("=" * 50)
152
+ print(f"⏱️ {get_message('performance_metrics', lang)}:")
153
+ print("-" * 50)
154
+ if time_to_first_token is not None:
155
+ print(
156
+ f"{get_message('time_to_first_token', lang)}: {time_to_first_token:.3f}s"
157
+ )
158
+ if time_to_thinking_end is not None:
159
+ print(
160
+ f"{get_message('time_to_thinking_end', lang)}: {time_to_thinking_end:.3f}s"
161
+ )
162
+ print(
163
+ f"{get_message('total_inference_time', lang)}: {total_time:.3f}s"
164
+ )
165
+ print("=" * 50)
166
+
167
+ return ModelResponse(
168
+ thinking=thinking,
169
+ action=action,
170
+ raw_content=raw_content,
171
+ time_to_first_token=time_to_first_token,
172
+ time_to_thinking_end=time_to_thinking_end,
173
+ total_time=total_time,
174
+ )
75
175
 
76
176
  def _parse_response(self, content: str) -> tuple[str, str]:
77
177
  """
@@ -0,0 +1,47 @@
1
+ """XCTest utilities for iOS device interaction via WebDriverAgent/XCUITest."""
2
+
3
+ from phone_agent.xctest.connection import (
4
+ ConnectionType,
5
+ DeviceInfo,
6
+ XCTestConnection,
7
+ list_devices,
8
+ quick_connect,
9
+ )
10
+ from phone_agent.xctest.device import (
11
+ back,
12
+ double_tap,
13
+ get_current_app,
14
+ home,
15
+ launch_app,
16
+ long_press,
17
+ swipe,
18
+ tap,
19
+ )
20
+ from phone_agent.xctest.input import (
21
+ clear_text,
22
+ type_text,
23
+ )
24
+ from phone_agent.xctest.screenshot import get_screenshot
25
+
26
+ __all__ = [
27
+ # Screenshot
28
+ "get_screenshot",
29
+ # Input
30
+ "type_text",
31
+ "clear_text",
32
+ # Device control
33
+ "get_current_app",
34
+ "tap",
35
+ "swipe",
36
+ "back",
37
+ "home",
38
+ "double_tap",
39
+ "long_press",
40
+ "launch_app",
41
+ # Connection management
42
+ "XCTestConnection",
43
+ "DeviceInfo",
44
+ "ConnectionType",
45
+ "quick_connect",
46
+ "list_devices",
47
+ ]