autoglm-gui 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. AutoGLM_GUI/__init__.py +1 -1
  2. AutoGLM_GUI/__main__.py +11 -2
  3. AutoGLM_GUI/adb_plus/qr_pair.py +3 -3
  4. AutoGLM_GUI/agents/__init__.py +7 -2
  5. AutoGLM_GUI/agents/factory.py +46 -6
  6. AutoGLM_GUI/agents/glm/agent.py +8 -3
  7. AutoGLM_GUI/agents/glm/async_agent.py +515 -0
  8. AutoGLM_GUI/agents/glm/parser.py +4 -2
  9. AutoGLM_GUI/agents/mai/agent.py +3 -0
  10. AutoGLM_GUI/agents/protocols.py +111 -1
  11. AutoGLM_GUI/agents/stream_runner.py +11 -7
  12. AutoGLM_GUI/api/__init__.py +3 -1
  13. AutoGLM_GUI/api/agents.py +103 -37
  14. AutoGLM_GUI/api/devices.py +72 -0
  15. AutoGLM_GUI/api/history.py +27 -1
  16. AutoGLM_GUI/api/layered_agent.py +9 -8
  17. AutoGLM_GUI/api/mcp.py +6 -4
  18. AutoGLM_GUI/config_manager.py +38 -1
  19. AutoGLM_GUI/device_manager.py +28 -4
  20. AutoGLM_GUI/device_metadata_manager.py +174 -0
  21. AutoGLM_GUI/devices/mock_device.py +8 -1
  22. AutoGLM_GUI/models/history.py +45 -1
  23. AutoGLM_GUI/phone_agent_manager.py +145 -32
  24. AutoGLM_GUI/scheduler_manager.py +52 -6
  25. AutoGLM_GUI/schemas.py +101 -0
  26. AutoGLM_GUI/scrcpy_stream.py +2 -1
  27. AutoGLM_GUI/static/assets/{about-BQm96DAl.js → about-D7r9gCvG.js} +1 -1
  28. AutoGLM_GUI/static/assets/{alert-dialog-B42XxGPR.js → alert-dialog-BKM-yRiQ.js} +1 -1
  29. AutoGLM_GUI/static/assets/chat-k6TTD7PW.js +129 -0
  30. AutoGLM_GUI/static/assets/{circle-alert-D4rSJh37.js → circle-alert-sohSDLhl.js} +1 -1
  31. AutoGLM_GUI/static/assets/{dialog-DZ78cEcj.js → dialog-BgtPh0d5.js} +1 -1
  32. AutoGLM_GUI/static/assets/eye-DLqKbQmg.js +1 -0
  33. AutoGLM_GUI/static/assets/history-Bv1lfGUU.js +1 -0
  34. AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
  35. AutoGLM_GUI/static/assets/index-CxWwh1VO.js +1 -0
  36. AutoGLM_GUI/static/assets/{index-CssG-3TH.js → index-SysdKciY.js} +5 -5
  37. AutoGLM_GUI/static/assets/label-DTUnzN4B.js +1 -0
  38. AutoGLM_GUI/static/assets/{logs-eoFxn5of.js → logs-BIhnDizW.js} +1 -1
  39. AutoGLM_GUI/static/assets/{popover-DLsuV5Sx.js → popover-CikYqu2P.js} +1 -1
  40. AutoGLM_GUI/static/assets/scheduled-tasks-B-KBsGbl.js +1 -0
  41. AutoGLM_GUI/static/assets/{textarea-BX6y7uM5.js → textarea-knJZrz77.js} +1 -1
  42. AutoGLM_GUI/static/assets/workflows-DzcSYwLZ.js +1 -0
  43. AutoGLM_GUI/static/index.html +2 -2
  44. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.2.dist-info}/METADATA +58 -7
  45. autoglm_gui-1.5.2.dist-info/RECORD +119 -0
  46. AutoGLM_GUI/device_adapter.py +0 -263
  47. AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +0 -129
  48. AutoGLM_GUI/static/assets/history-DFBv7TGc.js +0 -1
  49. AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +0 -1
  50. AutoGLM_GUI/static/assets/index-CmZSnDqc.js +0 -1
  51. AutoGLM_GUI/static/assets/label-BCUzE_nm.js +0 -1
  52. AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js +0 -1
  53. AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js +0 -1
  54. AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js +0 -1
  55. autoglm_gui-1.5.0.dist-info/RECORD +0 -157
  56. mai_agent/base.py +0 -137
  57. mai_agent/mai_grounding_agent.py +0 -263
  58. mai_agent/mai_naivigation_agent.py +0 -526
  59. mai_agent/prompt.py +0 -148
  60. mai_agent/unified_memory.py +0 -67
  61. mai_agent/utils.py +0 -73
  62. phone_agent/__init__.py +0 -12
  63. phone_agent/actions/__init__.py +0 -5
  64. phone_agent/actions/handler.py +0 -400
  65. phone_agent/actions/handler_ios.py +0 -278
  66. phone_agent/adb/__init__.py +0 -51
  67. phone_agent/adb/connection.py +0 -358
  68. phone_agent/adb/device.py +0 -253
  69. phone_agent/adb/input.py +0 -108
  70. phone_agent/adb/screenshot.py +0 -108
  71. phone_agent/agent.py +0 -253
  72. phone_agent/agent_ios.py +0 -277
  73. phone_agent/config/__init__.py +0 -53
  74. phone_agent/config/apps.py +0 -227
  75. phone_agent/config/apps_harmonyos.py +0 -256
  76. phone_agent/config/apps_ios.py +0 -339
  77. phone_agent/config/i18n.py +0 -81
  78. phone_agent/config/prompts.py +0 -80
  79. phone_agent/config/prompts_en.py +0 -79
  80. phone_agent/config/prompts_zh.py +0 -82
  81. phone_agent/config/timing.py +0 -167
  82. phone_agent/device_factory.py +0 -166
  83. phone_agent/hdc/__init__.py +0 -53
  84. phone_agent/hdc/connection.py +0 -384
  85. phone_agent/hdc/device.py +0 -269
  86. phone_agent/hdc/input.py +0 -145
  87. phone_agent/hdc/screenshot.py +0 -127
  88. phone_agent/model/__init__.py +0 -5
  89. phone_agent/model/client.py +0 -290
  90. phone_agent/xctest/__init__.py +0 -47
  91. phone_agent/xctest/connection.py +0 -379
  92. phone_agent/xctest/device.py +0 -472
  93. phone_agent/xctest/input.py +0 -311
  94. phone_agent/xctest/screenshot.py +0 -226
  95. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.2.dist-info}/WHEEL +0 -0
  96. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.2.dist-info}/entry_points.txt +0 -0
  97. {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.2.dist-info}/licenses/LICENSE +0 -0
phone_agent/hdc/input.py DELETED
@@ -1,145 +0,0 @@
1
- """Input utilities for HarmonyOS device text input."""
2
-
3
- from phone_agent.hdc.connection import _run_hdc_command
4
-
5
-
6
- def type_text(text: str, device_id: str | None = None) -> None:
7
- """
8
- Type text into the currently focused input field.
9
-
10
- Args:
11
- text: The text to type. Supports multi-line text with newline characters.
12
- device_id: Optional HDC device ID for multi-device setups.
13
-
14
- Note:
15
- HarmonyOS uses: hdc shell uitest uiInput text "文本内容"
16
- This command works without coordinates when input field is focused.
17
- For multi-line text, the function splits by newlines and sends ENTER keyEvents.
18
- ENTER key code in HarmonyOS: 2054
19
- Recommendation: Click on the input field first to focus it, then use this function.
20
- """
21
- hdc_prefix = _get_hdc_prefix(device_id)
22
-
23
- # Handle multi-line text by splitting on newlines
24
- if "\n" in text:
25
- lines = text.split("\n")
26
- for i, line in enumerate(lines):
27
- if line: # Only process non-empty lines
28
- # Escape special characters for shell
29
- escaped_line = line.replace('"', '\\"').replace("$", "\\$")
30
-
31
- _run_hdc_command(
32
- hdc_prefix + ["shell", "uitest", "uiInput", "text", escaped_line],
33
- capture_output=True,
34
- text=True,
35
- )
36
-
37
- # Send ENTER key event after each line except the last one
38
- if i < len(lines) - 1:
39
- try:
40
- _run_hdc_command(
41
- hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2054"],
42
- capture_output=True,
43
- text=True,
44
- )
45
- except Exception as e:
46
- print(f"[HDC] ENTER keyEvent failed: {e}")
47
- else:
48
- # Single line text - original logic
49
- # Escape special characters for shell (keep quotes for proper text handling)
50
- # The text will be wrapped in quotes in the command
51
- escaped_text = text.replace('"', '\\"').replace("$", "\\$")
52
-
53
- # HarmonyOS uitest uiInput text command
54
- # Format: hdc shell uitest uiInput text "文本内容"
55
- _run_hdc_command(
56
- hdc_prefix + ["shell", "uitest", "uiInput", "text", escaped_text],
57
- capture_output=True,
58
- text=True,
59
- )
60
-
61
-
62
- def clear_text(device_id: str | None = None) -> None:
63
- """
64
- Clear text in the currently focused input field.
65
-
66
- Args:
67
- device_id: Optional HDC device ID for multi-device setups.
68
-
69
- Note:
70
- This method uses repeated delete key events to clear text.
71
- For HarmonyOS, you might also use select all + delete for better efficiency.
72
- """
73
- hdc_prefix = _get_hdc_prefix(device_id)
74
- # Ctrl+A to select all (key code 2072 for Ctrl, 2017 for A)
75
- # Then delete
76
- _run_hdc_command(
77
- hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2072", "2017"],
78
- capture_output=True,
79
- text=True,
80
- )
81
- _run_hdc_command(
82
- hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2055"], # Delete key
83
- capture_output=True,
84
- text=True,
85
- )
86
-
87
-
88
- def detect_and_set_adb_keyboard(device_id: str | None = None) -> str:
89
- """
90
- Detect current keyboard and switch to ADB Keyboard if available.
91
-
92
- Args:
93
- device_id: Optional HDC device ID for multi-device setups.
94
-
95
- Returns:
96
- The original keyboard IME identifier for later restoration.
97
-
98
- Note:
99
- This is a placeholder. HarmonyOS may not support ADB Keyboard.
100
- If there's a similar tool for HarmonyOS, integrate it here.
101
- """
102
- hdc_prefix = _get_hdc_prefix(device_id)
103
-
104
- # Get current IME (if HarmonyOS supports this)
105
- try:
106
- result = _run_hdc_command(
107
- hdc_prefix + ["shell", "settings", "get", "secure", "default_input_method"],
108
- capture_output=True,
109
- text=True,
110
- )
111
- current_ime = (result.stdout + result.stderr).strip()
112
-
113
- # If ADB Keyboard equivalent exists for HarmonyOS, switch to it
114
- # For now, we'll just return the current IME
115
- return current_ime
116
- except Exception:
117
- return ""
118
-
119
-
120
- def restore_keyboard(ime: str, device_id: str | None = None) -> None:
121
- """
122
- Restore the original keyboard IME.
123
-
124
- Args:
125
- ime: The IME identifier to restore.
126
- device_id: Optional HDC device ID for multi-device setups.
127
- """
128
- if not ime:
129
- return
130
-
131
- hdc_prefix = _get_hdc_prefix(device_id)
132
-
133
- try:
134
- _run_hdc_command(
135
- hdc_prefix + ["shell", "ime", "set", ime], capture_output=True, text=True
136
- )
137
- except Exception:
138
- pass
139
-
140
-
141
- def _get_hdc_prefix(device_id: str | None) -> list:
142
- """Get HDC command prefix with optional device specifier."""
143
- if device_id:
144
- return ["hdc", "-t", device_id]
145
- return ["hdc"]
@@ -1,127 +0,0 @@
1
- """Screenshot utilities for capturing HarmonyOS device screen."""
2
-
3
- import base64
4
- import os
5
- import tempfile
6
- import uuid
7
- from dataclasses import dataclass
8
- from io import BytesIO
9
-
10
- from PIL import Image
11
- from phone_agent.hdc.connection import _run_hdc_command
12
-
13
-
14
- @dataclass
15
- class Screenshot:
16
- """Represents a captured screenshot."""
17
-
18
- base64_data: str
19
- width: int
20
- height: int
21
- is_sensitive: bool = False
22
-
23
-
24
- def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screenshot:
25
- """
26
- Capture a screenshot from the connected HarmonyOS device.
27
-
28
- Args:
29
- device_id: Optional HDC device ID for multi-device setups.
30
- timeout: Timeout in seconds for screenshot operations.
31
-
32
- Returns:
33
- Screenshot object containing base64 data and dimensions.
34
-
35
- Note:
36
- If the screenshot fails (e.g., on sensitive screens like payment pages),
37
- a black fallback image is returned with is_sensitive=True.
38
- """
39
- temp_path = os.path.join(tempfile.gettempdir(), f"screenshot_{uuid.uuid4()}.png")
40
- hdc_prefix = _get_hdc_prefix(device_id)
41
-
42
- try:
43
- # Execute screenshot command
44
- # HarmonyOS HDC only supports JPEG format
45
- remote_path = "/data/local/tmp/tmp_screenshot.jpeg"
46
-
47
- # Try method 1: hdc shell screenshot (newer HarmonyOS versions)
48
- result = _run_hdc_command(
49
- hdc_prefix + ["shell", "screenshot", remote_path],
50
- capture_output=True,
51
- text=True,
52
- timeout=timeout,
53
- )
54
-
55
- # Check for screenshot failure (sensitive screen)
56
- output = result.stdout + result.stderr
57
- if (
58
- "fail" in output.lower()
59
- or "error" in output.lower()
60
- or "not found" in output.lower()
61
- ):
62
- # Try method 2: snapshot_display (older versions or different devices)
63
- result = _run_hdc_command(
64
- hdc_prefix + ["shell", "snapshot_display", "-f", remote_path],
65
- capture_output=True,
66
- text=True,
67
- timeout=timeout,
68
- )
69
- output = result.stdout + result.stderr
70
- if "fail" in output.lower() or "error" in output.lower():
71
- return _create_fallback_screenshot(is_sensitive=True)
72
-
73
- # Pull screenshot to local temp path
74
- # Note: remote file is JPEG, but PIL can open it regardless of local extension
75
- _run_hdc_command(
76
- hdc_prefix + ["file", "recv", remote_path, temp_path],
77
- capture_output=True,
78
- text=True,
79
- timeout=5,
80
- )
81
-
82
- if not os.path.exists(temp_path):
83
- return _create_fallback_screenshot(is_sensitive=False)
84
-
85
- # Read JPEG image and convert to PNG for model inference
86
- # PIL automatically detects the image format from file content
87
- img = Image.open(temp_path)
88
- width, height = img.size
89
-
90
- buffered = BytesIO()
91
- img.save(buffered, format="PNG")
92
- base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
93
-
94
- # Cleanup
95
- os.remove(temp_path)
96
-
97
- return Screenshot(
98
- base64_data=base64_data, width=width, height=height, is_sensitive=False
99
- )
100
-
101
- except Exception as e:
102
- print(f"Screenshot error: {e}")
103
- return _create_fallback_screenshot(is_sensitive=False)
104
-
105
-
106
- def _get_hdc_prefix(device_id: str | None) -> list:
107
- """Get HDC command prefix with optional device specifier."""
108
- if device_id:
109
- return ["hdc", "-t", device_id]
110
- return ["hdc"]
111
-
112
-
113
- def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot:
114
- """Create a black fallback image when screenshot fails."""
115
- default_width, default_height = 1080, 2400
116
-
117
- black_img = Image.new("RGB", (default_width, default_height), color="black")
118
- buffered = BytesIO()
119
- black_img.save(buffered, format="PNG")
120
- base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
121
-
122
- return Screenshot(
123
- base64_data=base64_data,
124
- width=default_width,
125
- height=default_height,
126
- is_sensitive=is_sensitive,
127
- )
@@ -1,5 +0,0 @@
1
- """Model client module for AI inference."""
2
-
3
- from phone_agent.model.client import ModelClient, ModelConfig
4
-
5
- __all__ = ["ModelClient", "ModelConfig"]
@@ -1,290 +0,0 @@
1
- """Model client for AI inference using OpenAI-compatible API."""
2
-
3
- import json
4
- import time
5
- from dataclasses import dataclass, field
6
- from typing import Any
7
-
8
- from openai import OpenAI
9
-
10
- from phone_agent.config.i18n import get_message
11
-
12
-
13
- @dataclass
14
- class ModelConfig:
15
- """Configuration for the AI model."""
16
-
17
- base_url: str = "http://localhost:8000/v1"
18
- api_key: str = "EMPTY"
19
- model_name: str = "autoglm-phone-9b"
20
- max_tokens: int = 3000
21
- temperature: float = 0.0
22
- top_p: float = 0.85
23
- frequency_penalty: float = 0.2
24
- extra_body: dict[str, Any] = field(default_factory=dict)
25
- lang: str = "cn" # Language for UI messages: 'cn' or 'en'
26
-
27
-
28
- @dataclass
29
- class ModelResponse:
30
- """Response from the AI model."""
31
-
32
- thinking: str
33
- action: str
34
- raw_content: str
35
- # Performance metrics
36
- time_to_first_token: float | None = None # Time to first token (seconds)
37
- time_to_thinking_end: float | None = None # Time to thinking end (seconds)
38
- total_time: float | None = None # Total inference time (seconds)
39
-
40
-
41
- class ModelClient:
42
- """
43
- Client for interacting with OpenAI-compatible vision-language models.
44
-
45
- Args:
46
- config: Model configuration.
47
- """
48
-
49
- def __init__(self, config: ModelConfig | None = None):
50
- self.config = config or ModelConfig()
51
- self.client = OpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
52
-
53
- def request(self, messages: list[dict[str, Any]]) -> ModelResponse:
54
- """
55
- Send a request to the model.
56
-
57
- Args:
58
- messages: List of message dictionaries in OpenAI format.
59
-
60
- Returns:
61
- ModelResponse containing thinking and action.
62
-
63
- Raises:
64
- ValueError: If the response cannot be parsed.
65
- """
66
- # Start timing
67
- start_time = time.time()
68
- time_to_first_token = None
69
- time_to_thinking_end = None
70
-
71
- stream = self.client.chat.completions.create(
72
- messages=messages,
73
- model=self.config.model_name,
74
- max_tokens=self.config.max_tokens,
75
- temperature=self.config.temperature,
76
- top_p=self.config.top_p,
77
- frequency_penalty=self.config.frequency_penalty,
78
- extra_body=self.config.extra_body,
79
- stream=True,
80
- )
81
-
82
- raw_content = ""
83
- buffer = "" # Buffer to hold content that might be part of a marker
84
- action_markers = ["finish(message=", "do(action="]
85
- in_action_phase = False # Track if we've entered the action phase
86
- first_token_received = False
87
-
88
- for chunk in stream:
89
- if len(chunk.choices) == 0:
90
- continue
91
- if chunk.choices[0].delta.content is not None:
92
- content = chunk.choices[0].delta.content
93
- raw_content += content
94
-
95
- # Record time to first token
96
- if not first_token_received:
97
- time_to_first_token = time.time() - start_time
98
- first_token_received = True
99
-
100
- if in_action_phase:
101
- # Already in action phase, just accumulate content without printing
102
- continue
103
-
104
- buffer += content
105
-
106
- # Check if any marker is fully present in buffer
107
- marker_found = False
108
- for marker in action_markers:
109
- if marker in buffer:
110
- # Marker found, print everything before it
111
- thinking_part = buffer.split(marker, 1)[0]
112
- print(thinking_part, end="", flush=True)
113
- print() # Print newline after thinking is complete
114
- in_action_phase = True
115
- marker_found = True
116
-
117
- # Record time to thinking end
118
- if time_to_thinking_end is None:
119
- time_to_thinking_end = time.time() - start_time
120
-
121
- break
122
-
123
- if marker_found:
124
- continue # Continue to collect remaining content
125
-
126
- # Check if buffer ends with a prefix of any marker
127
- # If so, don't print yet (wait for more content)
128
- is_potential_marker = False
129
- for marker in action_markers:
130
- for i in range(1, len(marker)):
131
- if buffer.endswith(marker[:i]):
132
- is_potential_marker = True
133
- break
134
- if is_potential_marker:
135
- break
136
-
137
- if not is_potential_marker:
138
- # Safe to print the buffer
139
- print(buffer, end="", flush=True)
140
- buffer = ""
141
-
142
- # Calculate total time
143
- total_time = time.time() - start_time
144
-
145
- # Parse thinking and action from response
146
- thinking, action = self._parse_response(raw_content)
147
-
148
- # Print performance metrics
149
- lang = self.config.lang
150
- print()
151
- print("=" * 50)
152
- print(f"⏱️ {get_message('performance_metrics', lang)}:")
153
- print("-" * 50)
154
- if time_to_first_token is not None:
155
- print(
156
- f"{get_message('time_to_first_token', lang)}: {time_to_first_token:.3f}s"
157
- )
158
- if time_to_thinking_end is not None:
159
- print(
160
- f"{get_message('time_to_thinking_end', lang)}: {time_to_thinking_end:.3f}s"
161
- )
162
- print(
163
- f"{get_message('total_inference_time', lang)}: {total_time:.3f}s"
164
- )
165
- print("=" * 50)
166
-
167
- return ModelResponse(
168
- thinking=thinking,
169
- action=action,
170
- raw_content=raw_content,
171
- time_to_first_token=time_to_first_token,
172
- time_to_thinking_end=time_to_thinking_end,
173
- total_time=total_time,
174
- )
175
-
176
- def _parse_response(self, content: str) -> tuple[str, str]:
177
- """
178
- Parse the model response into thinking and action parts.
179
-
180
- Parsing rules:
181
- 1. If content contains 'finish(message=', everything before is thinking,
182
- everything from 'finish(message=' onwards is action.
183
- 2. If rule 1 doesn't apply but content contains 'do(action=',
184
- everything before is thinking, everything from 'do(action=' onwards is action.
185
- 3. Fallback: If content contains '<answer>', use legacy parsing with XML tags.
186
- 4. Otherwise, return empty thinking and full content as action.
187
-
188
- Args:
189
- content: Raw response content.
190
-
191
- Returns:
192
- Tuple of (thinking, action).
193
- """
194
- # Rule 1: Check for finish(message=
195
- if "finish(message=" in content:
196
- parts = content.split("finish(message=", 1)
197
- thinking = parts[0].strip()
198
- action = "finish(message=" + parts[1]
199
- return thinking, action
200
-
201
- # Rule 2: Check for do(action=
202
- if "do(action=" in content:
203
- parts = content.split("do(action=", 1)
204
- thinking = parts[0].strip()
205
- action = "do(action=" + parts[1]
206
- return thinking, action
207
-
208
- # Rule 3: Fallback to legacy XML tag parsing
209
- if "<answer>" in content:
210
- parts = content.split("<answer>", 1)
211
- thinking = parts[0].replace("<think>", "").replace("</think>", "").strip()
212
- action = parts[1].replace("</answer>", "").strip()
213
- return thinking, action
214
-
215
- # Rule 4: No markers found, return content as action
216
- return "", content
217
-
218
-
219
- class MessageBuilder:
220
- """Helper class for building conversation messages."""
221
-
222
- @staticmethod
223
- def create_system_message(content: str) -> dict[str, Any]:
224
- """Create a system message."""
225
- return {"role": "system", "content": content}
226
-
227
- @staticmethod
228
- def create_user_message(
229
- text: str, image_base64: str | None = None
230
- ) -> dict[str, Any]:
231
- """
232
- Create a user message with optional image.
233
-
234
- Args:
235
- text: Text content.
236
- image_base64: Optional base64-encoded image.
237
-
238
- Returns:
239
- Message dictionary.
240
- """
241
- content = []
242
-
243
- if image_base64:
244
- content.append(
245
- {
246
- "type": "image_url",
247
- "image_url": {"url": f"data:image/png;base64,{image_base64}"},
248
- }
249
- )
250
-
251
- content.append({"type": "text", "text": text})
252
-
253
- return {"role": "user", "content": content}
254
-
255
- @staticmethod
256
- def create_assistant_message(content: str) -> dict[str, Any]:
257
- """Create an assistant message."""
258
- return {"role": "assistant", "content": content}
259
-
260
- @staticmethod
261
- def remove_images_from_message(message: dict[str, Any]) -> dict[str, Any]:
262
- """
263
- Remove image content from a message to save context space.
264
-
265
- Args:
266
- message: Message dictionary.
267
-
268
- Returns:
269
- Message with images removed.
270
- """
271
- if isinstance(message.get("content"), list):
272
- message["content"] = [
273
- item for item in message["content"] if item.get("type") == "text"
274
- ]
275
- return message
276
-
277
- @staticmethod
278
- def build_screen_info(current_app: str, **extra_info) -> str:
279
- """
280
- Build screen info string for the model.
281
-
282
- Args:
283
- current_app: Current app name.
284
- **extra_info: Additional info to include.
285
-
286
- Returns:
287
- JSON string with screen info.
288
- """
289
- info = {"current_app": current_app, **extra_info}
290
- return json.dumps(info, ensure_ascii=False)
@@ -1,47 +0,0 @@
1
- """XCTest utilities for iOS device interaction via WebDriverAgent/XCUITest."""
2
-
3
- from phone_agent.xctest.connection import (
4
- ConnectionType,
5
- DeviceInfo,
6
- XCTestConnection,
7
- list_devices,
8
- quick_connect,
9
- )
10
- from phone_agent.xctest.device import (
11
- back,
12
- double_tap,
13
- get_current_app,
14
- home,
15
- launch_app,
16
- long_press,
17
- swipe,
18
- tap,
19
- )
20
- from phone_agent.xctest.input import (
21
- clear_text,
22
- type_text,
23
- )
24
- from phone_agent.xctest.screenshot import get_screenshot
25
-
26
- __all__ = [
27
- # Screenshot
28
- "get_screenshot",
29
- # Input
30
- "type_text",
31
- "clear_text",
32
- # Device control
33
- "get_current_app",
34
- "tap",
35
- "swipe",
36
- "back",
37
- "home",
38
- "double_tap",
39
- "long_press",
40
- "launch_app",
41
- # Connection management
42
- "XCTestConnection",
43
- "DeviceInfo",
44
- "ConnectionType",
45
- "quick_connect",
46
- "list_devices",
47
- ]