phone-copilot 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phone_copilot-0.1.0/PKG-INFO +116 -0
- phone_copilot-0.1.0/README.md +102 -0
- phone_copilot-0.1.0/pyproject.toml +74 -0
- phone_copilot-0.1.0/src/phone_copilot/__init__.py +73 -0
- phone_copilot-0.1.0/src/phone_copilot/action.py +536 -0
- phone_copilot-0.1.0/src/phone_copilot/agent/__init__.py +6 -0
- phone_copilot-0.1.0/src/phone_copilot/agent/agent.py +282 -0
- phone_copilot-0.1.0/src/phone_copilot/agent/context.py +70 -0
- phone_copilot-0.1.0/src/phone_copilot/agent/prompt.py +108 -0
- phone_copilot-0.1.0/src/phone_copilot/api.py +54 -0
- phone_copilot-0.1.0/src/phone_copilot/apps.py +365 -0
- phone_copilot-0.1.0/src/phone_copilot/device.py +78 -0
- phone_copilot-0.1.0/src/phone_copilot/image.py +223 -0
- phone_copilot-0.1.0/src/phone_copilot/model_client.py +165 -0
- phone_copilot-0.1.0/src/phone_copilot/py.typed +0 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: phone-copilot
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An AI-powered mobile task automation assistant with support for both Android and HarmonyOS.
|
|
5
|
+
Author: lanbaoshen
|
|
6
|
+
Author-email: lanbaoshen <lanbaoshen@icloud.com>
|
|
7
|
+
Requires-Dist: click>=8.3.1
|
|
8
|
+
Requires-Dist: openai>=2.20.0
|
|
9
|
+
Requires-Dist: pillow>=12.1.1
|
|
10
|
+
Requires-Dist: pydantic>=2.12.5
|
|
11
|
+
Requires-Dist: xpdc>=0.1.2
|
|
12
|
+
Requires-Python: >=3.12
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# Phone Copilot
|
|
16
|
+
|
|
17
|
+
[English README](README_EN.md)
|
|
18
|
+
|
|
19
|
+
<p align="center">
|
|
20
|
+
<img src="logo.svg" alt="logo.svg" width="200" height="200">
|
|
21
|
+
</p>
|
|
22
|
+
|
|
23
|
+
## 项目介绍
|
|
24
|
+
|
|
25
|
+
Phone Copilot 是一个参考 [Open-AutoGLM](https://github.com/zai-org/Open-AutoGLM) 的基于 AI 构建的手机端智能助理框架,它能够以多模态方式理解手机屏幕内容,并通过自动化操作帮助用户完成任务。
|
|
26
|
+
|
|
27
|
+
系统通过 ADB 或 HDC 来控制 Android 和 HarmonyOS 设备,以视觉语言模型进行屏幕感知,再结合智能规划能力生成并执行操作流程。用户只需用自然语言描述需求,如“打开高德地图,导航至南京南高铁站”,Phone Agent 即可自动解析意图、理解当前界面、规划下一步动作并完成整个流程。
|
|
28
|
+
|
|
29
|
+
https://github.com/user-attachments/assets/e60e118e-7cec-4bba-9e8a-38054069cc9f
|
|
30
|
+
|
|
31
|
+
## 环境准备
|
|
32
|
+
|
|
33
|
+
1. Python >= 3.12
|
|
34
|
+
2. Android 设备需要安装 adb,并配置环境变量。HarmonyNext 设备需要安装 hdc,并配置环境变量
|
|
35
|
+
3. 手机需要启动开发者模式,并且打开 USB 调试选项
|
|
36
|
+
4. 如果 Android 设备需要输入中文,需要额外安装 ADB Keyboard
|
|
37
|
+
5. 本地通过 `vllm` 部署模型,或者使用远端模型 API, 建议使用 `AutoGLM-Phone-9B`
|
|
38
|
+
|
|
39
|
+
## 快速开始
|
|
40
|
+
|
|
41
|
+
### uv (recommend)
|
|
42
|
+
|
|
43
|
+
```shell
|
|
44
|
+
pip install uv
|
|
45
|
+
|
|
46
|
+
# 使用 model scope api, https://modelscope.cn/models/ZhipuAI/AutoGLM-Phone-9B
|
|
47
|
+
uvx phone-copilot "打开高德地图,导航至南京南高铁站" --base-url "https://api-inference.modelscope.cn/v1" --model "ZhipuAI/AutoGLM-Phone-9B" --api-key "替换为你的 Token" --json "demo.json" --html "demo.html"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### pip
|
|
51
|
+
|
|
52
|
+
```shell
|
|
53
|
+
pip install phone-copilot
|
|
54
|
+
|
|
55
|
+
# 使用 model scope api, https://modelscope.cn/models/ZhipuAI/AutoGLM-Phone-9B
|
|
56
|
+
phone-copilot "打开高德地图,导航至南京南高铁站" --base-url "https://api-inference.modelscope.cn/v1" --model "ZhipuAI/AutoGLM-Phone-9B" --api-key "替换为你的 Token" --json "demo.json" --html "demo.html"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## 命令行参数
|
|
60
|
+
|
|
61
|
+
| 参数 | 必填 | 默认值 | 说明 |
|
|
62
|
+
| --- | --- | --- | --- |
|
|
63
|
+
| `task`(位置参数) | 是 | - | 要执行的任务描述(自然语言)。示例:`"打开高德地图,导航至南京南高铁站"` |
|
|
64
|
+
| `--base-url` | 是 | - | 模型 API 的 Base URL(OpenAI 兼容)。示例:`https://api-inference.modelscope.cn/v1` 或本地 `http://localhost:8000/v1` |
|
|
65
|
+
| `--model` | 是 | - | 使用的模型名称。示例:`ZhipuAI/AutoGLM-Phone-9B` |
|
|
66
|
+
| `--api-key` | 否 | 空字符串 | 调用模型 API 的鉴权 Token / API Key。若使用本地部署或无需鉴权可不传 |
|
|
67
|
+
| `--device, -d` | 否 | 自动选择 | 目标设备 ID。不指定时默认选择第一个已连接设备 |
|
|
68
|
+
| `--lang, -l` | 否 | `zh` | 系统提示词语言:`zh` 或 `en` |
|
|
69
|
+
| `--max-steps` | 否 | `100` | Agent 最大执行步数,超过后停止 |
|
|
70
|
+
| `--ccf` | 否 | `1000` | 坐标/压缩因子(用于适配不同模型的坐标系)。通常:Qwen 系列设为 `0`,AutoGLM 系列设为 `1000` |
|
|
71
|
+
| `--json` | 否 | - | 保存执行过程的 JSON 报告路径;会自动使用 `.json` 后缀(例如传 `demo` 也会保存为 `demo.json`) |
|
|
72
|
+
| `--html` | 否 | - | 保存执行过程的 HTML 报告路径;会自动使用 `.html` 后缀 |
|
|
73
|
+
| `--adb-keyboard` | 否 | `false` | 启用 Android 的 ADB Keyboard(用于中文输入等)。会自动安装 ADB Keyboard 并设置为当前键盘 |
|
|
74
|
+
|
|
75
|
+
## 代码运行
|
|
76
|
+
|
|
77
|
+
### 通过 High API 使用
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from pathlib import Path
|
|
81
|
+
from phone_copilot.api import run_task
|
|
82
|
+
|
|
83
|
+
agent = run_task(
|
|
84
|
+
task='打开高德地图,导航至南京南高铁站',
|
|
85
|
+
base_url='https://api-inference.modelscope.cn/v1',
|
|
86
|
+
model='ZhipuAI/AutoGLM-Phone-9B',
|
|
87
|
+
api_key='替换为你的 Token'
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
agent.export_html(Path('demo.html'))
|
|
91
|
+
agent.export_json(Path('demo.json'))
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### 通过 Agent 和 Model Client 使用
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from pathlib import Path
|
|
98
|
+
|
|
99
|
+
from phone_copilot.agent import Agent
|
|
100
|
+
from phone_copilot.device import detect_device
|
|
101
|
+
from phone_copilot.model_client import ModelClient
|
|
102
|
+
|
|
103
|
+
agent = Agent(
|
|
104
|
+
device=detect_device(),
|
|
105
|
+
model_client=ModelClient(
|
|
106
|
+
base_url='https://api-inference.modelscope.cn/v1',
|
|
107
|
+
model='ZhipuAI/AutoGLM-Phone-9B',
|
|
108
|
+
api_key='替换为你的 Token'
|
|
109
|
+
),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
agent.run(task='打开高德地图,导航至南京南高铁站')
|
|
113
|
+
|
|
114
|
+
agent.export_html(Path('demo.html'))
|
|
115
|
+
agent.export_json(Path('demo.json'))
|
|
116
|
+
```
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Phone Copilot
|
|
2
|
+
|
|
3
|
+
[English README](README_EN.md)
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
<img src="logo.svg" alt="logo.svg" width="200" height="200">
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
## 项目介绍
|
|
10
|
+
|
|
11
|
+
Phone Copilot 是一个参考 [Open-AutoGLM](https://github.com/zai-org/Open-AutoGLM) 的基于 AI 构建的手机端智能助理框架,它能够以多模态方式理解手机屏幕内容,并通过自动化操作帮助用户完成任务。
|
|
12
|
+
|
|
13
|
+
系统通过 ADB 或 HDC 来控制 Android 和 HarmonyOS 设备,以视觉语言模型进行屏幕感知,再结合智能规划能力生成并执行操作流程。用户只需用自然语言描述需求,如“打开高德地图,导航至南京南高铁站”,Phone Agent 即可自动解析意图、理解当前界面、规划下一步动作并完成整个流程。
|
|
14
|
+
|
|
15
|
+
https://github.com/user-attachments/assets/e60e118e-7cec-4bba-9e8a-38054069cc9f
|
|
16
|
+
|
|
17
|
+
## 环境准备
|
|
18
|
+
|
|
19
|
+
1. Python >= 3.12
|
|
20
|
+
2. Android 设备需要安装 adb,并配置环境变量。HarmonyNext 设备需要安装 hdc,并配置环境变量
|
|
21
|
+
3. 手机需要启动开发者模式,并且打开 USB 调试选项
|
|
22
|
+
4. 如果 Android 设备需要输入中文,需要额外安装 ADB Keyboard
|
|
23
|
+
5. 本地通过 `vllm` 部署模型,或者使用远端模型 API, 建议使用 `AutoGLM-Phone-9B`
|
|
24
|
+
|
|
25
|
+
## 快速开始
|
|
26
|
+
|
|
27
|
+
### uv (recommend)
|
|
28
|
+
|
|
29
|
+
```shell
|
|
30
|
+
pip install uv
|
|
31
|
+
|
|
32
|
+
# 使用 model scope api, https://modelscope.cn/models/ZhipuAI/AutoGLM-Phone-9B
|
|
33
|
+
uvx phone-copilot "打开高德地图,导航至南京南高铁站" --base-url "https://api-inference.modelscope.cn/v1" --model "ZhipuAI/AutoGLM-Phone-9B" --api-key "替换为你的 Token" --json "demo.json" --html "demo.html"
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### pip
|
|
37
|
+
|
|
38
|
+
```shell
|
|
39
|
+
pip install phone-copilot
|
|
40
|
+
|
|
41
|
+
# 使用 model scope api, https://modelscope.cn/models/ZhipuAI/AutoGLM-Phone-9B
|
|
42
|
+
phone-copilot "打开高德地图,导航至南京南高铁站" --base-url "https://api-inference.modelscope.cn/v1" --model "ZhipuAI/AutoGLM-Phone-9B" --api-key "替换为你的 Token" --json "demo.json" --html "demo.html"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## 命令行参数
|
|
46
|
+
|
|
47
|
+
| 参数 | 必填 | 默认值 | 说明 |
|
|
48
|
+
| --- | --- | --- | --- |
|
|
49
|
+
| `task`(位置参数) | 是 | - | 要执行的任务描述(自然语言)。示例:`"打开高德地图,导航至南京南高铁站"` |
|
|
50
|
+
| `--base-url` | 是 | - | 模型 API 的 Base URL(OpenAI 兼容)。示例:`https://api-inference.modelscope.cn/v1` 或本地 `http://localhost:8000/v1` |
|
|
51
|
+
| `--model` | 是 | - | 使用的模型名称。示例:`ZhipuAI/AutoGLM-Phone-9B` |
|
|
52
|
+
| `--api-key` | 否 | 空字符串 | 调用模型 API 的鉴权 Token / API Key。若使用本地部署或无需鉴权可不传 |
|
|
53
|
+
| `--device, -d` | 否 | 自动选择 | 目标设备 ID。不指定时默认选择第一个已连接设备 |
|
|
54
|
+
| `--lang, -l` | 否 | `zh` | 系统提示词语言:`zh` 或 `en` |
|
|
55
|
+
| `--max-steps` | 否 | `100` | Agent 最大执行步数,超过后停止 |
|
|
56
|
+
| `--ccf` | 否 | `1000` | 坐标/压缩因子(用于适配不同模型的坐标系)。通常:Qwen 系列设为 `0`,AutoGLM 系列设为 `1000` |
|
|
57
|
+
| `--json` | 否 | - | 保存执行过程的 JSON 报告路径;会自动使用 `.json` 后缀(例如传 `demo` 也会保存为 `demo.json`) |
|
|
58
|
+
| `--html` | 否 | - | 保存执行过程的 HTML 报告路径;会自动使用 `.html` 后缀 |
|
|
59
|
+
| `--adb-keyboard` | 否 | `false` | 启用 Android 的 ADB Keyboard(用于中文输入等)。会自动安装 ADB Keyboard 并设置为当前键盘 |
|
|
60
|
+
|
|
61
|
+
## 代码运行
|
|
62
|
+
|
|
63
|
+
### 通过 High API 使用
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from pathlib import Path
|
|
67
|
+
from phone_copilot.api import run_task
|
|
68
|
+
|
|
69
|
+
agent = run_task(
|
|
70
|
+
task='打开高德地图,导航至南京南高铁站',
|
|
71
|
+
base_url='https://api-inference.modelscope.cn/v1',
|
|
72
|
+
model='ZhipuAI/AutoGLM-Phone-9B',
|
|
73
|
+
api_key='替换为你的 Token'
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
agent.export_html(Path('demo.html'))
|
|
77
|
+
agent.export_json(Path('demo.json'))
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 通过 Agent 和 Model Client 使用
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from pathlib import Path
|
|
84
|
+
|
|
85
|
+
from phone_copilot.agent import Agent
|
|
86
|
+
from phone_copilot.device import detect_device
|
|
87
|
+
from phone_copilot.model_client import ModelClient
|
|
88
|
+
|
|
89
|
+
agent = Agent(
|
|
90
|
+
device=detect_device(),
|
|
91
|
+
model_client=ModelClient(
|
|
92
|
+
base_url='https://api-inference.modelscope.cn/v1',
|
|
93
|
+
model='ZhipuAI/AutoGLM-Phone-9B',
|
|
94
|
+
api_key='替换为你的 Token'
|
|
95
|
+
),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
agent.run(task='打开高德地图,导航至南京南高铁站')
|
|
99
|
+
|
|
100
|
+
agent.export_html(Path('demo.html'))
|
|
101
|
+
agent.export_json(Path('demo.json'))
|
|
102
|
+
```
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "phone-copilot"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "An AI-powered mobile task automation assistant with support for both Android and HarmonyOS."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "lanbaoshen", email = "lanbaoshen@icloud.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"click>=8.3.1",
|
|
12
|
+
"openai>=2.20.0",
|
|
13
|
+
"pillow>=12.1.1",
|
|
14
|
+
"pydantic>=2.12.5",
|
|
15
|
+
"xpdc>=0.1.2",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[tool.uv]
|
|
19
|
+
package = true
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["uv_build>=0.9.30,<0.10.0"]
|
|
23
|
+
build-backend = "uv_build"
|
|
24
|
+
|
|
25
|
+
[dependency-groups]
|
|
26
|
+
dev = [
|
|
27
|
+
"pre-commit>=4.5.1",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.scripts]
|
|
31
|
+
phone-copilot = "phone_copilot:main"
|
|
32
|
+
|
|
33
|
+
[tool.ruff]
|
|
34
|
+
line-length = 120
|
|
35
|
+
indent-width = 4
|
|
36
|
+
exclude = [
|
|
37
|
+
".bzr",
|
|
38
|
+
".direnv",
|
|
39
|
+
".eggs",
|
|
40
|
+
".git",
|
|
41
|
+
".git-rewrite",
|
|
42
|
+
".hg",
|
|
43
|
+
".ipynb_checkpoints",
|
|
44
|
+
".mypy_cache",
|
|
45
|
+
".nox",
|
|
46
|
+
".pants.d",
|
|
47
|
+
".pyenv",
|
|
48
|
+
".pytest_cache",
|
|
49
|
+
".pytype",
|
|
50
|
+
".ruff_cache",
|
|
51
|
+
".svn",
|
|
52
|
+
".tox",
|
|
53
|
+
".venv",
|
|
54
|
+
".vscode",
|
|
55
|
+
"__pypackages__",
|
|
56
|
+
"_build",
|
|
57
|
+
"buck-out",
|
|
58
|
+
"build",
|
|
59
|
+
"dist",
|
|
60
|
+
"node_modules",
|
|
61
|
+
"site-packages",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
lint.select = ["E", "F", "B", "W", "I", "N", "UP", "ANN", "S", "BLE", "FBT", "C4", "T10", "EM", "ISC", "ICN"]
|
|
65
|
+
lint.ignore = ["ANN002", "ANN003"]
|
|
66
|
+
lint.fixable = ["ALL"]
|
|
67
|
+
lint.unfixable = []
|
|
68
|
+
lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
|
69
|
+
lint.isort.known-third-party = []
|
|
70
|
+
|
|
71
|
+
format.quote-style = "single"
|
|
72
|
+
format.indent-style = "space"
|
|
73
|
+
format.skip-magic-trailing-comma = false
|
|
74
|
+
format.line-ending = "auto"
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
LOG_DIR = Path.home() / '.phone-copilot'
|
|
8
|
+
logger.add(LOG_DIR / 'log.log', rotation='10 MB')
|
|
9
|
+
except Exception as e: # noqa: BLE001
|
|
10
|
+
logger.error(f'Failed to set up logger directory: {e}')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command()
|
|
14
|
+
@click.argument('task', required=True)
|
|
15
|
+
@click.option('--base-url', required=True, help='The base URL of the model API.')
|
|
16
|
+
@click.option('--model', required=True, help='The name of the model to use.')
|
|
17
|
+
@click.option('--api-key', default='', help='The API key for authentication with the model API.')
|
|
18
|
+
@click.option(
|
|
19
|
+
'--device', '-d', help='The ID of the target device. If not specified, the first connected device will be used.'
|
|
20
|
+
)
|
|
21
|
+
@click.option(
|
|
22
|
+
'--lang', '-l', default='zh', type=click.Choice(['zh', 'en']), help='The language for system prompts (zh or en).'
|
|
23
|
+
)
|
|
24
|
+
@click.option('--max-steps', default=100, help='The maximum number of steps the agent can take before giving up.')
|
|
25
|
+
@click.option(
|
|
26
|
+
'--ccf',
|
|
27
|
+
default=1000,
|
|
28
|
+
help='Context Compression Factor for the agent. Like QWen Model, it should be set to 0. AutoGLM Model should be set to 1000.', # noqa: E501
|
|
29
|
+
)
|
|
30
|
+
@click.option('--json', help='The json report file path to save the execution details.')
|
|
31
|
+
@click.option('--html', help='The html report file path to save the execution details.')
|
|
32
|
+
@click.option(
|
|
33
|
+
'--adb-keyboard',
|
|
34
|
+
is_flag=True,
|
|
35
|
+
default=False,
|
|
36
|
+
help='Enable ADB keyboard input method with the specified keyboard ID',
|
|
37
|
+
)
|
|
38
|
+
def main(
|
|
39
|
+
task: str,
|
|
40
|
+
base_url: str,
|
|
41
|
+
model: str,
|
|
42
|
+
api_key: str,
|
|
43
|
+
device: str,
|
|
44
|
+
lang: str,
|
|
45
|
+
max_steps: int,
|
|
46
|
+
ccf: int,
|
|
47
|
+
json: str,
|
|
48
|
+
html: str,
|
|
49
|
+
adb_keyboard: bool, # noqa: FBT001
|
|
50
|
+
) -> None:
|
|
51
|
+
from phone_copilot.api import run_task
|
|
52
|
+
|
|
53
|
+
agent = run_task(
|
|
54
|
+
task=task,
|
|
55
|
+
base_url=base_url,
|
|
56
|
+
model=model,
|
|
57
|
+
api_key=api_key,
|
|
58
|
+
device=device,
|
|
59
|
+
language=lang,
|
|
60
|
+
max_steps=max_steps,
|
|
61
|
+
ccf=ccf,
|
|
62
|
+
adb_keyboard=adb_keyboard,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if json:
|
|
66
|
+
agent.export_json(Path(json).with_suffix('.json'))
|
|
67
|
+
|
|
68
|
+
if html:
|
|
69
|
+
agent.export_html(Path(html).with_suffix('.html'))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
if __name__ == '__main__':
|
|
73
|
+
main()
|