qamule-trajectory 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qamule_trajectory-0.1.0/PKG-INFO +73 -0
- qamule_trajectory-0.1.0/README.md +64 -0
- qamule_trajectory-0.1.0/pyproject.toml +22 -0
- qamule_trajectory-0.1.0/src/qamule_trajectory/__init__.py +19 -0
- qamule_trajectory-0.1.0/src/qamule_trajectory/cli.py +146 -0
- qamule_trajectory-0.1.0/src/qamule_trajectory/core.py +187 -0
- qamule_trajectory-0.1.0/src/qamule_trajectory/py.typed +0 -0
- qamule_trajectory-0.1.0/src/qamule_trajectory/schema.py +30 -0
- qamule_trajectory-0.1.0/src/qamule_trajectory/static/logo.jpg +0 -0
- qamule_trajectory-0.1.0/src/qamule_trajectory/validation.py +343 -0
- qamule_trajectory-0.1.0/src/qamule_trajectory/viewer.py +1023 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: qamule-trajectory
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python trajectory toolkit for QAMule VLM and agent training datasets.
|
|
5
|
+
Author: lanbaoshen
|
|
6
|
+
Author-email: lanbaoshen <lanbaoshen@icloud.com>
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# qamule-trajectory
|
|
11
|
+
|
|
12
|
+
Python trajectory toolkit for QAMule VLM and agent training datasets.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
uv sync
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## CLI
|
|
21
|
+
|
|
22
|
+
Start the local viewer for the current directory and open the browser:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
uv run qamule-trajectory
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Start the viewer quietly without opening the browser:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
uv run qamule-trajectory -q
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Quiet mode still prints the local viewer URL.
|
|
35
|
+
|
|
36
|
+
Create a session:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
uv run qamule-trajectory init dataset/open_wifi_20260626_120000 \
|
|
40
|
+
--instruction "Open Wi-Fi in Settings" \
|
|
41
|
+
--app com.android.settings \
|
|
42
|
+
--device-model "Pixel 8" \
|
|
43
|
+
--resolution 1080 2400 \
|
|
44
|
+
--android 14
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Append a step after saving `step_001.jpg` in the session directory:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uv run qamule-trajectory append dataset/open_wifi_20260626_120000 \
|
|
51
|
+
--screenshot step_001.jpg \
|
|
52
|
+
--current-app com.android.settings/.Settings \
|
|
53
|
+
--thought "I can see the Settings home screen." \
|
|
54
|
+
--action '{"type":"click","x":540,"y":620}'
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Validate a dataset, session, or `trajectory.json`:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
uv run qamule-trajectory validate dataset/
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Browse trajectories locally:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
uv run qamule-trajectory view dataset/ --port 8932
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
`-q` and `--quiet` are accepted for quiet viewer mode.
|
|
70
|
+
|
|
71
|
+
## Trajectory Layout
|
|
72
|
+
|
|
73
|
+
Each session is a directory named `{task_slug}_YYYYMMDD_HHMMSS` containing `trajectory.json` and sequential screenshots named `step_001.jpg`, `step_002.jpg`, and so on.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# qamule-trajectory
|
|
2
|
+
|
|
3
|
+
Python trajectory toolkit for QAMule VLM and agent training datasets.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uv sync
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## CLI
|
|
12
|
+
|
|
13
|
+
Start the local viewer for the current directory and open the browser:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uv run qamule-trajectory
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Start the viewer quietly without opening the browser:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
uv run qamule-trajectory -q
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Quiet mode still prints the local viewer URL.
|
|
26
|
+
|
|
27
|
+
Create a session:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uv run qamule-trajectory init dataset/open_wifi_20260626_120000 \
|
|
31
|
+
--instruction "Open Wi-Fi in Settings" \
|
|
32
|
+
--app com.android.settings \
|
|
33
|
+
--device-model "Pixel 8" \
|
|
34
|
+
--resolution 1080 2400 \
|
|
35
|
+
--android 14
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Append a step after saving `step_001.jpg` in the session directory:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
uv run qamule-trajectory append dataset/open_wifi_20260626_120000 \
|
|
42
|
+
--screenshot step_001.jpg \
|
|
43
|
+
--current-app com.android.settings/.Settings \
|
|
44
|
+
--thought "I can see the Settings home screen." \
|
|
45
|
+
--action '{"type":"click","x":540,"y":620}'
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Validate a dataset, session, or `trajectory.json`:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
uv run qamule-trajectory validate dataset/
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Browse trajectories locally:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
uv run qamule-trajectory view dataset/ --port 8932
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
`-q` and `--quiet` are accepted for quiet viewer mode.
|
|
61
|
+
|
|
62
|
+
## Trajectory Layout
|
|
63
|
+
|
|
64
|
+
Each session is a directory named `{task_slug}_YYYYMMDD_HHMMSS` containing `trajectory.json` and sequential screenshots named `step_001.jpg`, `step_002.jpg`, and so on.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "qamule-trajectory"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Python trajectory toolkit for QAMule VLM and agent training datasets."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "lanbaoshen", email = "lanbaoshen@icloud.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = []
|
|
11
|
+
|
|
12
|
+
[project.scripts]
|
|
13
|
+
qamule-trajectory = "qamule_trajectory.cli:main"
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["uv_build>=0.11.8,<0.12.0"]
|
|
17
|
+
build-backend = "uv_build"
|
|
18
|
+
|
|
19
|
+
[dependency-groups]
|
|
20
|
+
dev = [
|
|
21
|
+
"pytest>=9.1.1",
|
|
22
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Python trajectory toolkit for QAMule datasets."""
|
|
2
|
+
|
|
3
|
+
from .core import TrajectoryError, append_step, expected_screenshot_name, init_session, load_trajectory, parse_action
|
|
4
|
+
from .schema import STEP_JPG_RE, TASK_SLUG_RE, TERMINAL_ACTIONS
|
|
5
|
+
from .validation import ValidationResult, validate_path
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"STEP_JPG_RE",
|
|
9
|
+
"TASK_SLUG_RE",
|
|
10
|
+
"TERMINAL_ACTIONS",
|
|
11
|
+
"TrajectoryError",
|
|
12
|
+
"ValidationResult",
|
|
13
|
+
"append_step",
|
|
14
|
+
"expected_screenshot_name",
|
|
15
|
+
"init_session",
|
|
16
|
+
"load_trajectory",
|
|
17
|
+
"parse_action",
|
|
18
|
+
"validate_path",
|
|
19
|
+
]
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Command line interface for qamule-trajectory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .core import TrajectoryError, append_step, init_session, parse_bool
|
|
9
|
+
from .validation import format_validation_result, validate_path
|
|
10
|
+
from .viewer import serve
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _arg_bool(value: str) -> bool:
|
|
14
|
+
try:
|
|
15
|
+
return parse_bool(value)
|
|
16
|
+
except TrajectoryError as exc:
|
|
17
|
+
raise argparse.ArgumentTypeError(str(exc)) from exc
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _cmd_init(args: argparse.Namespace) -> int:
|
|
21
|
+
session_dir = init_session(
|
|
22
|
+
args.session_dir,
|
|
23
|
+
instruction=args.instruction,
|
|
24
|
+
app=args.app,
|
|
25
|
+
device_model=args.device_model,
|
|
26
|
+
resolution=args.resolution,
|
|
27
|
+
android=args.android,
|
|
28
|
+
task_slug=args.task_slug,
|
|
29
|
+
dataset_root=args.dataset_root,
|
|
30
|
+
force=args.force,
|
|
31
|
+
)
|
|
32
|
+
print(session_dir)
|
|
33
|
+
return 0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _cmd_append(args: argparse.Namespace) -> int:
|
|
37
|
+
trajectory_path = append_step(
|
|
38
|
+
args.session_dir,
|
|
39
|
+
screenshot=args.screenshot,
|
|
40
|
+
current_app=args.current_app,
|
|
41
|
+
thought=args.thought,
|
|
42
|
+
raw_action=args.action,
|
|
43
|
+
step_success=args.step_success,
|
|
44
|
+
)
|
|
45
|
+
print(trajectory_path)
|
|
46
|
+
return 0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _cmd_validate(args: argparse.Namespace) -> int:
|
|
50
|
+
result = validate_path(args.input_path)
|
|
51
|
+
print(format_validation_result(result))
|
|
52
|
+
return 0 if result.ok else 1
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _cmd_view(args: argparse.Namespace) -> int:
|
|
56
|
+
try:
|
|
57
|
+
quiet = bool(getattr(args, "quiet", False))
|
|
58
|
+
serve(args.dataset_dir, port=args.port, open_browser=not args.no_open and not quiet, quiet=quiet)
|
|
59
|
+
except ValueError as exc:
|
|
60
|
+
print(f"[FATAL] {exc}", file=sys.stderr)
|
|
61
|
+
return 2
|
|
62
|
+
return 0
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
66
|
+
parser = argparse.ArgumentParser(
|
|
67
|
+
prog="qamule-trajectory",
|
|
68
|
+
description="Create, validate, and browse QAMule trajectory datasets.",
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"-q",
|
|
72
|
+
"--quiet",
|
|
73
|
+
action="store_true",
|
|
74
|
+
help="start the default viewer quietly without opening the browser",
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument("--port", type=int, default=8932, help="port for the default viewer")
|
|
77
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
78
|
+
|
|
79
|
+
init_parser = subparsers.add_parser("init", help="create a new trajectory.json for a session")
|
|
80
|
+
init_parser.add_argument("session_dir", nargs="?", help="session directory, e.g. dataset/open_wifi_20260514_101530")
|
|
81
|
+
init_parser.add_argument("--task-slug", help="create dataset/{task_slug}_YYYYMMDD_HHMMSS automatically")
|
|
82
|
+
init_parser.add_argument("--dataset-root", default="dataset", help="dataset root used with --task-slug")
|
|
83
|
+
init_parser.add_argument("--instruction", required=True, help="human task instruction")
|
|
84
|
+
init_parser.add_argument("--app", required=True, help="target app package")
|
|
85
|
+
init_parser.add_argument("--device-model", required=True, help="device model")
|
|
86
|
+
init_parser.add_argument(
|
|
87
|
+
"--resolution",
|
|
88
|
+
required=True,
|
|
89
|
+
nargs=2,
|
|
90
|
+
type=int,
|
|
91
|
+
metavar=("WIDTH", "HEIGHT"),
|
|
92
|
+
help="device resolution as width height",
|
|
93
|
+
)
|
|
94
|
+
init_parser.add_argument("--android", required=True, help="android version")
|
|
95
|
+
init_parser.add_argument("--force", action="store_true", help="overwrite an existing trajectory.json")
|
|
96
|
+
init_parser.set_defaults(func=_cmd_init)
|
|
97
|
+
|
|
98
|
+
append_parser = subparsers.add_parser("append", help="append one recorded step to trajectory.json")
|
|
99
|
+
append_parser.add_argument("session_dir", help="session directory that already contains trajectory.json")
|
|
100
|
+
append_parser.add_argument("--screenshot", required=True, help="step screenshot file name, e.g. step_001.jpg")
|
|
101
|
+
append_parser.add_argument("--current-app", required=True, help="current package/activity string")
|
|
102
|
+
append_parser.add_argument("--thought", required=True, help="observation and reasoning for this step")
|
|
103
|
+
append_parser.add_argument("--action", required=True, help="JSON object describing the action")
|
|
104
|
+
append_parser.add_argument(
|
|
105
|
+
"--step-success",
|
|
106
|
+
default=True,
|
|
107
|
+
type=_arg_bool,
|
|
108
|
+
help="whether this recorded step executed successfully (default: true)",
|
|
109
|
+
)
|
|
110
|
+
append_parser.set_defaults(func=_cmd_append)
|
|
111
|
+
|
|
112
|
+
validate_parser = subparsers.add_parser("validate", help="validate a dataset, session, or trajectory.json")
|
|
113
|
+
validate_parser.add_argument("input_path", nargs="?", default="dataset", help="dataset dir, session dir, or trajectory.json")
|
|
114
|
+
validate_parser.set_defaults(func=_cmd_validate)
|
|
115
|
+
|
|
116
|
+
view_parser = subparsers.add_parser("view", help="serve a local trajectory viewer")
|
|
117
|
+
view_parser.add_argument("dataset_dir", nargs="?", default=".", help="dataset directory to browse")
|
|
118
|
+
view_parser.add_argument("--port", type=int, default=8932, help="port to serve on")
|
|
119
|
+
view_parser.add_argument("--no-open", action="store_true", help="do not open the browser automatically")
|
|
120
|
+
view_parser.add_argument(
|
|
121
|
+
"-q",
|
|
122
|
+
"--quiet",
|
|
123
|
+
action="store_true",
|
|
124
|
+
help="serve quietly without opening the browser",
|
|
125
|
+
)
|
|
126
|
+
view_parser.set_defaults(func=_cmd_view)
|
|
127
|
+
|
|
128
|
+
return parser
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def main(argv: list[str] | None = None) -> int:
|
|
132
|
+
parser = build_parser()
|
|
133
|
+
args = parser.parse_args(argv)
|
|
134
|
+
if not hasattr(args, "func"):
|
|
135
|
+
args.func = _cmd_view
|
|
136
|
+
args.dataset_dir = "."
|
|
137
|
+
args.no_open = False
|
|
138
|
+
try:
|
|
139
|
+
return args.func(args)
|
|
140
|
+
except TrajectoryError as exc:
|
|
141
|
+
print(f"[FATAL] {exc}", file=sys.stderr)
|
|
142
|
+
return 2
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
if __name__ == "__main__":
|
|
146
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""Core helpers for creating and updating trajectory sessions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .schema import STEP_JPG_RE, TASK_SLUG_RE, TERMINAL_ACTIONS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TrajectoryError(ValueError):
|
|
15
|
+
"""Raised when a trajectory operation cannot be completed."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_bool(value: str) -> bool:
|
|
19
|
+
lowered = value.strip().lower()
|
|
20
|
+
if lowered in {"true", "1", "yes", "y"}:
|
|
21
|
+
return True
|
|
22
|
+
if lowered in {"false", "0", "no", "n"}:
|
|
23
|
+
return False
|
|
24
|
+
raise TrajectoryError("expected true/false")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def expected_screenshot_name(step_number: int) -> str:
|
|
28
|
+
return f"step_{step_number:03d}.jpg"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def parse_action(raw_action: str) -> dict[str, Any]:
|
|
32
|
+
try:
|
|
33
|
+
action = json.loads(raw_action)
|
|
34
|
+
except json.JSONDecodeError as exc:
|
|
35
|
+
raise TrajectoryError(
|
|
36
|
+
f"invalid action JSON: line {exc.lineno}, column {exc.colno}: {exc.msg}"
|
|
37
|
+
) from exc
|
|
38
|
+
|
|
39
|
+
if not isinstance(action, dict):
|
|
40
|
+
raise TrajectoryError("action must decode to a JSON object")
|
|
41
|
+
|
|
42
|
+
action_type = action.get("type")
|
|
43
|
+
if not isinstance(action_type, str) or not action_type.strip():
|
|
44
|
+
raise TrajectoryError("action.type must be a non-empty string")
|
|
45
|
+
|
|
46
|
+
return action
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def load_trajectory(session_dir: Path) -> tuple[Path, dict[str, Any]]:
|
|
50
|
+
trajectory_path = session_dir / "trajectory.json"
|
|
51
|
+
if not trajectory_path.is_file():
|
|
52
|
+
raise TrajectoryError(f"trajectory.json not found in session directory: {session_dir}")
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
data = json.loads(trajectory_path.read_text(encoding="utf-8"))
|
|
56
|
+
except json.JSONDecodeError as exc:
|
|
57
|
+
raise TrajectoryError(
|
|
58
|
+
f"invalid JSON in {trajectory_path}: line {exc.lineno}, column {exc.colno}: {exc.msg}"
|
|
59
|
+
) from exc
|
|
60
|
+
except OSError as exc:
|
|
61
|
+
raise TrajectoryError(f"cannot read {trajectory_path}: {exc}") from exc
|
|
62
|
+
|
|
63
|
+
if not isinstance(data, dict):
|
|
64
|
+
raise TrajectoryError("trajectory.json must contain a top-level object")
|
|
65
|
+
|
|
66
|
+
steps = data.get("steps")
|
|
67
|
+
if not isinstance(steps, list):
|
|
68
|
+
raise TrajectoryError("trajectory.json field 'steps' must be an array")
|
|
69
|
+
|
|
70
|
+
return trajectory_path, data
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def write_trajectory(trajectory_path: Path, data: dict[str, Any]) -> None:
|
|
74
|
+
data["total_steps"] = len(data.get("steps", []))
|
|
75
|
+
payload = json.dumps(data, ensure_ascii=False, indent=2) + "\n"
|
|
76
|
+
trajectory_path.write_text(payload, encoding="utf-8")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def resolve_session_dir(
|
|
80
|
+
session_dir: str | Path | None,
|
|
81
|
+
*,
|
|
82
|
+
task_slug: str | None = None,
|
|
83
|
+
dataset_root: str | Path = "dataset",
|
|
84
|
+
now: Callable[[], datetime] = datetime.now,
|
|
85
|
+
) -> Path:
|
|
86
|
+
if session_dir and task_slug:
|
|
87
|
+
raise TrajectoryError("pass either SESSION_DIR or --task-slug, not both")
|
|
88
|
+
if session_dir:
|
|
89
|
+
return Path(session_dir)
|
|
90
|
+
if not task_slug:
|
|
91
|
+
raise TrajectoryError("init requires either SESSION_DIR or --task-slug")
|
|
92
|
+
if not TASK_SLUG_RE.match(task_slug):
|
|
93
|
+
raise TrajectoryError("task slug must match pattern '[A-Za-z0-9][A-Za-z0-9_-]*'")
|
|
94
|
+
|
|
95
|
+
timestamp = now().strftime("%Y%m%d_%H%M%S")
|
|
96
|
+
return Path(dataset_root) / f"{task_slug}_{timestamp}"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def init_session(
|
|
100
|
+
session_dir: str | Path | None,
|
|
101
|
+
*,
|
|
102
|
+
instruction: str,
|
|
103
|
+
app: str,
|
|
104
|
+
device_model: str,
|
|
105
|
+
resolution: tuple[int, int] | list[int],
|
|
106
|
+
android: str,
|
|
107
|
+
task_slug: str | None = None,
|
|
108
|
+
dataset_root: str | Path = "dataset",
|
|
109
|
+
force: bool = False,
|
|
110
|
+
) -> Path:
|
|
111
|
+
resolved_session_dir = resolve_session_dir(
|
|
112
|
+
session_dir,
|
|
113
|
+
task_slug=task_slug,
|
|
114
|
+
dataset_root=dataset_root,
|
|
115
|
+
)
|
|
116
|
+
resolved_session_dir.mkdir(parents=True, exist_ok=True)
|
|
117
|
+
|
|
118
|
+
trajectory_path = resolved_session_dir / "trajectory.json"
|
|
119
|
+
if trajectory_path.exists() and not force:
|
|
120
|
+
raise TrajectoryError(f"trajectory.json already exists: {trajectory_path}. Use --force to overwrite it.")
|
|
121
|
+
|
|
122
|
+
data: dict[str, Any] = {
|
|
123
|
+
"task_id": resolved_session_dir.name,
|
|
124
|
+
"instruction": instruction,
|
|
125
|
+
"app": app,
|
|
126
|
+
"device": {
|
|
127
|
+
"model": device_model,
|
|
128
|
+
"resolution": [int(resolution[0]), int(resolution[1])],
|
|
129
|
+
"android": android,
|
|
130
|
+
},
|
|
131
|
+
"steps": [],
|
|
132
|
+
"success": False,
|
|
133
|
+
"total_steps": 0,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
write_trajectory(trajectory_path, data)
|
|
137
|
+
return resolved_session_dir
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def append_step(
|
|
141
|
+
session_dir: str | Path,
|
|
142
|
+
*,
|
|
143
|
+
screenshot: str,
|
|
144
|
+
current_app: str,
|
|
145
|
+
thought: str,
|
|
146
|
+
raw_action: str,
|
|
147
|
+
step_success: bool = True,
|
|
148
|
+
) -> Path:
|
|
149
|
+
resolved_session_dir = Path(session_dir)
|
|
150
|
+
trajectory_path, data = load_trajectory(resolved_session_dir)
|
|
151
|
+
steps = data["steps"]
|
|
152
|
+
|
|
153
|
+
if steps:
|
|
154
|
+
last_action = steps[-1].get("action", {})
|
|
155
|
+
if isinstance(last_action, dict) and last_action.get("type") in TERMINAL_ACTIONS:
|
|
156
|
+
raise TrajectoryError("cannot append after a terminal action; start a new session or edit the JSON manually")
|
|
157
|
+
|
|
158
|
+
action = parse_action(raw_action)
|
|
159
|
+
step_number = len(steps) + 1
|
|
160
|
+
expected_name = expected_screenshot_name(step_number)
|
|
161
|
+
|
|
162
|
+
if not STEP_JPG_RE.match(screenshot):
|
|
163
|
+
raise TrajectoryError("screenshot must match pattern 'step_{NNN}.jpg'")
|
|
164
|
+
if screenshot != expected_name:
|
|
165
|
+
raise TrajectoryError(f"screenshot for step {step_number} must be named {expected_name}")
|
|
166
|
+
if not (resolved_session_dir / screenshot).is_file():
|
|
167
|
+
raise TrajectoryError(f"screenshot not found in session directory: {screenshot}")
|
|
168
|
+
|
|
169
|
+
steps.append(
|
|
170
|
+
{
|
|
171
|
+
"step": step_number,
|
|
172
|
+
"screenshot": screenshot,
|
|
173
|
+
"current_app": current_app,
|
|
174
|
+
"thought": thought,
|
|
175
|
+
"action": action,
|
|
176
|
+
"success": step_success,
|
|
177
|
+
}
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
action_type = action.get("type")
|
|
181
|
+
if action_type == "finish":
|
|
182
|
+
data["success"] = True
|
|
183
|
+
elif action_type == "impossible":
|
|
184
|
+
data["success"] = False
|
|
185
|
+
|
|
186
|
+
write_trajectory(trajectory_path, data)
|
|
187
|
+
return trajectory_path
|
|
File without changes
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Shared trajectory schema constants."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
STEP_JPG_RE = re.compile(r"^step_(\d{3})\.jpg$")
|
|
8
|
+
TASK_SLUG_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_-]*$")
|
|
9
|
+
SESSION_DIR_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_-]*_\d{8}_\d{6}$")
|
|
10
|
+
|
|
11
|
+
TERMINAL_ACTIONS = {"finish", "impossible"}
|
|
12
|
+
|
|
13
|
+
ACTION_REQUIRED_PARAMS = {
|
|
14
|
+
"app_start": {"app"},
|
|
15
|
+
"click": {"x", "y"},
|
|
16
|
+
"long_click": {"x", "y"},
|
|
17
|
+
"swipe": {"x1", "y1", "x2", "y2"},
|
|
18
|
+
"type": {"text"},
|
|
19
|
+
"press": {"key"},
|
|
20
|
+
"wait": {"duration"},
|
|
21
|
+
"finish": {"reason"},
|
|
22
|
+
"impossible": {"reason"},
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
NUMERIC_ACTION_KEYS = {
|
|
26
|
+
"click": {"x", "y"},
|
|
27
|
+
"long_click": {"x", "y"},
|
|
28
|
+
"swipe": {"x1", "y1", "x2", "y2"},
|
|
29
|
+
"wait": {"duration"},
|
|
30
|
+
}
|
|
Binary file
|