qamule-trajectory 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.3
2
+ Name: qamule-trajectory
3
+ Version: 0.1.0
4
+ Summary: Python trajectory toolkit for QAMule VLM and agent training datasets.
5
+ Author: lanbaoshen
6
+ Author-email: lanbaoshen <lanbaoshen@icloud.com>
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+
10
+ # qamule-trajectory
11
+
12
+ Python trajectory toolkit for QAMule VLM and agent training datasets.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ uv sync
18
+ ```
19
+
20
+ ## CLI
21
+
22
+ Start the local viewer for the current directory and open the browser:
23
+
24
+ ```bash
25
+ uv run qamule-trajectory
26
+ ```
27
+
28
+ Start the viewer quietly without opening the browser:
29
+
30
+ ```bash
31
+ uv run qamule-trajectory -q
32
+ ```
33
+
34
+ Quiet mode still prints the local viewer URL.
35
+
36
+ Create a session:
37
+
38
+ ```bash
39
+ uv run qamule-trajectory init dataset/open_wifi_20260626_120000 \
40
+ --instruction "Open Wi-Fi in Settings" \
41
+ --app com.android.settings \
42
+ --device-model "Pixel 8" \
43
+ --resolution 1080 2400 \
44
+ --android 14
45
+ ```
46
+
47
+ Append a step after saving `step_001.jpg` in the session directory:
48
+
49
+ ```bash
50
+ uv run qamule-trajectory append dataset/open_wifi_20260626_120000 \
51
+ --screenshot step_001.jpg \
52
+ --current-app com.android.settings/.Settings \
53
+ --thought "I can see the Settings home screen." \
54
+ --action '{"type":"click","x":540,"y":620}'
55
+ ```
56
+
57
+ Validate a dataset, session, or `trajectory.json`:
58
+
59
+ ```bash
60
+ uv run qamule-trajectory validate dataset/
61
+ ```
62
+
63
+ Browse trajectories locally:
64
+
65
+ ```bash
66
+ uv run qamule-trajectory view dataset/ --port 8932
67
+ ```
68
+
69
+ `-q` and `--quiet` are accepted for quiet viewer mode.
70
+
71
+ ## Trajectory Layout
72
+
73
+ Each session is a directory named `{task_slug}_YYYYMMDD_HHMMSS` containing `trajectory.json` and sequential screenshots named `step_001.jpg`, `step_002.jpg`, and so on.
@@ -0,0 +1,64 @@
1
+ # qamule-trajectory
2
+
3
+ Python trajectory toolkit for QAMule VLM and agent training datasets.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ uv sync
9
+ ```
10
+
11
+ ## CLI
12
+
13
+ Start the local viewer for the current directory and open the browser:
14
+
15
+ ```bash
16
+ uv run qamule-trajectory
17
+ ```
18
+
19
+ Start the viewer quietly without opening the browser:
20
+
21
+ ```bash
22
+ uv run qamule-trajectory -q
23
+ ```
24
+
25
+ Quiet mode still prints the local viewer URL.
26
+
27
+ Create a session:
28
+
29
+ ```bash
30
+ uv run qamule-trajectory init dataset/open_wifi_20260626_120000 \
31
+ --instruction "Open Wi-Fi in Settings" \
32
+ --app com.android.settings \
33
+ --device-model "Pixel 8" \
34
+ --resolution 1080 2400 \
35
+ --android 14
36
+ ```
37
+
38
+ Append a step after saving `step_001.jpg` in the session directory:
39
+
40
+ ```bash
41
+ uv run qamule-trajectory append dataset/open_wifi_20260626_120000 \
42
+ --screenshot step_001.jpg \
43
+ --current-app com.android.settings/.Settings \
44
+ --thought "I can see the Settings home screen." \
45
+ --action '{"type":"click","x":540,"y":620}'
46
+ ```
47
+
48
+ Validate a dataset, session, or `trajectory.json`:
49
+
50
+ ```bash
51
+ uv run qamule-trajectory validate dataset/
52
+ ```
53
+
54
+ Browse trajectories locally:
55
+
56
+ ```bash
57
+ uv run qamule-trajectory view dataset/ --port 8932
58
+ ```
59
+
60
+ `-q` and `--quiet` are accepted for quiet viewer mode.
61
+
62
+ ## Trajectory Layout
63
+
64
+ Each session is a directory named `{task_slug}_YYYYMMDD_HHMMSS` containing `trajectory.json` and sequential screenshots named `step_001.jpg`, `step_002.jpg`, and so on.
@@ -0,0 +1,22 @@
1
+ [project]
2
+ name = "qamule-trajectory"
3
+ version = "0.1.0"
4
+ description = "Python trajectory toolkit for QAMule VLM and agent training datasets."
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "lanbaoshen", email = "lanbaoshen@icloud.com" }
8
+ ]
9
+ requires-python = ">=3.10"
10
+ dependencies = []
11
+
12
+ [project.scripts]
13
+ qamule-trajectory = "qamule_trajectory.cli:main"
14
+
15
+ [build-system]
16
+ requires = ["uv_build>=0.11.8,<0.12.0"]
17
+ build-backend = "uv_build"
18
+
19
+ [dependency-groups]
20
+ dev = [
21
+ "pytest>=9.1.1",
22
+ ]
@@ -0,0 +1,19 @@
1
+ """Python trajectory toolkit for QAMule datasets."""
2
+
3
+ from .core import TrajectoryError, append_step, expected_screenshot_name, init_session, load_trajectory, parse_action
4
+ from .schema import STEP_JPG_RE, TASK_SLUG_RE, TERMINAL_ACTIONS
5
+ from .validation import ValidationResult, validate_path
6
+
7
+ __all__ = [
8
+ "STEP_JPG_RE",
9
+ "TASK_SLUG_RE",
10
+ "TERMINAL_ACTIONS",
11
+ "TrajectoryError",
12
+ "ValidationResult",
13
+ "append_step",
14
+ "expected_screenshot_name",
15
+ "init_session",
16
+ "load_trajectory",
17
+ "parse_action",
18
+ "validate_path",
19
+ ]
@@ -0,0 +1,146 @@
1
+ """Command line interface for qamule-trajectory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+
8
+ from .core import TrajectoryError, append_step, init_session, parse_bool
9
+ from .validation import format_validation_result, validate_path
10
+ from .viewer import serve
11
+
12
+
13
+ def _arg_bool(value: str) -> bool:
14
+ try:
15
+ return parse_bool(value)
16
+ except TrajectoryError as exc:
17
+ raise argparse.ArgumentTypeError(str(exc)) from exc
18
+
19
+
20
+ def _cmd_init(args: argparse.Namespace) -> int:
21
+ session_dir = init_session(
22
+ args.session_dir,
23
+ instruction=args.instruction,
24
+ app=args.app,
25
+ device_model=args.device_model,
26
+ resolution=args.resolution,
27
+ android=args.android,
28
+ task_slug=args.task_slug,
29
+ dataset_root=args.dataset_root,
30
+ force=args.force,
31
+ )
32
+ print(session_dir)
33
+ return 0
34
+
35
+
36
+ def _cmd_append(args: argparse.Namespace) -> int:
37
+ trajectory_path = append_step(
38
+ args.session_dir,
39
+ screenshot=args.screenshot,
40
+ current_app=args.current_app,
41
+ thought=args.thought,
42
+ raw_action=args.action,
43
+ step_success=args.step_success,
44
+ )
45
+ print(trajectory_path)
46
+ return 0
47
+
48
+
49
+ def _cmd_validate(args: argparse.Namespace) -> int:
50
+ result = validate_path(args.input_path)
51
+ print(format_validation_result(result))
52
+ return 0 if result.ok else 1
53
+
54
+
55
+ def _cmd_view(args: argparse.Namespace) -> int:
56
+ try:
57
+ quiet = bool(getattr(args, "quiet", False))
58
+ serve(args.dataset_dir, port=args.port, open_browser=not args.no_open and not quiet, quiet=quiet)
59
+ except ValueError as exc:
60
+ print(f"[FATAL] {exc}", file=sys.stderr)
61
+ return 2
62
+ return 0
63
+
64
+
65
+ def build_parser() -> argparse.ArgumentParser:
66
+ parser = argparse.ArgumentParser(
67
+ prog="qamule-trajectory",
68
+ description="Create, validate, and browse QAMule trajectory datasets.",
69
+ )
70
+ parser.add_argument(
71
+ "-q",
72
+ "--quiet",
73
+ action="store_true",
74
+ help="start the default viewer quietly without opening the browser",
75
+ )
76
+ parser.add_argument("--port", type=int, default=8932, help="port for the default viewer")
77
+ subparsers = parser.add_subparsers(dest="command")
78
+
79
+ init_parser = subparsers.add_parser("init", help="create a new trajectory.json for a session")
80
+ init_parser.add_argument("session_dir", nargs="?", help="session directory, e.g. dataset/open_wifi_20260514_101530")
81
+ init_parser.add_argument("--task-slug", help="create dataset/{task_slug}_YYYYMMDD_HHMMSS automatically")
82
+ init_parser.add_argument("--dataset-root", default="dataset", help="dataset root used with --task-slug")
83
+ init_parser.add_argument("--instruction", required=True, help="human task instruction")
84
+ init_parser.add_argument("--app", required=True, help="target app package")
85
+ init_parser.add_argument("--device-model", required=True, help="device model")
86
+ init_parser.add_argument(
87
+ "--resolution",
88
+ required=True,
89
+ nargs=2,
90
+ type=int,
91
+ metavar=("WIDTH", "HEIGHT"),
92
+ help="device resolution as width height",
93
+ )
94
+ init_parser.add_argument("--android", required=True, help="android version")
95
+ init_parser.add_argument("--force", action="store_true", help="overwrite an existing trajectory.json")
96
+ init_parser.set_defaults(func=_cmd_init)
97
+
98
+ append_parser = subparsers.add_parser("append", help="append one recorded step to trajectory.json")
99
+ append_parser.add_argument("session_dir", help="session directory that already contains trajectory.json")
100
+ append_parser.add_argument("--screenshot", required=True, help="step screenshot file name, e.g. step_001.jpg")
101
+ append_parser.add_argument("--current-app", required=True, help="current package/activity string")
102
+ append_parser.add_argument("--thought", required=True, help="observation and reasoning for this step")
103
+ append_parser.add_argument("--action", required=True, help="JSON object describing the action")
104
+ append_parser.add_argument(
105
+ "--step-success",
106
+ default=True,
107
+ type=_arg_bool,
108
+ help="whether this recorded step executed successfully (default: true)",
109
+ )
110
+ append_parser.set_defaults(func=_cmd_append)
111
+
112
+ validate_parser = subparsers.add_parser("validate", help="validate a dataset, session, or trajectory.json")
113
+ validate_parser.add_argument("input_path", nargs="?", default="dataset", help="dataset dir, session dir, or trajectory.json")
114
+ validate_parser.set_defaults(func=_cmd_validate)
115
+
116
+ view_parser = subparsers.add_parser("view", help="serve a local trajectory viewer")
117
+ view_parser.add_argument("dataset_dir", nargs="?", default=".", help="dataset directory to browse")
118
+ view_parser.add_argument("--port", type=int, default=8932, help="port to serve on")
119
+ view_parser.add_argument("--no-open", action="store_true", help="do not open the browser automatically")
120
+ view_parser.add_argument(
121
+ "-q",
122
+ "--quiet",
123
+ action="store_true",
124
+ help="serve quietly without opening the browser",
125
+ )
126
+ view_parser.set_defaults(func=_cmd_view)
127
+
128
+ return parser
129
+
130
+
131
+ def main(argv: list[str] | None = None) -> int:
132
+ parser = build_parser()
133
+ args = parser.parse_args(argv)
134
+ if not hasattr(args, "func"):
135
+ args.func = _cmd_view
136
+ args.dataset_dir = "."
137
+ args.no_open = False
138
+ try:
139
+ return args.func(args)
140
+ except TrajectoryError as exc:
141
+ print(f"[FATAL] {exc}", file=sys.stderr)
142
+ return 2
143
+
144
+
145
+ if __name__ == "__main__":
146
+ raise SystemExit(main())
@@ -0,0 +1,187 @@
1
+ """Core helpers for creating and updating trajectory sessions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable
6
+ from datetime import datetime
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from .schema import STEP_JPG_RE, TASK_SLUG_RE, TERMINAL_ACTIONS
12
+
13
+
14
+ class TrajectoryError(ValueError):
15
+ """Raised when a trajectory operation cannot be completed."""
16
+
17
+
18
+ def parse_bool(value: str) -> bool:
19
+ lowered = value.strip().lower()
20
+ if lowered in {"true", "1", "yes", "y"}:
21
+ return True
22
+ if lowered in {"false", "0", "no", "n"}:
23
+ return False
24
+ raise TrajectoryError("expected true/false")
25
+
26
+
27
+ def expected_screenshot_name(step_number: int) -> str:
28
+ return f"step_{step_number:03d}.jpg"
29
+
30
+
31
+ def parse_action(raw_action: str) -> dict[str, Any]:
32
+ try:
33
+ action = json.loads(raw_action)
34
+ except json.JSONDecodeError as exc:
35
+ raise TrajectoryError(
36
+ f"invalid action JSON: line {exc.lineno}, column {exc.colno}: {exc.msg}"
37
+ ) from exc
38
+
39
+ if not isinstance(action, dict):
40
+ raise TrajectoryError("action must decode to a JSON object")
41
+
42
+ action_type = action.get("type")
43
+ if not isinstance(action_type, str) or not action_type.strip():
44
+ raise TrajectoryError("action.type must be a non-empty string")
45
+
46
+ return action
47
+
48
+
49
+ def load_trajectory(session_dir: Path) -> tuple[Path, dict[str, Any]]:
50
+ trajectory_path = session_dir / "trajectory.json"
51
+ if not trajectory_path.is_file():
52
+ raise TrajectoryError(f"trajectory.json not found in session directory: {session_dir}")
53
+
54
+ try:
55
+ data = json.loads(trajectory_path.read_text(encoding="utf-8"))
56
+ except json.JSONDecodeError as exc:
57
+ raise TrajectoryError(
58
+ f"invalid JSON in {trajectory_path}: line {exc.lineno}, column {exc.colno}: {exc.msg}"
59
+ ) from exc
60
+ except OSError as exc:
61
+ raise TrajectoryError(f"cannot read {trajectory_path}: {exc}") from exc
62
+
63
+ if not isinstance(data, dict):
64
+ raise TrajectoryError("trajectory.json must contain a top-level object")
65
+
66
+ steps = data.get("steps")
67
+ if not isinstance(steps, list):
68
+ raise TrajectoryError("trajectory.json field 'steps' must be an array")
69
+
70
+ return trajectory_path, data
71
+
72
+
73
+ def write_trajectory(trajectory_path: Path, data: dict[str, Any]) -> None:
74
+ data["total_steps"] = len(data.get("steps", []))
75
+ payload = json.dumps(data, ensure_ascii=False, indent=2) + "\n"
76
+ trajectory_path.write_text(payload, encoding="utf-8")
77
+
78
+
79
+ def resolve_session_dir(
80
+ session_dir: str | Path | None,
81
+ *,
82
+ task_slug: str | None = None,
83
+ dataset_root: str | Path = "dataset",
84
+ now: Callable[[], datetime] = datetime.now,
85
+ ) -> Path:
86
+ if session_dir and task_slug:
87
+ raise TrajectoryError("pass either SESSION_DIR or --task-slug, not both")
88
+ if session_dir:
89
+ return Path(session_dir)
90
+ if not task_slug:
91
+ raise TrajectoryError("init requires either SESSION_DIR or --task-slug")
92
+ if not TASK_SLUG_RE.match(task_slug):
93
+ raise TrajectoryError("task slug must match pattern '[A-Za-z0-9][A-Za-z0-9_-]*'")
94
+
95
+ timestamp = now().strftime("%Y%m%d_%H%M%S")
96
+ return Path(dataset_root) / f"{task_slug}_{timestamp}"
97
+
98
+
99
+ def init_session(
100
+ session_dir: str | Path | None,
101
+ *,
102
+ instruction: str,
103
+ app: str,
104
+ device_model: str,
105
+ resolution: tuple[int, int] | list[int],
106
+ android: str,
107
+ task_slug: str | None = None,
108
+ dataset_root: str | Path = "dataset",
109
+ force: bool = False,
110
+ ) -> Path:
111
+ resolved_session_dir = resolve_session_dir(
112
+ session_dir,
113
+ task_slug=task_slug,
114
+ dataset_root=dataset_root,
115
+ )
116
+ resolved_session_dir.mkdir(parents=True, exist_ok=True)
117
+
118
+ trajectory_path = resolved_session_dir / "trajectory.json"
119
+ if trajectory_path.exists() and not force:
120
+ raise TrajectoryError(f"trajectory.json already exists: {trajectory_path}. Use --force to overwrite it.")
121
+
122
+ data: dict[str, Any] = {
123
+ "task_id": resolved_session_dir.name,
124
+ "instruction": instruction,
125
+ "app": app,
126
+ "device": {
127
+ "model": device_model,
128
+ "resolution": [int(resolution[0]), int(resolution[1])],
129
+ "android": android,
130
+ },
131
+ "steps": [],
132
+ "success": False,
133
+ "total_steps": 0,
134
+ }
135
+
136
+ write_trajectory(trajectory_path, data)
137
+ return resolved_session_dir
138
+
139
+
140
+ def append_step(
141
+ session_dir: str | Path,
142
+ *,
143
+ screenshot: str,
144
+ current_app: str,
145
+ thought: str,
146
+ raw_action: str,
147
+ step_success: bool = True,
148
+ ) -> Path:
149
+ resolved_session_dir = Path(session_dir)
150
+ trajectory_path, data = load_trajectory(resolved_session_dir)
151
+ steps = data["steps"]
152
+
153
+ if steps:
154
+ last_action = steps[-1].get("action", {})
155
+ if isinstance(last_action, dict) and last_action.get("type") in TERMINAL_ACTIONS:
156
+ raise TrajectoryError("cannot append after a terminal action; start a new session or edit the JSON manually")
157
+
158
+ action = parse_action(raw_action)
159
+ step_number = len(steps) + 1
160
+ expected_name = expected_screenshot_name(step_number)
161
+
162
+ if not STEP_JPG_RE.match(screenshot):
163
+ raise TrajectoryError("screenshot must match pattern 'step_{NNN}.jpg'")
164
+ if screenshot != expected_name:
165
+ raise TrajectoryError(f"screenshot for step {step_number} must be named {expected_name}")
166
+ if not (resolved_session_dir / screenshot).is_file():
167
+ raise TrajectoryError(f"screenshot not found in session directory: {screenshot}")
168
+
169
+ steps.append(
170
+ {
171
+ "step": step_number,
172
+ "screenshot": screenshot,
173
+ "current_app": current_app,
174
+ "thought": thought,
175
+ "action": action,
176
+ "success": step_success,
177
+ }
178
+ )
179
+
180
+ action_type = action.get("type")
181
+ if action_type == "finish":
182
+ data["success"] = True
183
+ elif action_type == "impossible":
184
+ data["success"] = False
185
+
186
+ write_trajectory(trajectory_path, data)
187
+ return trajectory_path
File without changes
@@ -0,0 +1,30 @@
1
+ """Shared trajectory schema constants."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ STEP_JPG_RE = re.compile(r"^step_(\d{3})\.jpg$")
8
+ TASK_SLUG_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_-]*$")
9
+ SESSION_DIR_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_-]*_\d{8}_\d{6}$")
10
+
11
+ TERMINAL_ACTIONS = {"finish", "impossible"}
12
+
13
+ ACTION_REQUIRED_PARAMS = {
14
+ "app_start": {"app"},
15
+ "click": {"x", "y"},
16
+ "long_click": {"x", "y"},
17
+ "swipe": {"x1", "y1", "x2", "y2"},
18
+ "type": {"text"},
19
+ "press": {"key"},
20
+ "wait": {"duration"},
21
+ "finish": {"reason"},
22
+ "impossible": {"reason"},
23
+ }
24
+
25
+ NUMERIC_ACTION_KEYS = {
26
+ "click": {"x", "y"},
27
+ "long_click": {"x", "y"},
28
+ "swipe": {"x1", "y1", "x2", "y2"},
29
+ "wait": {"duration"},
30
+ }