aivane-agent-android 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ __version__ = "0.1.0"
2
+
3
+ from .cli import main
4
+ from .client import AgentAndroidClient
5
+ from .repl import AriaReplSession
6
+
7
+ __all__ = ["__version__", "main", "AgentAndroidClient", "AriaReplSession"]
agent_android/cli.py ADDED
@@ -0,0 +1,323 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import sys
7
+ from typing import Any, Dict, List
8
+
9
+ from . import __version__
10
+ from .client import AgentAndroidClient
11
+ from .config import TOKEN_ENV_VAR, require_base_url, resolve_api_token
12
+ from .formatting import _format_launcher_app, format_element, print_tree
13
+ from .repl import AriaReplSession
14
+
15
+ EPILOG = """AIVane Android REPL CLI helper for agent-android.
16
+
17
+ The phone hosts the beta HTTP service locally and this client connects
18
+ directly to http://<device-ip>:8080. The public path is local-first and
19
+ does not require a cloud relay for the basic smoke flow.
20
+
21
+ Quick start:
22
+ agent-android --repl --url http://<device-ip>:8080
23
+ agent-android --health --url http://<device-ip>:8080
24
+ agent-android --health --url http://<device-ip>:8080 --token YOUR_TOKEN
25
+ agent-android --apps --url http://<device-ip>:8080
26
+ agent-android --list --url http://<device-ip>:8080
27
+
28
+ One-off examples:
29
+ agent-android --launch com.example.app --url http://<device-ip>:8080
30
+ agent-android --tap 7 --url http://<device-ip>:8080
31
+ agent-android --input 7 "hello world" --url http://<device-ip>:8080
32
+ agent-android --template template.json --url http://<device-ip>:8080
33
+ agent-android --swipe up --url http://<device-ip>:8080
34
+ agent-android --screenshot --url http://<device-ip>:8080
35
+ agent-android --wait-for Search --timeout 30 --url http://<device-ip>:8080
36
+
37
+ REPL quick reference:
38
+ health / hl Check the /health endpoint
39
+ l [n] / list [n] List elements (reuse cache)
40
+ ss / snapshot Force-refresh the UI tree
41
+ apps List launcher apps
42
+ ref <N> Dump one element
43
+ node <N> Print the raw <node .../> XML snippet for refId=N
44
+ x <N> Print XPath candidates for refId=N
45
+ mx <ids> Find shared XPath candidates for multiple refIds
46
+ vx <xpath> [idx] Validate XPath match count and inspect one runtime match
47
+ vn <xpath> Print matched <node .../> snippets using runtime XPath results
48
+ t <N> Tap element with refId=N
49
+ tx <xpath> Tap by XPath locator
50
+ i <N> <text> Enter text into refId=N (--clear or "" clears it)
51
+ ix <xpath> <text> Enter text via XPath locator
52
+ sw <d|u|l|r> Swipe direction (supports --dur/--dist)
53
+ wf <text> Wait for element text (use --t to override timeout)
54
+ g <N> <attr> Inspect an attribute for refId=N
55
+ s [path] Capture screenshot
56
+ ux [path] [--all] Print or save the current UI tree XML
57
+ la <pkg> Launch an app by package name
58
+ p <key> Press a system key (back/home/recents)
59
+ b Navigate back
60
+ vars Show session variables
61
+ set url <u> Switch the server URL
62
+ set token <v> Save or clear the shared token
63
+ set timeout <N> Adjust the default timeout
64
+ h Show REPL help
65
+ q Quit the REPL
66
+
67
+ Token:
68
+ If the phone requires a shared token, use one of:
69
+ - --token YOUR_TOKEN
70
+ - Set environment variable {env_var}
71
+ - In REPL: set token YOUR_TOKEN
72
+
73
+ Troubleshooting:
74
+ If Python calls stop working, first check whether the AIVane app or
75
+ the phone-side API service has exited, then retry /health.
76
+ """.format(env_var=TOKEN_ENV_VAR)
77
+
78
+
79
+ def build_parser() -> argparse.ArgumentParser:
80
+ parser = argparse.ArgumentParser(
81
+ prog="agent-android",
82
+ description="agent-android v0.1 - local-first Android UI automation over the public AIVane REPL surface",
83
+ formatter_class=argparse.RawDescriptionHelpFormatter,
84
+ epilog=EPILOG,
85
+ )
86
+
87
+ parser.add_argument("--repl", "-i", action="store_true", help="Enter REPL interactive mode (recommended)")
88
+ parser.add_argument("--url", "-u", default=None, help="AIVane server URL (command-line overrides saved config)")
89
+ parser.add_argument("--token", default=None, help=f"Shared token for protected device access. Overrides {TOKEN_ENV_VAR} and saved config.")
90
+ parser.add_argument("--wait", "-w", type=int, default=0, help="Wait N seconds before fetching ARIA tree")
91
+ parser.add_argument("--no-cache", action="store_true", help="Force refresh ARIA tree (bypass cache)")
92
+ parser.add_argument("--wait-for", type=str, metavar="TEXT", help="Wait for element with text matching to appear")
93
+ parser.add_argument("--timeout", "-t", type=int, default=30, help="Max wait time for --wait-for (default: 30s)")
94
+ parser.add_argument("--include-offscreen", action="store_true", help="Include off-screen elements in the returned tree")
95
+
96
+ group = parser.add_mutually_exclusive_group()
97
+ group.add_argument("--list", "-l", action="store_true", help="List all elements")
98
+ group.add_argument("--screenshot", "-s", nargs="?", const="_auto_", metavar="OUTPUT_PATH", help="Capture screenshot. Optional: output file path")
99
+ group.add_argument("--swipe", type=str, metavar="DIRECTION", help="Swipe direction: up/down/left/right")
100
+ group.add_argument("--tap", type=int, metavar="REFID", help="Tap element by refId")
101
+ group.add_argument("--input", nargs=2, metavar=("REFID", "TEXT"), help="Input text to element by refId")
102
+ group.add_argument("--template", metavar="TEMPLATE_JSON", help="Execute a template JSON file via /execute")
103
+ group.add_argument("--launch", "-a", type=str, metavar="PACKAGE", help="Launch app")
104
+ group.add_argument("--health", action="store_true", help="Check service health from /health")
105
+ group.add_argument("--back", action="store_true", help="Press back button")
106
+ group.add_argument("--apps", action="store_true", help="List launcher apps from /apps")
107
+ group.add_argument("--press", type=str, metavar="KEY", help="Press key: back / home / recents")
108
+ group.add_argument("--get-attr", nargs=2, metavar=("REFID", "ATTR"), help="Get element attribute by refId (text/className/bounds/...)")
109
+ group.add_argument("--refId", "-r", type=int, metavar="N", help="Get element details")
110
+ group.add_argument("--xpath", "-x", type=int, metavar="N", help="Get element XPath")
111
+ group.add_argument("--id", type=str, metavar="RESOURCE_ID", help="Query by resourceId")
112
+ group.add_argument("--text", type=str, metavar="TEXT", help="Query by text")
113
+ group.add_argument("--inputs", action="store_true", help="List all input fields")
114
+
115
+ parser.add_argument("--duration", type=int, default=300, help="Swipe duration in ms (default: 300)")
116
+ parser.add_argument("--distance", type=float, default=0.5, help="Swipe distance ratio 0.0-1.0 (default: 0.5)")
117
+ parser.add_argument("--quality", "-q", type=int, default=80, help="Screenshot quality 1-100 (default: 80)")
118
+ parser.add_argument("--filter", "-f", type=str, help="Filter elements by text or content description")
119
+ parser.add_argument("--raw", action="store_true", help="Output raw JSON")
120
+ parser.add_argument("--output", "-o", type=str, help="Save ARIA tree to JSON file")
121
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
122
+ return parser
123
+
124
+
125
+ def _load_template_payload(path_str: str) -> Dict[str, Any]:
126
+ template_path = os.path.expanduser(path_str)
127
+ try:
128
+ with open(template_path, "r", encoding="utf-8") as handle:
129
+ return json.load(handle)
130
+ except FileNotFoundError:
131
+ print(f"Template file not found: {template_path}", file=sys.stderr)
132
+ raise SystemExit(1)
133
+ except json.JSONDecodeError as exc:
134
+ print(f"Template JSON is invalid: {exc}", file=sys.stderr)
135
+ raise SystemExit(1)
136
+
137
+
138
+ def _run_direct_commands(args: argparse.Namespace, client: AgentAndroidClient) -> None:
139
+ if args.template:
140
+ payload = _load_template_payload(args.template)
141
+ response = client.execute_template_payload(payload)
142
+ if response is None:
143
+ print("Failed to execute template payload. Check the connection hints above.", file=sys.stderr)
144
+ raise SystemExit(1)
145
+ print(json.dumps(response, indent=2, ensure_ascii=False))
146
+ raise SystemExit(0 if response.get("success") is True else 1)
147
+ if args.health:
148
+ health = client.get_health()
149
+ if health is None:
150
+ print("Failed to fetch health. Check the connection hints above.", file=sys.stderr)
151
+ raise SystemExit(1)
152
+ print(json.dumps(health, indent=2, ensure_ascii=False))
153
+ raise SystemExit(0)
154
+ if args.back:
155
+ raise SystemExit(0 if client.press_back() else 1)
156
+ if args.press:
157
+ raise SystemExit(0 if client.press_key(args.press) else 1)
158
+ if args.launch:
159
+ raise SystemExit(0 if client.launch_app(args.launch) else 1)
160
+ if args.apps:
161
+ apps = client.list_launcher_apps()
162
+ if apps is None:
163
+ print(
164
+ "Failed to fetch launcher apps. Check the connection hints above "
165
+ "and confirm the service is healthy.",
166
+ file=sys.stderr,
167
+ )
168
+ raise SystemExit(1)
169
+ if not apps:
170
+ print("No launcher apps returned.")
171
+ raise SystemExit(0)
172
+ print("Launcher apps:")
173
+ for index, app in enumerate(apps, start=1):
174
+ print(f" [{index:02d}] {_format_launcher_app(app)}")
175
+ raise SystemExit(0)
176
+ if args.screenshot is not None:
177
+ output_path = None if args.screenshot == "_auto_" else args.screenshot
178
+ raise SystemExit(0 if client.screenshot(output_path=output_path, quality=args.quality) else 1)
179
+ if args.swipe:
180
+ success = client.swipe(direction=args.swipe, duration=args.duration, distance=args.distance)
181
+ raise SystemExit(0 if success else 1)
182
+ if args.tap is not None:
183
+ raise SystemExit(0 if client.tap_element(args.tap) else 1)
184
+ if args.input:
185
+ ref_id = int(args.input[0])
186
+ raise SystemExit(0 if client.input_to_element(ref_id, args.input[1]) else 1)
187
+
188
+
189
+ def _run_wait_command(args: argparse.Namespace, client: AgentAndroidClient) -> None:
190
+ if not args.wait_for:
191
+ return
192
+ print(f"Waiting for element '{args.wait_for}' (timeout={args.timeout}s)...", file=sys.stderr)
193
+ elem = client.wait_for_element(text=args.wait_for, timeout=args.timeout)
194
+ if elem:
195
+ ref_id = elem.get("refId")
196
+ print(
197
+ f"refId={ref_id} found: text='{elem.get('text', '')}' "
198
+ f"class={elem.get('simpleClassName', '')} "
199
+ f"at ({elem.get('x', '?')}, {elem.get('y', '?')})"
200
+ )
201
+ raise SystemExit(0)
202
+ raise SystemExit(1)
203
+
204
+
205
+ def _dump_input_elements(
206
+ client: AgentAndroidClient,
207
+ elements: List[Dict[str, Any]],
208
+ args: argparse.Namespace,
209
+ ) -> None:
210
+ if not args.inputs:
211
+ return
212
+ input_elements = client.find_input_elements(elements)
213
+ if not input_elements:
214
+ print("No input fields found")
215
+ raise SystemExit(0)
216
+ print("\n" + "=" * 70)
217
+ print(f" Input Fields - {len(input_elements)} elements")
218
+ print("=" * 70)
219
+ for elem in input_elements:
220
+ ref_id = elem.get("refId", "?")
221
+ text = elem.get("text", "") or elem.get("contentDesc", "") or "-"
222
+ cls = elem.get("simpleClassName", "")
223
+ x, y = elem.get("x", "?"), elem.get("y", "?")
224
+ editable = "editable" if elem.get("editable") else ""
225
+ focusable = "focusable" if elem.get("focusable") else ""
226
+ print(
227
+ " [{:2d}] {:<28} {:<18} ({:4s},{:4s}) [{}, {}]".format(
228
+ ref_id,
229
+ str(text)[:28],
230
+ cls,
231
+ str(x),
232
+ str(y),
233
+ editable,
234
+ focusable,
235
+ )
236
+ )
237
+ print("=" * 70)
238
+ raise SystemExit(0)
239
+
240
+
241
+ def _handle_tree_queries(
242
+ client: AgentAndroidClient,
243
+ elements: List[Dict[str, Any]],
244
+ args: argparse.Namespace,
245
+ ) -> None:
246
+ results = elements
247
+
248
+ if args.get_attr:
249
+ ref_id = int(args.get_attr[0])
250
+ value = client.get_attribute(ref_id, args.get_attr[1])
251
+ if value is not None:
252
+ print(value)
253
+ raise SystemExit(0)
254
+ raise SystemExit(1)
255
+
256
+ if args.refId:
257
+ elem = client.find_by_refId(elements, args.refId)
258
+ if elem:
259
+ print(format_element(elem))
260
+ return
261
+ print(f"Element with refId={args.refId} not found")
262
+ raise SystemExit(1)
263
+
264
+ if args.xpath:
265
+ elem = client.find_by_refId(elements, args.xpath)
266
+ if elem:
267
+ print(elem.get("xpath", ""))
268
+ return
269
+ print(f"Element with refId={args.xpath} not found")
270
+ raise SystemExit(1)
271
+
272
+ if args.id:
273
+ results = client.find_by_resourceId(elements, args.id)
274
+ if not results:
275
+ print(f"No elements with resourceId={args.id}")
276
+ elif args.text:
277
+ results = client.find_by_text(elements, args.text)
278
+ if not results:
279
+ print(f"No elements with text containing '{args.text}'")
280
+ else:
281
+ args.list = True
282
+
283
+ if args.list or args.text or args.id:
284
+ if args.raw:
285
+ print(json.dumps(results, indent=2, ensure_ascii=False))
286
+ else:
287
+ print_tree(results, args.filter, client.get_current_package_name())
288
+
289
+
290
+ def main() -> int:
291
+ parser = build_parser()
292
+ args = parser.parse_args()
293
+ url = require_base_url(args.url)
294
+ token = resolve_api_token(args.token)
295
+
296
+ if args.repl:
297
+ history_path = os.path.expanduser("~/.agent-android-history")
298
+ session = AriaReplSession(url=url, token=token, history_file=history_path)
299
+ session.run()
300
+ return 0
301
+
302
+ client = AgentAndroidClient(url, token=token)
303
+ _run_direct_commands(args, client)
304
+ _run_wait_command(args, client)
305
+
306
+ print("Fetching ARIA tree...", file=sys.stderr)
307
+ elements = client.get_ui_elements(
308
+ wait=args.wait,
309
+ force_refresh=args.no_cache,
310
+ visible_only=not args.include_offscreen,
311
+ )
312
+ if not elements:
313
+ print("Failed to get ARIA tree. Check the connection hints above.", file=sys.stderr)
314
+ return 1
315
+
316
+ if args.output:
317
+ with open(args.output, "w", encoding="utf-8") as handle:
318
+ json.dump(elements, handle, ensure_ascii=False, indent=2)
319
+ print(f"ARIA tree saved to: {args.output}", file=sys.stderr)
320
+
321
+ _dump_input_elements(client, elements, args)
322
+ _handle_tree_queries(client, elements, args)
323
+ return 0