droidrun 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. droidrun/__init__.py +16 -11
  2. droidrun/__main__.py +1 -1
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +1 -1
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +6 -0
  7. droidrun/agent/codeact/__init__.py +2 -4
  8. droidrun/agent/codeact/codeact_agent.py +321 -235
  9. droidrun/agent/codeact/events.py +12 -20
  10. droidrun/agent/codeact/prompts.py +0 -52
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/context/todo.txt +4 -0
  25. droidrun/agent/droid/__init__.py +2 -2
  26. droidrun/agent/droid/droid_agent.py +264 -325
  27. droidrun/agent/droid/events.py +28 -0
  28. droidrun/agent/oneflows/reflector.py +265 -0
  29. droidrun/agent/planner/__init__.py +2 -4
  30. droidrun/agent/planner/events.py +9 -13
  31. droidrun/agent/planner/planner_agent.py +268 -0
  32. droidrun/agent/planner/prompts.py +33 -53
  33. droidrun/agent/utils/__init__.py +3 -0
  34. droidrun/agent/utils/async_utils.py +1 -40
  35. droidrun/agent/utils/chat_utils.py +268 -48
  36. droidrun/agent/utils/executer.py +49 -14
  37. droidrun/agent/utils/llm_picker.py +14 -10
  38. droidrun/agent/utils/trajectory.py +184 -0
  39. droidrun/cli/__init__.py +1 -1
  40. droidrun/cli/logs.py +283 -0
  41. droidrun/cli/main.py +333 -439
  42. droidrun/run.py +105 -0
  43. droidrun/tools/__init__.py +5 -10
  44. droidrun/tools/{actions.py → adb.py} +279 -238
  45. droidrun/tools/ios.py +594 -0
  46. droidrun/tools/tools.py +99 -0
  47. droidrun-0.3.0.dist-info/METADATA +149 -0
  48. droidrun-0.3.0.dist-info/RECORD +52 -0
  49. droidrun/agent/planner/task_manager.py +0 -355
  50. droidrun/agent/planner/workflow.py +0 -371
  51. droidrun/tools/device.py +0 -29
  52. droidrun/tools/loader.py +0 -60
  53. droidrun-0.2.0.dist-info/METADATA +0 -373
  54. droidrun-0.2.0.dist-info/RECORD +0 -32
  55. {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
  56. {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
  57. {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
droidrun/tools/ios.py ADDED
@@ -0,0 +1,594 @@
1
+ """
2
+ UI Actions - Core UI interaction tools for iOS device control.
3
+ """
4
+
5
+ import re
6
+ import time
7
+ import asyncio
8
+ from typing import Optional, Dict, Tuple, List, Any
9
+ import logging
10
+ import aiohttp
11
+ from droidrun.tools.tools import Tools
12
+
13
+ logger = logging.getLogger("IOS")
14
+
15
+ SYSTEM_BUNDLE_IDENTIFIERS = [
16
+ "ai.droidrun.droidrun-ios-portal",
17
+ "com.apple.Bridge",
18
+ "com.apple.DocumentsApp",
19
+ "com.apple.Fitness",
20
+ "com.apple.Health",
21
+ "com.apple.Maps",
22
+ "com.apple.MobileAddressBook",
23
+ "com.apple.MobileSMS",
24
+ "com.apple.Passbook",
25
+ "com.apple.Passwords",
26
+ "com.apple.Preferences",
27
+ "com.apple.PreviewShell",
28
+ "com.apple.mobilecal",
29
+ "com.apple.mobilesafari",
30
+ "com.apple.mobileslideshow",
31
+ "com.apple.news",
32
+ "com.apple.reminders",
33
+ "com.apple.shortcuts",
34
+ "com.apple.webapp",
35
+ ]
36
+
37
+
38
+ class IOSTools(Tools):
39
+ """Core UI interaction tools for iOS device control."""
40
+
41
+ def __init__(self, url: str, bundle_identifiers: List[str] = []) -> None:
42
+ self.clickable_elements_cache: List[Dict[str, Any]] = []
43
+ self.url = url
44
+ self.last_screenshot = None
45
+ self.reason = None
46
+ self.success = None
47
+ self.finished = False
48
+ self.memory: List[str] = []
49
+ self.screenshots: List[Dict[str, Any]] = []
50
+ self.last_tapped_rect: Optional[str] = (
51
+ None # Store last tapped element's rect for text input
52
+ )
53
+ self.bundle_identifiers = bundle_identifiers
54
+ logger.info(f"iOS device URL: {url}")
55
+
56
+ async def get_clickables(
57
+ self, serial: Optional[str] = None
58
+ ) -> List[Dict[str, Any]]:
59
+ """
60
+ Get all clickable UI elements from the iOS device using accessibility API.
61
+
62
+ Args:
63
+ serial: Optional device URL (not used for iOS, uses instance URL)
64
+
65
+ Returns:
66
+ List of dictionaries containing UI elements extracted from the device screen
67
+ """
68
+ try:
69
+ async with aiohttp.ClientSession() as session:
70
+ a11y_url = f"{self.url}/vision/a11y"
71
+ async with session.get(a11y_url) as response:
72
+ if response.status == 200:
73
+ a11y_data = await response.json()
74
+
75
+ # Parse the iOS accessibility tree format
76
+ elements = self._parse_ios_accessibility_tree(
77
+ a11y_data["accessibilityTree"]
78
+ )
79
+
80
+ # Cache the elements for tap_by_index usage
81
+ self.clickable_elements_cache = elements
82
+
83
+ return self.clickable_elements_cache
84
+ else:
85
+ logger.error(
86
+ f"Failed to get accessibility data: HTTP {response.status}"
87
+ )
88
+ raise ValueError(
89
+ f"Failed to get accessibility data: HTTP {response.status}"
90
+ )
91
+
92
+ except Exception as e:
93
+ logger.error(f"Error getting clickable elements: {e}")
94
+ # raise ValueError(f"Error getting clickable elements: {e}")
95
+
96
+ def _parse_ios_accessibility_tree(self, a11y_data: str) -> List[Dict[str, Any]]:
97
+ """
98
+ Parse iOS accessibility tree format into structured elements.
99
+
100
+ Args:
101
+ a11y_data: Raw accessibility data from iOS device
102
+
103
+ Returns:
104
+ List of parsed UI elements with coordinates and properties
105
+ """
106
+ elements = []
107
+ lines = a11y_data.strip().split("\n")
108
+
109
+ # Track current element index
110
+ element_index = 0
111
+
112
+ for line in lines:
113
+ # Skip empty lines and header lines
114
+ if (
115
+ not line.strip()
116
+ or line.startswith("Attributes:")
117
+ or line.startswith("Element subtree:")
118
+ or line.startswith("Path to element:")
119
+ or line.startswith("Query chain:")
120
+ ):
121
+ continue
122
+
123
+ # Parse UI elements - look for lines with coordinates
124
+ # Format: ElementType, {{x, y}, {width, height}}, [optional properties]
125
+ coord_match = re.search(
126
+ r"\{\{([0-9.]+),\s*([0-9.]+)\},\s*\{([0-9.]+),\s*([0-9.]+)\}\}", line
127
+ )
128
+
129
+ if coord_match:
130
+ x, y, width, height = map(float, coord_match.groups())
131
+
132
+ # Extract element type (the text before the first comma)
133
+ element_type_match = re.match(r"\s*(.+?),", line)
134
+ element_type = (
135
+ element_type_match.group(1).strip()
136
+ if element_type_match
137
+ else "Unknown"
138
+ )
139
+
140
+ # Remove leading arrows and spaces
141
+ element_type = re.sub(r"^[→\s]+", "", element_type)
142
+
143
+ # Extract label if present
144
+ label_match = re.search(r"label:\s*'([^']*)'", line)
145
+ label = label_match.group(1) if label_match else ""
146
+
147
+ # Extract identifier if present
148
+ identifier_match = re.search(r"identifier:\s*'([^']*)'", line)
149
+ identifier = identifier_match.group(1) if identifier_match else ""
150
+
151
+ # Extract placeholder value if present
152
+ placeholder_match = re.search(r"placeholderValue:\s*'([^']*)'", line)
153
+ placeholder = placeholder_match.group(1) if placeholder_match else ""
154
+
155
+ # Extract value if present
156
+ value_match = re.search(r"value:\s*([^,}]+)", line)
157
+ value = value_match.group(1).strip() if value_match else ""
158
+
159
+ # Calculate rect string for iOS tap API (x,y,width,height format)
160
+ rect_str = f"{x},{y},{width},{height}"
161
+
162
+ # Create element dictionary
163
+ element = {
164
+ "index": element_index,
165
+ "type": element_type,
166
+ "className": element_type,
167
+ "text": label or identifier or placeholder or "",
168
+ "label": label,
169
+ "identifier": identifier,
170
+ "placeholder": placeholder,
171
+ "value": value,
172
+ "bounds": f"{x},{y},{x+width},{y+height}", # left,top,right,bottom format for compatibility
173
+ "rect": rect_str, # x,y,width,height format for iOS API
174
+ "x": x,
175
+ "y": y,
176
+ "width": width,
177
+ "height": height,
178
+ "center_x": x + width / 2,
179
+ "center_y": y + height / 2,
180
+ }
181
+
182
+ # Only include interactive elements (buttons, text fields, etc.)
183
+ interactive_types = [
184
+ "Button",
185
+ "SearchField",
186
+ "TextField",
187
+ "Cell",
188
+ "Switch",
189
+ "Slider",
190
+ "Stepper",
191
+ "Picker",
192
+ "Link",
193
+ ]
194
+ if any(
195
+ interactive_type in element_type
196
+ for interactive_type in interactive_types
197
+ ):
198
+ elements.append(element)
199
+ element_index += 1
200
+
201
+ return elements
202
+
203
+ async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
204
+ """
205
+ Tap on a UI element by its index.
206
+
207
+ This function uses the cached clickable elements
208
+ to find the element with the given index and tap on its center coordinates.
209
+
210
+ Args:
211
+ index: Index of the element to tap
212
+
213
+ Returns:
214
+ Result message
215
+ """
216
+
217
+ def find_element_by_index(elements, target_index):
218
+ """Find an element with the given index."""
219
+ for item in elements:
220
+ if item.get("index") == target_index:
221
+ return item
222
+ # Check children if present
223
+ children = item.get("children", [])
224
+ result = find_element_by_index(children, target_index)
225
+ if result:
226
+ return result
227
+ return None
228
+
229
+ try:
230
+ # Check if we have cached elements
231
+ if not self.clickable_elements_cache:
232
+ return "Error: No UI elements cached. Call get_clickables first."
233
+
234
+ # Find the element with the given index
235
+ element = find_element_by_index(self.clickable_elements_cache, index)
236
+
237
+ if not element:
238
+ # List available indices to help the user
239
+ indices = [
240
+ elem.get("index")
241
+ for elem in self.clickable_elements_cache
242
+ if elem.get("index") is not None
243
+ ]
244
+ indices_str = ", ".join(str(idx) for idx in sorted(indices)[:20])
245
+ if len(indices) > 20:
246
+ indices_str += f"... and {len(indices) - 20} more"
247
+
248
+ return f"Error: No element found with index {index}. Available indices: {indices_str}"
249
+
250
+ # Get the element coordinates
251
+ x = element.get("x", 0)
252
+ y = element.get("y", 0)
253
+ width = element.get("width", 0)
254
+ height = element.get("height", 0)
255
+
256
+ if not all(
257
+ [x is not None, y is not None, width is not None, height is not None]
258
+ ):
259
+ element_text = element.get("text", "No text")
260
+ element_class = element.get("className", "Unknown class")
261
+ return f"Error: Element with index {index} ('{element_text}', {element_class}) has no coordinates and cannot be tapped"
262
+
263
+ # Format rect in iOS format: {{x,y},{w,h}}
264
+ ios_rect = f"{{{{{x},{y}}},{{{width},{height}}}}}"
265
+
266
+ # Store the rect for potential text input (keep in simple format for text input)
267
+ self.last_tapped_rect = f"{x},{y},{width},{height}"
268
+
269
+ # Make the tap request
270
+ async with aiohttp.ClientSession() as session:
271
+ tap_url = f"{self.url}/gestures/tap"
272
+ payload = {"rect": ios_rect, "count": 1, "longPress": False}
273
+
274
+ logger.info(f"payload {payload}")
275
+
276
+ async with session.post(tap_url, json=payload) as response:
277
+ if response.status == 200:
278
+ # Add a small delay to allow UI to update
279
+ await asyncio.sleep(0.5)
280
+
281
+ # Create a descriptive response
282
+ response_parts = []
283
+ response_parts.append(f"Tapped element with index {index}")
284
+ response_parts.append(
285
+ f"Text: '{element.get('text', 'No text')}'"
286
+ )
287
+ response_parts.append(
288
+ f"Class: {element.get('className', 'Unknown class')}"
289
+ )
290
+ response_parts.append(f"Rect: {ios_rect}")
291
+
292
+ return " | ".join(response_parts)
293
+ else:
294
+ return f"Error: Failed to tap element. HTTP {response.status}"
295
+
296
+ except Exception as e:
297
+ return f"Error: {str(e)}"
298
+
299
+ """async def tap_by_coordinates(self, x: int, y: int) -> bool:
300
+ # Format rect in iOS format: {{x,y},{w,h}}
301
+ width = 1
302
+ height = 1
303
+ ios_rect = f"{{{{{x},{y}}},{{{width},{height}}}}}"
304
+
305
+ # Make the tap request
306
+ async with aiohttp.ClientSession() as session:
307
+ tap_url = f"{self.url}/gestures/tap"
308
+ payload = {"rect": ios_rect, "count": 1, "longPress": False}
309
+
310
+ logger.info(f"payload {payload}")
311
+
312
+ async with session.post(tap_url, json=payload) as response:
313
+ if response.status == 200:
314
+ return True
315
+ else:
316
+ return False"""
317
+
318
+ async def tap(self, index: int) -> str:
319
+ """
320
+ Tap on a UI element by its index.
321
+
322
+ This function uses the cached clickable elements from the last get_clickables call
323
+ to find the element with the given index and tap on its center coordinates.
324
+
325
+ Args:
326
+ index: Index of the element to tap
327
+
328
+ Returns:
329
+ Result message
330
+ """
331
+ return await self.tap_by_index(index)
332
+
333
+ async def swipe(
334
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
335
+ ) -> bool:
336
+ """
337
+ Performs a straight-line swipe gesture on the device screen.
338
+ To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
339
+ Args:
340
+ start_x: Starting X coordinate
341
+ start_y: Starting Y coordinate
342
+ end_x: Ending X coordinate
343
+ end_y: Ending Y coordinate
344
+ duration_ms: Duration of swipe in milliseconds (not used in iOS API)
345
+ Returns:
346
+ Bool indicating success or failure
347
+ """
348
+ try:
349
+ # Calculate swipe direction based on coordinates
350
+ dx = end_x - start_x
351
+ dy = end_y - start_y
352
+
353
+ # Determine primary direction
354
+ if abs(dx) > abs(dy):
355
+ direction = "right" if dx > 0 else "left"
356
+ else:
357
+ direction = "down" if dy > 0 else "up"
358
+
359
+ async with aiohttp.ClientSession() as session:
360
+ swipe_url = f"{self.url}/gestures/swipe"
361
+ payload = {"x": float(start_x), "y": float(start_y), "dir": direction}
362
+
363
+ async with session.post(swipe_url, json=payload) as response:
364
+ if response.status == 200:
365
+ logger.info(
366
+ f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) direction: {direction}"
367
+ )
368
+ return True
369
+ else:
370
+ logger.error(f"Failed to swipe: HTTP {response.status}")
371
+ return False
372
+
373
+ except Exception as e:
374
+ logger.error(f"Error performing swipe: {e}")
375
+ return False
376
+
377
+ async def input_text(self, text: str, serial: Optional[str] = None) -> str:
378
+ """
379
+ Input text on the iOS device.
380
+
381
+ Args:
382
+ text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
383
+ serial: Optional device serial (not used for iOS, uses instance URL)
384
+
385
+ Returns:
386
+ Result message
387
+ """
388
+ try:
389
+ # Use the last tapped element's rect if available, otherwise use a default
390
+ rect = self.last_tapped_rect if self.last_tapped_rect else "0,0,100,100"
391
+
392
+ async with aiohttp.ClientSession() as session:
393
+ type_url = f"{self.url}/inputs/type"
394
+ payload = {"rect": rect, "text": text}
395
+
396
+ async with session.post(type_url, json=payload) as response:
397
+ if response.status == 200:
398
+ await asyncio.sleep(0.5) # Wait for text input to complete
399
+ return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
400
+ else:
401
+ return f"Error: Failed to input text. HTTP {response.status}"
402
+
403
+ except Exception as e:
404
+ return f"Error sending text input: {str(e)}"
405
+
406
+ async def back(self) -> str:
407
+ raise NotImplementedError("Back is not yet implemented for iOS")
408
+
409
+ async def press_key(self, keycode: int) -> str:
410
+ # TODO: refactor this. its not about physical keys but BACK, ENTER, DELETE, etc.
411
+ """
412
+ Press a key on the iOS device.
413
+
414
+ iOS Key codes:
415
+ - 0: HOME
416
+ - 4: ACTION
417
+ - 5: CAMERA
418
+
419
+ Args:
420
+ keycode: iOS keycode to press
421
+ """
422
+ try:
423
+ key_names = {0: "HOME", 4: "ACTION", 5: "CAMERA"}
424
+ key_name = key_names.get(keycode, str(keycode))
425
+
426
+ async with aiohttp.ClientSession() as session:
427
+ key_url = f"{self.url}/inputs/key"
428
+ payload = {"key": keycode}
429
+
430
+ async with session.post(key_url, json=payload) as response:
431
+ if response.status == 200:
432
+ return f"Pressed key {key_name}"
433
+ else:
434
+ return f"Error: Failed to press key. HTTP {response.status}"
435
+
436
+ except Exception as e:
437
+ return f"Error pressing key: {str(e)}"
438
+
439
+ async def start_app(self, package: str, activity: str = "") -> str:
440
+ """
441
+ Start an app on the iOS device.
442
+
443
+ Args:
444
+ package: Bundle identifier (e.g., "com.apple.MobileSMS")
445
+ activity: Optional activity name (not used on iOS)
446
+ """
447
+ try:
448
+ async with aiohttp.ClientSession() as session:
449
+ launch_url = f"{self.url}/inputs/launch"
450
+ payload = {"bundleIdentifier": package}
451
+
452
+ async with session.post(launch_url, json=payload) as response:
453
+ if response.status == 200:
454
+ await asyncio.sleep(1.0) # Wait for app to launch
455
+ return f"Successfully launched app: {package}"
456
+ else:
457
+ return f"Error: Failed to launch app {package}. HTTP {response.status}"
458
+
459
+ except Exception as e:
460
+ return f"Error launching app: {str(e)}"
461
+
462
+ async def take_screenshot(self) -> Tuple[str, bytes]:
463
+ """
464
+ Take a screenshot of the iOS device.
465
+ This function captures the current screen and adds the screenshot to context in the next message.
466
+ Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
467
+ """
468
+ try:
469
+ async with aiohttp.ClientSession() as session:
470
+ screenshot_url = f"{self.url}/vision/screenshot"
471
+ async with session.get(screenshot_url) as response:
472
+ if response.status == 200:
473
+ screenshot_data = await response.read()
474
+
475
+ # Store screenshot with timestamp
476
+ screenshot_info = {
477
+ "timestamp": time.time(),
478
+ "data": screenshot_data,
479
+ }
480
+ self.screenshots.append(screenshot_info)
481
+ self.last_screenshot = screenshot_data
482
+
483
+ logger.info(
484
+ f"Screenshot captured successfully, size: {len(screenshot_data)} bytes"
485
+ )
486
+ return ("PNG", screenshot_data)
487
+ else:
488
+ logger.error(
489
+ f"Failed to capture screenshot: HTTP {response.status}"
490
+ )
491
+ raise ValueError(
492
+ f"Failed to capture screenshot: HTTP {response.status}"
493
+ )
494
+
495
+ except Exception as e:
496
+ logger.error(f"Error capturing screenshot: {e}")
497
+ raise ValueError(f"Error taking screenshot: {str(e)}")
498
+
499
+ async def get_phone_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
500
+ """
501
+ Get the current phone state including current activity and keyboard visibility.
502
+
503
+ Args:
504
+ serial: Optional device serial number (not used for iOS)
505
+
506
+ Returns:
507
+ Dictionary with current phone state information
508
+ """
509
+ try:
510
+ # For iOS, we can get some state info from the accessibility API
511
+ async with aiohttp.ClientSession() as session:
512
+ a11y_url = f"{self.url}/vision/state"
513
+ async with session.get(a11y_url) as response:
514
+ if response.status == 200:
515
+ state_data = await response.json()
516
+
517
+ return {
518
+ "current_activity": state_data["activity"],
519
+ "keyboard_shown": state_data["keyboardShown"],
520
+ }
521
+ else:
522
+ return {
523
+ "error": f"Failed to get device state: HTTP {response.status}",
524
+ "current_activity": "Unknown",
525
+ "keyboard_shown": False,
526
+ }
527
+
528
+ except Exception as e:
529
+ return {"error": str(e), "message": f"Error getting phone state: {str(e)}"}
530
+
531
+ async def list_packages(self, include_system_apps: bool = True) -> List[str]:
532
+ all_packages = set(self.bundle_identifiers)
533
+ if include_system_apps:
534
+ all_packages.update(SYSTEM_BUNDLE_IDENTIFIERS)
535
+ return sorted(list(all_packages))
536
+
537
+ async def extract(self, filename: str | None = None) -> str:
538
+ # TODO
539
+ return "not implemented"
540
+
541
+ async def remember(self, information: str) -> str:
542
+ """
543
+ Store important information to remember for future context.
544
+
545
+ This information will be included in future LLM prompts to help maintain context
546
+ across interactions. Use this for critical facts, observations, or user preferences
547
+ that should influence future decisions.
548
+
549
+ Args:
550
+ information: The information to remember
551
+
552
+ Returns:
553
+ Confirmation message
554
+ """
555
+ if not information or not isinstance(information, str):
556
+ return "Error: Please provide valid information to remember."
557
+
558
+ # Add the information to memory
559
+ self.memory.append(information.strip())
560
+
561
+ # Limit memory size to prevent context overflow (keep most recent items)
562
+ max_memory_items = 10
563
+ if len(self.memory) > max_memory_items:
564
+ self.memory = self.memory[-max_memory_items:]
565
+
566
+ return f"Remembered: {information}"
567
+
568
+ def get_memory(self) -> List[str]:
569
+ """
570
+ Retrieve all stored memory items.
571
+
572
+ Returns:
573
+ List of stored memory items
574
+ """
575
+ return self.memory.copy()
576
+
577
+ def complete(self, success: bool, reason: str = ""):
578
+ """
579
+ Mark the task as finished.
580
+
581
+ Args:
582
+ success: Indicates if the task was successful.
583
+ reason: Reason for failure/success
584
+ """
585
+ if success:
586
+ self.success = True
587
+ self.reason = reason or "Task completed successfully."
588
+ self.finished = True
589
+ else:
590
+ self.success = False
591
+ if not reason:
592
+ raise ValueError("Reason for failure is required if success is False.")
593
+ self.reason = reason
594
+ self.finished = True
@@ -0,0 +1,99 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Optional, Dict, Any
3
+ import logging
4
+ from typing import Tuple, Dict, Callable, Any, Optional
5
+
6
+ # Get a logger for this module
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class Tools(ABC):
11
+ @abstractmethod
12
+ async def get_clickables(self) -> str:
13
+ pass
14
+
15
+ @abstractmethod
16
+ async def tap_by_index(self, index: int) -> bool:
17
+ pass
18
+
19
+ #@abstractmethod
20
+ #async def tap_by_coordinates(self, x: int, y: int) -> bool:
21
+ # pass
22
+
23
+ @abstractmethod
24
+ async def swipe(
25
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
26
+ ) -> bool:
27
+ pass
28
+
29
+ @abstractmethod
30
+ async def input_text(self, text: str) -> bool:
31
+ pass
32
+
33
+ @abstractmethod
34
+ async def back(self) -> bool:
35
+ pass
36
+
37
+ @abstractmethod
38
+ async def press_key(self, keycode: int) -> bool:
39
+ pass
40
+
41
+ @abstractmethod
42
+ async def start_app(self, package: str, activity: str = "") -> bool:
43
+ pass
44
+
45
+ @abstractmethod
46
+ async def take_screenshot(self) -> Tuple[str, bytes]:
47
+ pass
48
+
49
+ @abstractmethod
50
+ async def get_phone_state(self) -> Dict[str, Any]:
51
+ pass
52
+
53
+ @abstractmethod
54
+ async def list_packages(self, include_system_apps: bool = False) -> List[str]:
55
+ pass
56
+
57
+ @abstractmethod
58
+ async def remember(self, information: str) -> str:
59
+ pass
60
+
61
+ @abstractmethod
62
+ async def get_memory(self) -> List[str]:
63
+ pass
64
+
65
+ @abstractmethod
66
+ async def extract(self, filename: Optional[str] = None) -> str:
67
+ pass
68
+
69
+ @abstractmethod
70
+ def complete(self, success: bool, reason: str = "") -> bool:
71
+ pass
72
+
73
+
74
+ def describe_tools(tools: Tools) -> Dict[str, Callable[..., Any]]:
75
+ """
76
+ Describe the tools available for the given Tools instance.
77
+
78
+ Args:
79
+ tools: The Tools instance to describe.
80
+
81
+ Returns:
82
+ A dictionary mapping tool names to their descriptions.
83
+ """
84
+
85
+ return {
86
+ # UI interaction
87
+ "swipe": tools.swipe,
88
+ "input_text": tools.input_text,
89
+ "press_key": tools.press_key,
90
+ "tap_by_index": tools.tap_by_index,
91
+ # "tap_by_coordinates": tools_instance.tap_by_coordinates,
92
+ # App management
93
+ "start_app": tools.start_app,
94
+ "list_packages": tools.list_packages,
95
+ # state management
96
+ "extract": tools.extract,
97
+ "remember": tools.remember,
98
+ "complete": tools.complete,
99
+ }