droidrun 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. droidrun/__init__.py +16 -11
  2. droidrun/__main__.py +1 -1
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +1 -1
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +6 -0
  7. droidrun/agent/codeact/__init__.py +2 -4
  8. droidrun/agent/codeact/codeact_agent.py +330 -235
  9. droidrun/agent/codeact/events.py +12 -20
  10. droidrun/agent/codeact/prompts.py +0 -52
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/droid/__init__.py +2 -2
  25. droidrun/agent/droid/droid_agent.py +269 -325
  26. droidrun/agent/droid/events.py +28 -0
  27. droidrun/agent/oneflows/reflector.py +265 -0
  28. droidrun/agent/planner/__init__.py +2 -4
  29. droidrun/agent/planner/events.py +9 -13
  30. droidrun/agent/planner/planner_agent.py +288 -0
  31. droidrun/agent/planner/prompts.py +33 -53
  32. droidrun/agent/utils/__init__.py +3 -0
  33. droidrun/agent/utils/async_utils.py +1 -40
  34. droidrun/agent/utils/chat_utils.py +265 -48
  35. droidrun/agent/utils/executer.py +49 -14
  36. droidrun/agent/utils/llm_picker.py +14 -10
  37. droidrun/agent/utils/trajectory.py +184 -0
  38. droidrun/cli/__init__.py +1 -1
  39. droidrun/cli/logs.py +283 -0
  40. droidrun/cli/main.py +364 -441
  41. droidrun/tools/__init__.py +5 -10
  42. droidrun/tools/{actions.py → adb.py} +381 -412
  43. droidrun/tools/ios.py +596 -0
  44. droidrun/tools/tools.py +95 -0
  45. droidrun-0.3.1.dist-info/METADATA +150 -0
  46. droidrun-0.3.1.dist-info/RECORD +50 -0
  47. droidrun/agent/planner/task_manager.py +0 -355
  48. droidrun/agent/planner/workflow.py +0 -371
  49. droidrun/tools/device.py +0 -29
  50. droidrun/tools/loader.py +0 -60
  51. droidrun-0.2.0.dist-info/METADATA +0 -373
  52. droidrun-0.2.0.dist-info/RECORD +0 -32
  53. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/WHEEL +0 -0
  54. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/entry_points.txt +0 -0
  55. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/licenses/LICENSE +0 -0
droidrun/tools/ios.py ADDED
@@ -0,0 +1,596 @@
1
+ """
2
+ UI Actions - Core UI interaction tools for iOS device control.
3
+ """
4
+
5
+ import re
6
+ import time
7
+ import asyncio
8
+ from typing import Optional, Dict, Tuple, List, Any
9
+ import logging
10
+ import aiohttp
11
+ from droidrun.tools.tools import Tools
12
+
13
+ logger = logging.getLogger("IOS")
14
+
15
+ SYSTEM_BUNDLE_IDENTIFIERS = [
16
+ "ai.droidrun.droidrun-ios-portal",
17
+ "com.apple.Bridge",
18
+ "com.apple.DocumentsApp",
19
+ "com.apple.Fitness",
20
+ "com.apple.Health",
21
+ "com.apple.Maps",
22
+ "com.apple.MobileAddressBook",
23
+ "com.apple.MobileSMS",
24
+ "com.apple.Passbook",
25
+ "com.apple.Passwords",
26
+ "com.apple.Preferences",
27
+ "com.apple.PreviewShell",
28
+ "com.apple.mobilecal",
29
+ "com.apple.mobilesafari",
30
+ "com.apple.mobileslideshow",
31
+ "com.apple.news",
32
+ "com.apple.reminders",
33
+ "com.apple.shortcuts",
34
+ "com.apple.webapp",
35
+ ]
36
+
37
+
38
+ class IOSTools(Tools):
39
+ """Core UI interaction tools for iOS device control."""
40
+
41
+ def __init__(self, url: str, bundle_identifiers: List[str] = []) -> None:
42
+ self.clickable_elements_cache: List[Dict[str, Any]] = []
43
+ self.url = url
44
+ self.last_screenshot = None
45
+ self.reason = None
46
+ self.success = None
47
+ self.finished = False
48
+ self.memory: List[str] = []
49
+ self.screenshots: List[Dict[str, Any]] = []
50
+ self.last_tapped_rect: Optional[str] = (
51
+ None # Store last tapped element's rect for text input
52
+ )
53
+ self.bundle_identifiers = bundle_identifiers
54
+ logger.info(f"iOS device URL: {url}")
55
+
56
+ async def get_state(
57
+ self, serial: Optional[str] = None
58
+ ) -> List[Dict[str, Any]]:
59
+ """
60
+ Get all clickable UI elements from the iOS device using accessibility API.
61
+
62
+ Args:
63
+ serial: Optional device URL (not used for iOS, uses instance URL)
64
+
65
+ Returns:
66
+ List of dictionaries containing UI elements extracted from the device screen
67
+ """
68
+ try:
69
+ async with aiohttp.ClientSession() as session:
70
+ a11y_url = f"{self.url}/vision/a11y"
71
+ async with session.get(a11y_url) as response:
72
+ if response.status == 200:
73
+ a11y_data = await response.json()
74
+
75
+ # Parse the iOS accessibility tree format
76
+ elements = self._parse_ios_accessibility_tree(
77
+ a11y_data["accessibilityTree"]
78
+ )
79
+
80
+ # Cache the elements for tap_by_index usage
81
+ self.clickable_elements_cache = elements
82
+
83
+ return {
84
+ "a11y_tree":self.clickable_elements_cache
85
+ }
86
+ else:
87
+ logger.error(
88
+ f"Failed to get accessibility data: HTTP {response.status}"
89
+ )
90
+ raise ValueError(
91
+ f"Failed to get accessibility data: HTTP {response.status}"
92
+ )
93
+
94
+ except Exception as e:
95
+ logger.error(f"Error getting clickable elements: {e}")
96
+ # raise ValueError(f"Error getting clickable elements: {e}")
97
+
98
+ def _parse_ios_accessibility_tree(self, a11y_data: str) -> List[Dict[str, Any]]:
99
+ """
100
+ Parse iOS accessibility tree format into structured elements.
101
+
102
+ Args:
103
+ a11y_data: Raw accessibility data from iOS device
104
+
105
+ Returns:
106
+ List of parsed UI elements with coordinates and properties
107
+ """
108
+ elements = []
109
+ lines = a11y_data.strip().split("\n")
110
+
111
+ # Track current element index
112
+ element_index = 0
113
+
114
+ for line in lines:
115
+ # Skip empty lines and header lines
116
+ if (
117
+ not line.strip()
118
+ or line.startswith("Attributes:")
119
+ or line.startswith("Element subtree:")
120
+ or line.startswith("Path to element:")
121
+ or line.startswith("Query chain:")
122
+ ):
123
+ continue
124
+
125
+ # Parse UI elements - look for lines with coordinates
126
+ # Format: ElementType, {{x, y}, {width, height}}, [optional properties]
127
+ coord_match = re.search(
128
+ r"\{\{([0-9.]+),\s*([0-9.]+)\},\s*\{([0-9.]+),\s*([0-9.]+)\}\}", line
129
+ )
130
+
131
+ if coord_match:
132
+ x, y, width, height = map(float, coord_match.groups())
133
+
134
+ # Extract element type (the text before the first comma)
135
+ element_type_match = re.match(r"\s*(.+?),", line)
136
+ element_type = (
137
+ element_type_match.group(1).strip()
138
+ if element_type_match
139
+ else "Unknown"
140
+ )
141
+
142
+ # Remove leading arrows and spaces
143
+ element_type = re.sub(r"^[→\s]+", "", element_type)
144
+
145
+ # Extract label if present
146
+ label_match = re.search(r"label:\s*'([^']*)'", line)
147
+ label = label_match.group(1) if label_match else ""
148
+
149
+ # Extract identifier if present
150
+ identifier_match = re.search(r"identifier:\s*'([^']*)'", line)
151
+ identifier = identifier_match.group(1) if identifier_match else ""
152
+
153
+ # Extract placeholder value if present
154
+ placeholder_match = re.search(r"placeholderValue:\s*'([^']*)'", line)
155
+ placeholder = placeholder_match.group(1) if placeholder_match else ""
156
+
157
+ # Extract value if present
158
+ value_match = re.search(r"value:\s*([^,}]+)", line)
159
+ value = value_match.group(1).strip() if value_match else ""
160
+
161
+ # Calculate rect string for iOS tap API (x,y,width,height format)
162
+ rect_str = f"{x},{y},{width},{height}"
163
+
164
+ # Create element dictionary
165
+ element = {
166
+ "index": element_index,
167
+ "type": element_type,
168
+ "className": element_type,
169
+ "text": label or identifier or placeholder or "",
170
+ "label": label,
171
+ "identifier": identifier,
172
+ "placeholder": placeholder,
173
+ "value": value,
174
+ "bounds": f"{x},{y},{x+width},{y+height}", # left,top,right,bottom format for compatibility
175
+ "rect": rect_str, # x,y,width,height format for iOS API
176
+ "x": x,
177
+ "y": y,
178
+ "width": width,
179
+ "height": height,
180
+ "center_x": x + width / 2,
181
+ "center_y": y + height / 2,
182
+ }
183
+
184
+ # Only include interactive elements (buttons, text fields, etc.)
185
+ interactive_types = [
186
+ "Button",
187
+ "SearchField",
188
+ "TextField",
189
+ "Cell",
190
+ "Switch",
191
+ "Slider",
192
+ "Stepper",
193
+ "Picker",
194
+ "Link",
195
+ ]
196
+ if any(
197
+ interactive_type in element_type
198
+ for interactive_type in interactive_types
199
+ ):
200
+ elements.append(element)
201
+ element_index += 1
202
+
203
+ return elements
204
+
205
+ async def tap_by_index(self, index: int, serial: Optional[str] = None) -> str:
206
+ """
207
+ Tap on a UI element by its index.
208
+
209
+ This function uses the cached clickable elements
210
+ to find the element with the given index and tap on its center coordinates.
211
+
212
+ Args:
213
+ index: Index of the element to tap
214
+
215
+ Returns:
216
+ Result message
217
+ """
218
+
219
+ def find_element_by_index(elements, target_index):
220
+ """Find an element with the given index."""
221
+ for item in elements:
222
+ if item.get("index") == target_index:
223
+ return item
224
+ # Check children if present
225
+ children = item.get("children", [])
226
+ result = find_element_by_index(children, target_index)
227
+ if result:
228
+ return result
229
+ return None
230
+
231
+ try:
232
+ # Check if we have cached elements
233
+ if not self.clickable_elements_cache:
234
+ return "Error: No UI elements cached. Call get_clickables first."
235
+
236
+ # Find the element with the given index
237
+ element = find_element_by_index(self.clickable_elements_cache, index)
238
+
239
+ if not element:
240
+ # List available indices to help the user
241
+ indices = [
242
+ elem.get("index")
243
+ for elem in self.clickable_elements_cache
244
+ if elem.get("index") is not None
245
+ ]
246
+ indices_str = ", ".join(str(idx) for idx in sorted(indices)[:20])
247
+ if len(indices) > 20:
248
+ indices_str += f"... and {len(indices) - 20} more"
249
+
250
+ return f"Error: No element found with index {index}. Available indices: {indices_str}"
251
+
252
+ # Get the element coordinates
253
+ x = element.get("x", 0)
254
+ y = element.get("y", 0)
255
+ width = element.get("width", 0)
256
+ height = element.get("height", 0)
257
+
258
+ if not all(
259
+ [x is not None, y is not None, width is not None, height is not None]
260
+ ):
261
+ element_text = element.get("text", "No text")
262
+ element_class = element.get("className", "Unknown class")
263
+ return f"Error: Element with index {index} ('{element_text}', {element_class}) has no coordinates and cannot be tapped"
264
+
265
+ # Format rect in iOS format: {{x,y},{w,h}}
266
+ ios_rect = f"{{{{{x},{y}}},{{{width},{height}}}}}"
267
+
268
+ # Store the rect for potential text input (keep in simple format for text input)
269
+ self.last_tapped_rect = f"{x},{y},{width},{height}"
270
+
271
+ # Make the tap request
272
+ async with aiohttp.ClientSession() as session:
273
+ tap_url = f"{self.url}/gestures/tap"
274
+ payload = {"rect": ios_rect, "count": 1, "longPress": False}
275
+
276
+ logger.info(f"payload {payload}")
277
+
278
+ async with session.post(tap_url, json=payload) as response:
279
+ if response.status == 200:
280
+ # Add a small delay to allow UI to update
281
+ await asyncio.sleep(0.5)
282
+
283
+ # Create a descriptive response
284
+ response_parts = []
285
+ response_parts.append(f"Tapped element with index {index}")
286
+ response_parts.append(
287
+ f"Text: '{element.get('text', 'No text')}'"
288
+ )
289
+ response_parts.append(
290
+ f"Class: {element.get('className', 'Unknown class')}"
291
+ )
292
+ response_parts.append(f"Rect: {ios_rect}")
293
+
294
+ return " | ".join(response_parts)
295
+ else:
296
+ return f"Error: Failed to tap element. HTTP {response.status}"
297
+
298
+ except Exception as e:
299
+ return f"Error: {str(e)}"
300
+
301
+ """async def tap_by_coordinates(self, x: int, y: int) -> bool:
302
+ # Format rect in iOS format: {{x,y},{w,h}}
303
+ width = 1
304
+ height = 1
305
+ ios_rect = f"{{{{{x},{y}}},{{{width},{height}}}}}"
306
+
307
+ # Make the tap request
308
+ async with aiohttp.ClientSession() as session:
309
+ tap_url = f"{self.url}/gestures/tap"
310
+ payload = {"rect": ios_rect, "count": 1, "longPress": False}
311
+
312
+ logger.info(f"payload {payload}")
313
+
314
+ async with session.post(tap_url, json=payload) as response:
315
+ if response.status == 200:
316
+ return True
317
+ else:
318
+ return False"""
319
+
320
+ async def tap(self, index: int) -> str:
321
+ """
322
+ Tap on a UI element by its index.
323
+
324
+ This function uses the cached clickable elements from the last get_clickables call
325
+ to find the element with the given index and tap on its center coordinates.
326
+
327
+ Args:
328
+ index: Index of the element to tap
329
+
330
+ Returns:
331
+ Result message
332
+ """
333
+ return await self.tap_by_index(index)
334
+
335
+ async def swipe(
336
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
337
+ ) -> bool:
338
+ """
339
+ Performs a straight-line swipe gesture on the device screen.
340
+ To perform a hold (long press), set the start and end coordinates to the same values and increase the duration as needed.
341
+ Args:
342
+ start_x: Starting X coordinate
343
+ start_y: Starting Y coordinate
344
+ end_x: Ending X coordinate
345
+ end_y: Ending Y coordinate
346
+ duration_ms: Duration of swipe in milliseconds (not used in iOS API)
347
+ Returns:
348
+ Bool indicating success or failure
349
+ """
350
+ try:
351
+ # Calculate swipe direction based on coordinates
352
+ dx = end_x - start_x
353
+ dy = end_y - start_y
354
+
355
+ # Determine primary direction
356
+ if abs(dx) > abs(dy):
357
+ direction = "right" if dx > 0 else "left"
358
+ else:
359
+ direction = "down" if dy > 0 else "up"
360
+
361
+ async with aiohttp.ClientSession() as session:
362
+ swipe_url = f"{self.url}/gestures/swipe"
363
+ payload = {"x": float(start_x), "y": float(start_y), "dir": direction}
364
+
365
+ async with session.post(swipe_url, json=payload) as response:
366
+ if response.status == 200:
367
+ logger.info(
368
+ f"Swiped from ({start_x}, {start_y}) to ({end_x}, {end_y}) direction: {direction}"
369
+ )
370
+ return True
371
+ else:
372
+ logger.error(f"Failed to swipe: HTTP {response.status}")
373
+ return False
374
+
375
+ except Exception as e:
376
+ logger.error(f"Error performing swipe: {e}")
377
+ return False
378
+
379
+ async def input_text(self, text: str, serial: Optional[str] = None) -> str:
380
+ """
381
+ Input text on the iOS device.
382
+
383
+ Args:
384
+ text: Text to input. Can contain spaces, newlines, and special characters including non-ASCII.
385
+ serial: Optional device serial (not used for iOS, uses instance URL)
386
+
387
+ Returns:
388
+ Result message
389
+ """
390
+ try:
391
+ # Use the last tapped element's rect if available, otherwise use a default
392
+ rect = self.last_tapped_rect if self.last_tapped_rect else "0,0,100,100"
393
+
394
+ async with aiohttp.ClientSession() as session:
395
+ type_url = f"{self.url}/inputs/type"
396
+ payload = {"rect": rect, "text": text}
397
+
398
+ async with session.post(type_url, json=payload) as response:
399
+ if response.status == 200:
400
+ await asyncio.sleep(0.5) # Wait for text input to complete
401
+ return f"Text input completed: {text[:50]}{'...' if len(text) > 50 else ''}"
402
+ else:
403
+ return f"Error: Failed to input text. HTTP {response.status}"
404
+
405
+ except Exception as e:
406
+ return f"Error sending text input: {str(e)}"
407
+
408
+ async def back(self) -> str:
409
+ raise NotImplementedError("Back is not yet implemented for iOS")
410
+
411
+ async def press_key(self, keycode: int) -> str:
412
+ # TODO: refactor this. its not about physical keys but BACK, ENTER, DELETE, etc.
413
+ """
414
+ Press a key on the iOS device.
415
+
416
+ iOS Key codes:
417
+ - 0: HOME
418
+ - 4: ACTION
419
+ - 5: CAMERA
420
+
421
+ Args:
422
+ keycode: iOS keycode to press
423
+ """
424
+ try:
425
+ key_names = {0: "HOME", 4: "ACTION", 5: "CAMERA"}
426
+ key_name = key_names.get(keycode, str(keycode))
427
+
428
+ async with aiohttp.ClientSession() as session:
429
+ key_url = f"{self.url}/inputs/key"
430
+ payload = {"key": keycode}
431
+
432
+ async with session.post(key_url, json=payload) as response:
433
+ if response.status == 200:
434
+ return f"Pressed key {key_name}"
435
+ else:
436
+ return f"Error: Failed to press key. HTTP {response.status}"
437
+
438
+ except Exception as e:
439
+ return f"Error pressing key: {str(e)}"
440
+
441
+ async def start_app(self, package: str, activity: str = "") -> str:
442
+ """
443
+ Start an app on the iOS device.
444
+
445
+ Args:
446
+ package: Bundle identifier (e.g., "com.apple.MobileSMS")
447
+ activity: Optional activity name (not used on iOS)
448
+ """
449
+ try:
450
+ async with aiohttp.ClientSession() as session:
451
+ launch_url = f"{self.url}/inputs/launch"
452
+ payload = {"bundleIdentifier": package}
453
+
454
+ async with session.post(launch_url, json=payload) as response:
455
+ if response.status == 200:
456
+ await asyncio.sleep(1.0) # Wait for app to launch
457
+ return f"Successfully launched app: {package}"
458
+ else:
459
+ return f"Error: Failed to launch app {package}. HTTP {response.status}"
460
+
461
+ except Exception as e:
462
+ return f"Error launching app: {str(e)}"
463
+
464
+ async def take_screenshot(self) -> Tuple[str, bytes]:
465
+ """
466
+ Take a screenshot of the iOS device.
467
+ This function captures the current screen and adds the screenshot to context in the next message.
468
+ Also stores the screenshot in the screenshots list with timestamp for later GIF creation.
469
+ """
470
+ try:
471
+ async with aiohttp.ClientSession() as session:
472
+ screenshot_url = f"{self.url}/vision/screenshot"
473
+ async with session.get(screenshot_url) as response:
474
+ if response.status == 200:
475
+ screenshot_data = await response.read()
476
+
477
+ # Store screenshot with timestamp
478
+ screenshot_info = {
479
+ "timestamp": time.time(),
480
+ "data": screenshot_data,
481
+ }
482
+ self.screenshots.append(screenshot_info)
483
+ self.last_screenshot = screenshot_data
484
+
485
+ logger.info(
486
+ f"Screenshot captured successfully, size: {len(screenshot_data)} bytes"
487
+ )
488
+ return ("PNG", screenshot_data)
489
+ else:
490
+ logger.error(
491
+ f"Failed to capture screenshot: HTTP {response.status}"
492
+ )
493
+ raise ValueError(
494
+ f"Failed to capture screenshot: HTTP {response.status}"
495
+ )
496
+
497
+ except Exception as e:
498
+ logger.error(f"Error capturing screenshot: {e}")
499
+ raise ValueError(f"Error taking screenshot: {str(e)}")
500
+
501
+ async def get_phone_state(self, serial: Optional[str] = None) -> Dict[str, Any]:
502
+ """
503
+ Get the current phone state including current activity and keyboard visibility.
504
+
505
+ Args:
506
+ serial: Optional device serial number (not used for iOS)
507
+
508
+ Returns:
509
+ Dictionary with current phone state information
510
+ """
511
+ try:
512
+ # For iOS, we can get some state info from the accessibility API
513
+ async with aiohttp.ClientSession() as session:
514
+ a11y_url = f"{self.url}/vision/state"
515
+ async with session.get(a11y_url) as response:
516
+ if response.status == 200:
517
+ state_data = await response.json()
518
+
519
+ return {
520
+ "current_activity": state_data["activity"],
521
+ "keyboard_shown": state_data["keyboardShown"],
522
+ }
523
+ else:
524
+ return {
525
+ "error": f"Failed to get device state: HTTP {response.status}",
526
+ "current_activity": "Unknown",
527
+ "keyboard_shown": False,
528
+ }
529
+
530
+ except Exception as e:
531
+ return {"error": str(e), "message": f"Error getting phone state: {str(e)}"}
532
+
533
+ async def list_packages(self, include_system_apps: bool = True) -> List[str]:
534
+ all_packages = set(self.bundle_identifiers)
535
+ if include_system_apps:
536
+ all_packages.update(SYSTEM_BUNDLE_IDENTIFIERS)
537
+ return sorted(list(all_packages))
538
+
539
+ async def extract(self, filename: str | None = None) -> str:
540
+ # TODO
541
+ return "not implemented"
542
+
543
+ async def remember(self, information: str) -> str:
544
+ """
545
+ Store important information to remember for future context.
546
+
547
+ This information will be included in future LLM prompts to help maintain context
548
+ across interactions. Use this for critical facts, observations, or user preferences
549
+ that should influence future decisions.
550
+
551
+ Args:
552
+ information: The information to remember
553
+
554
+ Returns:
555
+ Confirmation message
556
+ """
557
+ if not information or not isinstance(information, str):
558
+ return "Error: Please provide valid information to remember."
559
+
560
+ # Add the information to memory
561
+ self.memory.append(information.strip())
562
+
563
+ # Limit memory size to prevent context overflow (keep most recent items)
564
+ max_memory_items = 10
565
+ if len(self.memory) > max_memory_items:
566
+ self.memory = self.memory[-max_memory_items:]
567
+
568
+ return f"Remembered: {information}"
569
+
570
+ def get_memory(self) -> List[str]:
571
+ """
572
+ Retrieve all stored memory items.
573
+
574
+ Returns:
575
+ List of stored memory items
576
+ """
577
+ return self.memory.copy()
578
+
579
+ def complete(self, success: bool, reason: str = ""):
580
+ """
581
+ Mark the task as finished.
582
+
583
+ Args:
584
+ success: Indicates if the task was successful.
585
+ reason: Reason for failure/success
586
+ """
587
+ if success:
588
+ self.success = True
589
+ self.reason = reason or "Task completed successfully."
590
+ self.finished = True
591
+ else:
592
+ self.success = False
593
+ if not reason:
594
+ raise ValueError("Reason for failure is required if success is False.")
595
+ self.reason = reason
596
+ self.finished = True
@@ -0,0 +1,95 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Optional, Dict, Any
3
+ import logging
4
+ from typing import Tuple, Dict, Callable, Any, Optional
5
+
6
+ # Get a logger for this module
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class Tools(ABC):
11
+ @abstractmethod
12
+ async def get_state(self) -> Dict[str, Any]:
13
+ pass
14
+
15
+ @abstractmethod
16
+ async def tap_by_index(self, index: int) -> bool:
17
+ pass
18
+
19
+ #@abstractmethod
20
+ #async def tap_by_coordinates(self, x: int, y: int) -> bool:
21
+ # pass
22
+
23
+ @abstractmethod
24
+ async def swipe(
25
+ self, start_x: int, start_y: int, end_x: int, end_y: int, duration_ms: int = 300
26
+ ) -> bool:
27
+ pass
28
+
29
+ @abstractmethod
30
+ async def input_text(self, text: str) -> bool:
31
+ pass
32
+
33
+ @abstractmethod
34
+ async def back(self) -> bool:
35
+ pass
36
+
37
+ @abstractmethod
38
+ async def press_key(self, keycode: int) -> bool:
39
+ pass
40
+
41
+ @abstractmethod
42
+ async def start_app(self, package: str, activity: str = "") -> bool:
43
+ pass
44
+
45
+ @abstractmethod
46
+ async def take_screenshot(self) -> Tuple[str, bytes]:
47
+ pass
48
+
49
+ @abstractmethod
50
+ async def list_packages(self, include_system_apps: bool = False) -> List[str]:
51
+ pass
52
+
53
+ @abstractmethod
54
+ async def remember(self, information: str) -> str:
55
+ pass
56
+
57
+ @abstractmethod
58
+ async def get_memory(self) -> List[str]:
59
+ pass
60
+
61
+ @abstractmethod
62
+ async def extract(self, filename: Optional[str] = None) -> str:
63
+ pass
64
+
65
+ @abstractmethod
66
+ def complete(self, success: bool, reason: str = "") -> bool:
67
+ pass
68
+
69
+
70
+ def describe_tools(tools: Tools) -> Dict[str, Callable[..., Any]]:
71
+ """
72
+ Describe the tools available for the given Tools instance.
73
+
74
+ Args:
75
+ tools: The Tools instance to describe.
76
+
77
+ Returns:
78
+ A dictionary mapping tool names to their descriptions.
79
+ """
80
+
81
+ return {
82
+ # UI interaction
83
+ "swipe": tools.swipe,
84
+ "input_text": tools.input_text,
85
+ "press_key": tools.press_key,
86
+ "tap_by_index": tools.tap_by_index,
87
+ # "tap_by_coordinates": tools_instance.tap_by_coordinates,
88
+ # App management
89
+ "start_app": tools.start_app,
90
+ "list_packages": tools.list_packages,
91
+ # state management
92
+ "extract": tools.extract,
93
+ "remember": tools.remember,
94
+ "complete": tools.complete,
95
+ }