droidrun 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
droidrun/cli/main.py CHANGED
@@ -1,123 +1,446 @@
1
1
  """
2
2
  DroidRun CLI - Command line interface for controlling Android devices through LLM agents.
3
3
  """
4
+ if __name__ == "__main__":
5
+ import sys
6
+ import os
7
+ _project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
8
+ sys.path.insert(0, _project_root)
9
+ __package__ = "droidrun.cli"
10
+
4
11
 
5
12
  import asyncio
6
13
  import click
7
14
  import os
15
+ import logging
16
+ import time
17
+ import queue
8
18
  from rich.console import Console
9
- from rich import print as rprint
10
- from droidrun.tools import DeviceManager
11
- from droidrun.agent import run_agent
19
+ from rich.live import Live
20
+ from rich.panel import Panel
21
+ from rich.layout import Layout
22
+ from rich.text import Text
23
+ from rich.spinner import Spinner
24
+ from rich.align import Align
25
+ from ..tools import DeviceManager, Tools, load_tools
26
+ from ..agent.droid import DroidAgent
27
+ from ..agent.utils.llm_picker import load_llm
12
28
  from functools import wraps
13
-
14
- # Import the install_app function directly for the setup command
15
- from droidrun.tools.actions import install_app
16
-
17
29
  console = Console()
18
30
  device_manager = DeviceManager()
19
31
 
32
+ log_queue = queue.Queue()
33
+ current_step = "Initializing..."
34
+ spinner = Spinner("dots")
35
+
36
+ class RichHandler(logging.Handler):
37
+ def emit(self, record):
38
+ log_record = self.format(record)
39
+ log_queue.put(log_record)
40
+
20
41
  def coro(f):
21
42
  @wraps(f)
22
43
  def wrapper(*args, **kwargs):
23
44
  return asyncio.run(f(*args, **kwargs))
24
45
  return wrapper
25
46
 
26
- # Define the run command as a standalone function to be used as both a command and default
27
- @coro
28
- async def run_command(command: str, device: str | None, provider: str, model: str, steps: int, vision: bool):
29
- """Run a command on your Android device using natural language."""
30
- console.print(f"[bold blue]Executing command:[/] {command}")
47
+ def create_layout():
48
+ """Create a layout with logs at top and status at bottom"""
49
+ layout = Layout()
50
+ layout.split(
51
+ Layout(name="logs"),
52
+ Layout(name="goal", size=3),
53
+ Layout(name="status", size=3)
54
+ )
55
+ return layout
56
+
57
+ def update_layout(layout, log_list, step_message, current_time, goal=None, completed=False, success=None):
58
+ """Update the layout with current logs and step information"""
59
+ from rich.text import Text
60
+ import shutil
31
61
 
32
- # Auto-detect Gemini if model starts with "gemini-"
33
- if model and model.startswith("gemini-"):
34
- provider = "gemini"
62
+ terminal_height = shutil.get_terminal_size().lines
63
+ other_components_height = 3 + 3 + 4 + 1 + 4
64
+ available_log_lines = max(5, terminal_height - other_components_height)
35
65
 
36
- # Print vision status
37
- if vision:
38
- console.print("[blue]Vision capabilities are enabled.[/]")
39
- else:
40
- console.print("[blue]Vision capabilities are disabled.[/]")
66
+ visible_logs = log_list[-available_log_lines:] if len(log_list) > available_log_lines else log_list
41
67
 
42
- # Get API keys from environment variables
43
- api_key = None
44
- if provider.lower() == 'openai':
45
- api_key = os.environ.get('OPENAI_API_KEY')
46
- if not api_key:
47
- console.print("[bold red]Error:[/] OPENAI_API_KEY environment variable not set")
48
- return
49
- if not model:
50
- model = "gpt-4o-mini"
51
- elif provider.lower() == 'anthropic':
52
- api_key = os.environ.get('ANTHROPIC_API_KEY')
53
- if not api_key:
54
- console.print("[bold red]Error:[/] ANTHROPIC_API_KEY environment variable not set")
55
- return
56
- if not model:
57
- model = "claude-3-sonnet-20240229"
58
- elif provider.lower() == 'gemini':
59
- api_key = os.environ.get('GEMINI_API_KEY')
60
- if not api_key:
61
- console.print("[bold red]Error:[/] GEMINI_API_KEY environment variable not set")
62
- return
63
- if not model:
64
- model = "gemini-2.0-flash"
68
+ log_content = "\n".join(visible_logs)
69
+
70
+ layout["logs"].update(Panel(
71
+ log_content,
72
+ title=f"Logs (showing {len(visible_logs)} most recent of {len(log_list)} total)",
73
+ border_style="blue",
74
+ title_align="left",
75
+ padding=(0, 1),
76
+ ))
77
+
78
+ if goal:
79
+ goal_text = Text(goal, style="bold")
80
+ layout["goal"].update(Panel(
81
+ goal_text,
82
+ title="Goal",
83
+ border_style="magenta",
84
+ title_align="left",
85
+ padding=(0, 1)
86
+ ))
87
+
88
+ step_display = Text()
89
+
90
+ if completed:
91
+ if success:
92
+ step_display.append("✓ ", style="bold green")
93
+ panel_title = "Completed Successfully"
94
+ panel_style = "green"
95
+ else:
96
+ step_display.append("✗ ", style="bold red")
97
+ panel_title = "Failed"
98
+ panel_style = "red"
65
99
  else:
66
- console.print(f"[bold red]Error:[/] Unsupported provider: {provider}")
67
- return
100
+ step_display.append(spinner.render(current_time))
101
+ step_display.append(" ")
102
+ panel_title = "Current Action"
103
+ panel_style = "green"
68
104
 
69
- try:
70
- # Try to find a device if none specified
71
- if not device:
72
- devices = await device_manager.list_devices()
73
- if not devices:
74
- console.print("[yellow]No devices connected.[/]")
75
- return
76
-
77
- device = devices[0].serial
78
- console.print(f"[blue]Using device:[/] {device}")
105
+ step_display.append(step_message)
106
+
107
+ layout["status"].update(Panel(
108
+ step_display,
109
+ title=panel_title,
110
+ border_style=panel_style,
111
+ title_align="left",
112
+ padding=(0, 1)
113
+ ))
114
+
115
+ @coro
116
+ async def run_command(command: str, device: str | None, provider: str, model: str, steps: int, vision: bool, base_url: str, reasoning: bool, tracing: bool, debug: bool, **kwargs):
117
+ """Run a command on your Android device using natural language."""
118
+ configure_logging(debug)
119
+
120
+ global current_step
121
+ current_step = "Initializing..."
122
+ logs = []
123
+ max_log_history = 1000
124
+ is_completed = False
125
+ is_success = None
126
+
127
+ layout = create_layout()
128
+
129
+ with Live(layout, refresh_per_second=20, console=console) as live:
130
+ def update_display():
131
+ current_time = time.time()
132
+ update_layout(
133
+ layout,
134
+ logs,
135
+ current_step,
136
+ current_time,
137
+ goal=command,
138
+ completed=is_completed,
139
+ success=is_success
140
+ )
141
+ live.refresh()
79
142
 
80
- # Set the device serial in the environment variable
81
- os.environ["DROIDRUN_DEVICE_SERIAL"] = device
82
- console.print(f"[blue]Set DROIDRUN_DEVICE_SERIAL to:[/] {device}")
143
+ def process_new_logs():
144
+ log_count = 0
145
+ while not log_queue.empty():
146
+ try:
147
+ log = log_queue.get_nowait()
148
+ logs.append(log)
149
+ log_count += 1
150
+ if len(logs) > max_log_history:
151
+ logs.pop(0)
152
+ except queue.Empty:
153
+ break
154
+ return log_count > 0
83
155
 
84
- # Run the agent
85
- console.print("[bold blue]Running ReAct agent...[/]")
86
- console.print("[yellow]Press Ctrl+C to stop execution[/]")
156
+ async def process_logs():
157
+ global current_step
158
+ iteration = 0
159
+ while True:
160
+ if is_completed:
161
+ process_new_logs()
162
+ if iteration % 10 == 0:
163
+ update_display()
164
+ iteration += 1
165
+ await asyncio.sleep(0.1)
166
+ continue
167
+
168
+ new_logs_added = process_new_logs()
169
+
170
+ # Improve detection of the latest action from logs
171
+ latest_task = None
172
+ for log in reversed(logs[-50:]): # Search from most recent logs first
173
+ if "🔧 Executing task:" in log:
174
+ task_desc = log.split("🔧 Executing task:", 1)[1].strip()
175
+
176
+ if "Goal:" in task_desc:
177
+ goal_part = task_desc.split("Goal:", 1)[1].strip()
178
+ latest_task = goal_part
179
+ else:
180
+ latest_task = task_desc
181
+ break # Stop at the most recent task
182
+
183
+ if latest_task:
184
+ current_step = f"Executing: {latest_task}"
185
+
186
+ if new_logs_added or iteration % 5 == 0:
187
+ update_layout(
188
+ layout,
189
+ logs,
190
+ current_step,
191
+ time.time(),
192
+ goal=command,
193
+ completed=is_completed,
194
+ success=is_success
195
+ )
196
+
197
+ iteration += 1
198
+ await asyncio.sleep(0.05)
87
199
 
88
200
  try:
89
- steps = await run_agent(
90
- task=command,
91
- device_serial=device, # Still pass for backward compatibility
92
- llm_provider=provider,
93
- model_name=model,
94
- api_key=api_key,
95
- vision=vision
201
+ update_display()
202
+ logs.append(f"Executing command: {command}")
203
+
204
+ if not kwargs.get("temperature"):
205
+ kwargs["temperature"] = 0
206
+
207
+ current_step = "Setting up tools..."
208
+ update_display()
209
+
210
+ tool_list, tools_instance = await load_tools(serial=device)
211
+
212
+ if debug:
213
+ logs.append(f"Tools: {list(tool_list.keys())}")
214
+ update_display()
215
+
216
+ device_serial = tools_instance.serial
217
+ logs.append(f"Using device: {device_serial}")
218
+ update_display()
219
+
220
+ os.environ["DROIDRUN_DEVICE_SERIAL"] = device_serial
221
+
222
+ current_step = "Initializing LLM..."
223
+ update_display()
224
+
225
+ llm = load_llm(provider_name=provider, model=model, base_url=base_url, **kwargs)
226
+
227
+ current_step = "Initializing DroidAgent..."
228
+ update_display()
229
+
230
+ if reasoning:
231
+ logs.append("Using planning mode with reasoning")
232
+ else:
233
+ logs.append("Using direct execution mode without planning")
234
+
235
+ if tracing:
236
+ logs.append("Arize Phoenix tracing enabled")
237
+
238
+ update_display()
239
+
240
+ droid_agent = DroidAgent(
241
+ goal=command,
242
+ llm=llm,
243
+ tools_instance=tools_instance,
244
+ tool_list=tool_list,
245
+ max_steps=steps,
246
+ vision=vision,
247
+ timeout=1000,
248
+ max_retries=3,
249
+ reasoning=reasoning,
250
+ enable_tracing=tracing,
251
+ debug=debug
96
252
  )
97
253
 
98
- # Final message
99
- console.print(f"[bold green]Execution completed with {len(steps)} steps[/]")
254
+ logs.append("Press Ctrl+C to stop execution")
255
+ current_step = "Running agent..."
256
+ update_display()
257
+
258
+ try:
259
+ log_task = asyncio.create_task(process_logs())
260
+ result = None
261
+ try:
262
+ result = await droid_agent.run()
263
+
264
+ if result.get("success", False):
265
+ is_completed = True
266
+ is_success = True
267
+
268
+ if result.get("output"):
269
+ success_output = f"🎯 FINAL ANSWER: {result.get('output')}"
270
+ logs.append(success_output)
271
+ current_step = f"{result.get('output')}"
272
+ else:
273
+ current_step = result.get("reason", "Success")
274
+ else:
275
+ is_completed = True
276
+ is_success = False
277
+
278
+ current_step = result.get("reason", "Failed") if result else "Failed"
279
+
280
+ update_layout(
281
+ layout,
282
+ logs,
283
+ current_step,
284
+ time.time(),
285
+ goal=command,
286
+ completed=is_completed,
287
+ success=is_success
288
+ )
289
+
290
+ await asyncio.sleep(2)
291
+ finally:
292
+ log_task.cancel()
293
+ try:
294
+ await log_task
295
+ except asyncio.CancelledError:
296
+ pass
297
+
298
+ for _ in range(20):
299
+ process_new_logs()
300
+ await asyncio.sleep(0.05)
301
+
302
+ update_layout(
303
+ layout,
304
+ logs,
305
+ current_step,
306
+ time.time(),
307
+ goal=command,
308
+ completed=is_completed,
309
+ success=is_success
310
+ )
311
+
312
+ live.refresh()
313
+
314
+ await asyncio.sleep(3)
315
+
316
+ except KeyboardInterrupt:
317
+ logs.append("Execution stopped by user.")
318
+ current_step = "Stopped by user"
319
+
320
+ is_completed = True
321
+ is_success = False
322
+
323
+ update_layout(
324
+ layout,
325
+ logs,
326
+ current_step,
327
+ time.time(),
328
+ goal=command,
329
+ completed=is_completed,
330
+ success=is_success
331
+ )
332
+
333
+ except ValueError as e:
334
+ logs.append(f"Configuration Error: {e}")
335
+ current_step = f"Error: {e}"
336
+
337
+ is_completed = True
338
+ is_success = False
339
+
340
+ update_layout(
341
+ layout,
342
+ logs,
343
+ current_step,
344
+ time.time(),
345
+ goal=command,
346
+ completed=is_completed,
347
+ success=is_success
348
+ )
349
+
350
+ except Exception as e:
351
+ logs.append(f"An unexpected error occurred during agent execution: {e}")
352
+ current_step = f"Error: {e}"
353
+ if debug:
354
+ import traceback
355
+ logs.append(traceback.format_exc())
356
+
357
+ is_completed = True
358
+ is_success = False
359
+
360
+ update_layout(
361
+ layout,
362
+ logs,
363
+ current_step,
364
+ time.time(),
365
+ goal=command,
366
+ completed=is_completed,
367
+ success=is_success
368
+ )
369
+
370
+ update_display()
371
+ await asyncio.sleep(1)
372
+
100
373
  except ValueError as e:
101
- if "does not support vision" in str(e):
102
- console.print(f"[bold red]Vision Error:[/] {e}")
103
- console.print("[yellow]Please specify a vision-capable model with the --model flag.[/]")
104
- console.print("[blue]Recommended models:[/]")
105
- console.print(" - OpenAI: gpt-4o or gpt-4-vision")
106
- console.print(" - Anthropic: claude-3-opus-20240229 or claude-3-sonnet-20240229")
107
- console.print(" - Gemini: gemini-pro-vision")
108
- return
109
- else:
110
- raise # Re-raise other ValueError exceptions
111
-
112
- except Exception as e:
113
- console.print(f"[bold red]Error:[/] {e}")
374
+ logs.append(f"Error: {e}")
375
+ current_step = f"Error: {e}"
376
+
377
+ step_display = Text()
378
+ step_display.append(" ", style="bold red")
379
+ step_display.append(current_step)
380
+
381
+ layout["status"].update(Panel(
382
+ step_display,
383
+ title="Error",
384
+ border_style="red",
385
+ title_align="left",
386
+ padding=(0, 1)
387
+ ))
388
+ update_display()
389
+
390
+ except Exception as e:
391
+ logs.append(f"An unexpected error occurred during setup: {e}")
392
+ current_step = f"Error: {e}"
393
+ if debug:
394
+ import traceback
395
+ logs.append(traceback.format_exc())
396
+
397
+ step_display = Text()
398
+ step_display.append("⚠ ", style="bold red")
399
+ step_display.append(current_step)
400
+
401
+ layout["status"].update(Panel(
402
+ step_display,
403
+ title="Error",
404
+ border_style="red",
405
+ title_align="left",
406
+ padding=(0, 1)
407
+ ))
408
+ update_display()
409
+ await asyncio.sleep(1)
410
+
411
+ def configure_logging(debug: bool):
412
+ """Configure logging verbosity based on debug flag."""
413
+ root_logger = logging.getLogger()
414
+ droidrun_logger = logging.getLogger("droidrun")
415
+
416
+ # Clear existing handlers
417
+ for handler in root_logger.handlers[:]:
418
+ root_logger.removeHandler(handler)
419
+ for handler in droidrun_logger.handlers[:]:
420
+ droidrun_logger.removeHandler(handler)
421
+
422
+ rich_handler = RichHandler()
423
+
424
+ formatter = logging.Formatter('%(message)s')
425
+ rich_handler.setFormatter(formatter)
426
+
427
+ if debug:
428
+ rich_handler.setLevel(logging.DEBUG)
429
+ droidrun_logger.setLevel(logging.DEBUG)
430
+ root_logger.setLevel(logging.INFO)
431
+ else:
432
+ rich_handler.setLevel(logging.INFO)
433
+ droidrun_logger.setLevel(logging.INFO)
434
+ root_logger.setLevel(logging.WARNING)
435
+
436
+ droidrun_logger.addHandler(rich_handler)
437
+
438
+ log_queue.put(f"Logging level set to: {logging.getLevelName(droidrun_logger.level)}")
439
+
114
440
 
115
- # Custom Click multi-command class to handle both subcommands and default behavior
116
441
  class DroidRunCLI(click.Group):
117
442
  def parse_args(self, ctx, args):
118
- # Check if the first argument might be a task rather than a command
119
443
  if args and not args[0].startswith('-') and args[0] not in self.commands:
120
- # Insert the 'run' command before the first argument if it's not a known command
121
444
  args.insert(0, 'run')
122
445
  return super().parse_args(ctx, args)
123
446
 
@@ -129,14 +452,19 @@ def cli():
129
452
  @cli.command()
130
453
  @click.argument('command', type=str)
131
454
  @click.option('--device', '-d', help='Device serial number or IP address', default=None)
132
- @click.option('--provider', '-p', help='LLM provider (openai, anthropic, or gemini)', default='openai')
133
- @click.option('--model', '-m', help='LLM model name', default=None)
455
+ @click.option('--provider', '-p', help='LLM provider (openai, ollama, anthropic, gemini, deepseek)', default='Gemini')
456
+ @click.option('--model', '-m', help='LLM model name', default="models/gemini-2.5-pro-preview-05-06")
457
+ @click.option('--temperature', type=float, help='Temperature for LLM', default=0.2)
134
458
  @click.option('--steps', type=int, help='Maximum number of steps', default=15)
135
- @click.option('--vision', is_flag=True, help='Enable vision capabilities')
136
- def run(command: str, device: str | None, provider: str, model: str, steps: int, vision: bool):
459
+ @click.option('--vision', is_flag=True, help='Enable vision capabilities', default=True)
460
+ @click.option('--base_url', '-u', help='Base URL for API (e.g., OpenRouter or Ollama)', default=None)
461
+ @click.option('--reasoning/--no-reasoning', is_flag=True, help='Enable/disable planning with reasoning', default=False)
462
+ @click.option('--tracing', is_flag=True, help='Enable Arize Phoenix tracing', default=False)
463
+ @click.option('--debug', is_flag=True, help='Enable verbose debug logging', default=False)
464
+ def run(command: str, device: str | None, provider: str, model: str, steps: int, vision: bool, base_url: str, temperature: float, reasoning: bool, tracing: bool, debug: bool):
137
465
  """Run a command on your Android device using natural language."""
138
466
  # Call our standalone function
139
- return run_command(command, device, provider, model, steps, vision)
467
+ return run_command(command, device, provider, model, steps, vision, base_url, reasoning, tracing, debug, temperature=temperature)
140
468
 
141
469
  @cli.command()
142
470
  @coro
@@ -190,12 +518,10 @@ async def disconnect(serial: str):
190
518
  async def setup(path: str, device: str | None):
191
519
  """Install an APK file and enable it as an accessibility service."""
192
520
  try:
193
- # Check if APK file exists
194
521
  if not os.path.exists(path):
195
522
  console.print(f"[bold red]Error:[/] APK file not found at {path}")
196
523
  return
197
524
 
198
- # Try to find a device if none specified
199
525
  if not device:
200
526
  devices = await device_manager.list_devices()
201
527
  if not devices:
@@ -205,19 +531,16 @@ async def setup(path: str, device: str | None):
205
531
  device = devices[0].serial
206
532
  console.print(f"[blue]Using device:[/] {device}")
207
533
 
208
- # Set the device serial in the environment variable
209
534
  os.environ["DROIDRUN_DEVICE_SERIAL"] = device
210
535
  console.print(f"[blue]Set DROIDRUN_DEVICE_SERIAL to:[/] {device}")
211
536
 
212
- # Get a device object for ADB commands
213
537
  device_obj = await device_manager.get_device(device)
214
538
  if not device_obj:
215
539
  console.print(f"[bold red]Error:[/] Could not get device object for {device}")
216
540
  return
217
-
218
- # Step 1: Install the APK file
541
+ tools = Tools(serial=device)
219
542
  console.print(f"[bold blue]Step 1/2: Installing APK:[/] {path}")
220
- result = await install_app(path, False, True, device)
543
+ result = await tools.install_app(path, False, True)
221
544
 
222
545
  if "Error" in result:
223
546
  console.print(f"[bold red]Installation failed:[/] {result}")
@@ -225,17 +548,13 @@ async def setup(path: str, device: str | None):
225
548
  else:
226
549
  console.print(f"[bold green]Installation successful![/]")
227
550
 
228
- # Step 2: Enable the accessibility service with the specific command
229
551
  console.print(f"[bold blue]Step 2/2: Enabling accessibility service[/]")
230
552
 
231
- # Package name for reference in error message
232
553
  package = "com.droidrun.portal"
233
554
 
234
555
  try:
235
- # Use the exact command provided
236
556
  await device_obj._adb.shell(device, "settings put secure enabled_accessibility_services com.droidrun.portal/com.droidrun.portal.DroidrunPortalService")
237
557
 
238
- # Also enable accessibility services globally
239
558
  await device_obj._adb.shell(device, "settings put secure accessibility_enabled 1")
240
559
 
241
560
  console.print("[green]Accessibility service enabled successfully![/]")
@@ -245,7 +564,6 @@ async def setup(path: str, device: str | None):
245
564
  console.print(f"[yellow]Could not automatically enable accessibility service: {e}[/]")
246
565
  console.print("[yellow]Opening accessibility settings for manual configuration...[/]")
247
566
 
248
- # Fallback: Open the accessibility settings page
249
567
  await device_obj._adb.shell(device, "am start -a android.settings.ACCESSIBILITY_SETTINGS")
250
568
 
251
569
  console.print("\n[yellow]Please complete the following steps on your device:[/]")
@@ -259,7 +577,4 @@ async def setup(path: str, device: str | None):
259
577
  except Exception as e:
260
578
  console.print(f"[bold red]Error:[/] {e}")
261
579
  import traceback
262
- traceback.print_exc()
263
-
264
- if __name__ == '__main__':
265
- cli()
580
+ traceback.print_exc()
@@ -4,32 +4,11 @@ DroidRun Tools - Core functionality for Android device control.
4
4
 
5
5
  from .device import DeviceManager
6
6
  from .actions import (
7
- tap,
8
- swipe,
9
- input_text,
10
- press_key,
11
- start_app,
12
- install_app,
13
- uninstall_app,
14
- take_screenshot,
15
- list_packages,
16
- get_clickables,
17
- complete,
18
- extract,
7
+ Tools
19
8
  )
20
-
9
+ from .loader import load_tools
21
10
  __all__ = [
22
11
  'DeviceManager',
23
- 'tap',
24
- 'swipe',
25
- 'input_text',
26
- 'press_key',
27
- 'start_app',
28
- 'install_app',
29
- 'uninstall_app',
30
- 'take_screenshot',
31
- 'list_packages',
32
- 'get_clickables',
33
- 'complete',
34
- 'extract',
12
+ 'Tools',
13
+ 'load_tools'
35
14
  ]