oagi-core 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,455 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Agent Execution Report</title>
7
+ <style>
8
+ body {
9
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
10
+ max-width: 1200px;
11
+ margin: 0 auto;
12
+ padding: 20px;
13
+ background: #f5f5f5;
14
+ }
15
+
16
+ h1 {
17
+ color: #333;
18
+ border-bottom: 2px solid #007bff;
19
+ padding-bottom: 10px;
20
+ }
21
+
22
+ .step, .plan {
23
+ background: white;
24
+ border-radius: 8px;
25
+ padding: 20px;
26
+ margin: 20px 0;
27
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
28
+ }
29
+
30
+ .step h2 {
31
+ margin-top: 0;
32
+ color: #007bff;
33
+ }
34
+
35
+ .plan {
36
+ background: #e7f3ff;
37
+ }
38
+
39
+ .plan h3 {
40
+ margin-top: 0;
41
+ color: #0056b3;
42
+ }
43
+
44
+ .timestamp {
45
+ color: #666;
46
+ font-size: 0.9em;
47
+ }
48
+
49
+ .screenshot-container {
50
+ position: relative;
51
+ display: inline-block;
52
+ margin: 10px 0;
53
+ }
54
+
55
+ .screenshot {
56
+ max-width: 100%;
57
+ border: 1px solid #ddd;
58
+ border-radius: 4px;
59
+ display: block;
60
+ }
61
+
62
+ .reasoning {
63
+ background: #f8f9fa;
64
+ padding: 10px;
65
+ border-left: 3px solid #007bff;
66
+ margin: 10px 0;
67
+ white-space: pre-wrap;
68
+ }
69
+
70
+ .actions {
71
+ margin: 10px 0;
72
+ }
73
+
74
+ .actions ul {
75
+ margin: 5px 0;
76
+ padding-left: 20px;
77
+ }
78
+
79
+ .actions code {
80
+ background: #e9ecef;
81
+ padding: 2px 6px;
82
+ border-radius: 3px;
83
+ }
84
+
85
+ .complete {
86
+ background: #d4edda;
87
+ color: #155724;
88
+ padding: 10px;
89
+ border-radius: 4px;
90
+ margin-top: 10px;
91
+ }
92
+
93
+ .action-result {
94
+ padding: 10px;
95
+ margin: 5px 0;
96
+ }
97
+
98
+ .success {
99
+ color: #155724;
100
+ }
101
+
102
+ .error {
103
+ color: #721c24;
104
+ background: #f8d7da;
105
+ padding: 10px;
106
+ border-radius: 4px;
107
+ }
108
+
109
+ .log {
110
+ background: #fff3cd;
111
+ padding: 10px;
112
+ margin: 10px 0;
113
+ border-radius: 4px;
114
+ }
115
+
116
+ .split {
117
+ text-align: center;
118
+ margin: 30px 0;
119
+ }
120
+
121
+ .split h3 {
122
+ color: #666;
123
+ }
124
+
125
+ .split-line {
126
+ border: none;
127
+ border-top: 2px dashed #ccc;
128
+ margin: 30px 0;
129
+ }
130
+
131
+ .url {
132
+ word-break: break-all;
133
+ }
134
+
135
+ .plan-result {
136
+ background: #d1ecf1;
137
+ color: #0c5460;
138
+ padding: 10px;
139
+ border-radius: 4px;
140
+ margin-top: 10px;
141
+ }
142
+
143
+ /* Cursor indicators */
144
+ .click-indicator {
145
+ position: absolute;
146
+ width: 20px;
147
+ height: 20px;
148
+ border-radius: 50%;
149
+ background: rgba(255, 0, 0, 0.8);
150
+ border: 2px solid #fff;
151
+ box-shadow: 0 0 10px rgba(255, 0, 0, 0.6);
152
+ transform: translate(-50%, -50%);
153
+ pointer-events: none;
154
+ z-index: 10;
155
+ }
156
+
157
+ .drag-indicator {
158
+ position: absolute;
159
+ width: 20px;
160
+ height: 20px;
161
+ border-radius: 50%;
162
+ border: 2px solid #fff;
163
+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.3);
164
+ transform: translate(-50%, -50%);
165
+ pointer-events: none;
166
+ z-index: 10;
167
+ }
168
+
169
+ .drag-start {
170
+ background: rgba(0, 255, 0, 0.8);
171
+ box-shadow: 0 0 10px rgba(0, 255, 0, 0.6);
172
+ }
173
+
174
+ .drag-end {
175
+ background: rgba(255, 0, 0, 0.8);
176
+ box-shadow: 0 0 10px rgba(255, 0, 0, 0.6);
177
+ }
178
+
179
+ .drag-line {
180
+ position: absolute;
181
+ background: rgba(255, 255, 0, 0.8);
182
+ height: 3px;
183
+ border-radius: 2px;
184
+ box-shadow: 0 0 5px rgba(255, 255, 0, 0.4);
185
+ pointer-events: none;
186
+ z-index: 9;
187
+ transform-origin: left center;
188
+ }
189
+
190
+ .scroll-indicator {
191
+ position: absolute;
192
+ width: 40px;
193
+ height: 40px;
194
+ border-radius: 50%;
195
+ background: rgba(138, 43, 226, 0.9);
196
+ border: 2px solid #fff;
197
+ box-shadow: 0 0 15px rgba(138, 43, 226, 0.6);
198
+ transform: translate(-50%, -50%);
199
+ pointer-events: none;
200
+ z-index: 10;
201
+ display: flex;
202
+ align-items: center;
203
+ justify-content: center;
204
+ font-size: 20px;
205
+ color: white;
206
+ font-weight: bold;
207
+ }
208
+ </style>
209
+ </head>
210
+ <body>
211
+ <h1>Agent Execution Report</h1>
212
+ <div id="content"></div>
213
+
214
+ <script>
215
+ const eventsData = {EVENTS_DATA};
216
+
217
+ function escapeHtml(text) {
218
+ const div = document.createElement('div');
219
+ div.textContent = text;
220
+ return div.innerHTML;
221
+ }
222
+
223
+ function removeIndicators(container) {
224
+ container.querySelectorAll('.click-indicator, .drag-indicator, .drag-line, .scroll-indicator')
225
+ .forEach(el => el.remove());
226
+ }
227
+
228
+ function addIndicators(container) {
229
+ const img = container.querySelector('.screenshot');
230
+ const actionsAttr = container.getAttribute('data-actions');
231
+ if (!actionsAttr || !img) return;
232
+
233
+ // Remove existing indicators before adding new ones
234
+ removeIndicators(container);
235
+
236
+ const actions = JSON.parse(actionsAttr);
237
+ const imgWidth = img.offsetWidth;
238
+ const imgHeight = img.offsetHeight;
239
+
240
+ actions.forEach(action => {
241
+ switch (action.type) {
242
+ case 'click':
243
+ addClickIndicator(container, action, imgWidth, imgHeight);
244
+ break;
245
+ case 'drag':
246
+ addDragIndicator(container, action, imgWidth, imgHeight);
247
+ break;
248
+ case 'scroll':
249
+ addScrollIndicator(container, action, imgWidth, imgHeight);
250
+ break;
251
+ }
252
+ });
253
+ }
254
+
255
+ // Recalculate indicator positions on window resize
256
+ let resizeTimeout;
257
+ window.addEventListener('resize', function () {
258
+ clearTimeout(resizeTimeout);
259
+ resizeTimeout = setTimeout(function () {
260
+ document.querySelectorAll('.screenshot-container').forEach(container => {
261
+ addIndicators(container);
262
+ });
263
+ }, 100);
264
+ });
265
+
266
+ function addClickIndicator(container, action, imgWidth, imgHeight) {
267
+ const x = (action.x / 1000) * imgWidth;
268
+ const y = (action.y / 1000) * imgHeight;
269
+
270
+ const indicator = document.createElement('div');
271
+ indicator.className = 'click-indicator';
272
+ indicator.style.left = x + 'px';
273
+ indicator.style.top = y + 'px';
274
+ container.appendChild(indicator);
275
+ }
276
+
277
+ function addDragIndicator(container, action, imgWidth, imgHeight) {
278
+ const x1 = (action.x1 / 1000) * imgWidth;
279
+ const y1 = (action.y1 / 1000) * imgHeight;
280
+ const x2 = (action.x2 / 1000) * imgWidth;
281
+ const y2 = (action.y2 / 1000) * imgHeight;
282
+
283
+ const startIndicator = document.createElement('div');
284
+ startIndicator.className = 'drag-indicator drag-start';
285
+ startIndicator.style.left = x1 + 'px';
286
+ startIndicator.style.top = y1 + 'px';
287
+ container.appendChild(startIndicator);
288
+
289
+ const endIndicator = document.createElement('div');
290
+ endIndicator.className = 'drag-indicator drag-end';
291
+ endIndicator.style.left = x2 + 'px';
292
+ endIndicator.style.top = y2 + 'px';
293
+ container.appendChild(endIndicator);
294
+
295
+ const line = document.createElement('div');
296
+ line.className = 'drag-line';
297
+ const deltaX = x2 - x1;
298
+ const deltaY = y2 - y1;
299
+ const length = Math.sqrt(deltaX * deltaX + deltaY * deltaY);
300
+ const angle = Math.atan2(deltaY, deltaX) * (180 / Math.PI);
301
+ line.style.left = x1 + 'px';
302
+ line.style.top = y1 + 'px';
303
+ line.style.width = length + 'px';
304
+ line.style.transform = 'rotate(' + angle + 'deg)';
305
+ container.appendChild(line);
306
+ }
307
+
308
+ function addScrollIndicator(container, action, imgWidth, imgHeight) {
309
+ const x = (action.x / 1000) * imgWidth;
310
+ const y = (action.y / 1000) * imgHeight;
311
+
312
+ const indicator = document.createElement('div');
313
+ indicator.className = 'scroll-indicator';
314
+ indicator.style.left = x + 'px';
315
+ indicator.style.top = y + 'px';
316
+
317
+ if (action.direction === 'up') {
318
+ indicator.innerHTML = '&#8593;';
319
+ indicator.title = 'Scroll Up';
320
+ } else if (action.direction === 'down') {
321
+ indicator.innerHTML = '&#8595;';
322
+ indicator.title = 'Scroll Down';
323
+ } else {
324
+ indicator.innerHTML = '&#8597;';
325
+ indicator.title = 'Scroll';
326
+ }
327
+ container.appendChild(indicator);
328
+ }
329
+
330
+ function renderEvents() {
331
+ const content = document.getElementById('content');
332
+ let html = '';
333
+
334
+ eventsData.forEach(event => {
335
+ const timestamp = event.timestamp;
336
+
337
+ switch (event.event_type) {
338
+ case 'step':
339
+ html += '<div class="step">';
340
+ html += `<h2>Step ${event.step_num}</h2>`;
341
+ html += `<span class="timestamp">${timestamp}</span>`;
342
+
343
+ if (event.image) {
344
+ const actionsJson = JSON.stringify(event.action_coords || []).replace(/"/g, '&quot;');
345
+ html += `<div class="screenshot-container" data-actions="${actionsJson}">`;
346
+ if (event.image.startsWith('data:') || event.image.startsWith('http')) {
347
+ html += `<img src="${event.image}" alt="Step ${event.step_num}" class="screenshot"/>`;
348
+ } else {
349
+ html += `<img src="data:image/png;base64,${event.image}" alt="Step ${event.step_num}" class="screenshot"/>`;
350
+ }
351
+ html += '</div>';
352
+ }
353
+
354
+ if (event.reason) {
355
+ html += '<div class="reasoning">';
356
+ html += `<strong>Reasoning:</strong><p>${escapeHtml(event.reason)}</p>`;
357
+ html += '</div>';
358
+ }
359
+
360
+ if (event.actions && event.actions.length > 0) {
361
+ html += '<div class="actions"><strong>Planned Actions:</strong><ul>';
362
+ event.actions.forEach(action => {
363
+ const countStr = action.count > 1 ? ` (x${action.count})` : '';
364
+ html += `<li><code>${action.type}</code>: ${escapeHtml(action.argument)}${countStr}</li>`;
365
+ });
366
+ html += '</ul></div>';
367
+ }
368
+
369
+ if (event.stop) {
370
+ html += '<div class="complete">Task Complete</div>';
371
+ }
372
+ html += '</div>';
373
+ break;
374
+
375
+ case 'action':
376
+ html += '<div class="action-result">';
377
+ html += `<span class="timestamp">${timestamp}</span>`;
378
+ if (event.error) {
379
+ html += `<div class="error">Error: ${escapeHtml(event.error)}</div>`;
380
+ } else {
381
+ html += '<div class="success">Actions executed successfully</div>';
382
+ }
383
+ html += '</div>';
384
+ break;
385
+
386
+ case 'log':
387
+ html += '<div class="log">';
388
+ html += `<span class="timestamp">${timestamp}</span>`;
389
+ html += `<p>${escapeHtml(event.message)}</p>`;
390
+ html += '</div>';
391
+ break;
392
+
393
+ case 'split':
394
+ if (event.label) {
395
+ html += `<div class="split"><h3>${escapeHtml(event.label)}</h3></div>`;
396
+ } else {
397
+ html += '<hr class="split-line"/>';
398
+ }
399
+ break;
400
+
401
+ case 'plan':
402
+ const phaseTitles = {
403
+ 'initial': 'Initial Planning',
404
+ 'reflection': 'Reflection',
405
+ 'summary': 'Summary'
406
+ };
407
+ const phaseTitle = phaseTitles[event.phase] || event.phase;
408
+
409
+ html += '<div class="plan">';
410
+ html += `<h3>${phaseTitle}</h3>`;
411
+ html += `<span class="timestamp">${timestamp}</span>`;
412
+
413
+ if (event.image) {
414
+ html += '<div class="screenshot-container">';
415
+ if (event.image.startsWith('data:') || event.image.startsWith('http')) {
416
+ html += `<img src="${event.image}" alt="${phaseTitle}" class="screenshot"/>`;
417
+ } else {
418
+ html += `<img src="data:image/png;base64,${event.image}" alt="${phaseTitle}" class="screenshot"/>`;
419
+ }
420
+ html += '</div>';
421
+ }
422
+
423
+ if (event.reasoning) {
424
+ html += '<div class="reasoning">';
425
+ html += `<strong>Reasoning:</strong><p>${escapeHtml(event.reasoning)}</p>`;
426
+ html += '</div>';
427
+ }
428
+
429
+ if (event.result) {
430
+ html += `<div class="plan-result"><strong>Result:</strong> ${escapeHtml(event.result)}</div>`;
431
+ }
432
+ html += '</div>';
433
+ break;
434
+ }
435
+ });
436
+
437
+ content.innerHTML = html;
438
+
439
+ // Add cursor indicators after images load
440
+ document.querySelectorAll('.screenshot-container').forEach(container => {
441
+ const img = container.querySelector('.screenshot');
442
+ if (img) {
443
+ if (img.complete) {
444
+ addIndicators(container);
445
+ } else {
446
+ img.onload = () => addIndicators(container);
447
+ }
448
+ }
449
+ });
450
+ }
451
+
452
+ document.addEventListener('DOMContentLoaded', renderEvents);
453
+ </script>
454
+ </body>
455
+ </html>
@@ -6,6 +6,7 @@
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
+ import asyncio
9
10
  import logging
10
11
  from datetime import datetime
11
12
  from typing import Any
@@ -59,6 +60,7 @@ class TaskeeAgent(AsyncAgent):
59
60
  external_memory: PlannerMemory | None = None,
60
61
  todo_index: int | None = None,
61
62
  step_observer: AsyncObserver | None = None,
63
+ step_delay: float = 0.3,
62
64
  ):
63
65
  """Initialize the taskee agent.
64
66
 
@@ -73,6 +75,7 @@ class TaskeeAgent(AsyncAgent):
73
75
  external_memory: External memory from parent agent
74
76
  todo_index: Index of the todo being executed
75
77
  step_observer: Optional observer for step tracking
78
+ step_delay: Delay in seconds after actions before next screenshot
76
79
  """
77
80
  self.api_key = api_key
78
81
  self.base_url = base_url
@@ -84,6 +87,7 @@ class TaskeeAgent(AsyncAgent):
84
87
  self.external_memory = external_memory
85
88
  self.todo_index = todo_index
86
89
  self.step_observer = step_observer
90
+ self.step_delay = step_delay
87
91
 
88
92
  # Internal state
89
93
  self.actor: AsyncActor | None = None
@@ -327,6 +331,10 @@ class TaskeeAgent(AsyncAgent):
327
331
  self.total_actions += len(step.actions)
328
332
  self.since_reflection += len(step.actions)
329
333
 
334
+ # Wait after actions before next screenshot
335
+ if self.step_delay > 0:
336
+ await asyncio.sleep(self.step_delay)
337
+
330
338
  steps_taken += 1
331
339
 
332
340
  # Check if task is complete
@@ -40,6 +40,7 @@ class TaskerAgent(AsyncAgent):
40
40
  reflection_interval: int = 4,
41
41
  planner: Planner | None = None,
42
42
  step_observer: AsyncObserver | None = None,
43
+ step_delay: float = 0.3,
43
44
  ):
44
45
  """Initialize the tasker agent.
45
46
 
@@ -52,6 +53,7 @@ class TaskerAgent(AsyncAgent):
52
53
  reflection_interval: Actions before reflection
53
54
  planner: Planner for planning and reflection
54
55
  step_observer: Optional observer for step tracking
56
+ step_delay: Delay in seconds after actions before next screenshot
55
57
  """
56
58
  self.api_key = api_key
57
59
  self.base_url = base_url
@@ -61,6 +63,7 @@ class TaskerAgent(AsyncAgent):
61
63
  self.reflection_interval = reflection_interval
62
64
  self.planner = planner or Planner(api_key=api_key, base_url=base_url)
63
65
  self.step_observer = step_observer
66
+ self.step_delay = step_delay
64
67
 
65
68
  # Memory for tracking workflow
66
69
  self.memory = PlannerMemory()
@@ -184,6 +187,7 @@ class TaskerAgent(AsyncAgent):
184
187
  external_memory=self.memory, # Share memory with child
185
188
  todo_index=todo_index, # Pass the todo index
186
189
  step_observer=self.step_observer, # Pass step observer
190
+ step_delay=self.step_delay,
187
191
  )
188
192
 
189
193
  self.current_todo_index = todo_index
oagi/cli/agent.py CHANGED
@@ -65,6 +65,11 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
65
65
  type=str,
66
66
  help="Output file path for export (default: execution_report.[md|html|json])",
67
67
  )
68
+ run_parser.add_argument(
69
+ "--step-delay",
70
+ type=float,
71
+ help="Delay in seconds after each step before next screenshot (default: 0.3)",
72
+ )
68
73
 
69
74
  # agent permission command
70
75
  agent_subparsers.add_parser(
@@ -196,6 +201,7 @@ def run_agent(args: argparse.Namespace) -> None:
196
201
  max_steps = args.max_steps or 20
197
202
  temperature = args.temperature if args.temperature is not None else 0.5
198
203
  mode = args.mode or "actor"
204
+ step_delay = args.step_delay if args.step_delay is not None else 0.3
199
205
  export_format = args.export
200
206
  export_file = args.export_file
201
207
 
@@ -221,6 +227,7 @@ def run_agent(args: argparse.Namespace) -> None:
221
227
  max_steps=max_steps,
222
228
  temperature=temperature,
223
229
  step_observer=observer,
230
+ step_delay=step_delay,
224
231
  )
225
232
 
226
233
  # Create handlers
@@ -229,7 +236,8 @@ def run_agent(args: argparse.Namespace) -> None:
229
236
 
230
237
  print(f"Starting agent with instruction: {args.instruction}")
231
238
  print(
232
- f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, Temperature: {temperature}"
239
+ f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, "
240
+ f"Temperature: {temperature}, Step delay: {step_delay}s"
233
241
  )
234
242
  print("-" * 60)
235
243
 
@@ -6,14 +6,13 @@
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
- import re
10
9
  import sys
11
10
  import time
12
11
 
13
12
  from pydantic import BaseModel, Field
14
13
 
15
14
  from ..exceptions import check_optional_dependency
16
- from ..types import Action, ActionType
15
+ from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
17
16
 
18
17
  check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
19
18
  import pyautogui # noqa: E402
@@ -136,36 +135,27 @@ class PyautoguiActionHandler:
136
135
 
137
136
  def _parse_coords(self, args_str: str) -> tuple[int, int]:
138
137
  """Extract x, y coordinates from argument string."""
139
- match = re.match(r"(\d+),\s*(\d+)", args_str)
140
- if not match:
138
+ coords = parse_coords(args_str)
139
+ if not coords:
141
140
  raise ValueError(f"Invalid coordinates format: {args_str}")
142
- x, y = int(match.group(1)), int(match.group(2))
143
- return self._denormalize_coords(x, y)
141
+ return self._denormalize_coords(coords[0], coords[1])
144
142
 
145
143
  def _parse_drag_coords(self, args_str: str) -> tuple[int, int, int, int]:
146
144
  """Extract x1, y1, x2, y2 coordinates from drag argument string."""
147
- match = re.match(r"(\d+),\s*(\d+),\s*(\d+),\s*(\d+)", args_str)
148
- if not match:
145
+ coords = parse_drag_coords(args_str)
146
+ if not coords:
149
147
  raise ValueError(f"Invalid drag coordinates format: {args_str}")
150
- x1, y1, x2, y2 = (
151
- int(match.group(1)),
152
- int(match.group(2)),
153
- int(match.group(3)),
154
- int(match.group(4)),
155
- )
156
- x1, y1 = self._denormalize_coords(x1, y1)
157
- x2, y2 = self._denormalize_coords(x2, y2)
148
+ x1, y1 = self._denormalize_coords(coords[0], coords[1])
149
+ x2, y2 = self._denormalize_coords(coords[2], coords[3])
158
150
  return x1, y1, x2, y2
159
151
 
160
152
  def _parse_scroll(self, args_str: str) -> tuple[int, int, str]:
161
153
  """Extract x, y, direction from scroll argument string."""
162
- match = re.match(r"(\d+),\s*(\d+),\s*(\w+)", args_str)
163
- if not match:
154
+ result = parse_scroll(args_str)
155
+ if not result:
164
156
  raise ValueError(f"Invalid scroll format: {args_str}")
165
- x, y = int(match.group(1)), int(match.group(2))
166
- x, y = self._denormalize_coords(x, y)
167
- direction = match.group(3).lower()
168
- return x, y, direction
157
+ x, y = self._denormalize_coords(result[0], result[1])
158
+ return x, y, result[2]
169
159
 
170
160
  def _normalize_key(self, key: str) -> str:
171
161
  """Normalize key names for consistency."""