oagi-core 0.10.1__py3-none-any.whl → 0.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oagi/agent/default.py CHANGED
@@ -6,9 +6,16 @@
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
8
 
9
+ import asyncio
9
10
  import logging
10
11
 
11
12
  from .. import AsyncActor
13
+ from ..constants import (
14
+ DEFAULT_MAX_STEPS,
15
+ DEFAULT_STEP_DELAY,
16
+ DEFAULT_TEMPERATURE,
17
+ MODEL_ACTOR,
18
+ )
12
19
  from ..types import (
13
20
  ActionEvent,
14
21
  AsyncActionHandler,
@@ -35,10 +42,11 @@ class AsyncDefaultAgent:
35
42
  self,
36
43
  api_key: str | None = None,
37
44
  base_url: str | None = None,
38
- model: str = "lux-actor-1",
39
- max_steps: int = 20,
40
- temperature: float | None = 0.5,
45
+ model: str = MODEL_ACTOR,
46
+ max_steps: int = DEFAULT_MAX_STEPS,
47
+ temperature: float | None = DEFAULT_TEMPERATURE,
41
48
  step_observer: AsyncObserver | None = None,
49
+ step_delay: float = DEFAULT_STEP_DELAY,
42
50
  ):
43
51
  self.api_key = api_key
44
52
  self.base_url = base_url
@@ -46,6 +54,7 @@ class AsyncDefaultAgent:
46
54
  self.max_steps = max_steps
47
55
  self.temperature = temperature
48
56
  self.step_observer = step_observer
57
+ self.step_delay = step_delay
49
58
 
50
59
  async def execute(
51
60
  self,
@@ -113,6 +122,10 @@ class AsyncDefaultAgent:
113
122
  )
114
123
  )
115
124
 
125
+ # Wait after actions before next screenshot
126
+ if self.step_delay > 0:
127
+ await asyncio.sleep(self.step_delay)
128
+
116
129
  # Check if task is complete
117
130
  if step.stop:
118
131
  logger.info(f"Task completed successfully after {step_num} steps")
oagi/agent/factories.py CHANGED
@@ -6,6 +6,16 @@
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
8
  from oagi.agent.tasker import TaskerAgent
9
+ from oagi.constants import (
10
+ DEFAULT_MAX_STEPS,
11
+ DEFAULT_MAX_STEPS_TASKER,
12
+ DEFAULT_MAX_STEPS_THINKER,
13
+ DEFAULT_REFLECTION_INTERVAL_TASKER,
14
+ DEFAULT_STEP_DELAY,
15
+ DEFAULT_TEMPERATURE_LOW,
16
+ MODEL_ACTOR,
17
+ MODEL_THINKER,
18
+ )
9
19
  from oagi.types import AsyncStepObserver
10
20
 
11
21
  from .default import AsyncDefaultAgent
@@ -17,10 +27,11 @@ from .registry import async_agent_register
17
27
  def create_default_agent(
18
28
  api_key: str | None = None,
19
29
  base_url: str | None = None,
20
- model: str = "lux-actor-1",
21
- max_steps: int = 20,
22
- temperature: float = 0.1,
30
+ model: str = MODEL_ACTOR,
31
+ max_steps: int = DEFAULT_MAX_STEPS,
32
+ temperature: float = DEFAULT_TEMPERATURE_LOW,
23
33
  step_observer: AsyncStepObserver | None = None,
34
+ step_delay: float = DEFAULT_STEP_DELAY,
24
35
  ) -> AsyncAgent:
25
36
  return AsyncDefaultAgent(
26
37
  api_key=api_key,
@@ -29,6 +40,7 @@ def create_default_agent(
29
40
  max_steps=max_steps,
30
41
  temperature=temperature,
31
42
  step_observer=step_observer,
43
+ step_delay=step_delay,
32
44
  )
33
45
 
34
46
 
@@ -36,10 +48,11 @@ def create_default_agent(
36
48
  def create_thinker_agent(
37
49
  api_key: str | None = None,
38
50
  base_url: str | None = None,
39
- model: str = "lux-thinker-1",
40
- max_steps: int = 100,
41
- temperature: float = 0.1,
51
+ model: str = MODEL_THINKER,
52
+ max_steps: int = DEFAULT_MAX_STEPS_THINKER,
53
+ temperature: float = DEFAULT_TEMPERATURE_LOW,
42
54
  step_observer: AsyncStepObserver | None = None,
55
+ step_delay: float = DEFAULT_STEP_DELAY,
43
56
  ) -> AsyncAgent:
44
57
  return AsyncDefaultAgent(
45
58
  api_key=api_key,
@@ -48,6 +61,7 @@ def create_thinker_agent(
48
61
  max_steps=max_steps,
49
62
  temperature=temperature,
50
63
  step_observer=step_observer,
64
+ step_delay=step_delay,
51
65
  )
52
66
 
53
67
 
@@ -55,11 +69,12 @@ def create_thinker_agent(
55
69
  def create_planner_agent(
56
70
  api_key: str | None = None,
57
71
  base_url: str | None = None,
58
- model: str = "lux-actor-1",
59
- max_steps: int = 30,
60
- temperature: float = 0.1,
61
- reflection_interval: int = 20,
72
+ model: str = MODEL_ACTOR,
73
+ max_steps: int = DEFAULT_MAX_STEPS_TASKER,
74
+ temperature: float = DEFAULT_TEMPERATURE_LOW,
75
+ reflection_interval: int = DEFAULT_REFLECTION_INTERVAL_TASKER,
62
76
  step_observer: AsyncStepObserver | None = None,
77
+ step_delay: float = DEFAULT_STEP_DELAY,
63
78
  ) -> AsyncAgent:
64
79
  tasker = TaskerAgent(
65
80
  api_key=api_key,
@@ -69,6 +84,7 @@ def create_planner_agent(
69
84
  temperature=temperature,
70
85
  reflection_interval=reflection_interval,
71
86
  step_observer=step_observer,
87
+ step_delay=step_delay,
72
88
  )
73
89
  # tasker.set_task()
74
90
  return tasker
@@ -11,16 +11,64 @@ import json
11
11
  from pathlib import Path
12
12
 
13
13
  from ...types import (
14
+ Action,
14
15
  ActionEvent,
16
+ ActionType,
15
17
  ImageEvent,
16
18
  LogEvent,
17
19
  ObserverEvent,
18
20
  PlanEvent,
19
21
  SplitEvent,
20
22
  StepEvent,
23
+ parse_coords,
24
+ parse_drag_coords,
25
+ parse_scroll,
21
26
  )
22
27
 
23
28
 
29
+ def _parse_action_coords(action: Action) -> dict | None:
30
+ """Parse coordinates from action argument for cursor indicators.
31
+
32
+ Returns:
33
+ Dict with coordinates based on action type, or None if not applicable.
34
+ - Click types: {"type": "click", "x": int, "y": int}
35
+ - Drag: {"type": "drag", "x1": int, "y1": int, "x2": int, "y2": int}
36
+ - Scroll: {"type": "scroll", "x": int, "y": int, "direction": str}
37
+ """
38
+ arg = action.argument.strip("()")
39
+
40
+ match action.type:
41
+ case (
42
+ ActionType.CLICK
43
+ | ActionType.LEFT_DOUBLE
44
+ | ActionType.LEFT_TRIPLE
45
+ | ActionType.RIGHT_SINGLE
46
+ ):
47
+ coords = parse_coords(arg)
48
+ if coords:
49
+ return {"type": "click", "x": coords[0], "y": coords[1]}
50
+ case ActionType.DRAG:
51
+ coords = parse_drag_coords(arg)
52
+ if coords:
53
+ return {
54
+ "type": "drag",
55
+ "x1": coords[0],
56
+ "y1": coords[1],
57
+ "x2": coords[2],
58
+ "y2": coords[3],
59
+ }
60
+ case ActionType.SCROLL:
61
+ result = parse_scroll(arg)
62
+ if result:
63
+ return {
64
+ "type": "scroll",
65
+ "x": result[0],
66
+ "y": result[1],
67
+ "direction": result[2],
68
+ }
69
+ return None
70
+
71
+
24
72
  def export_to_markdown(
25
73
  events: list[ObserverEvent],
26
74
  path: str,
@@ -137,281 +185,124 @@ def export_to_markdown(
137
185
  output_path.write_text("".join(lines))
138
186
 
139
187
 
140
- def export_to_html(events: list[ObserverEvent], path: str) -> None:
141
- """Export events to a self-contained HTML file.
142
-
143
- Args:
144
- events: List of events to export.
145
- path: Path to the output HTML file.
146
- """
147
- output_path = Path(path)
148
- output_path.parent.mkdir(parents=True, exist_ok=True)
149
-
150
- html_parts: list[str] = [_get_html_header()]
188
+ def _convert_events_for_html(events: list[ObserverEvent]) -> list[dict]:
189
+ """Convert events to JSON-serializable format for HTML template."""
190
+ result = []
151
191
 
152
192
  for event in events:
153
193
  timestamp = event.timestamp.strftime("%H:%M:%S")
154
194
 
155
195
  match event:
156
196
  case StepEvent():
157
- html_parts.append('<div class="step">')
158
- html_parts.append(f"<h2>Step {event.step_num}</h2>")
159
- html_parts.append(f'<span class="timestamp">{timestamp}</span>')
160
-
161
- if isinstance(event.image, bytes):
162
- b64_image = base64.b64encode(event.image).decode("utf-8")
163
- html_parts.append(
164
- f'<img src="data:image/png;base64,{b64_image}" '
165
- f'alt="Step {event.step_num}" class="screenshot"/>'
166
- )
167
- elif isinstance(event.image, str):
168
- html_parts.append(
169
- f'<p class="url">Screenshot URL: <a href="{event.image}">{event.image}</a></p>'
170
- )
171
-
172
- if event.step.reason:
173
- html_parts.append('<div class="reasoning">')
174
- html_parts.append(
175
- f"<strong>Reasoning:</strong><p>{_escape_html(event.step.reason)}</p>"
176
- )
177
- html_parts.append("</div>")
178
-
197
+ # Collect action coordinates for cursor indicators
198
+ action_coords = []
199
+ actions_list = []
179
200
  if event.step.actions:
180
- html_parts.append('<div class="actions">')
181
- html_parts.append("<strong>Planned Actions:</strong><ul>")
182
201
  for action in event.step.actions:
183
- count_str = (
184
- f" (x{action.count})"
185
- if action.count and action.count > 1
186
- else ""
202
+ coords = _parse_action_coords(action)
203
+ if coords:
204
+ action_coords.append(coords)
205
+ actions_list.append(
206
+ {
207
+ "type": action.type.value,
208
+ "argument": action.argument,
209
+ "count": action.count or 1,
210
+ }
187
211
  )
188
- html_parts.append(
189
- f"<li><code>{action.type.value}</code>: "
190
- f"{_escape_html(action.argument)}{count_str}</li>"
191
- )
192
- html_parts.append("</ul></div>")
193
-
194
- if event.step.stop:
195
- html_parts.append('<div class="complete">Task Complete</div>')
196
212
 
197
- html_parts.append("</div>")
213
+ # Handle image
214
+ image_data = None
215
+ if isinstance(event.image, bytes):
216
+ image_data = base64.b64encode(event.image).decode("utf-8")
217
+ elif isinstance(event.image, str):
218
+ image_data = event.image
219
+
220
+ result.append(
221
+ {
222
+ "event_type": "step",
223
+ "timestamp": timestamp,
224
+ "step_num": event.step_num,
225
+ "image": image_data,
226
+ "action_coords": action_coords,
227
+ "reason": event.step.reason,
228
+ "actions": actions_list,
229
+ "stop": event.step.stop,
230
+ }
231
+ )
198
232
 
199
233
  case ActionEvent():
200
- html_parts.append('<div class="action-result">')
201
- html_parts.append(f'<span class="timestamp">{timestamp}</span>')
202
- if event.error:
203
- html_parts.append(
204
- f'<div class="error">Error: {_escape_html(event.error)}</div>'
205
- )
206
- else:
207
- html_parts.append(
208
- '<div class="success">Actions executed successfully</div>'
209
- )
210
- html_parts.append("</div>")
234
+ result.append(
235
+ {
236
+ "event_type": "action",
237
+ "timestamp": timestamp,
238
+ "error": event.error,
239
+ }
240
+ )
211
241
 
212
242
  case LogEvent():
213
- html_parts.append('<div class="log">')
214
- html_parts.append(f'<span class="timestamp">{timestamp}</span>')
215
- html_parts.append(f"<p>{_escape_html(event.message)}</p>")
216
- html_parts.append("</div>")
243
+ result.append(
244
+ {
245
+ "event_type": "log",
246
+ "timestamp": timestamp,
247
+ "message": event.message,
248
+ }
249
+ )
217
250
 
218
251
  case SplitEvent():
219
- if event.label:
220
- html_parts.append(
221
- f'<div class="split"><h3>{_escape_html(event.label)}</h3></div>'
222
- )
223
- else:
224
- html_parts.append('<hr class="split-line"/>')
252
+ result.append(
253
+ {
254
+ "event_type": "split",
255
+ "timestamp": timestamp,
256
+ "label": event.label,
257
+ }
258
+ )
225
259
 
226
260
  case ImageEvent():
227
261
  pass
228
262
 
229
263
  case PlanEvent():
230
- phase_titles = {
231
- "initial": "Initial Planning",
232
- "reflection": "Reflection",
233
- "summary": "Summary",
234
- }
235
- phase_title = phase_titles.get(event.phase, event.phase.capitalize())
236
- html_parts.append('<div class="plan">')
237
- html_parts.append(f"<h3>{phase_title}</h3>")
238
- html_parts.append(f'<span class="timestamp">{timestamp}</span>')
264
+ image_data = None
265
+ if isinstance(event.image, bytes):
266
+ image_data = base64.b64encode(event.image).decode("utf-8")
267
+ elif isinstance(event.image, str):
268
+ image_data = event.image
239
269
 
240
- if event.image:
241
- if isinstance(event.image, bytes):
242
- b64_image = base64.b64encode(event.image).decode("utf-8")
243
- html_parts.append(
244
- f'<img src="data:image/png;base64,{b64_image}" '
245
- f'alt="{phase_title}" class="screenshot"/>'
246
- )
247
- elif isinstance(event.image, str):
248
- html_parts.append(
249
- f'<p class="url">Screenshot URL: '
250
- f'<a href="{event.image}">{event.image}</a></p>'
251
- )
270
+ result.append(
271
+ {
272
+ "event_type": "plan",
273
+ "timestamp": timestamp,
274
+ "phase": event.phase,
275
+ "image": image_data,
276
+ "reasoning": event.reasoning,
277
+ "result": event.result,
278
+ }
279
+ )
252
280
 
253
- if event.reasoning:
254
- html_parts.append('<div class="reasoning">')
255
- html_parts.append(
256
- f"<strong>Reasoning:</strong><p>{_escape_html(event.reasoning)}</p>"
257
- )
258
- html_parts.append("</div>")
281
+ return result
259
282
 
260
- if event.result:
261
- html_parts.append(
262
- f'<div class="plan-result"><strong>Result:</strong> '
263
- f"{_escape_html(event.result)}</div>"
264
- )
265
-
266
- html_parts.append("</div>")
267
-
268
- html_parts.append(_get_html_footer())
269
- output_path.write_text("".join(html_parts))
270
-
271
-
272
- def _escape_html(text: str) -> str:
273
- """Escape HTML special characters."""
274
- return (
275
- text.replace("&", "&amp;")
276
- .replace("<", "&lt;")
277
- .replace(">", "&gt;")
278
- .replace('"', "&quot;")
279
- .replace("'", "&#39;")
280
- )
281
-
282
-
283
- def _get_html_header() -> str:
284
- """Get HTML document header with CSS styles."""
285
- return """<!DOCTYPE html>
286
- <html lang="en">
287
- <head>
288
- <meta charset="UTF-8">
289
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
290
- <title>Agent Execution Report</title>
291
- <style>
292
- body {
293
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
294
- max-width: 1200px;
295
- margin: 0 auto;
296
- padding: 20px;
297
- background: #f5f5f5;
298
- }
299
- h1 {
300
- color: #333;
301
- border-bottom: 2px solid #007bff;
302
- padding-bottom: 10px;
303
- }
304
- .step {
305
- background: white;
306
- border-radius: 8px;
307
- padding: 20px;
308
- margin: 20px 0;
309
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
310
- }
311
- .step h2 {
312
- margin-top: 0;
313
- color: #007bff;
314
- }
315
- .timestamp {
316
- color: #666;
317
- font-size: 0.9em;
318
- }
319
- .screenshot {
320
- max-width: 100%;
321
- border: 1px solid #ddd;
322
- border-radius: 4px;
323
- margin: 10px 0;
324
- }
325
- .reasoning {
326
- background: #f8f9fa;
327
- padding: 10px;
328
- border-left: 3px solid #007bff;
329
- margin: 10px 0;
330
- }
331
- .actions {
332
- margin: 10px 0;
333
- }
334
- .actions ul {
335
- margin: 5px 0;
336
- padding-left: 20px;
337
- }
338
- .actions code {
339
- background: #e9ecef;
340
- padding: 2px 6px;
341
- border-radius: 3px;
342
- }
343
- .complete {
344
- background: #d4edda;
345
- color: #155724;
346
- padding: 10px;
347
- border-radius: 4px;
348
- margin-top: 10px;
349
- }
350
- .action-result {
351
- padding: 10px;
352
- margin: 5px 0;
353
- }
354
- .success {
355
- color: #155724;
356
- }
357
- .error {
358
- color: #721c24;
359
- background: #f8d7da;
360
- padding: 10px;
361
- border-radius: 4px;
362
- }
363
- .log {
364
- background: #fff3cd;
365
- padding: 10px;
366
- margin: 10px 0;
367
- border-radius: 4px;
368
- }
369
- .split {
370
- text-align: center;
371
- margin: 30px 0;
372
- }
373
- .split h3 {
374
- color: #666;
375
- }
376
- .split-line {
377
- border: none;
378
- border-top: 2px dashed #ccc;
379
- margin: 30px 0;
380
- }
381
- .url {
382
- word-break: break-all;
383
- }
384
- .plan {
385
- background: #e7f3ff;
386
- border-radius: 8px;
387
- padding: 20px;
388
- margin: 20px 0;
389
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
390
- }
391
- .plan h3 {
392
- margin-top: 0;
393
- color: #0056b3;
394
- }
395
- .plan-result {
396
- background: #d1ecf1;
397
- color: #0c5460;
398
- padding: 10px;
399
- border-radius: 4px;
400
- margin-top: 10px;
401
- }
402
- </style>
403
- </head>
404
- <body>
405
- <h1>Agent Execution Report</h1>
406
- """
407
-
408
-
409
- def _get_html_footer() -> str:
410
- """Get HTML document footer."""
411
- return """
412
- </body>
413
- </html>
414
- """
283
+
284
+ def export_to_html(events: list[ObserverEvent], path: str) -> None:
285
+ """Export events to a self-contained HTML file.
286
+
287
+ Args:
288
+ events: List of events to export.
289
+ path: Path to the output HTML file.
290
+ """
291
+ output_path = Path(path)
292
+ output_path.parent.mkdir(parents=True, exist_ok=True)
293
+
294
+ # Load template
295
+ template_path = Path(__file__).parent / "report_template.html"
296
+ template = template_path.read_text()
297
+
298
+ # Convert events to JSON
299
+ events_data = _convert_events_for_html(events)
300
+ events_json = json.dumps(events_data)
301
+
302
+ # Replace placeholder
303
+ html_content = template.replace("{EVENTS_DATA}", events_json)
304
+
305
+ output_path.write_text(html_content)
415
306
 
416
307
 
417
308
  def export_to_json(events: list[ObserverEvent], path: str) -> None: