oagi-core 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,445 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ import base64
10
+ import json
11
+ from pathlib import Path
12
+
13
+ from ...types import (
14
+ ActionEvent,
15
+ ImageEvent,
16
+ LogEvent,
17
+ ObserverEvent,
18
+ PlanEvent,
19
+ SplitEvent,
20
+ StepEvent,
21
+ )
22
+
23
+
24
+ def export_to_markdown(
25
+ events: list[ObserverEvent],
26
+ path: str,
27
+ images_dir: str | None = None,
28
+ ) -> None:
29
+ """Export events to a Markdown file.
30
+
31
+ Args:
32
+ events: List of events to export.
33
+ path: Path to the output Markdown file.
34
+ images_dir: Directory to save images. If None, images are not saved.
35
+ """
36
+ output_path = Path(path)
37
+ output_path.parent.mkdir(parents=True, exist_ok=True)
38
+
39
+ if images_dir:
40
+ images_path = Path(images_dir)
41
+ images_path.mkdir(parents=True, exist_ok=True)
42
+
43
+ lines: list[str] = ["# Agent Execution Report\n"]
44
+ image_counter = 0
45
+
46
+ for event in events:
47
+ timestamp = event.timestamp.strftime("%H:%M:%S")
48
+
49
+ match event:
50
+ case StepEvent():
51
+ lines.append(f"\n## Step {event.step_num}\n")
52
+ lines.append(f"**Time:** {timestamp}\n")
53
+
54
+ if isinstance(event.image, bytes):
55
+ if images_dir:
56
+ image_counter += 1
57
+ image_filename = f"step_{event.step_num}.png"
58
+ image_path = Path(images_dir) / image_filename
59
+ image_path.write_bytes(event.image)
60
+ rel_path = Path(images_dir).name / Path(image_filename)
61
+ lines.append(f"\n![Step {event.step_num}]({rel_path})\n")
62
+ else:
63
+ lines.append(
64
+ f"\n*[Screenshot captured - {len(event.image)} bytes]*\n"
65
+ )
66
+ elif isinstance(event.image, str):
67
+ lines.append(f"\n**Screenshot URL:** {event.image}\n")
68
+
69
+ if event.step.reason:
70
+ lines.append(f"\n**Reasoning:**\n> {event.step.reason}\n")
71
+
72
+ if event.step.actions:
73
+ lines.append("\n**Planned Actions:**\n")
74
+ for action in event.step.actions:
75
+ count_str = (
76
+ f" (x{action.count})"
77
+ if action.count and action.count > 1
78
+ else ""
79
+ )
80
+ lines.append(
81
+ f"- `{action.type.value}`: {action.argument}{count_str}\n"
82
+ )
83
+
84
+ if event.step.stop:
85
+ lines.append("\n**Status:** Task Complete\n")
86
+
87
+ case ActionEvent():
88
+ lines.append(f"\n### Actions Executed ({timestamp})\n")
89
+ if event.error:
90
+ lines.append(f"\n**Error:** {event.error}\n")
91
+ else:
92
+ lines.append("\n**Result:** Success\n")
93
+
94
+ case LogEvent():
95
+ lines.append(f"\n> **Log ({timestamp}):** {event.message}\n")
96
+
97
+ case SplitEvent():
98
+ if event.label:
99
+ lines.append(f"\n---\n\n### {event.label}\n")
100
+ else:
101
+ lines.append("\n---\n")
102
+
103
+ case ImageEvent():
104
+ pass
105
+
106
+ case PlanEvent():
107
+ phase_titles = {
108
+ "initial": "Initial Planning",
109
+ "reflection": "Reflection",
110
+ "summary": "Summary",
111
+ }
112
+ phase_title = phase_titles.get(event.phase, event.phase.capitalize())
113
+ lines.append(f"\n### {phase_title} ({timestamp})\n")
114
+
115
+ if event.image:
116
+ if isinstance(event.image, bytes):
117
+ if images_dir:
118
+ image_counter += 1
119
+ image_filename = f"plan_{event.phase}_{image_counter}.png"
120
+ image_path = Path(images_dir) / image_filename
121
+ image_path.write_bytes(event.image)
122
+ rel_path = Path(images_dir).name / Path(image_filename)
123
+ lines.append(f"\n![{phase_title}]({rel_path})\n")
124
+ else:
125
+ lines.append(
126
+ f"\n*[Screenshot captured - {len(event.image)} bytes]*\n"
127
+ )
128
+ elif isinstance(event.image, str):
129
+ lines.append(f"\n**Screenshot URL:** {event.image}\n")
130
+
131
+ if event.reasoning:
132
+ lines.append(f"\n**Reasoning:**\n> {event.reasoning}\n")
133
+
134
+ if event.result:
135
+ lines.append(f"\n**Result:** {event.result}\n")
136
+
137
+ output_path.write_text("".join(lines))
138
+
139
+
140
+ def export_to_html(events: list[ObserverEvent], path: str) -> None:
141
+ """Export events to a self-contained HTML file.
142
+
143
+ Args:
144
+ events: List of events to export.
145
+ path: Path to the output HTML file.
146
+ """
147
+ output_path = Path(path)
148
+ output_path.parent.mkdir(parents=True, exist_ok=True)
149
+
150
+ html_parts: list[str] = [_get_html_header()]
151
+
152
+ for event in events:
153
+ timestamp = event.timestamp.strftime("%H:%M:%S")
154
+
155
+ match event:
156
+ case StepEvent():
157
+ html_parts.append('<div class="step">')
158
+ html_parts.append(f"<h2>Step {event.step_num}</h2>")
159
+ html_parts.append(f'<span class="timestamp">{timestamp}</span>')
160
+
161
+ if isinstance(event.image, bytes):
162
+ b64_image = base64.b64encode(event.image).decode("utf-8")
163
+ html_parts.append(
164
+ f'<img src="data:image/png;base64,{b64_image}" '
165
+ f'alt="Step {event.step_num}" class="screenshot"/>'
166
+ )
167
+ elif isinstance(event.image, str):
168
+ html_parts.append(
169
+ f'<p class="url">Screenshot URL: <a href="{event.image}">{event.image}</a></p>'
170
+ )
171
+
172
+ if event.step.reason:
173
+ html_parts.append('<div class="reasoning">')
174
+ html_parts.append(
175
+ f"<strong>Reasoning:</strong><p>{_escape_html(event.step.reason)}</p>"
176
+ )
177
+ html_parts.append("</div>")
178
+
179
+ if event.step.actions:
180
+ html_parts.append('<div class="actions">')
181
+ html_parts.append("<strong>Planned Actions:</strong><ul>")
182
+ for action in event.step.actions:
183
+ count_str = (
184
+ f" (x{action.count})"
185
+ if action.count and action.count > 1
186
+ else ""
187
+ )
188
+ html_parts.append(
189
+ f"<li><code>{action.type.value}</code>: "
190
+ f"{_escape_html(action.argument)}{count_str}</li>"
191
+ )
192
+ html_parts.append("</ul></div>")
193
+
194
+ if event.step.stop:
195
+ html_parts.append('<div class="complete">Task Complete</div>')
196
+
197
+ html_parts.append("</div>")
198
+
199
+ case ActionEvent():
200
+ html_parts.append('<div class="action-result">')
201
+ html_parts.append(f'<span class="timestamp">{timestamp}</span>')
202
+ if event.error:
203
+ html_parts.append(
204
+ f'<div class="error">Error: {_escape_html(event.error)}</div>'
205
+ )
206
+ else:
207
+ html_parts.append(
208
+ '<div class="success">Actions executed successfully</div>'
209
+ )
210
+ html_parts.append("</div>")
211
+
212
+ case LogEvent():
213
+ html_parts.append('<div class="log">')
214
+ html_parts.append(f'<span class="timestamp">{timestamp}</span>')
215
+ html_parts.append(f"<p>{_escape_html(event.message)}</p>")
216
+ html_parts.append("</div>")
217
+
218
+ case SplitEvent():
219
+ if event.label:
220
+ html_parts.append(
221
+ f'<div class="split"><h3>{_escape_html(event.label)}</h3></div>'
222
+ )
223
+ else:
224
+ html_parts.append('<hr class="split-line"/>')
225
+
226
+ case ImageEvent():
227
+ pass
228
+
229
+ case PlanEvent():
230
+ phase_titles = {
231
+ "initial": "Initial Planning",
232
+ "reflection": "Reflection",
233
+ "summary": "Summary",
234
+ }
235
+ phase_title = phase_titles.get(event.phase, event.phase.capitalize())
236
+ html_parts.append('<div class="plan">')
237
+ html_parts.append(f"<h3>{phase_title}</h3>")
238
+ html_parts.append(f'<span class="timestamp">{timestamp}</span>')
239
+
240
+ if event.image:
241
+ if isinstance(event.image, bytes):
242
+ b64_image = base64.b64encode(event.image).decode("utf-8")
243
+ html_parts.append(
244
+ f'<img src="data:image/png;base64,{b64_image}" '
245
+ f'alt="{phase_title}" class="screenshot"/>'
246
+ )
247
+ elif isinstance(event.image, str):
248
+ html_parts.append(
249
+ f'<p class="url">Screenshot URL: '
250
+ f'<a href="{event.image}">{event.image}</a></p>'
251
+ )
252
+
253
+ if event.reasoning:
254
+ html_parts.append('<div class="reasoning">')
255
+ html_parts.append(
256
+ f"<strong>Reasoning:</strong><p>{_escape_html(event.reasoning)}</p>"
257
+ )
258
+ html_parts.append("</div>")
259
+
260
+ if event.result:
261
+ html_parts.append(
262
+ f'<div class="plan-result"><strong>Result:</strong> '
263
+ f"{_escape_html(event.result)}</div>"
264
+ )
265
+
266
+ html_parts.append("</div>")
267
+
268
+ html_parts.append(_get_html_footer())
269
+ output_path.write_text("".join(html_parts))
270
+
271
+
272
+ def _escape_html(text: str) -> str:
273
+ """Escape HTML special characters."""
274
+ return (
275
+ text.replace("&", "&amp;")
276
+ .replace("<", "&lt;")
277
+ .replace(">", "&gt;")
278
+ .replace('"', "&quot;")
279
+ .replace("'", "&#39;")
280
+ )
281
+
282
+
283
+ def _get_html_header() -> str:
284
+ """Get HTML document header with CSS styles."""
285
+ return """<!DOCTYPE html>
286
+ <html lang="en">
287
+ <head>
288
+ <meta charset="UTF-8">
289
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
290
+ <title>Agent Execution Report</title>
291
+ <style>
292
+ body {
293
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
294
+ max-width: 1200px;
295
+ margin: 0 auto;
296
+ padding: 20px;
297
+ background: #f5f5f5;
298
+ }
299
+ h1 {
300
+ color: #333;
301
+ border-bottom: 2px solid #007bff;
302
+ padding-bottom: 10px;
303
+ }
304
+ .step {
305
+ background: white;
306
+ border-radius: 8px;
307
+ padding: 20px;
308
+ margin: 20px 0;
309
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
310
+ }
311
+ .step h2 {
312
+ margin-top: 0;
313
+ color: #007bff;
314
+ }
315
+ .timestamp {
316
+ color: #666;
317
+ font-size: 0.9em;
318
+ }
319
+ .screenshot {
320
+ max-width: 100%;
321
+ border: 1px solid #ddd;
322
+ border-radius: 4px;
323
+ margin: 10px 0;
324
+ }
325
+ .reasoning {
326
+ background: #f8f9fa;
327
+ padding: 10px;
328
+ border-left: 3px solid #007bff;
329
+ margin: 10px 0;
330
+ }
331
+ .actions {
332
+ margin: 10px 0;
333
+ }
334
+ .actions ul {
335
+ margin: 5px 0;
336
+ padding-left: 20px;
337
+ }
338
+ .actions code {
339
+ background: #e9ecef;
340
+ padding: 2px 6px;
341
+ border-radius: 3px;
342
+ }
343
+ .complete {
344
+ background: #d4edda;
345
+ color: #155724;
346
+ padding: 10px;
347
+ border-radius: 4px;
348
+ margin-top: 10px;
349
+ }
350
+ .action-result {
351
+ padding: 10px;
352
+ margin: 5px 0;
353
+ }
354
+ .success {
355
+ color: #155724;
356
+ }
357
+ .error {
358
+ color: #721c24;
359
+ background: #f8d7da;
360
+ padding: 10px;
361
+ border-radius: 4px;
362
+ }
363
+ .log {
364
+ background: #fff3cd;
365
+ padding: 10px;
366
+ margin: 10px 0;
367
+ border-radius: 4px;
368
+ }
369
+ .split {
370
+ text-align: center;
371
+ margin: 30px 0;
372
+ }
373
+ .split h3 {
374
+ color: #666;
375
+ }
376
+ .split-line {
377
+ border: none;
378
+ border-top: 2px dashed #ccc;
379
+ margin: 30px 0;
380
+ }
381
+ .url {
382
+ word-break: break-all;
383
+ }
384
+ .plan {
385
+ background: #e7f3ff;
386
+ border-radius: 8px;
387
+ padding: 20px;
388
+ margin: 20px 0;
389
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
390
+ }
391
+ .plan h3 {
392
+ margin-top: 0;
393
+ color: #0056b3;
394
+ }
395
+ .plan-result {
396
+ background: #d1ecf1;
397
+ color: #0c5460;
398
+ padding: 10px;
399
+ border-radius: 4px;
400
+ margin-top: 10px;
401
+ }
402
+ </style>
403
+ </head>
404
+ <body>
405
+ <h1>Agent Execution Report</h1>
406
+ """
407
+
408
+
409
+ def _get_html_footer() -> str:
410
+ """Get HTML document footer."""
411
+ return """
412
+ </body>
413
+ </html>
414
+ """
415
+
416
+
417
+ def export_to_json(events: list[ObserverEvent], path: str) -> None:
418
+ """Export events to a JSON file.
419
+
420
+ Args:
421
+ events: List of events to export.
422
+ path: Path to the output JSON file.
423
+ """
424
+ output_path = Path(path)
425
+ output_path.parent.mkdir(parents=True, exist_ok=True)
426
+
427
+ # Convert events to JSON-serializable format
428
+ json_events = []
429
+ for event in events:
430
+ # Handle bytes images before model_dump to avoid UTF-8 decode error
431
+ if isinstance(event, (StepEvent, ImageEvent, PlanEvent)) and isinstance(
432
+ getattr(event, "image", None), bytes
433
+ ):
434
+ # Dump without json mode first, then handle bytes manually
435
+ event_dict = event.model_dump()
436
+ event_dict["image"] = base64.b64encode(event.image).decode("utf-8")
437
+ event_dict["image_encoding"] = "base64"
438
+ # Convert datetime to string
439
+ if "timestamp" in event_dict:
440
+ event_dict["timestamp"] = event_dict["timestamp"].isoformat()
441
+ else:
442
+ event_dict = event.model_dump(mode="json")
443
+ json_events.append(event_dict)
444
+
445
+ output_path.write_text(json.dumps(json_events, indent=2, default=str))
@@ -0,0 +1,12 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+
9
+ # Re-export from types for convenience
10
+ from ...types import AsyncObserver
11
+
12
+ __all__ = ["AsyncObserver"]
oagi/agent/registry.py CHANGED
@@ -91,7 +91,7 @@ def create_agent(mode: str, **kwargs: Any) -> AsyncAgent:
91
91
  Standard parameters typically include:
92
92
  - api_key: OAGI API key
93
93
  - base_url: OAGI API base URL
94
- - model: Model identifier (e.g., "lux-v1")
94
+ - model: Model identifier (e.g., "lux-actor-1")
95
95
  - max_steps: Maximum number of steps to execute
96
96
  - temperature: Sampling temperature
97
97
 
@@ -111,7 +111,7 @@ def create_agent(mode: str, **kwargs: Any) -> AsyncAgent:
111
111
  mode="actor",
112
112
  api_key="...",
113
113
  base_url="...",
114
- model="lux-v1",
114
+ model="lux-actor-1",
115
115
  max_steps=30,
116
116
  temperature=0.0,
117
117
  )
@@ -44,6 +44,7 @@ class Action(BaseModel):
44
44
  details: dict[str, Any] = Field(default_factory=dict)
45
45
  reasoning: str | None = None
46
46
  result: str | None = None
47
+ screenshot_uuid: str | None = None # UUID of uploaded screenshot for this action
47
48
 
48
49
 
49
50
  class TodoHistory(BaseModel):
@@ -10,6 +10,7 @@ import json
10
10
  from typing import Any
11
11
 
12
12
  from ...client import AsyncClient
13
+ from ...types import URL, Image
13
14
  from .memory import PlannerMemory
14
15
  from .models import Action, PlannerOutput, ReflectionOutput
15
16
 
@@ -120,7 +121,7 @@ class Planner:
120
121
  self,
121
122
  todo: str,
122
123
  context: dict[str, Any],
123
- screenshot: bytes | None = None,
124
+ screenshot: Image | URL | None = None,
124
125
  memory: PlannerMemory | None = None,
125
126
  todo_index: int | None = None,
126
127
  ) -> PlannerOutput:
@@ -175,10 +176,11 @@ class Planner:
175
176
  self,
176
177
  actions: list[Action],
177
178
  context: dict[str, Any],
178
- screenshot: bytes | None = None,
179
+ screenshot: Image | URL | None = None,
179
180
  memory: PlannerMemory | None = None,
180
181
  todo_index: int | None = None,
181
182
  current_instruction: str | None = None,
183
+ reflection_interval: int = 4,
182
184
  ) -> ReflectionOutput:
183
185
  """Reflect on recent actions and progress.
184
186
 
@@ -189,6 +191,7 @@ class Planner:
189
191
  memory: Optional PlannerMemory for formatting contexts
190
192
  todo_index: Optional todo index for formatting internal context
191
193
  current_instruction: Current subtask instruction being executed
194
+ reflection_interval: Window size for recent actions/screenshots
192
195
 
193
196
  Returns:
194
197
  ReflectionOutput with continuation decision and reasoning
@@ -212,6 +215,9 @@ class Planner:
212
215
  overall_todo,
213
216
  ) = self._extract_memory_data(memory, context, todo_index)
214
217
 
218
+ # Get window of recent actions based on reflection_interval
219
+ window_actions = actions[-reflection_interval:]
220
+
215
221
  # Convert actions to window_steps format
216
222
  window_steps = [
217
223
  {
@@ -220,7 +226,14 @@ class Planner:
220
226
  "target": action.target or "",
221
227
  "reasoning": action.reasoning or "",
222
228
  }
223
- for i, action in enumerate(actions[-10:]) # Last 10 actions
229
+ for i, action in enumerate(window_actions)
230
+ ]
231
+
232
+ # Extract screenshot UUIDs from window actions
233
+ window_screenshots = [
234
+ action.screenshot_uuid
235
+ for action in window_actions
236
+ if action.screenshot_uuid
224
237
  ]
225
238
 
226
239
  # Format prior notes from context (still needed as a simple string summary)
@@ -238,7 +251,7 @@ class Planner:
238
251
  task_execution_summary=task_execution_summary,
239
252
  current_subtask_instruction=current_instruction or "",
240
253
  window_steps=window_steps,
241
- window_screenshots=[], # Could be populated if we track screenshot history
254
+ window_screenshots=window_screenshots,
242
255
  result_screenshot=result_screenshot_uuid,
243
256
  prior_notes=prior_notes,
244
257
  )
@@ -337,7 +350,9 @@ class Planner:
337
350
  """
338
351
  try:
339
352
  # Try to parse as JSON (oagi_first format)
340
- data = json.loads(response)
353
+ # Extract JSON string to handle Markdown code blocks
354
+ json_response = self._extract_json_str(response)
355
+ data = json.loads(json_response)
341
356
  # oagi_first returns: {"reasoning": "...", "subtask": "..."}
342
357
  return PlannerOutput(
343
358
  instruction=data.get("subtask", data.get("instruction", "")),
@@ -349,7 +364,7 @@ class Planner:
349
364
  except (json.JSONDecodeError, KeyError):
350
365
  # Fallback: use the entire response as instruction
351
366
  return PlannerOutput(
352
- instruction=response,
367
+ instruction="",
353
368
  reasoning="Failed to parse structured response",
354
369
  subtodos=[],
355
370
  )
@@ -365,7 +380,8 @@ class Planner:
365
380
  """
366
381
  try:
367
382
  # Try to parse as JSON (oagi_follow format)
368
- data = json.loads(response)
383
+ json_response = self._extract_json_str(response)
384
+ data = json.loads(json_response)
369
385
  # oagi_follow returns:
370
386
  # {"assessment": "...", "summary": "...", "reflection": "...",
371
387
  # "success": "yes" | "no", "subtask_instruction": "..."}
@@ -392,3 +408,10 @@ class Planner:
392
408
  reasoning="Failed to parse reflection response, continuing current approach",
393
409
  success_assessment=False,
394
410
  )
411
+
412
+ def _extract_json_str(self, text: str) -> str:
413
+ start = text.find("{")
414
+ end = text.rfind("}") + 1
415
+ if start < 0 or end <= start:
416
+ return ""
417
+ return text[start:end]