oagi-core 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +76 -33
- oagi/agent/__init__.py +2 -0
- oagi/agent/default.py +41 -8
- oagi/agent/factories.py +22 -3
- oagi/agent/observer/__init__.py +38 -0
- oagi/agent/observer/agent_observer.py +99 -0
- oagi/agent/observer/events.py +28 -0
- oagi/agent/observer/exporters.py +445 -0
- oagi/agent/observer/protocol.py +12 -0
- oagi/agent/registry.py +2 -2
- oagi/agent/tasker/models.py +1 -0
- oagi/agent/tasker/planner.py +30 -7
- oagi/agent/tasker/taskee_agent.py +171 -79
- oagi/agent/tasker/tasker_agent.py +20 -9
- oagi/cli/agent.py +42 -3
- oagi/cli/tracking.py +27 -17
- oagi/handler/pyautogui_action_handler.py +7 -0
- oagi/server/agent_wrappers.py +5 -5
- oagi/server/models.py +1 -1
- oagi/server/session_store.py +2 -2
- oagi/task/async_.py +11 -32
- oagi/task/async_short.py +1 -1
- oagi/task/base.py +41 -7
- oagi/task/short.py +1 -1
- oagi/task/sync.py +9 -32
- oagi/types/__init__.py +24 -4
- oagi/types/async_image_provider.py +3 -2
- oagi/types/image_provider.py +3 -2
- oagi/types/step_observer.py +75 -16
- oagi/types/url.py +3 -0
- {oagi_core-0.9.2.dist-info → oagi_core-0.10.0.dist-info}/METADATA +37 -25
- oagi_core-0.10.0.dist-info/RECORD +68 -0
- oagi/types/url_image.py +0 -47
- oagi_core-0.9.2.dist-info/RECORD +0 -63
- {oagi_core-0.9.2.dist-info → oagi_core-0.10.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.9.2.dist-info → oagi_core-0.10.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.9.2.dist-info → oagi_core-0.10.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ...types import (
|
|
14
|
+
ActionEvent,
|
|
15
|
+
ImageEvent,
|
|
16
|
+
LogEvent,
|
|
17
|
+
ObserverEvent,
|
|
18
|
+
PlanEvent,
|
|
19
|
+
SplitEvent,
|
|
20
|
+
StepEvent,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def export_to_markdown(
|
|
25
|
+
events: list[ObserverEvent],
|
|
26
|
+
path: str,
|
|
27
|
+
images_dir: str | None = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Export events to a Markdown file.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
events: List of events to export.
|
|
33
|
+
path: Path to the output Markdown file.
|
|
34
|
+
images_dir: Directory to save images. If None, images are not saved.
|
|
35
|
+
"""
|
|
36
|
+
output_path = Path(path)
|
|
37
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
|
|
39
|
+
if images_dir:
|
|
40
|
+
images_path = Path(images_dir)
|
|
41
|
+
images_path.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
|
|
43
|
+
lines: list[str] = ["# Agent Execution Report\n"]
|
|
44
|
+
image_counter = 0
|
|
45
|
+
|
|
46
|
+
for event in events:
|
|
47
|
+
timestamp = event.timestamp.strftime("%H:%M:%S")
|
|
48
|
+
|
|
49
|
+
match event:
|
|
50
|
+
case StepEvent():
|
|
51
|
+
lines.append(f"\n## Step {event.step_num}\n")
|
|
52
|
+
lines.append(f"**Time:** {timestamp}\n")
|
|
53
|
+
|
|
54
|
+
if isinstance(event.image, bytes):
|
|
55
|
+
if images_dir:
|
|
56
|
+
image_counter += 1
|
|
57
|
+
image_filename = f"step_{event.step_num}.png"
|
|
58
|
+
image_path = Path(images_dir) / image_filename
|
|
59
|
+
image_path.write_bytes(event.image)
|
|
60
|
+
rel_path = Path(images_dir).name / Path(image_filename)
|
|
61
|
+
lines.append(f"\n\n")
|
|
62
|
+
else:
|
|
63
|
+
lines.append(
|
|
64
|
+
f"\n*[Screenshot captured - {len(event.image)} bytes]*\n"
|
|
65
|
+
)
|
|
66
|
+
elif isinstance(event.image, str):
|
|
67
|
+
lines.append(f"\n**Screenshot URL:** {event.image}\n")
|
|
68
|
+
|
|
69
|
+
if event.step.reason:
|
|
70
|
+
lines.append(f"\n**Reasoning:**\n> {event.step.reason}\n")
|
|
71
|
+
|
|
72
|
+
if event.step.actions:
|
|
73
|
+
lines.append("\n**Planned Actions:**\n")
|
|
74
|
+
for action in event.step.actions:
|
|
75
|
+
count_str = (
|
|
76
|
+
f" (x{action.count})"
|
|
77
|
+
if action.count and action.count > 1
|
|
78
|
+
else ""
|
|
79
|
+
)
|
|
80
|
+
lines.append(
|
|
81
|
+
f"- `{action.type.value}`: {action.argument}{count_str}\n"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if event.step.stop:
|
|
85
|
+
lines.append("\n**Status:** Task Complete\n")
|
|
86
|
+
|
|
87
|
+
case ActionEvent():
|
|
88
|
+
lines.append(f"\n### Actions Executed ({timestamp})\n")
|
|
89
|
+
if event.error:
|
|
90
|
+
lines.append(f"\n**Error:** {event.error}\n")
|
|
91
|
+
else:
|
|
92
|
+
lines.append("\n**Result:** Success\n")
|
|
93
|
+
|
|
94
|
+
case LogEvent():
|
|
95
|
+
lines.append(f"\n> **Log ({timestamp}):** {event.message}\n")
|
|
96
|
+
|
|
97
|
+
case SplitEvent():
|
|
98
|
+
if event.label:
|
|
99
|
+
lines.append(f"\n---\n\n### {event.label}\n")
|
|
100
|
+
else:
|
|
101
|
+
lines.append("\n---\n")
|
|
102
|
+
|
|
103
|
+
case ImageEvent():
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
case PlanEvent():
|
|
107
|
+
phase_titles = {
|
|
108
|
+
"initial": "Initial Planning",
|
|
109
|
+
"reflection": "Reflection",
|
|
110
|
+
"summary": "Summary",
|
|
111
|
+
}
|
|
112
|
+
phase_title = phase_titles.get(event.phase, event.phase.capitalize())
|
|
113
|
+
lines.append(f"\n### {phase_title} ({timestamp})\n")
|
|
114
|
+
|
|
115
|
+
if event.image:
|
|
116
|
+
if isinstance(event.image, bytes):
|
|
117
|
+
if images_dir:
|
|
118
|
+
image_counter += 1
|
|
119
|
+
image_filename = f"plan_{event.phase}_{image_counter}.png"
|
|
120
|
+
image_path = Path(images_dir) / image_filename
|
|
121
|
+
image_path.write_bytes(event.image)
|
|
122
|
+
rel_path = Path(images_dir).name / Path(image_filename)
|
|
123
|
+
lines.append(f"\n\n")
|
|
124
|
+
else:
|
|
125
|
+
lines.append(
|
|
126
|
+
f"\n*[Screenshot captured - {len(event.image)} bytes]*\n"
|
|
127
|
+
)
|
|
128
|
+
elif isinstance(event.image, str):
|
|
129
|
+
lines.append(f"\n**Screenshot URL:** {event.image}\n")
|
|
130
|
+
|
|
131
|
+
if event.reasoning:
|
|
132
|
+
lines.append(f"\n**Reasoning:**\n> {event.reasoning}\n")
|
|
133
|
+
|
|
134
|
+
if event.result:
|
|
135
|
+
lines.append(f"\n**Result:** {event.result}\n")
|
|
136
|
+
|
|
137
|
+
output_path.write_text("".join(lines))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def export_to_html(events: list[ObserverEvent], path: str) -> None:
|
|
141
|
+
"""Export events to a self-contained HTML file.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
events: List of events to export.
|
|
145
|
+
path: Path to the output HTML file.
|
|
146
|
+
"""
|
|
147
|
+
output_path = Path(path)
|
|
148
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
|
|
150
|
+
html_parts: list[str] = [_get_html_header()]
|
|
151
|
+
|
|
152
|
+
for event in events:
|
|
153
|
+
timestamp = event.timestamp.strftime("%H:%M:%S")
|
|
154
|
+
|
|
155
|
+
match event:
|
|
156
|
+
case StepEvent():
|
|
157
|
+
html_parts.append('<div class="step">')
|
|
158
|
+
html_parts.append(f"<h2>Step {event.step_num}</h2>")
|
|
159
|
+
html_parts.append(f'<span class="timestamp">{timestamp}</span>')
|
|
160
|
+
|
|
161
|
+
if isinstance(event.image, bytes):
|
|
162
|
+
b64_image = base64.b64encode(event.image).decode("utf-8")
|
|
163
|
+
html_parts.append(
|
|
164
|
+
f'<img src="data:image/png;base64,{b64_image}" '
|
|
165
|
+
f'alt="Step {event.step_num}" class="screenshot"/>'
|
|
166
|
+
)
|
|
167
|
+
elif isinstance(event.image, str):
|
|
168
|
+
html_parts.append(
|
|
169
|
+
f'<p class="url">Screenshot URL: <a href="{event.image}">{event.image}</a></p>'
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if event.step.reason:
|
|
173
|
+
html_parts.append('<div class="reasoning">')
|
|
174
|
+
html_parts.append(
|
|
175
|
+
f"<strong>Reasoning:</strong><p>{_escape_html(event.step.reason)}</p>"
|
|
176
|
+
)
|
|
177
|
+
html_parts.append("</div>")
|
|
178
|
+
|
|
179
|
+
if event.step.actions:
|
|
180
|
+
html_parts.append('<div class="actions">')
|
|
181
|
+
html_parts.append("<strong>Planned Actions:</strong><ul>")
|
|
182
|
+
for action in event.step.actions:
|
|
183
|
+
count_str = (
|
|
184
|
+
f" (x{action.count})"
|
|
185
|
+
if action.count and action.count > 1
|
|
186
|
+
else ""
|
|
187
|
+
)
|
|
188
|
+
html_parts.append(
|
|
189
|
+
f"<li><code>{action.type.value}</code>: "
|
|
190
|
+
f"{_escape_html(action.argument)}{count_str}</li>"
|
|
191
|
+
)
|
|
192
|
+
html_parts.append("</ul></div>")
|
|
193
|
+
|
|
194
|
+
if event.step.stop:
|
|
195
|
+
html_parts.append('<div class="complete">Task Complete</div>')
|
|
196
|
+
|
|
197
|
+
html_parts.append("</div>")
|
|
198
|
+
|
|
199
|
+
case ActionEvent():
|
|
200
|
+
html_parts.append('<div class="action-result">')
|
|
201
|
+
html_parts.append(f'<span class="timestamp">{timestamp}</span>')
|
|
202
|
+
if event.error:
|
|
203
|
+
html_parts.append(
|
|
204
|
+
f'<div class="error">Error: {_escape_html(event.error)}</div>'
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
html_parts.append(
|
|
208
|
+
'<div class="success">Actions executed successfully</div>'
|
|
209
|
+
)
|
|
210
|
+
html_parts.append("</div>")
|
|
211
|
+
|
|
212
|
+
case LogEvent():
|
|
213
|
+
html_parts.append('<div class="log">')
|
|
214
|
+
html_parts.append(f'<span class="timestamp">{timestamp}</span>')
|
|
215
|
+
html_parts.append(f"<p>{_escape_html(event.message)}</p>")
|
|
216
|
+
html_parts.append("</div>")
|
|
217
|
+
|
|
218
|
+
case SplitEvent():
|
|
219
|
+
if event.label:
|
|
220
|
+
html_parts.append(
|
|
221
|
+
f'<div class="split"><h3>{_escape_html(event.label)}</h3></div>'
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
html_parts.append('<hr class="split-line"/>')
|
|
225
|
+
|
|
226
|
+
case ImageEvent():
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
case PlanEvent():
|
|
230
|
+
phase_titles = {
|
|
231
|
+
"initial": "Initial Planning",
|
|
232
|
+
"reflection": "Reflection",
|
|
233
|
+
"summary": "Summary",
|
|
234
|
+
}
|
|
235
|
+
phase_title = phase_titles.get(event.phase, event.phase.capitalize())
|
|
236
|
+
html_parts.append('<div class="plan">')
|
|
237
|
+
html_parts.append(f"<h3>{phase_title}</h3>")
|
|
238
|
+
html_parts.append(f'<span class="timestamp">{timestamp}</span>')
|
|
239
|
+
|
|
240
|
+
if event.image:
|
|
241
|
+
if isinstance(event.image, bytes):
|
|
242
|
+
b64_image = base64.b64encode(event.image).decode("utf-8")
|
|
243
|
+
html_parts.append(
|
|
244
|
+
f'<img src="data:image/png;base64,{b64_image}" '
|
|
245
|
+
f'alt="{phase_title}" class="screenshot"/>'
|
|
246
|
+
)
|
|
247
|
+
elif isinstance(event.image, str):
|
|
248
|
+
html_parts.append(
|
|
249
|
+
f'<p class="url">Screenshot URL: '
|
|
250
|
+
f'<a href="{event.image}">{event.image}</a></p>'
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
if event.reasoning:
|
|
254
|
+
html_parts.append('<div class="reasoning">')
|
|
255
|
+
html_parts.append(
|
|
256
|
+
f"<strong>Reasoning:</strong><p>{_escape_html(event.reasoning)}</p>"
|
|
257
|
+
)
|
|
258
|
+
html_parts.append("</div>")
|
|
259
|
+
|
|
260
|
+
if event.result:
|
|
261
|
+
html_parts.append(
|
|
262
|
+
f'<div class="plan-result"><strong>Result:</strong> '
|
|
263
|
+
f"{_escape_html(event.result)}</div>"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
html_parts.append("</div>")
|
|
267
|
+
|
|
268
|
+
html_parts.append(_get_html_footer())
|
|
269
|
+
output_path.write_text("".join(html_parts))
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _escape_html(text: str) -> str:
|
|
273
|
+
"""Escape HTML special characters."""
|
|
274
|
+
return (
|
|
275
|
+
text.replace("&", "&")
|
|
276
|
+
.replace("<", "<")
|
|
277
|
+
.replace(">", ">")
|
|
278
|
+
.replace('"', """)
|
|
279
|
+
.replace("'", "'")
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _get_html_header() -> str:
|
|
284
|
+
"""Get HTML document header with CSS styles."""
|
|
285
|
+
return """<!DOCTYPE html>
|
|
286
|
+
<html lang="en">
|
|
287
|
+
<head>
|
|
288
|
+
<meta charset="UTF-8">
|
|
289
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
290
|
+
<title>Agent Execution Report</title>
|
|
291
|
+
<style>
|
|
292
|
+
body {
|
|
293
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
294
|
+
max-width: 1200px;
|
|
295
|
+
margin: 0 auto;
|
|
296
|
+
padding: 20px;
|
|
297
|
+
background: #f5f5f5;
|
|
298
|
+
}
|
|
299
|
+
h1 {
|
|
300
|
+
color: #333;
|
|
301
|
+
border-bottom: 2px solid #007bff;
|
|
302
|
+
padding-bottom: 10px;
|
|
303
|
+
}
|
|
304
|
+
.step {
|
|
305
|
+
background: white;
|
|
306
|
+
border-radius: 8px;
|
|
307
|
+
padding: 20px;
|
|
308
|
+
margin: 20px 0;
|
|
309
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
310
|
+
}
|
|
311
|
+
.step h2 {
|
|
312
|
+
margin-top: 0;
|
|
313
|
+
color: #007bff;
|
|
314
|
+
}
|
|
315
|
+
.timestamp {
|
|
316
|
+
color: #666;
|
|
317
|
+
font-size: 0.9em;
|
|
318
|
+
}
|
|
319
|
+
.screenshot {
|
|
320
|
+
max-width: 100%;
|
|
321
|
+
border: 1px solid #ddd;
|
|
322
|
+
border-radius: 4px;
|
|
323
|
+
margin: 10px 0;
|
|
324
|
+
}
|
|
325
|
+
.reasoning {
|
|
326
|
+
background: #f8f9fa;
|
|
327
|
+
padding: 10px;
|
|
328
|
+
border-left: 3px solid #007bff;
|
|
329
|
+
margin: 10px 0;
|
|
330
|
+
}
|
|
331
|
+
.actions {
|
|
332
|
+
margin: 10px 0;
|
|
333
|
+
}
|
|
334
|
+
.actions ul {
|
|
335
|
+
margin: 5px 0;
|
|
336
|
+
padding-left: 20px;
|
|
337
|
+
}
|
|
338
|
+
.actions code {
|
|
339
|
+
background: #e9ecef;
|
|
340
|
+
padding: 2px 6px;
|
|
341
|
+
border-radius: 3px;
|
|
342
|
+
}
|
|
343
|
+
.complete {
|
|
344
|
+
background: #d4edda;
|
|
345
|
+
color: #155724;
|
|
346
|
+
padding: 10px;
|
|
347
|
+
border-radius: 4px;
|
|
348
|
+
margin-top: 10px;
|
|
349
|
+
}
|
|
350
|
+
.action-result {
|
|
351
|
+
padding: 10px;
|
|
352
|
+
margin: 5px 0;
|
|
353
|
+
}
|
|
354
|
+
.success {
|
|
355
|
+
color: #155724;
|
|
356
|
+
}
|
|
357
|
+
.error {
|
|
358
|
+
color: #721c24;
|
|
359
|
+
background: #f8d7da;
|
|
360
|
+
padding: 10px;
|
|
361
|
+
border-radius: 4px;
|
|
362
|
+
}
|
|
363
|
+
.log {
|
|
364
|
+
background: #fff3cd;
|
|
365
|
+
padding: 10px;
|
|
366
|
+
margin: 10px 0;
|
|
367
|
+
border-radius: 4px;
|
|
368
|
+
}
|
|
369
|
+
.split {
|
|
370
|
+
text-align: center;
|
|
371
|
+
margin: 30px 0;
|
|
372
|
+
}
|
|
373
|
+
.split h3 {
|
|
374
|
+
color: #666;
|
|
375
|
+
}
|
|
376
|
+
.split-line {
|
|
377
|
+
border: none;
|
|
378
|
+
border-top: 2px dashed #ccc;
|
|
379
|
+
margin: 30px 0;
|
|
380
|
+
}
|
|
381
|
+
.url {
|
|
382
|
+
word-break: break-all;
|
|
383
|
+
}
|
|
384
|
+
.plan {
|
|
385
|
+
background: #e7f3ff;
|
|
386
|
+
border-radius: 8px;
|
|
387
|
+
padding: 20px;
|
|
388
|
+
margin: 20px 0;
|
|
389
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
390
|
+
}
|
|
391
|
+
.plan h3 {
|
|
392
|
+
margin-top: 0;
|
|
393
|
+
color: #0056b3;
|
|
394
|
+
}
|
|
395
|
+
.plan-result {
|
|
396
|
+
background: #d1ecf1;
|
|
397
|
+
color: #0c5460;
|
|
398
|
+
padding: 10px;
|
|
399
|
+
border-radius: 4px;
|
|
400
|
+
margin-top: 10px;
|
|
401
|
+
}
|
|
402
|
+
</style>
|
|
403
|
+
</head>
|
|
404
|
+
<body>
|
|
405
|
+
<h1>Agent Execution Report</h1>
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def _get_html_footer() -> str:
|
|
410
|
+
"""Get HTML document footer."""
|
|
411
|
+
return """
|
|
412
|
+
</body>
|
|
413
|
+
</html>
|
|
414
|
+
"""
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def export_to_json(events: list[ObserverEvent], path: str) -> None:
|
|
418
|
+
"""Export events to a JSON file.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
events: List of events to export.
|
|
422
|
+
path: Path to the output JSON file.
|
|
423
|
+
"""
|
|
424
|
+
output_path = Path(path)
|
|
425
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
426
|
+
|
|
427
|
+
# Convert events to JSON-serializable format
|
|
428
|
+
json_events = []
|
|
429
|
+
for event in events:
|
|
430
|
+
# Handle bytes images before model_dump to avoid UTF-8 decode error
|
|
431
|
+
if isinstance(event, (StepEvent, ImageEvent, PlanEvent)) and isinstance(
|
|
432
|
+
getattr(event, "image", None), bytes
|
|
433
|
+
):
|
|
434
|
+
# Dump without json mode first, then handle bytes manually
|
|
435
|
+
event_dict = event.model_dump()
|
|
436
|
+
event_dict["image"] = base64.b64encode(event.image).decode("utf-8")
|
|
437
|
+
event_dict["image_encoding"] = "base64"
|
|
438
|
+
# Convert datetime to string
|
|
439
|
+
if "timestamp" in event_dict:
|
|
440
|
+
event_dict["timestamp"] = event_dict["timestamp"].isoformat()
|
|
441
|
+
else:
|
|
442
|
+
event_dict = event.model_dump(mode="json")
|
|
443
|
+
json_events.append(event_dict)
|
|
444
|
+
|
|
445
|
+
output_path.write_text(json.dumps(json_events, indent=2, default=str))
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
# Re-export from types for convenience
|
|
10
|
+
from ...types import AsyncObserver
|
|
11
|
+
|
|
12
|
+
__all__ = ["AsyncObserver"]
|
oagi/agent/registry.py
CHANGED
|
@@ -91,7 +91,7 @@ def create_agent(mode: str, **kwargs: Any) -> AsyncAgent:
|
|
|
91
91
|
Standard parameters typically include:
|
|
92
92
|
- api_key: OAGI API key
|
|
93
93
|
- base_url: OAGI API base URL
|
|
94
|
-
- model: Model identifier (e.g., "lux-
|
|
94
|
+
- model: Model identifier (e.g., "lux-actor-1")
|
|
95
95
|
- max_steps: Maximum number of steps to execute
|
|
96
96
|
- temperature: Sampling temperature
|
|
97
97
|
|
|
@@ -111,7 +111,7 @@ def create_agent(mode: str, **kwargs: Any) -> AsyncAgent:
|
|
|
111
111
|
mode="actor",
|
|
112
112
|
api_key="...",
|
|
113
113
|
base_url="...",
|
|
114
|
-
model="lux-
|
|
114
|
+
model="lux-actor-1",
|
|
115
115
|
max_steps=30,
|
|
116
116
|
temperature=0.0,
|
|
117
117
|
)
|
oagi/agent/tasker/models.py
CHANGED
|
@@ -44,6 +44,7 @@ class Action(BaseModel):
|
|
|
44
44
|
details: dict[str, Any] = Field(default_factory=dict)
|
|
45
45
|
reasoning: str | None = None
|
|
46
46
|
result: str | None = None
|
|
47
|
+
screenshot_uuid: str | None = None # UUID of uploaded screenshot for this action
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
class TodoHistory(BaseModel):
|
oagi/agent/tasker/planner.py
CHANGED
|
@@ -10,6 +10,7 @@ import json
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
12
|
from ...client import AsyncClient
|
|
13
|
+
from ...types import URL, Image
|
|
13
14
|
from .memory import PlannerMemory
|
|
14
15
|
from .models import Action, PlannerOutput, ReflectionOutput
|
|
15
16
|
|
|
@@ -120,7 +121,7 @@ class Planner:
|
|
|
120
121
|
self,
|
|
121
122
|
todo: str,
|
|
122
123
|
context: dict[str, Any],
|
|
123
|
-
screenshot:
|
|
124
|
+
screenshot: Image | URL | None = None,
|
|
124
125
|
memory: PlannerMemory | None = None,
|
|
125
126
|
todo_index: int | None = None,
|
|
126
127
|
) -> PlannerOutput:
|
|
@@ -175,10 +176,11 @@ class Planner:
|
|
|
175
176
|
self,
|
|
176
177
|
actions: list[Action],
|
|
177
178
|
context: dict[str, Any],
|
|
178
|
-
screenshot:
|
|
179
|
+
screenshot: Image | URL | None = None,
|
|
179
180
|
memory: PlannerMemory | None = None,
|
|
180
181
|
todo_index: int | None = None,
|
|
181
182
|
current_instruction: str | None = None,
|
|
183
|
+
reflection_interval: int = 4,
|
|
182
184
|
) -> ReflectionOutput:
|
|
183
185
|
"""Reflect on recent actions and progress.
|
|
184
186
|
|
|
@@ -189,6 +191,7 @@ class Planner:
|
|
|
189
191
|
memory: Optional PlannerMemory for formatting contexts
|
|
190
192
|
todo_index: Optional todo index for formatting internal context
|
|
191
193
|
current_instruction: Current subtask instruction being executed
|
|
194
|
+
reflection_interval: Window size for recent actions/screenshots
|
|
192
195
|
|
|
193
196
|
Returns:
|
|
194
197
|
ReflectionOutput with continuation decision and reasoning
|
|
@@ -212,6 +215,9 @@ class Planner:
|
|
|
212
215
|
overall_todo,
|
|
213
216
|
) = self._extract_memory_data(memory, context, todo_index)
|
|
214
217
|
|
|
218
|
+
# Get window of recent actions based on reflection_interval
|
|
219
|
+
window_actions = actions[-reflection_interval:]
|
|
220
|
+
|
|
215
221
|
# Convert actions to window_steps format
|
|
216
222
|
window_steps = [
|
|
217
223
|
{
|
|
@@ -220,7 +226,14 @@ class Planner:
|
|
|
220
226
|
"target": action.target or "",
|
|
221
227
|
"reasoning": action.reasoning or "",
|
|
222
228
|
}
|
|
223
|
-
for i, action in enumerate(
|
|
229
|
+
for i, action in enumerate(window_actions)
|
|
230
|
+
]
|
|
231
|
+
|
|
232
|
+
# Extract screenshot UUIDs from window actions
|
|
233
|
+
window_screenshots = [
|
|
234
|
+
action.screenshot_uuid
|
|
235
|
+
for action in window_actions
|
|
236
|
+
if action.screenshot_uuid
|
|
224
237
|
]
|
|
225
238
|
|
|
226
239
|
# Format prior notes from context (still needed as a simple string summary)
|
|
@@ -238,7 +251,7 @@ class Planner:
|
|
|
238
251
|
task_execution_summary=task_execution_summary,
|
|
239
252
|
current_subtask_instruction=current_instruction or "",
|
|
240
253
|
window_steps=window_steps,
|
|
241
|
-
window_screenshots=
|
|
254
|
+
window_screenshots=window_screenshots,
|
|
242
255
|
result_screenshot=result_screenshot_uuid,
|
|
243
256
|
prior_notes=prior_notes,
|
|
244
257
|
)
|
|
@@ -337,7 +350,9 @@ class Planner:
|
|
|
337
350
|
"""
|
|
338
351
|
try:
|
|
339
352
|
# Try to parse as JSON (oagi_first format)
|
|
340
|
-
|
|
353
|
+
# Extract JSON string to handle Markdown code blocks
|
|
354
|
+
json_response = self._extract_json_str(response)
|
|
355
|
+
data = json.loads(json_response)
|
|
341
356
|
# oagi_first returns: {"reasoning": "...", "subtask": "..."}
|
|
342
357
|
return PlannerOutput(
|
|
343
358
|
instruction=data.get("subtask", data.get("instruction", "")),
|
|
@@ -349,7 +364,7 @@ class Planner:
|
|
|
349
364
|
except (json.JSONDecodeError, KeyError):
|
|
350
365
|
# Fallback: use the entire response as instruction
|
|
351
366
|
return PlannerOutput(
|
|
352
|
-
instruction=
|
|
367
|
+
instruction="",
|
|
353
368
|
reasoning="Failed to parse structured response",
|
|
354
369
|
subtodos=[],
|
|
355
370
|
)
|
|
@@ -365,7 +380,8 @@ class Planner:
|
|
|
365
380
|
"""
|
|
366
381
|
try:
|
|
367
382
|
# Try to parse as JSON (oagi_follow format)
|
|
368
|
-
|
|
383
|
+
json_response = self._extract_json_str(response)
|
|
384
|
+
data = json.loads(json_response)
|
|
369
385
|
# oagi_follow returns:
|
|
370
386
|
# {"assessment": "...", "summary": "...", "reflection": "...",
|
|
371
387
|
# "success": "yes" | "no", "subtask_instruction": "..."}
|
|
@@ -392,3 +408,10 @@ class Planner:
|
|
|
392
408
|
reasoning="Failed to parse reflection response, continuing current approach",
|
|
393
409
|
success_assessment=False,
|
|
394
410
|
)
|
|
411
|
+
|
|
412
|
+
def _extract_json_str(self, text: str) -> str:
|
|
413
|
+
start = text.find("{")
|
|
414
|
+
end = text.rfind("}") + 1
|
|
415
|
+
if start < 0 or end <= start:
|
|
416
|
+
return ""
|
|
417
|
+
return text[start:end]
|