oagi-core 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +76 -33
- oagi/agent/__init__.py +2 -0
- oagi/agent/default.py +45 -12
- oagi/agent/factories.py +22 -3
- oagi/agent/observer/__init__.py +38 -0
- oagi/agent/observer/agent_observer.py +99 -0
- oagi/agent/observer/events.py +28 -0
- oagi/agent/observer/exporters.py +445 -0
- oagi/agent/observer/protocol.py +12 -0
- oagi/agent/registry.py +2 -2
- oagi/agent/tasker/models.py +1 -0
- oagi/agent/tasker/planner.py +41 -9
- oagi/agent/tasker/taskee_agent.py +178 -86
- oagi/agent/tasker/tasker_agent.py +25 -14
- oagi/cli/agent.py +50 -9
- oagi/cli/tracking.py +27 -17
- oagi/cli/utils.py +11 -4
- oagi/client/base.py +3 -7
- oagi/handler/_macos.py +55 -0
- oagi/handler/pyautogui_action_handler.py +19 -2
- oagi/server/agent_wrappers.py +5 -5
- oagi/server/config.py +3 -3
- oagi/server/models.py +2 -2
- oagi/server/session_store.py +2 -2
- oagi/server/socketio_server.py +1 -1
- oagi/task/async_.py +13 -34
- oagi/task/async_short.py +2 -2
- oagi/task/base.py +41 -7
- oagi/task/short.py +2 -2
- oagi/task/sync.py +11 -34
- oagi/types/__init__.py +24 -4
- oagi/types/async_image_provider.py +3 -2
- oagi/types/image_provider.py +3 -2
- oagi/types/step_observer.py +75 -16
- oagi/types/url.py +3 -0
- {oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/METADATA +38 -25
- oagi_core-0.10.0.dist-info/RECORD +68 -0
- oagi/types/url_image.py +0 -47
- oagi_core-0.9.1.dist-info/RECORD +0 -62
- {oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.9.1.dist-info → oagi_core-0.10.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ...types import (
|
|
14
|
+
ActionEvent,
|
|
15
|
+
ImageEvent,
|
|
16
|
+
LogEvent,
|
|
17
|
+
ObserverEvent,
|
|
18
|
+
PlanEvent,
|
|
19
|
+
SplitEvent,
|
|
20
|
+
StepEvent,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def export_to_markdown(
|
|
25
|
+
events: list[ObserverEvent],
|
|
26
|
+
path: str,
|
|
27
|
+
images_dir: str | None = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Export events to a Markdown file.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
events: List of events to export.
|
|
33
|
+
path: Path to the output Markdown file.
|
|
34
|
+
images_dir: Directory to save images. If None, images are not saved.
|
|
35
|
+
"""
|
|
36
|
+
output_path = Path(path)
|
|
37
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
|
|
39
|
+
if images_dir:
|
|
40
|
+
images_path = Path(images_dir)
|
|
41
|
+
images_path.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
|
|
43
|
+
lines: list[str] = ["# Agent Execution Report\n"]
|
|
44
|
+
image_counter = 0
|
|
45
|
+
|
|
46
|
+
for event in events:
|
|
47
|
+
timestamp = event.timestamp.strftime("%H:%M:%S")
|
|
48
|
+
|
|
49
|
+
match event:
|
|
50
|
+
case StepEvent():
|
|
51
|
+
lines.append(f"\n## Step {event.step_num}\n")
|
|
52
|
+
lines.append(f"**Time:** {timestamp}\n")
|
|
53
|
+
|
|
54
|
+
if isinstance(event.image, bytes):
|
|
55
|
+
if images_dir:
|
|
56
|
+
image_counter += 1
|
|
57
|
+
image_filename = f"step_{event.step_num}.png"
|
|
58
|
+
image_path = Path(images_dir) / image_filename
|
|
59
|
+
image_path.write_bytes(event.image)
|
|
60
|
+
rel_path = Path(images_dir).name / Path(image_filename)
|
|
61
|
+
lines.append(f"\n\n")
|
|
62
|
+
else:
|
|
63
|
+
lines.append(
|
|
64
|
+
f"\n*[Screenshot captured - {len(event.image)} bytes]*\n"
|
|
65
|
+
)
|
|
66
|
+
elif isinstance(event.image, str):
|
|
67
|
+
lines.append(f"\n**Screenshot URL:** {event.image}\n")
|
|
68
|
+
|
|
69
|
+
if event.step.reason:
|
|
70
|
+
lines.append(f"\n**Reasoning:**\n> {event.step.reason}\n")
|
|
71
|
+
|
|
72
|
+
if event.step.actions:
|
|
73
|
+
lines.append("\n**Planned Actions:**\n")
|
|
74
|
+
for action in event.step.actions:
|
|
75
|
+
count_str = (
|
|
76
|
+
f" (x{action.count})"
|
|
77
|
+
if action.count and action.count > 1
|
|
78
|
+
else ""
|
|
79
|
+
)
|
|
80
|
+
lines.append(
|
|
81
|
+
f"- `{action.type.value}`: {action.argument}{count_str}\n"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if event.step.stop:
|
|
85
|
+
lines.append("\n**Status:** Task Complete\n")
|
|
86
|
+
|
|
87
|
+
case ActionEvent():
|
|
88
|
+
lines.append(f"\n### Actions Executed ({timestamp})\n")
|
|
89
|
+
if event.error:
|
|
90
|
+
lines.append(f"\n**Error:** {event.error}\n")
|
|
91
|
+
else:
|
|
92
|
+
lines.append("\n**Result:** Success\n")
|
|
93
|
+
|
|
94
|
+
case LogEvent():
|
|
95
|
+
lines.append(f"\n> **Log ({timestamp}):** {event.message}\n")
|
|
96
|
+
|
|
97
|
+
case SplitEvent():
|
|
98
|
+
if event.label:
|
|
99
|
+
lines.append(f"\n---\n\n### {event.label}\n")
|
|
100
|
+
else:
|
|
101
|
+
lines.append("\n---\n")
|
|
102
|
+
|
|
103
|
+
case ImageEvent():
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
case PlanEvent():
|
|
107
|
+
phase_titles = {
|
|
108
|
+
"initial": "Initial Planning",
|
|
109
|
+
"reflection": "Reflection",
|
|
110
|
+
"summary": "Summary",
|
|
111
|
+
}
|
|
112
|
+
phase_title = phase_titles.get(event.phase, event.phase.capitalize())
|
|
113
|
+
lines.append(f"\n### {phase_title} ({timestamp})\n")
|
|
114
|
+
|
|
115
|
+
if event.image:
|
|
116
|
+
if isinstance(event.image, bytes):
|
|
117
|
+
if images_dir:
|
|
118
|
+
image_counter += 1
|
|
119
|
+
image_filename = f"plan_{event.phase}_{image_counter}.png"
|
|
120
|
+
image_path = Path(images_dir) / image_filename
|
|
121
|
+
image_path.write_bytes(event.image)
|
|
122
|
+
rel_path = Path(images_dir).name / Path(image_filename)
|
|
123
|
+
lines.append(f"\n\n")
|
|
124
|
+
else:
|
|
125
|
+
lines.append(
|
|
126
|
+
f"\n*[Screenshot captured - {len(event.image)} bytes]*\n"
|
|
127
|
+
)
|
|
128
|
+
elif isinstance(event.image, str):
|
|
129
|
+
lines.append(f"\n**Screenshot URL:** {event.image}\n")
|
|
130
|
+
|
|
131
|
+
if event.reasoning:
|
|
132
|
+
lines.append(f"\n**Reasoning:**\n> {event.reasoning}\n")
|
|
133
|
+
|
|
134
|
+
if event.result:
|
|
135
|
+
lines.append(f"\n**Result:** {event.result}\n")
|
|
136
|
+
|
|
137
|
+
output_path.write_text("".join(lines))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def export_to_html(events: list[ObserverEvent], path: str) -> None:
|
|
141
|
+
"""Export events to a self-contained HTML file.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
events: List of events to export.
|
|
145
|
+
path: Path to the output HTML file.
|
|
146
|
+
"""
|
|
147
|
+
output_path = Path(path)
|
|
148
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
|
|
150
|
+
html_parts: list[str] = [_get_html_header()]
|
|
151
|
+
|
|
152
|
+
for event in events:
|
|
153
|
+
timestamp = event.timestamp.strftime("%H:%M:%S")
|
|
154
|
+
|
|
155
|
+
match event:
|
|
156
|
+
case StepEvent():
|
|
157
|
+
html_parts.append('<div class="step">')
|
|
158
|
+
html_parts.append(f"<h2>Step {event.step_num}</h2>")
|
|
159
|
+
html_parts.append(f'<span class="timestamp">{timestamp}</span>')
|
|
160
|
+
|
|
161
|
+
if isinstance(event.image, bytes):
|
|
162
|
+
b64_image = base64.b64encode(event.image).decode("utf-8")
|
|
163
|
+
html_parts.append(
|
|
164
|
+
f'<img src="data:image/png;base64,{b64_image}" '
|
|
165
|
+
f'alt="Step {event.step_num}" class="screenshot"/>'
|
|
166
|
+
)
|
|
167
|
+
elif isinstance(event.image, str):
|
|
168
|
+
html_parts.append(
|
|
169
|
+
f'<p class="url">Screenshot URL: <a href="{event.image}">{event.image}</a></p>'
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if event.step.reason:
|
|
173
|
+
html_parts.append('<div class="reasoning">')
|
|
174
|
+
html_parts.append(
|
|
175
|
+
f"<strong>Reasoning:</strong><p>{_escape_html(event.step.reason)}</p>"
|
|
176
|
+
)
|
|
177
|
+
html_parts.append("</div>")
|
|
178
|
+
|
|
179
|
+
if event.step.actions:
|
|
180
|
+
html_parts.append('<div class="actions">')
|
|
181
|
+
html_parts.append("<strong>Planned Actions:</strong><ul>")
|
|
182
|
+
for action in event.step.actions:
|
|
183
|
+
count_str = (
|
|
184
|
+
f" (x{action.count})"
|
|
185
|
+
if action.count and action.count > 1
|
|
186
|
+
else ""
|
|
187
|
+
)
|
|
188
|
+
html_parts.append(
|
|
189
|
+
f"<li><code>{action.type.value}</code>: "
|
|
190
|
+
f"{_escape_html(action.argument)}{count_str}</li>"
|
|
191
|
+
)
|
|
192
|
+
html_parts.append("</ul></div>")
|
|
193
|
+
|
|
194
|
+
if event.step.stop:
|
|
195
|
+
html_parts.append('<div class="complete">Task Complete</div>')
|
|
196
|
+
|
|
197
|
+
html_parts.append("</div>")
|
|
198
|
+
|
|
199
|
+
case ActionEvent():
|
|
200
|
+
html_parts.append('<div class="action-result">')
|
|
201
|
+
html_parts.append(f'<span class="timestamp">{timestamp}</span>')
|
|
202
|
+
if event.error:
|
|
203
|
+
html_parts.append(
|
|
204
|
+
f'<div class="error">Error: {_escape_html(event.error)}</div>'
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
html_parts.append(
|
|
208
|
+
'<div class="success">Actions executed successfully</div>'
|
|
209
|
+
)
|
|
210
|
+
html_parts.append("</div>")
|
|
211
|
+
|
|
212
|
+
case LogEvent():
|
|
213
|
+
html_parts.append('<div class="log">')
|
|
214
|
+
html_parts.append(f'<span class="timestamp">{timestamp}</span>')
|
|
215
|
+
html_parts.append(f"<p>{_escape_html(event.message)}</p>")
|
|
216
|
+
html_parts.append("</div>")
|
|
217
|
+
|
|
218
|
+
case SplitEvent():
|
|
219
|
+
if event.label:
|
|
220
|
+
html_parts.append(
|
|
221
|
+
f'<div class="split"><h3>{_escape_html(event.label)}</h3></div>'
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
html_parts.append('<hr class="split-line"/>')
|
|
225
|
+
|
|
226
|
+
case ImageEvent():
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
case PlanEvent():
|
|
230
|
+
phase_titles = {
|
|
231
|
+
"initial": "Initial Planning",
|
|
232
|
+
"reflection": "Reflection",
|
|
233
|
+
"summary": "Summary",
|
|
234
|
+
}
|
|
235
|
+
phase_title = phase_titles.get(event.phase, event.phase.capitalize())
|
|
236
|
+
html_parts.append('<div class="plan">')
|
|
237
|
+
html_parts.append(f"<h3>{phase_title}</h3>")
|
|
238
|
+
html_parts.append(f'<span class="timestamp">{timestamp}</span>')
|
|
239
|
+
|
|
240
|
+
if event.image:
|
|
241
|
+
if isinstance(event.image, bytes):
|
|
242
|
+
b64_image = base64.b64encode(event.image).decode("utf-8")
|
|
243
|
+
html_parts.append(
|
|
244
|
+
f'<img src="data:image/png;base64,{b64_image}" '
|
|
245
|
+
f'alt="{phase_title}" class="screenshot"/>'
|
|
246
|
+
)
|
|
247
|
+
elif isinstance(event.image, str):
|
|
248
|
+
html_parts.append(
|
|
249
|
+
f'<p class="url">Screenshot URL: '
|
|
250
|
+
f'<a href="{event.image}">{event.image}</a></p>'
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
if event.reasoning:
|
|
254
|
+
html_parts.append('<div class="reasoning">')
|
|
255
|
+
html_parts.append(
|
|
256
|
+
f"<strong>Reasoning:</strong><p>{_escape_html(event.reasoning)}</p>"
|
|
257
|
+
)
|
|
258
|
+
html_parts.append("</div>")
|
|
259
|
+
|
|
260
|
+
if event.result:
|
|
261
|
+
html_parts.append(
|
|
262
|
+
f'<div class="plan-result"><strong>Result:</strong> '
|
|
263
|
+
f"{_escape_html(event.result)}</div>"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
html_parts.append("</div>")
|
|
267
|
+
|
|
268
|
+
html_parts.append(_get_html_footer())
|
|
269
|
+
output_path.write_text("".join(html_parts))
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _escape_html(text: str) -> str:
|
|
273
|
+
"""Escape HTML special characters."""
|
|
274
|
+
return (
|
|
275
|
+
text.replace("&", "&")
|
|
276
|
+
.replace("<", "<")
|
|
277
|
+
.replace(">", ">")
|
|
278
|
+
.replace('"', """)
|
|
279
|
+
.replace("'", "'")
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _get_html_header() -> str:
|
|
284
|
+
"""Get HTML document header with CSS styles."""
|
|
285
|
+
return """<!DOCTYPE html>
|
|
286
|
+
<html lang="en">
|
|
287
|
+
<head>
|
|
288
|
+
<meta charset="UTF-8">
|
|
289
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
290
|
+
<title>Agent Execution Report</title>
|
|
291
|
+
<style>
|
|
292
|
+
body {
|
|
293
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
294
|
+
max-width: 1200px;
|
|
295
|
+
margin: 0 auto;
|
|
296
|
+
padding: 20px;
|
|
297
|
+
background: #f5f5f5;
|
|
298
|
+
}
|
|
299
|
+
h1 {
|
|
300
|
+
color: #333;
|
|
301
|
+
border-bottom: 2px solid #007bff;
|
|
302
|
+
padding-bottom: 10px;
|
|
303
|
+
}
|
|
304
|
+
.step {
|
|
305
|
+
background: white;
|
|
306
|
+
border-radius: 8px;
|
|
307
|
+
padding: 20px;
|
|
308
|
+
margin: 20px 0;
|
|
309
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
310
|
+
}
|
|
311
|
+
.step h2 {
|
|
312
|
+
margin-top: 0;
|
|
313
|
+
color: #007bff;
|
|
314
|
+
}
|
|
315
|
+
.timestamp {
|
|
316
|
+
color: #666;
|
|
317
|
+
font-size: 0.9em;
|
|
318
|
+
}
|
|
319
|
+
.screenshot {
|
|
320
|
+
max-width: 100%;
|
|
321
|
+
border: 1px solid #ddd;
|
|
322
|
+
border-radius: 4px;
|
|
323
|
+
margin: 10px 0;
|
|
324
|
+
}
|
|
325
|
+
.reasoning {
|
|
326
|
+
background: #f8f9fa;
|
|
327
|
+
padding: 10px;
|
|
328
|
+
border-left: 3px solid #007bff;
|
|
329
|
+
margin: 10px 0;
|
|
330
|
+
}
|
|
331
|
+
.actions {
|
|
332
|
+
margin: 10px 0;
|
|
333
|
+
}
|
|
334
|
+
.actions ul {
|
|
335
|
+
margin: 5px 0;
|
|
336
|
+
padding-left: 20px;
|
|
337
|
+
}
|
|
338
|
+
.actions code {
|
|
339
|
+
background: #e9ecef;
|
|
340
|
+
padding: 2px 6px;
|
|
341
|
+
border-radius: 3px;
|
|
342
|
+
}
|
|
343
|
+
.complete {
|
|
344
|
+
background: #d4edda;
|
|
345
|
+
color: #155724;
|
|
346
|
+
padding: 10px;
|
|
347
|
+
border-radius: 4px;
|
|
348
|
+
margin-top: 10px;
|
|
349
|
+
}
|
|
350
|
+
.action-result {
|
|
351
|
+
padding: 10px;
|
|
352
|
+
margin: 5px 0;
|
|
353
|
+
}
|
|
354
|
+
.success {
|
|
355
|
+
color: #155724;
|
|
356
|
+
}
|
|
357
|
+
.error {
|
|
358
|
+
color: #721c24;
|
|
359
|
+
background: #f8d7da;
|
|
360
|
+
padding: 10px;
|
|
361
|
+
border-radius: 4px;
|
|
362
|
+
}
|
|
363
|
+
.log {
|
|
364
|
+
background: #fff3cd;
|
|
365
|
+
padding: 10px;
|
|
366
|
+
margin: 10px 0;
|
|
367
|
+
border-radius: 4px;
|
|
368
|
+
}
|
|
369
|
+
.split {
|
|
370
|
+
text-align: center;
|
|
371
|
+
margin: 30px 0;
|
|
372
|
+
}
|
|
373
|
+
.split h3 {
|
|
374
|
+
color: #666;
|
|
375
|
+
}
|
|
376
|
+
.split-line {
|
|
377
|
+
border: none;
|
|
378
|
+
border-top: 2px dashed #ccc;
|
|
379
|
+
margin: 30px 0;
|
|
380
|
+
}
|
|
381
|
+
.url {
|
|
382
|
+
word-break: break-all;
|
|
383
|
+
}
|
|
384
|
+
.plan {
|
|
385
|
+
background: #e7f3ff;
|
|
386
|
+
border-radius: 8px;
|
|
387
|
+
padding: 20px;
|
|
388
|
+
margin: 20px 0;
|
|
389
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
390
|
+
}
|
|
391
|
+
.plan h3 {
|
|
392
|
+
margin-top: 0;
|
|
393
|
+
color: #0056b3;
|
|
394
|
+
}
|
|
395
|
+
.plan-result {
|
|
396
|
+
background: #d1ecf1;
|
|
397
|
+
color: #0c5460;
|
|
398
|
+
padding: 10px;
|
|
399
|
+
border-radius: 4px;
|
|
400
|
+
margin-top: 10px;
|
|
401
|
+
}
|
|
402
|
+
</style>
|
|
403
|
+
</head>
|
|
404
|
+
<body>
|
|
405
|
+
<h1>Agent Execution Report</h1>
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def _get_html_footer() -> str:
|
|
410
|
+
"""Get HTML document footer."""
|
|
411
|
+
return """
|
|
412
|
+
</body>
|
|
413
|
+
</html>
|
|
414
|
+
"""
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def export_to_json(events: list[ObserverEvent], path: str) -> None:
|
|
418
|
+
"""Export events to a JSON file.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
events: List of events to export.
|
|
422
|
+
path: Path to the output JSON file.
|
|
423
|
+
"""
|
|
424
|
+
output_path = Path(path)
|
|
425
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
426
|
+
|
|
427
|
+
# Convert events to JSON-serializable format
|
|
428
|
+
json_events = []
|
|
429
|
+
for event in events:
|
|
430
|
+
# Handle bytes images before model_dump to avoid UTF-8 decode error
|
|
431
|
+
if isinstance(event, (StepEvent, ImageEvent, PlanEvent)) and isinstance(
|
|
432
|
+
getattr(event, "image", None), bytes
|
|
433
|
+
):
|
|
434
|
+
# Dump without json mode first, then handle bytes manually
|
|
435
|
+
event_dict = event.model_dump()
|
|
436
|
+
event_dict["image"] = base64.b64encode(event.image).decode("utf-8")
|
|
437
|
+
event_dict["image_encoding"] = "base64"
|
|
438
|
+
# Convert datetime to string
|
|
439
|
+
if "timestamp" in event_dict:
|
|
440
|
+
event_dict["timestamp"] = event_dict["timestamp"].isoformat()
|
|
441
|
+
else:
|
|
442
|
+
event_dict = event.model_dump(mode="json")
|
|
443
|
+
json_events.append(event_dict)
|
|
444
|
+
|
|
445
|
+
output_path.write_text(json.dumps(json_events, indent=2, default=str))
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
# Re-export from types for convenience
|
|
10
|
+
from ...types import AsyncObserver
|
|
11
|
+
|
|
12
|
+
__all__ = ["AsyncObserver"]
|
oagi/agent/registry.py
CHANGED
|
@@ -91,7 +91,7 @@ def create_agent(mode: str, **kwargs: Any) -> AsyncAgent:
|
|
|
91
91
|
Standard parameters typically include:
|
|
92
92
|
- api_key: OAGI API key
|
|
93
93
|
- base_url: OAGI API base URL
|
|
94
|
-
- model: Model identifier (e.g., "lux-
|
|
94
|
+
- model: Model identifier (e.g., "lux-actor-1")
|
|
95
95
|
- max_steps: Maximum number of steps to execute
|
|
96
96
|
- temperature: Sampling temperature
|
|
97
97
|
|
|
@@ -111,7 +111,7 @@ def create_agent(mode: str, **kwargs: Any) -> AsyncAgent:
|
|
|
111
111
|
mode="actor",
|
|
112
112
|
api_key="...",
|
|
113
113
|
base_url="...",
|
|
114
|
-
model="lux-
|
|
114
|
+
model="lux-actor-1",
|
|
115
115
|
max_steps=30,
|
|
116
116
|
temperature=0.0,
|
|
117
117
|
)
|
oagi/agent/tasker/models.py
CHANGED
|
@@ -44,6 +44,7 @@ class Action(BaseModel):
|
|
|
44
44
|
details: dict[str, Any] = Field(default_factory=dict)
|
|
45
45
|
reasoning: str | None = None
|
|
46
46
|
result: str | None = None
|
|
47
|
+
screenshot_uuid: str | None = None # UUID of uploaded screenshot for this action
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
class TodoHistory(BaseModel):
|
oagi/agent/tasker/planner.py
CHANGED
|
@@ -10,6 +10,7 @@ import json
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
12
|
from ...client import AsyncClient
|
|
13
|
+
from ...types import URL, Image
|
|
13
14
|
from .memory import PlannerMemory
|
|
14
15
|
from .models import Action, PlannerOutput, ReflectionOutput
|
|
15
16
|
|
|
@@ -20,19 +21,28 @@ class Planner:
|
|
|
20
21
|
This class provides planning and reflection capabilities using OAGI workers.
|
|
21
22
|
"""
|
|
22
23
|
|
|
23
|
-
def __init__(
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
client: AsyncClient | None = None,
|
|
27
|
+
api_key: str | None = None,
|
|
28
|
+
base_url: str | None = None,
|
|
29
|
+
):
|
|
24
30
|
"""Initialize the planner.
|
|
25
31
|
|
|
26
32
|
Args:
|
|
27
33
|
client: AsyncClient for OAGI API calls. If None, one will be created when needed.
|
|
34
|
+
api_key: API key for creating internal client
|
|
35
|
+
base_url: Base URL for creating internal client
|
|
28
36
|
"""
|
|
29
37
|
self.client = client
|
|
38
|
+
self.api_key = api_key
|
|
39
|
+
self.base_url = base_url
|
|
30
40
|
self._owns_client = False # Track if we created the client
|
|
31
41
|
|
|
32
42
|
def _ensure_client(self) -> AsyncClient:
|
|
33
43
|
"""Ensure we have a client, creating one if needed."""
|
|
34
44
|
if not self.client:
|
|
35
|
-
self.client = AsyncClient()
|
|
45
|
+
self.client = AsyncClient(api_key=self.api_key, base_url=self.base_url)
|
|
36
46
|
self._owns_client = True
|
|
37
47
|
return self.client
|
|
38
48
|
|
|
@@ -111,7 +121,7 @@ class Planner:
|
|
|
111
121
|
self,
|
|
112
122
|
todo: str,
|
|
113
123
|
context: dict[str, Any],
|
|
114
|
-
screenshot:
|
|
124
|
+
screenshot: Image | URL | None = None,
|
|
115
125
|
memory: PlannerMemory | None = None,
|
|
116
126
|
todo_index: int | None = None,
|
|
117
127
|
) -> PlannerOutput:
|
|
@@ -166,10 +176,11 @@ class Planner:
|
|
|
166
176
|
self,
|
|
167
177
|
actions: list[Action],
|
|
168
178
|
context: dict[str, Any],
|
|
169
|
-
screenshot:
|
|
179
|
+
screenshot: Image | URL | None = None,
|
|
170
180
|
memory: PlannerMemory | None = None,
|
|
171
181
|
todo_index: int | None = None,
|
|
172
182
|
current_instruction: str | None = None,
|
|
183
|
+
reflection_interval: int = 4,
|
|
173
184
|
) -> ReflectionOutput:
|
|
174
185
|
"""Reflect on recent actions and progress.
|
|
175
186
|
|
|
@@ -180,6 +191,7 @@ class Planner:
|
|
|
180
191
|
memory: Optional PlannerMemory for formatting contexts
|
|
181
192
|
todo_index: Optional todo index for formatting internal context
|
|
182
193
|
current_instruction: Current subtask instruction being executed
|
|
194
|
+
reflection_interval: Window size for recent actions/screenshots
|
|
183
195
|
|
|
184
196
|
Returns:
|
|
185
197
|
ReflectionOutput with continuation decision and reasoning
|
|
@@ -203,6 +215,9 @@ class Planner:
|
|
|
203
215
|
overall_todo,
|
|
204
216
|
) = self._extract_memory_data(memory, context, todo_index)
|
|
205
217
|
|
|
218
|
+
# Get window of recent actions based on reflection_interval
|
|
219
|
+
window_actions = actions[-reflection_interval:]
|
|
220
|
+
|
|
206
221
|
# Convert actions to window_steps format
|
|
207
222
|
window_steps = [
|
|
208
223
|
{
|
|
@@ -211,7 +226,14 @@ class Planner:
|
|
|
211
226
|
"target": action.target or "",
|
|
212
227
|
"reasoning": action.reasoning or "",
|
|
213
228
|
}
|
|
214
|
-
for i, action in enumerate(
|
|
229
|
+
for i, action in enumerate(window_actions)
|
|
230
|
+
]
|
|
231
|
+
|
|
232
|
+
# Extract screenshot UUIDs from window actions
|
|
233
|
+
window_screenshots = [
|
|
234
|
+
action.screenshot_uuid
|
|
235
|
+
for action in window_actions
|
|
236
|
+
if action.screenshot_uuid
|
|
215
237
|
]
|
|
216
238
|
|
|
217
239
|
# Format prior notes from context (still needed as a simple string summary)
|
|
@@ -229,7 +251,7 @@ class Planner:
|
|
|
229
251
|
task_execution_summary=task_execution_summary,
|
|
230
252
|
current_subtask_instruction=current_instruction or "",
|
|
231
253
|
window_steps=window_steps,
|
|
232
|
-
window_screenshots=
|
|
254
|
+
window_screenshots=window_screenshots,
|
|
233
255
|
result_screenshot=result_screenshot_uuid,
|
|
234
256
|
prior_notes=prior_notes,
|
|
235
257
|
)
|
|
@@ -328,7 +350,9 @@ class Planner:
|
|
|
328
350
|
"""
|
|
329
351
|
try:
|
|
330
352
|
# Try to parse as JSON (oagi_first format)
|
|
331
|
-
|
|
353
|
+
# Extract JSON string to handle Markdown code blocks
|
|
354
|
+
json_response = self._extract_json_str(response)
|
|
355
|
+
data = json.loads(json_response)
|
|
332
356
|
# oagi_first returns: {"reasoning": "...", "subtask": "..."}
|
|
333
357
|
return PlannerOutput(
|
|
334
358
|
instruction=data.get("subtask", data.get("instruction", "")),
|
|
@@ -340,7 +364,7 @@ class Planner:
|
|
|
340
364
|
except (json.JSONDecodeError, KeyError):
|
|
341
365
|
# Fallback: use the entire response as instruction
|
|
342
366
|
return PlannerOutput(
|
|
343
|
-
instruction=
|
|
367
|
+
instruction="",
|
|
344
368
|
reasoning="Failed to parse structured response",
|
|
345
369
|
subtodos=[],
|
|
346
370
|
)
|
|
@@ -356,7 +380,8 @@ class Planner:
|
|
|
356
380
|
"""
|
|
357
381
|
try:
|
|
358
382
|
# Try to parse as JSON (oagi_follow format)
|
|
359
|
-
|
|
383
|
+
json_response = self._extract_json_str(response)
|
|
384
|
+
data = json.loads(json_response)
|
|
360
385
|
# oagi_follow returns:
|
|
361
386
|
# {"assessment": "...", "summary": "...", "reflection": "...",
|
|
362
387
|
# "success": "yes" | "no", "subtask_instruction": "..."}
|
|
@@ -383,3 +408,10 @@ class Planner:
|
|
|
383
408
|
reasoning="Failed to parse reflection response, continuing current approach",
|
|
384
409
|
success_assessment=False,
|
|
385
410
|
)
|
|
411
|
+
|
|
412
|
+
def _extract_json_str(self, text: str) -> str:
|
|
413
|
+
start = text.find("{")
|
|
414
|
+
end = text.rfind("}") + 1
|
|
415
|
+
if start < 0 or end <= start:
|
|
416
|
+
return ""
|
|
417
|
+
return text[start:end]
|