oagi-core 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/agent/default.py +7 -0
- oagi/agent/factories.py +6 -0
- oagi/agent/observer/exporters.py +142 -251
- oagi/agent/observer/report_template.html +455 -0
- oagi/agent/tasker/taskee_agent.py +8 -0
- oagi/agent/tasker/tasker_agent.py +4 -0
- oagi/cli/agent.py +9 -1
- oagi/handler/pyautogui_action_handler.py +12 -22
- oagi/server/socketio_server.py +20 -19
- oagi/types/__init__.py +12 -1
- oagi/types/models/__init__.py +10 -1
- oagi/types/models/action.py +51 -0
- {oagi_core-0.10.1.dist-info → oagi_core-0.10.2.dist-info}/METADATA +1 -1
- {oagi_core-0.10.1.dist-info → oagi_core-0.10.2.dist-info}/RECORD +17 -16
- {oagi_core-0.10.1.dist-info → oagi_core-0.10.2.dist-info}/WHEEL +0 -0
- {oagi_core-0.10.1.dist-info → oagi_core-0.10.2.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.10.1.dist-info → oagi_core-0.10.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>Agent Execution Report</title>
|
|
7
|
+
<style>
|
|
8
|
+
body {
|
|
9
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
10
|
+
max-width: 1200px;
|
|
11
|
+
margin: 0 auto;
|
|
12
|
+
padding: 20px;
|
|
13
|
+
background: #f5f5f5;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
h1 {
|
|
17
|
+
color: #333;
|
|
18
|
+
border-bottom: 2px solid #007bff;
|
|
19
|
+
padding-bottom: 10px;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
.step, .plan {
|
|
23
|
+
background: white;
|
|
24
|
+
border-radius: 8px;
|
|
25
|
+
padding: 20px;
|
|
26
|
+
margin: 20px 0;
|
|
27
|
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
.step h2 {
|
|
31
|
+
margin-top: 0;
|
|
32
|
+
color: #007bff;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
.plan {
|
|
36
|
+
background: #e7f3ff;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
.plan h3 {
|
|
40
|
+
margin-top: 0;
|
|
41
|
+
color: #0056b3;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
.timestamp {
|
|
45
|
+
color: #666;
|
|
46
|
+
font-size: 0.9em;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
.screenshot-container {
|
|
50
|
+
position: relative;
|
|
51
|
+
display: inline-block;
|
|
52
|
+
margin: 10px 0;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
.screenshot {
|
|
56
|
+
max-width: 100%;
|
|
57
|
+
border: 1px solid #ddd;
|
|
58
|
+
border-radius: 4px;
|
|
59
|
+
display: block;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
.reasoning {
|
|
63
|
+
background: #f8f9fa;
|
|
64
|
+
padding: 10px;
|
|
65
|
+
border-left: 3px solid #007bff;
|
|
66
|
+
margin: 10px 0;
|
|
67
|
+
white-space: pre-wrap;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
.actions {
|
|
71
|
+
margin: 10px 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
.actions ul {
|
|
75
|
+
margin: 5px 0;
|
|
76
|
+
padding-left: 20px;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
.actions code {
|
|
80
|
+
background: #e9ecef;
|
|
81
|
+
padding: 2px 6px;
|
|
82
|
+
border-radius: 3px;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
.complete {
|
|
86
|
+
background: #d4edda;
|
|
87
|
+
color: #155724;
|
|
88
|
+
padding: 10px;
|
|
89
|
+
border-radius: 4px;
|
|
90
|
+
margin-top: 10px;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
.action-result {
|
|
94
|
+
padding: 10px;
|
|
95
|
+
margin: 5px 0;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
.success {
|
|
99
|
+
color: #155724;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
.error {
|
|
103
|
+
color: #721c24;
|
|
104
|
+
background: #f8d7da;
|
|
105
|
+
padding: 10px;
|
|
106
|
+
border-radius: 4px;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
.log {
|
|
110
|
+
background: #fff3cd;
|
|
111
|
+
padding: 10px;
|
|
112
|
+
margin: 10px 0;
|
|
113
|
+
border-radius: 4px;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
.split {
|
|
117
|
+
text-align: center;
|
|
118
|
+
margin: 30px 0;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
.split h3 {
|
|
122
|
+
color: #666;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
.split-line {
|
|
126
|
+
border: none;
|
|
127
|
+
border-top: 2px dashed #ccc;
|
|
128
|
+
margin: 30px 0;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
.url {
|
|
132
|
+
word-break: break-all;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
.plan-result {
|
|
136
|
+
background: #d1ecf1;
|
|
137
|
+
color: #0c5460;
|
|
138
|
+
padding: 10px;
|
|
139
|
+
border-radius: 4px;
|
|
140
|
+
margin-top: 10px;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/* Cursor indicators */
|
|
144
|
+
.click-indicator {
|
|
145
|
+
position: absolute;
|
|
146
|
+
width: 20px;
|
|
147
|
+
height: 20px;
|
|
148
|
+
border-radius: 50%;
|
|
149
|
+
background: rgba(255, 0, 0, 0.8);
|
|
150
|
+
border: 2px solid #fff;
|
|
151
|
+
box-shadow: 0 0 10px rgba(255, 0, 0, 0.6);
|
|
152
|
+
transform: translate(-50%, -50%);
|
|
153
|
+
pointer-events: none;
|
|
154
|
+
z-index: 10;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
.drag-indicator {
|
|
158
|
+
position: absolute;
|
|
159
|
+
width: 20px;
|
|
160
|
+
height: 20px;
|
|
161
|
+
border-radius: 50%;
|
|
162
|
+
border: 2px solid #fff;
|
|
163
|
+
box-shadow: 0 0 10px rgba(0, 0, 0, 0.3);
|
|
164
|
+
transform: translate(-50%, -50%);
|
|
165
|
+
pointer-events: none;
|
|
166
|
+
z-index: 10;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
.drag-start {
|
|
170
|
+
background: rgba(0, 255, 0, 0.8);
|
|
171
|
+
box-shadow: 0 0 10px rgba(0, 255, 0, 0.6);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
.drag-end {
|
|
175
|
+
background: rgba(255, 0, 0, 0.8);
|
|
176
|
+
box-shadow: 0 0 10px rgba(255, 0, 0, 0.6);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
.drag-line {
|
|
180
|
+
position: absolute;
|
|
181
|
+
background: rgba(255, 255, 0, 0.8);
|
|
182
|
+
height: 3px;
|
|
183
|
+
border-radius: 2px;
|
|
184
|
+
box-shadow: 0 0 5px rgba(255, 255, 0, 0.4);
|
|
185
|
+
pointer-events: none;
|
|
186
|
+
z-index: 9;
|
|
187
|
+
transform-origin: left center;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
.scroll-indicator {
|
|
191
|
+
position: absolute;
|
|
192
|
+
width: 40px;
|
|
193
|
+
height: 40px;
|
|
194
|
+
border-radius: 50%;
|
|
195
|
+
background: rgba(138, 43, 226, 0.9);
|
|
196
|
+
border: 2px solid #fff;
|
|
197
|
+
box-shadow: 0 0 15px rgba(138, 43, 226, 0.6);
|
|
198
|
+
transform: translate(-50%, -50%);
|
|
199
|
+
pointer-events: none;
|
|
200
|
+
z-index: 10;
|
|
201
|
+
display: flex;
|
|
202
|
+
align-items: center;
|
|
203
|
+
justify-content: center;
|
|
204
|
+
font-size: 20px;
|
|
205
|
+
color: white;
|
|
206
|
+
font-weight: bold;
|
|
207
|
+
}
|
|
208
|
+
</style>
|
|
209
|
+
</head>
|
|
210
|
+
<body>
|
|
211
|
+
<h1>Agent Execution Report</h1>
|
|
212
|
+
<div id="content"></div>
|
|
213
|
+
|
|
214
|
+
<script>
|
|
215
|
+
const eventsData = {EVENTS_DATA};
|
|
216
|
+
|
|
217
|
+
function escapeHtml(text) {
|
|
218
|
+
const div = document.createElement('div');
|
|
219
|
+
div.textContent = text;
|
|
220
|
+
return div.innerHTML;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function removeIndicators(container) {
|
|
224
|
+
container.querySelectorAll('.click-indicator, .drag-indicator, .drag-line, .scroll-indicator')
|
|
225
|
+
.forEach(el => el.remove());
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function addIndicators(container) {
|
|
229
|
+
const img = container.querySelector('.screenshot');
|
|
230
|
+
const actionsAttr = container.getAttribute('data-actions');
|
|
231
|
+
if (!actionsAttr || !img) return;
|
|
232
|
+
|
|
233
|
+
// Remove existing indicators before adding new ones
|
|
234
|
+
removeIndicators(container);
|
|
235
|
+
|
|
236
|
+
const actions = JSON.parse(actionsAttr);
|
|
237
|
+
const imgWidth = img.offsetWidth;
|
|
238
|
+
const imgHeight = img.offsetHeight;
|
|
239
|
+
|
|
240
|
+
actions.forEach(action => {
|
|
241
|
+
switch (action.type) {
|
|
242
|
+
case 'click':
|
|
243
|
+
addClickIndicator(container, action, imgWidth, imgHeight);
|
|
244
|
+
break;
|
|
245
|
+
case 'drag':
|
|
246
|
+
addDragIndicator(container, action, imgWidth, imgHeight);
|
|
247
|
+
break;
|
|
248
|
+
case 'scroll':
|
|
249
|
+
addScrollIndicator(container, action, imgWidth, imgHeight);
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Recalculate indicator positions on window resize
|
|
256
|
+
let resizeTimeout;
|
|
257
|
+
window.addEventListener('resize', function () {
|
|
258
|
+
clearTimeout(resizeTimeout);
|
|
259
|
+
resizeTimeout = setTimeout(function () {
|
|
260
|
+
document.querySelectorAll('.screenshot-container').forEach(container => {
|
|
261
|
+
addIndicators(container);
|
|
262
|
+
});
|
|
263
|
+
}, 100);
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
function addClickIndicator(container, action, imgWidth, imgHeight) {
|
|
267
|
+
const x = (action.x / 1000) * imgWidth;
|
|
268
|
+
const y = (action.y / 1000) * imgHeight;
|
|
269
|
+
|
|
270
|
+
const indicator = document.createElement('div');
|
|
271
|
+
indicator.className = 'click-indicator';
|
|
272
|
+
indicator.style.left = x + 'px';
|
|
273
|
+
indicator.style.top = y + 'px';
|
|
274
|
+
container.appendChild(indicator);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function addDragIndicator(container, action, imgWidth, imgHeight) {
|
|
278
|
+
const x1 = (action.x1 / 1000) * imgWidth;
|
|
279
|
+
const y1 = (action.y1 / 1000) * imgHeight;
|
|
280
|
+
const x2 = (action.x2 / 1000) * imgWidth;
|
|
281
|
+
const y2 = (action.y2 / 1000) * imgHeight;
|
|
282
|
+
|
|
283
|
+
const startIndicator = document.createElement('div');
|
|
284
|
+
startIndicator.className = 'drag-indicator drag-start';
|
|
285
|
+
startIndicator.style.left = x1 + 'px';
|
|
286
|
+
startIndicator.style.top = y1 + 'px';
|
|
287
|
+
container.appendChild(startIndicator);
|
|
288
|
+
|
|
289
|
+
const endIndicator = document.createElement('div');
|
|
290
|
+
endIndicator.className = 'drag-indicator drag-end';
|
|
291
|
+
endIndicator.style.left = x2 + 'px';
|
|
292
|
+
endIndicator.style.top = y2 + 'px';
|
|
293
|
+
container.appendChild(endIndicator);
|
|
294
|
+
|
|
295
|
+
const line = document.createElement('div');
|
|
296
|
+
line.className = 'drag-line';
|
|
297
|
+
const deltaX = x2 - x1;
|
|
298
|
+
const deltaY = y2 - y1;
|
|
299
|
+
const length = Math.sqrt(deltaX * deltaX + deltaY * deltaY);
|
|
300
|
+
const angle = Math.atan2(deltaY, deltaX) * (180 / Math.PI);
|
|
301
|
+
line.style.left = x1 + 'px';
|
|
302
|
+
line.style.top = y1 + 'px';
|
|
303
|
+
line.style.width = length + 'px';
|
|
304
|
+
line.style.transform = 'rotate(' + angle + 'deg)';
|
|
305
|
+
container.appendChild(line);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
function addScrollIndicator(container, action, imgWidth, imgHeight) {
|
|
309
|
+
const x = (action.x / 1000) * imgWidth;
|
|
310
|
+
const y = (action.y / 1000) * imgHeight;
|
|
311
|
+
|
|
312
|
+
const indicator = document.createElement('div');
|
|
313
|
+
indicator.className = 'scroll-indicator';
|
|
314
|
+
indicator.style.left = x + 'px';
|
|
315
|
+
indicator.style.top = y + 'px';
|
|
316
|
+
|
|
317
|
+
if (action.direction === 'up') {
|
|
318
|
+
indicator.innerHTML = '↑';
|
|
319
|
+
indicator.title = 'Scroll Up';
|
|
320
|
+
} else if (action.direction === 'down') {
|
|
321
|
+
indicator.innerHTML = '↓';
|
|
322
|
+
indicator.title = 'Scroll Down';
|
|
323
|
+
} else {
|
|
324
|
+
indicator.innerHTML = '↕';
|
|
325
|
+
indicator.title = 'Scroll';
|
|
326
|
+
}
|
|
327
|
+
container.appendChild(indicator);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function renderEvents() {
|
|
331
|
+
const content = document.getElementById('content');
|
|
332
|
+
let html = '';
|
|
333
|
+
|
|
334
|
+
eventsData.forEach(event => {
|
|
335
|
+
const timestamp = event.timestamp;
|
|
336
|
+
|
|
337
|
+
switch (event.event_type) {
|
|
338
|
+
case 'step':
|
|
339
|
+
html += '<div class="step">';
|
|
340
|
+
html += `<h2>Step ${event.step_num}</h2>`;
|
|
341
|
+
html += `<span class="timestamp">${timestamp}</span>`;
|
|
342
|
+
|
|
343
|
+
if (event.image) {
|
|
344
|
+
const actionsJson = JSON.stringify(event.action_coords || []).replace(/"/g, '"');
|
|
345
|
+
html += `<div class="screenshot-container" data-actions="${actionsJson}">`;
|
|
346
|
+
if (event.image.startsWith('data:') || event.image.startsWith('http')) {
|
|
347
|
+
html += `<img src="${event.image}" alt="Step ${event.step_num}" class="screenshot"/>`;
|
|
348
|
+
} else {
|
|
349
|
+
html += `<img src="data:image/png;base64,${event.image}" alt="Step ${event.step_num}" class="screenshot"/>`;
|
|
350
|
+
}
|
|
351
|
+
html += '</div>';
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
if (event.reason) {
|
|
355
|
+
html += '<div class="reasoning">';
|
|
356
|
+
html += `<strong>Reasoning:</strong><p>${escapeHtml(event.reason)}</p>`;
|
|
357
|
+
html += '</div>';
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
if (event.actions && event.actions.length > 0) {
|
|
361
|
+
html += '<div class="actions"><strong>Planned Actions:</strong><ul>';
|
|
362
|
+
event.actions.forEach(action => {
|
|
363
|
+
const countStr = action.count > 1 ? ` (x${action.count})` : '';
|
|
364
|
+
html += `<li><code>${action.type}</code>: ${escapeHtml(action.argument)}${countStr}</li>`;
|
|
365
|
+
});
|
|
366
|
+
html += '</ul></div>';
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (event.stop) {
|
|
370
|
+
html += '<div class="complete">Task Complete</div>';
|
|
371
|
+
}
|
|
372
|
+
html += '</div>';
|
|
373
|
+
break;
|
|
374
|
+
|
|
375
|
+
case 'action':
|
|
376
|
+
html += '<div class="action-result">';
|
|
377
|
+
html += `<span class="timestamp">${timestamp}</span>`;
|
|
378
|
+
if (event.error) {
|
|
379
|
+
html += `<div class="error">Error: ${escapeHtml(event.error)}</div>`;
|
|
380
|
+
} else {
|
|
381
|
+
html += '<div class="success">Actions executed successfully</div>';
|
|
382
|
+
}
|
|
383
|
+
html += '</div>';
|
|
384
|
+
break;
|
|
385
|
+
|
|
386
|
+
case 'log':
|
|
387
|
+
html += '<div class="log">';
|
|
388
|
+
html += `<span class="timestamp">${timestamp}</span>`;
|
|
389
|
+
html += `<p>${escapeHtml(event.message)}</p>`;
|
|
390
|
+
html += '</div>';
|
|
391
|
+
break;
|
|
392
|
+
|
|
393
|
+
case 'split':
|
|
394
|
+
if (event.label) {
|
|
395
|
+
html += `<div class="split"><h3>${escapeHtml(event.label)}</h3></div>`;
|
|
396
|
+
} else {
|
|
397
|
+
html += '<hr class="split-line"/>';
|
|
398
|
+
}
|
|
399
|
+
break;
|
|
400
|
+
|
|
401
|
+
case 'plan':
|
|
402
|
+
const phaseTitles = {
|
|
403
|
+
'initial': 'Initial Planning',
|
|
404
|
+
'reflection': 'Reflection',
|
|
405
|
+
'summary': 'Summary'
|
|
406
|
+
};
|
|
407
|
+
const phaseTitle = phaseTitles[event.phase] || event.phase;
|
|
408
|
+
|
|
409
|
+
html += '<div class="plan">';
|
|
410
|
+
html += `<h3>${phaseTitle}</h3>`;
|
|
411
|
+
html += `<span class="timestamp">${timestamp}</span>`;
|
|
412
|
+
|
|
413
|
+
if (event.image) {
|
|
414
|
+
html += '<div class="screenshot-container">';
|
|
415
|
+
if (event.image.startsWith('data:') || event.image.startsWith('http')) {
|
|
416
|
+
html += `<img src="${event.image}" alt="${phaseTitle}" class="screenshot"/>`;
|
|
417
|
+
} else {
|
|
418
|
+
html += `<img src="data:image/png;base64,${event.image}" alt="${phaseTitle}" class="screenshot"/>`;
|
|
419
|
+
}
|
|
420
|
+
html += '</div>';
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
if (event.reasoning) {
|
|
424
|
+
html += '<div class="reasoning">';
|
|
425
|
+
html += `<strong>Reasoning:</strong><p>${escapeHtml(event.reasoning)}</p>`;
|
|
426
|
+
html += '</div>';
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
if (event.result) {
|
|
430
|
+
html += `<div class="plan-result"><strong>Result:</strong> ${escapeHtml(event.result)}</div>`;
|
|
431
|
+
}
|
|
432
|
+
html += '</div>';
|
|
433
|
+
break;
|
|
434
|
+
}
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
content.innerHTML = html;
|
|
438
|
+
|
|
439
|
+
// Add cursor indicators after images load
|
|
440
|
+
document.querySelectorAll('.screenshot-container').forEach(container => {
|
|
441
|
+
const img = container.querySelector('.screenshot');
|
|
442
|
+
if (img) {
|
|
443
|
+
if (img.complete) {
|
|
444
|
+
addIndicators(container);
|
|
445
|
+
} else {
|
|
446
|
+
img.onload = () => addIndicators(container);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
});
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
document.addEventListener('DOMContentLoaded', renderEvents);
|
|
453
|
+
</script>
|
|
454
|
+
</body>
|
|
455
|
+
</html>
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
+
import asyncio
|
|
9
10
|
import logging
|
|
10
11
|
from datetime import datetime
|
|
11
12
|
from typing import Any
|
|
@@ -59,6 +60,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
59
60
|
external_memory: PlannerMemory | None = None,
|
|
60
61
|
todo_index: int | None = None,
|
|
61
62
|
step_observer: AsyncObserver | None = None,
|
|
63
|
+
step_delay: float = 0.3,
|
|
62
64
|
):
|
|
63
65
|
"""Initialize the taskee agent.
|
|
64
66
|
|
|
@@ -73,6 +75,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
73
75
|
external_memory: External memory from parent agent
|
|
74
76
|
todo_index: Index of the todo being executed
|
|
75
77
|
step_observer: Optional observer for step tracking
|
|
78
|
+
step_delay: Delay in seconds after actions before next screenshot
|
|
76
79
|
"""
|
|
77
80
|
self.api_key = api_key
|
|
78
81
|
self.base_url = base_url
|
|
@@ -84,6 +87,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
84
87
|
self.external_memory = external_memory
|
|
85
88
|
self.todo_index = todo_index
|
|
86
89
|
self.step_observer = step_observer
|
|
90
|
+
self.step_delay = step_delay
|
|
87
91
|
|
|
88
92
|
# Internal state
|
|
89
93
|
self.actor: AsyncActor | None = None
|
|
@@ -327,6 +331,10 @@ class TaskeeAgent(AsyncAgent):
|
|
|
327
331
|
self.total_actions += len(step.actions)
|
|
328
332
|
self.since_reflection += len(step.actions)
|
|
329
333
|
|
|
334
|
+
# Wait after actions before next screenshot
|
|
335
|
+
if self.step_delay > 0:
|
|
336
|
+
await asyncio.sleep(self.step_delay)
|
|
337
|
+
|
|
330
338
|
steps_taken += 1
|
|
331
339
|
|
|
332
340
|
# Check if task is complete
|
|
@@ -40,6 +40,7 @@ class TaskerAgent(AsyncAgent):
|
|
|
40
40
|
reflection_interval: int = 4,
|
|
41
41
|
planner: Planner | None = None,
|
|
42
42
|
step_observer: AsyncObserver | None = None,
|
|
43
|
+
step_delay: float = 0.3,
|
|
43
44
|
):
|
|
44
45
|
"""Initialize the tasker agent.
|
|
45
46
|
|
|
@@ -52,6 +53,7 @@ class TaskerAgent(AsyncAgent):
|
|
|
52
53
|
reflection_interval: Actions before reflection
|
|
53
54
|
planner: Planner for planning and reflection
|
|
54
55
|
step_observer: Optional observer for step tracking
|
|
56
|
+
step_delay: Delay in seconds after actions before next screenshot
|
|
55
57
|
"""
|
|
56
58
|
self.api_key = api_key
|
|
57
59
|
self.base_url = base_url
|
|
@@ -61,6 +63,7 @@ class TaskerAgent(AsyncAgent):
|
|
|
61
63
|
self.reflection_interval = reflection_interval
|
|
62
64
|
self.planner = planner or Planner(api_key=api_key, base_url=base_url)
|
|
63
65
|
self.step_observer = step_observer
|
|
66
|
+
self.step_delay = step_delay
|
|
64
67
|
|
|
65
68
|
# Memory for tracking workflow
|
|
66
69
|
self.memory = PlannerMemory()
|
|
@@ -184,6 +187,7 @@ class TaskerAgent(AsyncAgent):
|
|
|
184
187
|
external_memory=self.memory, # Share memory with child
|
|
185
188
|
todo_index=todo_index, # Pass the todo index
|
|
186
189
|
step_observer=self.step_observer, # Pass step observer
|
|
190
|
+
step_delay=self.step_delay,
|
|
187
191
|
)
|
|
188
192
|
|
|
189
193
|
self.current_todo_index = todo_index
|
oagi/cli/agent.py
CHANGED
|
@@ -65,6 +65,11 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
65
65
|
type=str,
|
|
66
66
|
help="Output file path for export (default: execution_report.[md|html|json])",
|
|
67
67
|
)
|
|
68
|
+
run_parser.add_argument(
|
|
69
|
+
"--step-delay",
|
|
70
|
+
type=float,
|
|
71
|
+
help="Delay in seconds after each step before next screenshot (default: 0.3)",
|
|
72
|
+
)
|
|
68
73
|
|
|
69
74
|
# agent permission command
|
|
70
75
|
agent_subparsers.add_parser(
|
|
@@ -196,6 +201,7 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
196
201
|
max_steps = args.max_steps or 20
|
|
197
202
|
temperature = args.temperature if args.temperature is not None else 0.5
|
|
198
203
|
mode = args.mode or "actor"
|
|
204
|
+
step_delay = args.step_delay if args.step_delay is not None else 0.3
|
|
199
205
|
export_format = args.export
|
|
200
206
|
export_file = args.export_file
|
|
201
207
|
|
|
@@ -221,6 +227,7 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
221
227
|
max_steps=max_steps,
|
|
222
228
|
temperature=temperature,
|
|
223
229
|
step_observer=observer,
|
|
230
|
+
step_delay=step_delay,
|
|
224
231
|
)
|
|
225
232
|
|
|
226
233
|
# Create handlers
|
|
@@ -229,7 +236,8 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
229
236
|
|
|
230
237
|
print(f"Starting agent with instruction: {args.instruction}")
|
|
231
238
|
print(
|
|
232
|
-
f"Mode: {mode}, Model: {model}, Max steps: {max_steps},
|
|
239
|
+
f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, "
|
|
240
|
+
f"Temperature: {temperature}, Step delay: {step_delay}s"
|
|
233
241
|
)
|
|
234
242
|
print("-" * 60)
|
|
235
243
|
|
|
@@ -6,14 +6,13 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
-
import re
|
|
10
9
|
import sys
|
|
11
10
|
import time
|
|
12
11
|
|
|
13
12
|
from pydantic import BaseModel, Field
|
|
14
13
|
|
|
15
14
|
from ..exceptions import check_optional_dependency
|
|
16
|
-
from ..types import Action, ActionType
|
|
15
|
+
from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
|
|
17
16
|
|
|
18
17
|
check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
|
|
19
18
|
import pyautogui # noqa: E402
|
|
@@ -136,36 +135,27 @@ class PyautoguiActionHandler:
|
|
|
136
135
|
|
|
137
136
|
def _parse_coords(self, args_str: str) -> tuple[int, int]:
|
|
138
137
|
"""Extract x, y coordinates from argument string."""
|
|
139
|
-
|
|
140
|
-
if not
|
|
138
|
+
coords = parse_coords(args_str)
|
|
139
|
+
if not coords:
|
|
141
140
|
raise ValueError(f"Invalid coordinates format: {args_str}")
|
|
142
|
-
|
|
143
|
-
return self._denormalize_coords(x, y)
|
|
141
|
+
return self._denormalize_coords(coords[0], coords[1])
|
|
144
142
|
|
|
145
143
|
def _parse_drag_coords(self, args_str: str) -> tuple[int, int, int, int]:
|
|
146
144
|
"""Extract x1, y1, x2, y2 coordinates from drag argument string."""
|
|
147
|
-
|
|
148
|
-
if not
|
|
145
|
+
coords = parse_drag_coords(args_str)
|
|
146
|
+
if not coords:
|
|
149
147
|
raise ValueError(f"Invalid drag coordinates format: {args_str}")
|
|
150
|
-
x1, y1
|
|
151
|
-
|
|
152
|
-
int(match.group(2)),
|
|
153
|
-
int(match.group(3)),
|
|
154
|
-
int(match.group(4)),
|
|
155
|
-
)
|
|
156
|
-
x1, y1 = self._denormalize_coords(x1, y1)
|
|
157
|
-
x2, y2 = self._denormalize_coords(x2, y2)
|
|
148
|
+
x1, y1 = self._denormalize_coords(coords[0], coords[1])
|
|
149
|
+
x2, y2 = self._denormalize_coords(coords[2], coords[3])
|
|
158
150
|
return x1, y1, x2, y2
|
|
159
151
|
|
|
160
152
|
def _parse_scroll(self, args_str: str) -> tuple[int, int, str]:
|
|
161
153
|
"""Extract x, y, direction from scroll argument string."""
|
|
162
|
-
|
|
163
|
-
if not
|
|
154
|
+
result = parse_scroll(args_str)
|
|
155
|
+
if not result:
|
|
164
156
|
raise ValueError(f"Invalid scroll format: {args_str}")
|
|
165
|
-
x, y =
|
|
166
|
-
x, y
|
|
167
|
-
direction = match.group(3).lower()
|
|
168
|
-
return x, y, direction
|
|
157
|
+
x, y = self._denormalize_coords(result[0], result[1])
|
|
158
|
+
return x, y, result[2]
|
|
169
159
|
|
|
170
160
|
def _normalize_key(self, key: str) -> str:
|
|
171
161
|
"""Normalize key names for consistency."""
|