oagi-core 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/agent/default.py +7 -0
- oagi/agent/factories.py +6 -0
- oagi/agent/observer/exporters.py +142 -251
- oagi/agent/observer/report_template.html +455 -0
- oagi/agent/tasker/__init__.py +0 -2
- oagi/agent/tasker/memory.py +3 -27
- oagi/agent/tasker/models.py +0 -7
- oagi/agent/tasker/planner.py +2 -11
- oagi/agent/tasker/taskee_agent.py +8 -0
- oagi/agent/tasker/tasker_agent.py +7 -17
- oagi/cli/agent.py +108 -1
- oagi/client/async_.py +0 -3
- oagi/client/base.py +0 -4
- oagi/client/sync.py +0 -3
- oagi/handler/pyautogui_action_handler.py +20 -24
- oagi/server/socketio_server.py +20 -19
- oagi/types/__init__.py +12 -1
- oagi/types/models/__init__.py +10 -1
- oagi/types/models/action.py +51 -0
- oagi/types/models/client.py +7 -3
- {oagi_core-0.10.0.dist-info → oagi_core-0.10.2.dist-info}/METADATA +2 -1
- {oagi_core-0.10.0.dist-info → oagi_core-0.10.2.dist-info}/RECORD +25 -24
- {oagi_core-0.10.0.dist-info → oagi_core-0.10.2.dist-info}/WHEEL +0 -0
- {oagi_core-0.10.0.dist-info → oagi_core-0.10.2.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.10.0.dist-info → oagi_core-0.10.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -40,6 +40,7 @@ class TaskerAgent(AsyncAgent):
|
|
|
40
40
|
reflection_interval: int = 4,
|
|
41
41
|
planner: Planner | None = None,
|
|
42
42
|
step_observer: AsyncObserver | None = None,
|
|
43
|
+
step_delay: float = 0.3,
|
|
43
44
|
):
|
|
44
45
|
"""Initialize the tasker agent.
|
|
45
46
|
|
|
@@ -52,6 +53,7 @@ class TaskerAgent(AsyncAgent):
|
|
|
52
53
|
reflection_interval: Actions before reflection
|
|
53
54
|
planner: Planner for planning and reflection
|
|
54
55
|
step_observer: Optional observer for step tracking
|
|
56
|
+
step_delay: Delay in seconds after actions before next screenshot
|
|
55
57
|
"""
|
|
56
58
|
self.api_key = api_key
|
|
57
59
|
self.base_url = base_url
|
|
@@ -61,6 +63,7 @@ class TaskerAgent(AsyncAgent):
|
|
|
61
63
|
self.reflection_interval = reflection_interval
|
|
62
64
|
self.planner = planner or Planner(api_key=api_key, base_url=base_url)
|
|
63
65
|
self.step_observer = step_observer
|
|
66
|
+
self.step_delay = step_delay
|
|
64
67
|
|
|
65
68
|
# Memory for tracking workflow
|
|
66
69
|
self.memory = PlannerMemory()
|
|
@@ -73,20 +76,15 @@ class TaskerAgent(AsyncAgent):
|
|
|
73
76
|
self,
|
|
74
77
|
task: str,
|
|
75
78
|
todos: list[str],
|
|
76
|
-
deliverables: list[str] | None = None,
|
|
77
79
|
) -> None:
|
|
78
|
-
"""Set the task
|
|
80
|
+
"""Set the task and todos for the workflow.
|
|
79
81
|
|
|
80
82
|
Args:
|
|
81
83
|
task: Overall task description
|
|
82
84
|
todos: List of todo descriptions
|
|
83
|
-
deliverables: Optional list of deliverable descriptions
|
|
84
85
|
"""
|
|
85
|
-
self.memory.set_task(task, todos
|
|
86
|
-
logger.info(
|
|
87
|
-
f"Task set with {len(todos)} todos and "
|
|
88
|
-
f"{len(deliverables) if deliverables else 0} deliverables"
|
|
89
|
-
)
|
|
86
|
+
self.memory.set_task(task, todos)
|
|
87
|
+
logger.info(f"Task set with {len(todos)} todos")
|
|
90
88
|
|
|
91
89
|
async def execute(
|
|
92
90
|
self,
|
|
@@ -189,6 +187,7 @@ class TaskerAgent(AsyncAgent):
|
|
|
189
187
|
external_memory=self.memory, # Share memory with child
|
|
190
188
|
todo_index=todo_index, # Pass the todo index
|
|
191
189
|
step_observer=self.step_observer, # Pass step observer
|
|
190
|
+
step_delay=self.step_delay,
|
|
192
191
|
)
|
|
193
192
|
|
|
194
193
|
self.current_todo_index = todo_index
|
|
@@ -327,12 +326,3 @@ class TaskerAgent(AsyncAgent):
|
|
|
327
326
|
"""
|
|
328
327
|
self.memory.append_todo(description)
|
|
329
328
|
logger.info(f"Appended new todo: {description}")
|
|
330
|
-
|
|
331
|
-
def append_deliverable(self, description: str) -> None:
|
|
332
|
-
"""Dynamically append a new deliverable to the workflow.
|
|
333
|
-
|
|
334
|
-
Args:
|
|
335
|
-
description: Description of the new deliverable
|
|
336
|
-
"""
|
|
337
|
-
self.memory.append_deliverable(description)
|
|
338
|
-
logger.info(f"Appended new deliverable: {description}")
|
oagi/cli/agent.py
CHANGED
|
@@ -65,11 +65,112 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
65
65
|
type=str,
|
|
66
66
|
help="Output file path for export (default: execution_report.[md|html|json])",
|
|
67
67
|
)
|
|
68
|
+
run_parser.add_argument(
|
|
69
|
+
"--step-delay",
|
|
70
|
+
type=float,
|
|
71
|
+
help="Delay in seconds after each step before next screenshot (default: 0.3)",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# agent permission command
|
|
75
|
+
agent_subparsers.add_parser(
|
|
76
|
+
"permission",
|
|
77
|
+
help="Check macOS permissions for screen recording and accessibility",
|
|
78
|
+
)
|
|
68
79
|
|
|
69
80
|
|
|
70
81
|
def handle_agent_command(args: argparse.Namespace) -> None:
|
|
71
82
|
if args.agent_command == "run":
|
|
72
83
|
run_agent(args)
|
|
84
|
+
elif args.agent_command == "permission":
|
|
85
|
+
check_permissions()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def check_permissions() -> None:
|
|
89
|
+
"""Check and request macOS permissions for screen recording and accessibility.
|
|
90
|
+
|
|
91
|
+
Guides the user through granting permissions one at a time.
|
|
92
|
+
"""
|
|
93
|
+
if sys.platform != "darwin":
|
|
94
|
+
print("Warning: Permission check is only applicable on macOS.")
|
|
95
|
+
print("On other platforms, no special permissions are required.")
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
check_optional_dependency("Quartz", "Permission check", "desktop")
|
|
99
|
+
check_optional_dependency("ApplicationServices", "Permission check", "desktop")
|
|
100
|
+
|
|
101
|
+
import subprocess # noqa: PLC0415
|
|
102
|
+
|
|
103
|
+
from ApplicationServices import AXIsProcessTrusted # noqa: PLC0415
|
|
104
|
+
from Quartz import ( # noqa: PLC0415
|
|
105
|
+
CGPreflightScreenCaptureAccess,
|
|
106
|
+
CGRequestScreenCaptureAccess,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Check all permissions first to show status
|
|
110
|
+
screen_recording_granted = CGPreflightScreenCaptureAccess()
|
|
111
|
+
accessibility_granted = AXIsProcessTrusted()
|
|
112
|
+
|
|
113
|
+
print("Checking permissions...")
|
|
114
|
+
print(f" {'[OK]' if screen_recording_granted else '[MISSING]'} Screen Recording")
|
|
115
|
+
print(f" {'[OK]' if accessibility_granted else '[MISSING]'} Accessibility")
|
|
116
|
+
|
|
117
|
+
# Guide user through missing permissions one at a time
|
|
118
|
+
if not screen_recording_granted:
|
|
119
|
+
CGRequestScreenCaptureAccess()
|
|
120
|
+
subprocess.run(
|
|
121
|
+
[
|
|
122
|
+
"open",
|
|
123
|
+
"x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture",
|
|
124
|
+
],
|
|
125
|
+
check=False,
|
|
126
|
+
)
|
|
127
|
+
print("\nPlease grant Screen Recording permission in System Preferences.")
|
|
128
|
+
print("After granting, run this command again to continue.")
|
|
129
|
+
print("Note: You may need to restart your terminal after granting permissions.")
|
|
130
|
+
sys.exit(1)
|
|
131
|
+
|
|
132
|
+
if not accessibility_granted:
|
|
133
|
+
subprocess.run(
|
|
134
|
+
[
|
|
135
|
+
"open",
|
|
136
|
+
"x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility",
|
|
137
|
+
],
|
|
138
|
+
check=False,
|
|
139
|
+
)
|
|
140
|
+
print("\nPlease grant Accessibility permission in System Preferences.")
|
|
141
|
+
print("After granting, run this command again to continue.")
|
|
142
|
+
print("Note: You may need to restart your terminal after granting permissions.")
|
|
143
|
+
sys.exit(1)
|
|
144
|
+
|
|
145
|
+
print()
|
|
146
|
+
print("All permissions granted. You can run the agent.")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _warn_missing_permissions() -> None:
|
|
150
|
+
if sys.platform != "darwin":
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
if not check_optional_dependency(
|
|
154
|
+
"Quartz", "Permission check", "desktop", raise_error=False
|
|
155
|
+
):
|
|
156
|
+
return
|
|
157
|
+
if not check_optional_dependency(
|
|
158
|
+
"ApplicationServices", "Permission check", "desktop", raise_error=False
|
|
159
|
+
):
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
from ApplicationServices import AXIsProcessTrusted # noqa: PLC0415
|
|
163
|
+
from Quartz import CGPreflightScreenCaptureAccess # noqa: PLC0415
|
|
164
|
+
|
|
165
|
+
missing = []
|
|
166
|
+
if not CGPreflightScreenCaptureAccess():
|
|
167
|
+
missing.append("Screen Recording")
|
|
168
|
+
if not AXIsProcessTrusted():
|
|
169
|
+
missing.append("Accessibility")
|
|
170
|
+
|
|
171
|
+
if missing:
|
|
172
|
+
print(f"Warning: Missing macOS permissions: {', '.join(missing)}")
|
|
173
|
+
print("Run 'oagi agent permission' to configure permissions.\n")
|
|
73
174
|
|
|
74
175
|
|
|
75
176
|
def run_agent(args: argparse.Namespace) -> None:
|
|
@@ -77,6 +178,9 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
77
178
|
check_optional_dependency("pyautogui", "Agent execution", "desktop")
|
|
78
179
|
check_optional_dependency("PIL", "Agent execution", "desktop")
|
|
79
180
|
|
|
181
|
+
# Warn about missing macOS permissions (non-blocking)
|
|
182
|
+
_warn_missing_permissions()
|
|
183
|
+
|
|
80
184
|
from oagi import AsyncPyautoguiActionHandler, AsyncScreenshotMaker # noqa: PLC0415
|
|
81
185
|
from oagi.agent import create_agent # noqa: PLC0415
|
|
82
186
|
|
|
@@ -97,6 +201,7 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
97
201
|
max_steps = args.max_steps or 20
|
|
98
202
|
temperature = args.temperature if args.temperature is not None else 0.5
|
|
99
203
|
mode = args.mode or "actor"
|
|
204
|
+
step_delay = args.step_delay if args.step_delay is not None else 0.3
|
|
100
205
|
export_format = args.export
|
|
101
206
|
export_file = args.export_file
|
|
102
207
|
|
|
@@ -122,6 +227,7 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
122
227
|
max_steps=max_steps,
|
|
123
228
|
temperature=temperature,
|
|
124
229
|
step_observer=observer,
|
|
230
|
+
step_delay=step_delay,
|
|
125
231
|
)
|
|
126
232
|
|
|
127
233
|
# Create handlers
|
|
@@ -130,7 +236,8 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
130
236
|
|
|
131
237
|
print(f"Starting agent with instruction: {args.instruction}")
|
|
132
238
|
print(
|
|
133
|
-
f"Mode: {mode}, Model: {model}, Max steps: {max_steps},
|
|
239
|
+
f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, "
|
|
240
|
+
f"Temperature: {temperature}, Step delay: {step_delay}s"
|
|
134
241
|
)
|
|
135
242
|
print("-" * 60)
|
|
136
243
|
|
oagi/client/async_.py
CHANGED
|
@@ -223,7 +223,6 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
223
223
|
overall_todo: str,
|
|
224
224
|
task_description: str,
|
|
225
225
|
todos: list[dict],
|
|
226
|
-
deliverables: list[dict],
|
|
227
226
|
history: list[dict] | None = None,
|
|
228
227
|
current_todo_index: int | None = None,
|
|
229
228
|
task_execution_summary: str | None = None,
|
|
@@ -243,7 +242,6 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
243
242
|
overall_todo: Current todo description
|
|
244
243
|
task_description: Overall task description
|
|
245
244
|
todos: List of todo dicts with index, description, status, execution_summary
|
|
246
|
-
deliverables: List of deliverable dicts with description, achieved
|
|
247
245
|
history: List of history dicts with todo_index, todo_description, action_count, summary, completed
|
|
248
246
|
current_todo_index: Index of current todo being executed
|
|
249
247
|
task_execution_summary: Summary of overall task execution
|
|
@@ -269,7 +267,6 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
269
267
|
overall_todo=overall_todo,
|
|
270
268
|
task_description=task_description,
|
|
271
269
|
todos=todos,
|
|
272
|
-
deliverables=deliverables,
|
|
273
270
|
history=history,
|
|
274
271
|
current_todo_index=current_todo_index,
|
|
275
272
|
task_execution_summary=task_execution_summary,
|
oagi/client/base.py
CHANGED
|
@@ -348,7 +348,6 @@ class BaseClient(Generic[HttpClientT]):
|
|
|
348
348
|
overall_todo: str,
|
|
349
349
|
task_description: str,
|
|
350
350
|
todos: list[dict],
|
|
351
|
-
deliverables: list[dict],
|
|
352
351
|
history: list[dict] | None = None,
|
|
353
352
|
current_todo_index: int | None = None,
|
|
354
353
|
task_execution_summary: str | None = None,
|
|
@@ -368,7 +367,6 @@ class BaseClient(Generic[HttpClientT]):
|
|
|
368
367
|
overall_todo: Current todo description
|
|
369
368
|
task_description: Overall task description
|
|
370
369
|
todos: List of todo dicts with index, description, status, execution_summary
|
|
371
|
-
deliverables: List of deliverable dicts with description, achieved
|
|
372
370
|
history: List of history dicts with todo_index, todo_description, action_count, summary, completed
|
|
373
371
|
current_todo_index: Index of current todo being executed
|
|
374
372
|
task_execution_summary: Summary of overall task execution
|
|
@@ -402,7 +400,6 @@ class BaseClient(Generic[HttpClientT]):
|
|
|
402
400
|
"overall_todo": overall_todo,
|
|
403
401
|
"task_description": task_description,
|
|
404
402
|
"todos": todos,
|
|
405
|
-
"deliverables": deliverables,
|
|
406
403
|
"history": history or [],
|
|
407
404
|
}
|
|
408
405
|
|
|
@@ -456,6 +453,5 @@ class BaseClient(Generic[HttpClientT]):
|
|
|
456
453
|
|
|
457
454
|
logger.info(
|
|
458
455
|
f"Generate request successful - tokens: {result.prompt_tokens}+{result.completion_tokens}, "
|
|
459
|
-
f"cost: ${result.cost:.6f}"
|
|
460
456
|
)
|
|
461
457
|
return result
|
oagi/client/sync.py
CHANGED
|
@@ -226,7 +226,6 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
226
226
|
overall_todo: str,
|
|
227
227
|
task_description: str,
|
|
228
228
|
todos: list[dict],
|
|
229
|
-
deliverables: list[dict],
|
|
230
229
|
history: list[dict] | None = None,
|
|
231
230
|
current_todo_index: int | None = None,
|
|
232
231
|
task_execution_summary: str | None = None,
|
|
@@ -246,7 +245,6 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
246
245
|
overall_todo: Current todo description
|
|
247
246
|
task_description: Overall task description
|
|
248
247
|
todos: List of todo dicts with index, description, status, execution_summary
|
|
249
|
-
deliverables: List of deliverable dicts with description, achieved
|
|
250
248
|
history: List of history dicts with todo_index, todo_description, action_count, summary, completed
|
|
251
249
|
current_todo_index: Index of current todo being executed
|
|
252
250
|
task_execution_summary: Summary of overall task execution
|
|
@@ -272,7 +270,6 @@ class SyncClient(BaseClient[httpx.Client]):
|
|
|
272
270
|
overall_todo=overall_todo,
|
|
273
271
|
task_description=task_description,
|
|
274
272
|
todos=todos,
|
|
275
|
-
deliverables=deliverables,
|
|
276
273
|
history=history,
|
|
277
274
|
current_todo_index=current_todo_index,
|
|
278
275
|
task_execution_summary=task_execution_summary,
|
|
@@ -6,14 +6,13 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
-
import re
|
|
10
9
|
import sys
|
|
11
10
|
import time
|
|
12
11
|
|
|
13
12
|
from pydantic import BaseModel, Field
|
|
14
13
|
|
|
15
14
|
from ..exceptions import check_optional_dependency
|
|
16
|
-
from ..types import Action, ActionType
|
|
15
|
+
from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
|
|
17
16
|
|
|
18
17
|
check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
|
|
19
18
|
import pyautogui # noqa: E402
|
|
@@ -136,43 +135,40 @@ class PyautoguiActionHandler:
|
|
|
136
135
|
|
|
137
136
|
def _parse_coords(self, args_str: str) -> tuple[int, int]:
|
|
138
137
|
"""Extract x, y coordinates from argument string."""
|
|
139
|
-
|
|
140
|
-
if not
|
|
138
|
+
coords = parse_coords(args_str)
|
|
139
|
+
if not coords:
|
|
141
140
|
raise ValueError(f"Invalid coordinates format: {args_str}")
|
|
142
|
-
|
|
143
|
-
return self._denormalize_coords(x, y)
|
|
141
|
+
return self._denormalize_coords(coords[0], coords[1])
|
|
144
142
|
|
|
145
143
|
def _parse_drag_coords(self, args_str: str) -> tuple[int, int, int, int]:
|
|
146
144
|
"""Extract x1, y1, x2, y2 coordinates from drag argument string."""
|
|
147
|
-
|
|
148
|
-
if not
|
|
145
|
+
coords = parse_drag_coords(args_str)
|
|
146
|
+
if not coords:
|
|
149
147
|
raise ValueError(f"Invalid drag coordinates format: {args_str}")
|
|
150
|
-
x1, y1
|
|
151
|
-
|
|
152
|
-
int(match.group(2)),
|
|
153
|
-
int(match.group(3)),
|
|
154
|
-
int(match.group(4)),
|
|
155
|
-
)
|
|
156
|
-
x1, y1 = self._denormalize_coords(x1, y1)
|
|
157
|
-
x2, y2 = self._denormalize_coords(x2, y2)
|
|
148
|
+
x1, y1 = self._denormalize_coords(coords[0], coords[1])
|
|
149
|
+
x2, y2 = self._denormalize_coords(coords[2], coords[3])
|
|
158
150
|
return x1, y1, x2, y2
|
|
159
151
|
|
|
160
152
|
def _parse_scroll(self, args_str: str) -> tuple[int, int, str]:
|
|
161
153
|
"""Extract x, y, direction from scroll argument string."""
|
|
162
|
-
|
|
163
|
-
if not
|
|
154
|
+
result = parse_scroll(args_str)
|
|
155
|
+
if not result:
|
|
164
156
|
raise ValueError(f"Invalid scroll format: {args_str}")
|
|
165
|
-
x, y =
|
|
166
|
-
x, y
|
|
167
|
-
direction = match.group(3).lower()
|
|
168
|
-
return x, y, direction
|
|
157
|
+
x, y = self._denormalize_coords(result[0], result[1])
|
|
158
|
+
return x, y, result[2]
|
|
169
159
|
|
|
170
160
|
def _normalize_key(self, key: str) -> str:
|
|
171
161
|
"""Normalize key names for consistency."""
|
|
172
162
|
key = key.strip().lower()
|
|
173
163
|
# Normalize caps lock variations
|
|
174
|
-
|
|
175
|
-
|
|
164
|
+
hotkey_variations_mapping = {
|
|
165
|
+
"capslock": ["caps_lock", "caps", "capslock"],
|
|
166
|
+
"pgup": ["page_up", "pageup"],
|
|
167
|
+
"pgdn": ["page_down", "pagedown"],
|
|
168
|
+
}
|
|
169
|
+
for normalized, variations in hotkey_variations_mapping.items():
|
|
170
|
+
if key in variations:
|
|
171
|
+
return normalized
|
|
176
172
|
# Remap ctrl to command on macOS if enabled
|
|
177
173
|
if self.config.macos_ctrl_to_cmd and sys.platform == "darwin" and key == "ctrl":
|
|
178
174
|
return "command"
|
oagi/server/socketio_server.py
CHANGED
|
@@ -16,7 +16,13 @@ from pydantic import ValidationError
|
|
|
16
16
|
from ..agent import AsyncDefaultAgent, create_agent
|
|
17
17
|
from ..client import AsyncClient
|
|
18
18
|
from ..exceptions import check_optional_dependency
|
|
19
|
-
from ..types.models.action import
|
|
19
|
+
from ..types.models.action import (
|
|
20
|
+
Action,
|
|
21
|
+
ActionType,
|
|
22
|
+
parse_coords,
|
|
23
|
+
parse_drag_coords,
|
|
24
|
+
parse_scroll,
|
|
25
|
+
)
|
|
20
26
|
from .agent_wrappers import SocketIOActionHandler, SocketIOImageProvider
|
|
21
27
|
from .config import ServerConfig
|
|
22
28
|
from .models import (
|
|
@@ -275,31 +281,29 @@ class SessionNamespace(socketio.AsyncNamespace):
|
|
|
275
281
|
| ActionType.LEFT_TRIPLE
|
|
276
282
|
| ActionType.RIGHT_SINGLE
|
|
277
283
|
):
|
|
278
|
-
coords = arg
|
|
279
|
-
if
|
|
280
|
-
x, y = int(coords[0]), int(coords[1])
|
|
281
|
-
else:
|
|
284
|
+
coords = parse_coords(arg)
|
|
285
|
+
if not coords:
|
|
282
286
|
logger.warning(f"Invalid action coordinates: {arg}")
|
|
283
287
|
return None
|
|
284
288
|
|
|
285
289
|
return await self.call(
|
|
286
290
|
action.type.value,
|
|
287
|
-
ClickEventData(**common, x=
|
|
291
|
+
ClickEventData(**common, x=coords[0], y=coords[1]).model_dump(),
|
|
288
292
|
to=session.socket_id,
|
|
289
293
|
timeout=self.config.socketio_timeout,
|
|
290
294
|
)
|
|
291
295
|
|
|
292
296
|
case ActionType.DRAG:
|
|
293
|
-
coords = arg
|
|
294
|
-
if
|
|
295
|
-
x1, y1, x2, y2 = (int(coords[i]) for i in range(4))
|
|
296
|
-
else:
|
|
297
|
+
coords = parse_drag_coords(arg)
|
|
298
|
+
if not coords:
|
|
297
299
|
logger.warning(f"Invalid drag coordinates: {arg}")
|
|
298
300
|
return None
|
|
299
301
|
|
|
300
302
|
return await self.call(
|
|
301
303
|
"drag",
|
|
302
|
-
DragEventData(
|
|
304
|
+
DragEventData(
|
|
305
|
+
**common, x1=coords[0], y1=coords[1], x2=coords[2], y2=coords[3]
|
|
306
|
+
).model_dump(),
|
|
303
307
|
to=session.socket_id,
|
|
304
308
|
timeout=self.config.socketio_timeout,
|
|
305
309
|
)
|
|
@@ -326,11 +330,8 @@ class SessionNamespace(socketio.AsyncNamespace):
|
|
|
326
330
|
)
|
|
327
331
|
|
|
328
332
|
case ActionType.SCROLL:
|
|
329
|
-
|
|
330
|
-
if
|
|
331
|
-
x, y = int(parts[0]), int(parts[1])
|
|
332
|
-
direction = parts[2].strip().lower()
|
|
333
|
-
else:
|
|
333
|
+
result = parse_scroll(arg)
|
|
334
|
+
if not result:
|
|
334
335
|
logger.warning(f"Invalid scroll coordinates: {arg}")
|
|
335
336
|
return None
|
|
336
337
|
|
|
@@ -340,9 +341,9 @@ class SessionNamespace(socketio.AsyncNamespace):
|
|
|
340
341
|
"scroll",
|
|
341
342
|
ScrollEventData(
|
|
342
343
|
**common,
|
|
343
|
-
x=
|
|
344
|
-
y=
|
|
345
|
-
direction=
|
|
344
|
+
x=result[0],
|
|
345
|
+
y=result[1],
|
|
346
|
+
direction=result[2],
|
|
346
347
|
count=count, # type: ignore
|
|
347
348
|
).model_dump(),
|
|
348
349
|
to=session.socket_id,
|
oagi/types/__init__.py
CHANGED
|
@@ -11,7 +11,15 @@ from .async_action_handler import AsyncActionHandler
|
|
|
11
11
|
from .async_image_provider import AsyncImageProvider
|
|
12
12
|
from .image import Image
|
|
13
13
|
from .image_provider import ImageProvider
|
|
14
|
-
from .models import
|
|
14
|
+
from .models import (
|
|
15
|
+
Action,
|
|
16
|
+
ActionType,
|
|
17
|
+
ImageConfig,
|
|
18
|
+
Step,
|
|
19
|
+
parse_coords,
|
|
20
|
+
parse_drag_coords,
|
|
21
|
+
parse_scroll,
|
|
22
|
+
)
|
|
15
23
|
from .step_observer import (
|
|
16
24
|
ActionEvent,
|
|
17
25
|
AsyncObserver,
|
|
@@ -47,4 +55,7 @@ __all__ = [
|
|
|
47
55
|
"ImageProvider",
|
|
48
56
|
"AsyncImageProvider",
|
|
49
57
|
"URL",
|
|
58
|
+
"parse_coords",
|
|
59
|
+
"parse_drag_coords",
|
|
60
|
+
"parse_scroll",
|
|
50
61
|
]
|
oagi/types/models/__init__.py
CHANGED
|
@@ -6,7 +6,13 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
-
from .action import
|
|
9
|
+
from .action import (
|
|
10
|
+
Action,
|
|
11
|
+
ActionType,
|
|
12
|
+
parse_coords,
|
|
13
|
+
parse_drag_coords,
|
|
14
|
+
parse_scroll,
|
|
15
|
+
)
|
|
10
16
|
from .client import (
|
|
11
17
|
ErrorDetail,
|
|
12
18
|
ErrorResponse,
|
|
@@ -29,4 +35,7 @@ __all__ = [
|
|
|
29
35
|
"Step",
|
|
30
36
|
"UploadFileResponse",
|
|
31
37
|
"Usage",
|
|
38
|
+
"parse_coords",
|
|
39
|
+
"parse_drag_coords",
|
|
40
|
+
"parse_scroll",
|
|
32
41
|
]
|
oagi/types/models/action.py
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
+
import re
|
|
9
10
|
from enum import Enum
|
|
10
11
|
|
|
11
12
|
from pydantic import BaseModel, Field
|
|
@@ -31,3 +32,53 @@ class Action(BaseModel):
|
|
|
31
32
|
count: int | None = Field(
|
|
32
33
|
default=1, ge=1, description="Number of times to repeat the action"
|
|
33
34
|
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def parse_coords(args_str: str) -> tuple[int, int] | None:
|
|
38
|
+
"""Extract x, y coordinates from argument string.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
args_str: Argument string in format "x, y" (normalized 0-1000 range)
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Tuple of (x, y) coordinates, or None if parsing fails
|
|
45
|
+
"""
|
|
46
|
+
match = re.match(r"(\d+),\s*(\d+)", args_str)
|
|
47
|
+
if not match:
|
|
48
|
+
return None
|
|
49
|
+
return int(match.group(1)), int(match.group(2))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def parse_drag_coords(args_str: str) -> tuple[int, int, int, int] | None:
|
|
53
|
+
"""Extract x1, y1, x2, y2 coordinates from drag argument string.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
args_str: Argument string in format "x1, y1, x2, y2" (normalized 0-1000 range)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Tuple of (x1, y1, x2, y2) coordinates, or None if parsing fails
|
|
60
|
+
"""
|
|
61
|
+
match = re.match(r"(\d+),\s*(\d+),\s*(\d+),\s*(\d+)", args_str)
|
|
62
|
+
if not match:
|
|
63
|
+
return None
|
|
64
|
+
return (
|
|
65
|
+
int(match.group(1)),
|
|
66
|
+
int(match.group(2)),
|
|
67
|
+
int(match.group(3)),
|
|
68
|
+
int(match.group(4)),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def parse_scroll(args_str: str) -> tuple[int, int, str] | None:
|
|
73
|
+
"""Extract x, y, direction from scroll argument string.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
args_str: Argument string in format "x, y, direction" (normalized 0-1000 range)
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Tuple of (x, y, direction) where direction is "up" or "down", or None if parsing fails
|
|
80
|
+
"""
|
|
81
|
+
match = re.match(r"(\d+),\s*(\d+),\s*(\w+)", args_str)
|
|
82
|
+
if not match:
|
|
83
|
+
return None
|
|
84
|
+
return int(match.group(1)), int(match.group(2)), match.group(3).lower()
|
oagi/types/models/client.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from .action import Action
|
|
12
12
|
|
|
@@ -56,9 +56,13 @@ class UploadFileResponse(BaseModel):
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class GenerateResponse(BaseModel):
|
|
59
|
-
"""Response from /
|
|
59
|
+
"""Response from /v1/generate endpoint."""
|
|
60
60
|
|
|
61
61
|
response: str
|
|
62
62
|
prompt_tokens: int
|
|
63
63
|
completion_tokens: int
|
|
64
|
-
cost: float
|
|
64
|
+
cost: float | None = Field(
|
|
65
|
+
default=None,
|
|
66
|
+
deprecated=True,
|
|
67
|
+
description="This field is deprecated",
|
|
68
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: oagi-core
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.2
|
|
4
4
|
Summary: Official API of OpenAGI Foundation
|
|
5
5
|
Project-URL: Homepage, https://github.com/agiopen-org/oagi
|
|
6
6
|
Author-email: OpenAGI Foundation <contact@agiopen.org>
|
|
@@ -32,6 +32,7 @@ Requires-Dist: rich>=13.0.0
|
|
|
32
32
|
Provides-Extra: desktop
|
|
33
33
|
Requires-Dist: pillow>=11.3.0; extra == 'desktop'
|
|
34
34
|
Requires-Dist: pyautogui>=0.9.54; extra == 'desktop'
|
|
35
|
+
Requires-Dist: pyobjc-framework-applicationservices>=9.0; (sys_platform == 'darwin') and extra == 'desktop'
|
|
35
36
|
Requires-Dist: pyobjc-framework-quartz>=9.0; (sys_platform == 'darwin') and extra == 'desktop'
|
|
36
37
|
Provides-Extra: server
|
|
37
38
|
Requires-Dist: fastapi[standard]>=0.115.0; extra == 'server'
|