oagi-core 0.10.3__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +1 -3
- oagi/actor/__init__.py +21 -0
- oagi/{task → actor}/async_.py +23 -7
- oagi/{task → actor}/async_short.py +1 -1
- oagi/actor/base.py +222 -0
- oagi/{task → actor}/short.py +1 -1
- oagi/{task → actor}/sync.py +21 -5
- oagi/agent/default.py +5 -0
- oagi/agent/factories.py +75 -3
- oagi/agent/observer/exporters.py +6 -0
- oagi/agent/observer/report_template.html +19 -0
- oagi/agent/tasker/planner.py +31 -19
- oagi/agent/tasker/taskee_agent.py +26 -7
- oagi/agent/tasker/tasker_agent.py +4 -0
- oagi/cli/agent.py +54 -30
- oagi/client/async_.py +54 -96
- oagi/client/base.py +81 -133
- oagi/client/sync.py +52 -99
- oagi/constants.py +7 -2
- oagi/handler/__init__.py +16 -0
- oagi/handler/_macos.py +137 -0
- oagi/handler/_windows.py +101 -0
- oagi/handler/async_pyautogui_action_handler.py +8 -0
- oagi/handler/capslock_manager.py +55 -0
- oagi/handler/pyautogui_action_handler.py +21 -39
- oagi/server/session_store.py +3 -3
- oagi/server/socketio_server.py +4 -4
- oagi/task/__init__.py +22 -8
- oagi/types/__init__.py +2 -1
- oagi/types/models/__init__.py +0 -2
- oagi/types/models/action.py +4 -1
- oagi/types/models/client.py +1 -17
- oagi/types/step_observer.py +2 -0
- oagi/types/url.py +25 -0
- oagi/utils/__init__.py +12 -0
- oagi/utils/output_parser.py +166 -0
- oagi/utils/prompt_builder.py +44 -0
- {oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/METADATA +90 -10
- oagi_core-0.12.0.dist-info/RECORD +76 -0
- oagi/task/base.py +0 -158
- oagi_core-0.10.3.dist-info/RECORD +0 -70
- {oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/licenses/LICENSE +0 -0
oagi/agent/tasker/planner.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import Any
|
|
|
11
11
|
|
|
12
12
|
from ...client import AsyncClient
|
|
13
13
|
from ...constants import DEFAULT_REFLECTION_INTERVAL
|
|
14
|
-
from ...types import URL, Image
|
|
14
|
+
from ...types import URL, Image, extract_uuid_from_url
|
|
15
15
|
from .memory import PlannerMemory
|
|
16
16
|
from .models import Action, PlannerOutput, ReflectionOutput
|
|
17
17
|
|
|
@@ -122,7 +122,7 @@ class Planner:
|
|
|
122
122
|
screenshot: Image | URL | None = None,
|
|
123
123
|
memory: PlannerMemory | None = None,
|
|
124
124
|
todo_index: int | None = None,
|
|
125
|
-
) -> PlannerOutput:
|
|
125
|
+
) -> tuple[PlannerOutput, str | None]:
|
|
126
126
|
"""Generate initial plan for a todo.
|
|
127
127
|
|
|
128
128
|
Args:
|
|
@@ -133,16 +133,21 @@ class Planner:
|
|
|
133
133
|
todo_index: Optional todo index for formatting internal context
|
|
134
134
|
|
|
135
135
|
Returns:
|
|
136
|
-
|
|
136
|
+
Tuple of (PlannerOutput, request_id) where request_id is from API response
|
|
137
137
|
"""
|
|
138
138
|
# Ensure we have a client
|
|
139
139
|
client = self._ensure_client()
|
|
140
140
|
|
|
141
|
-
#
|
|
141
|
+
# Get screenshot UUID - either extract from URL or upload
|
|
142
142
|
screenshot_uuid = None
|
|
143
143
|
if screenshot:
|
|
144
|
-
|
|
145
|
-
|
|
144
|
+
# Check if screenshot is already a URL (already uploaded to S3)
|
|
145
|
+
if isinstance(screenshot, str):
|
|
146
|
+
screenshot_uuid = extract_uuid_from_url(screenshot)
|
|
147
|
+
# If not a URL or UUID extraction failed, upload the image
|
|
148
|
+
if not screenshot_uuid:
|
|
149
|
+
upload_response = await client.put_s3_presigned_url(screenshot)
|
|
150
|
+
screenshot_uuid = upload_response.uuid
|
|
146
151
|
|
|
147
152
|
# Extract memory data if provided
|
|
148
153
|
(
|
|
@@ -165,8 +170,8 @@ class Planner:
|
|
|
165
170
|
current_screenshot=screenshot_uuid,
|
|
166
171
|
)
|
|
167
172
|
|
|
168
|
-
# Parse response
|
|
169
|
-
return self._parse_planner_output(response.response)
|
|
173
|
+
# Parse response and return with request_id
|
|
174
|
+
return self._parse_planner_output(response.response), response.request_id
|
|
170
175
|
|
|
171
176
|
async def reflect(
|
|
172
177
|
self,
|
|
@@ -177,7 +182,7 @@ class Planner:
|
|
|
177
182
|
todo_index: int | None = None,
|
|
178
183
|
current_instruction: str | None = None,
|
|
179
184
|
reflection_interval: int = DEFAULT_REFLECTION_INTERVAL,
|
|
180
|
-
) -> ReflectionOutput:
|
|
185
|
+
) -> tuple[ReflectionOutput, str | None]:
|
|
181
186
|
"""Reflect on recent actions and progress.
|
|
182
187
|
|
|
183
188
|
Args:
|
|
@@ -190,16 +195,21 @@ class Planner:
|
|
|
190
195
|
reflection_interval: Window size for recent actions/screenshots
|
|
191
196
|
|
|
192
197
|
Returns:
|
|
193
|
-
ReflectionOutput
|
|
198
|
+
Tuple of (ReflectionOutput, request_id) where request_id is from API response
|
|
194
199
|
"""
|
|
195
200
|
# Ensure we have a client
|
|
196
201
|
client = self._ensure_client()
|
|
197
202
|
|
|
198
|
-
#
|
|
203
|
+
# Get screenshot UUID - either extract from URL or upload
|
|
199
204
|
result_screenshot_uuid = None
|
|
200
205
|
if screenshot:
|
|
201
|
-
|
|
202
|
-
|
|
206
|
+
# Check if screenshot is already a URL (already uploaded to S3)
|
|
207
|
+
if isinstance(screenshot, str):
|
|
208
|
+
result_screenshot_uuid = extract_uuid_from_url(screenshot)
|
|
209
|
+
# If not a URL or UUID extraction failed, upload the image
|
|
210
|
+
if not result_screenshot_uuid:
|
|
211
|
+
upload_response = await client.put_s3_presigned_url(screenshot)
|
|
212
|
+
result_screenshot_uuid = upload_response.uuid
|
|
203
213
|
|
|
204
214
|
# Extract memory data if provided
|
|
205
215
|
(
|
|
@@ -250,8 +260,8 @@ class Planner:
|
|
|
250
260
|
prior_notes=prior_notes,
|
|
251
261
|
)
|
|
252
262
|
|
|
253
|
-
# Parse response
|
|
254
|
-
return self._parse_reflection_output(response.response)
|
|
263
|
+
# Parse response and return with request_id
|
|
264
|
+
return self._parse_reflection_output(response.response), response.request_id
|
|
255
265
|
|
|
256
266
|
async def summarize(
|
|
257
267
|
self,
|
|
@@ -259,7 +269,7 @@ class Planner:
|
|
|
259
269
|
context: dict[str, Any],
|
|
260
270
|
memory: PlannerMemory | None = None,
|
|
261
271
|
todo_index: int | None = None,
|
|
262
|
-
) -> str:
|
|
272
|
+
) -> tuple[str, str | None]:
|
|
263
273
|
"""Generate execution summary.
|
|
264
274
|
|
|
265
275
|
Args:
|
|
@@ -269,7 +279,7 @@ class Planner:
|
|
|
269
279
|
todo_index: Optional todo index for formatting internal context
|
|
270
280
|
|
|
271
281
|
Returns:
|
|
272
|
-
|
|
282
|
+
Tuple of (summary string, request_id) where request_id is from API response
|
|
273
283
|
"""
|
|
274
284
|
# Ensure we have a client
|
|
275
285
|
client = self._ensure_client()
|
|
@@ -304,9 +314,11 @@ class Planner:
|
|
|
304
314
|
# Parse response and extract summary
|
|
305
315
|
try:
|
|
306
316
|
result = json.loads(response.response)
|
|
307
|
-
|
|
317
|
+
summary = result.get("task_summary", response.response)
|
|
308
318
|
except json.JSONDecodeError:
|
|
309
|
-
|
|
319
|
+
summary = response.response
|
|
320
|
+
|
|
321
|
+
return summary, response.request_id
|
|
310
322
|
|
|
311
323
|
def _format_execution_notes(self, context: dict[str, Any]) -> str:
|
|
312
324
|
"""Format execution history notes.
|
|
@@ -19,6 +19,7 @@ from oagi.constants import (
|
|
|
19
19
|
DEFAULT_TEMPERATURE,
|
|
20
20
|
MODEL_ACTOR,
|
|
21
21
|
)
|
|
22
|
+
from oagi.handler import reset_handler
|
|
22
23
|
from oagi.types import (
|
|
23
24
|
URL,
|
|
24
25
|
ActionEvent,
|
|
@@ -28,6 +29,7 @@ from oagi.types import (
|
|
|
28
29
|
Image,
|
|
29
30
|
PlanEvent,
|
|
30
31
|
StepEvent,
|
|
32
|
+
extract_uuid_from_url,
|
|
31
33
|
)
|
|
32
34
|
|
|
33
35
|
from ..protocol import AsyncAgent
|
|
@@ -121,6 +123,9 @@ class TaskeeAgent(AsyncAgent):
|
|
|
121
123
|
Returns:
|
|
122
124
|
True if successful, False otherwise
|
|
123
125
|
"""
|
|
126
|
+
# Reset handler state at todo execution start
|
|
127
|
+
reset_handler(action_handler)
|
|
128
|
+
|
|
124
129
|
self.current_todo = instruction
|
|
125
130
|
self.actions = []
|
|
126
131
|
self.total_actions = 0
|
|
@@ -195,7 +200,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
195
200
|
context = self._get_context()
|
|
196
201
|
|
|
197
202
|
# Generate plan using LLM planner
|
|
198
|
-
plan_output = await self.planner.initial_plan(
|
|
203
|
+
plan_output, request_id = await self.planner.initial_plan(
|
|
199
204
|
self.current_todo,
|
|
200
205
|
context,
|
|
201
206
|
screenshot,
|
|
@@ -219,6 +224,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
219
224
|
image=_serialize_image(screenshot),
|
|
220
225
|
reasoning=plan_output.reasoning,
|
|
221
226
|
result=plan_output.instruction,
|
|
227
|
+
request_id=request_id,
|
|
222
228
|
)
|
|
223
229
|
)
|
|
224
230
|
|
|
@@ -256,11 +262,21 @@ class TaskeeAgent(AsyncAgent):
|
|
|
256
262
|
# Capture screenshot
|
|
257
263
|
screenshot = await image_provider()
|
|
258
264
|
|
|
259
|
-
#
|
|
265
|
+
# Get screenshot UUID - either extract from URL or upload
|
|
260
266
|
try:
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
267
|
+
screenshot_uuid = None
|
|
268
|
+
screenshot_url = None
|
|
269
|
+
|
|
270
|
+
# Check if screenshot is already a URL (from SocketIOImageProvider)
|
|
271
|
+
if isinstance(screenshot, str):
|
|
272
|
+
screenshot_uuid = extract_uuid_from_url(screenshot)
|
|
273
|
+
screenshot_url = screenshot
|
|
274
|
+
|
|
275
|
+
# If not a URL or UUID extraction failed, upload the image
|
|
276
|
+
if not screenshot_uuid:
|
|
277
|
+
upload_response = await client.put_s3_presigned_url(screenshot)
|
|
278
|
+
screenshot_uuid = upload_response.uuid
|
|
279
|
+
screenshot_url = upload_response.download_url
|
|
264
280
|
except Exception as e:
|
|
265
281
|
logger.error(f"Error uploading screenshot: {e}")
|
|
266
282
|
self._record_action(
|
|
@@ -294,6 +310,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
294
310
|
step_num=self.total_actions + 1,
|
|
295
311
|
image=_serialize_image(screenshot),
|
|
296
312
|
step=step,
|
|
313
|
+
task_id=self.actor.task_id,
|
|
297
314
|
)
|
|
298
315
|
)
|
|
299
316
|
|
|
@@ -378,7 +395,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
378
395
|
recent_actions = self.actions[-self.since_reflection :]
|
|
379
396
|
|
|
380
397
|
# Reflect using planner
|
|
381
|
-
reflection = await self.planner.reflect(
|
|
398
|
+
reflection, request_id = await self.planner.reflect(
|
|
382
399
|
recent_actions,
|
|
383
400
|
context,
|
|
384
401
|
screenshot,
|
|
@@ -409,6 +426,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
409
426
|
image=_serialize_image(screenshot),
|
|
410
427
|
reasoning=reflection.reasoning,
|
|
411
428
|
result=decision,
|
|
429
|
+
request_id=request_id,
|
|
412
430
|
)
|
|
413
431
|
)
|
|
414
432
|
|
|
@@ -441,7 +459,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
441
459
|
context = self._get_context()
|
|
442
460
|
context["current_todo"] = self.current_todo
|
|
443
461
|
|
|
444
|
-
summary = await self.planner.summarize(
|
|
462
|
+
summary, request_id = await self.planner.summarize(
|
|
445
463
|
self.actions,
|
|
446
464
|
context,
|
|
447
465
|
memory=self.external_memory,
|
|
@@ -463,6 +481,7 @@ class TaskeeAgent(AsyncAgent):
|
|
|
463
481
|
image=None,
|
|
464
482
|
reasoning=summary,
|
|
465
483
|
result=None,
|
|
484
|
+
request_id=request_id,
|
|
466
485
|
)
|
|
467
486
|
)
|
|
468
487
|
|
|
@@ -16,6 +16,7 @@ from oagi.constants import (
|
|
|
16
16
|
DEFAULT_TEMPERATURE,
|
|
17
17
|
MODEL_ACTOR,
|
|
18
18
|
)
|
|
19
|
+
from oagi.handler import reset_handler
|
|
19
20
|
from oagi.types import AsyncActionHandler, AsyncImageProvider, AsyncObserver, SplitEvent
|
|
20
21
|
|
|
21
22
|
from ..protocol import AsyncAgent
|
|
@@ -112,6 +113,9 @@ class TaskerAgent(AsyncAgent):
|
|
|
112
113
|
Returns:
|
|
113
114
|
True if all todos completed successfully, False otherwise
|
|
114
115
|
"""
|
|
116
|
+
# Reset handler state at automation start
|
|
117
|
+
reset_handler(action_handler)
|
|
118
|
+
|
|
115
119
|
overall_success = True
|
|
116
120
|
|
|
117
121
|
# Execute todos until none remain
|
oagi/cli/agent.py
CHANGED
|
@@ -17,12 +17,9 @@ from oagi.agent.observer import AsyncAgentObserver
|
|
|
17
17
|
from oagi.constants import (
|
|
18
18
|
API_KEY_HELP_URL,
|
|
19
19
|
DEFAULT_BASE_URL,
|
|
20
|
-
DEFAULT_MAX_STEPS,
|
|
21
20
|
DEFAULT_MAX_STEPS_THINKER,
|
|
22
21
|
DEFAULT_STEP_DELAY,
|
|
23
|
-
DEFAULT_TEMPERATURE,
|
|
24
22
|
MODE_ACTOR,
|
|
25
|
-
MODEL_ACTOR,
|
|
26
23
|
MODEL_THINKER,
|
|
27
24
|
)
|
|
28
25
|
from oagi.exceptions import check_optional_dependency
|
|
@@ -40,22 +37,30 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
40
37
|
"run", help="Run an agent with the given instruction"
|
|
41
38
|
)
|
|
42
39
|
run_parser.add_argument(
|
|
43
|
-
"instruction",
|
|
40
|
+
"instruction",
|
|
41
|
+
type=str,
|
|
42
|
+
nargs="?",
|
|
43
|
+
default="",
|
|
44
|
+
help="Task instruction for the agent to execute (optional for pre-configured modes)",
|
|
44
45
|
)
|
|
45
46
|
run_parser.add_argument(
|
|
46
|
-
"--model", type=str, help=
|
|
47
|
+
"--model", type=str, help="Model to use (default: determined by mode)"
|
|
47
48
|
)
|
|
48
49
|
run_parser.add_argument(
|
|
49
|
-
"--max-steps",
|
|
50
|
+
"--max-steps",
|
|
51
|
+
type=int,
|
|
52
|
+
help="Maximum number of steps (default: determined by mode)",
|
|
50
53
|
)
|
|
51
54
|
run_parser.add_argument(
|
|
52
|
-
"--temperature",
|
|
55
|
+
"--temperature",
|
|
56
|
+
type=float,
|
|
57
|
+
help="Sampling temperature (default: determined by mode)",
|
|
53
58
|
)
|
|
54
59
|
run_parser.add_argument(
|
|
55
60
|
"--mode",
|
|
56
61
|
type=str,
|
|
57
62
|
default=MODE_ACTOR,
|
|
58
|
-
help=f"Agent mode to use (default: {MODE_ACTOR}).
|
|
63
|
+
help=f"Agent mode to use (default: {MODE_ACTOR}). Use 'oagi agent modes' to list available modes",
|
|
59
64
|
)
|
|
60
65
|
run_parser.add_argument(
|
|
61
66
|
"--oagi-api-key", type=str, help="OAGI API key (default: OAGI_API_KEY env var)"
|
|
@@ -82,6 +87,9 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
82
87
|
help=f"Delay in seconds after each step before next screenshot (default: {DEFAULT_STEP_DELAY})",
|
|
83
88
|
)
|
|
84
89
|
|
|
90
|
+
# agent modes command
|
|
91
|
+
agent_subparsers.add_parser("modes", help="List available agent modes")
|
|
92
|
+
|
|
85
93
|
# agent permission command
|
|
86
94
|
agent_subparsers.add_parser(
|
|
87
95
|
"permission",
|
|
@@ -92,10 +100,22 @@ def add_agent_parser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
92
100
|
def handle_agent_command(args: argparse.Namespace) -> None:
|
|
93
101
|
if args.agent_command == "run":
|
|
94
102
|
run_agent(args)
|
|
103
|
+
elif args.agent_command == "modes":
|
|
104
|
+
list_modes()
|
|
95
105
|
elif args.agent_command == "permission":
|
|
96
106
|
check_permissions()
|
|
97
107
|
|
|
98
108
|
|
|
109
|
+
def list_modes() -> None:
|
|
110
|
+
"""List all available agent modes."""
|
|
111
|
+
from oagi.agent import list_agent_modes # noqa: PLC0415
|
|
112
|
+
|
|
113
|
+
modes = list_agent_modes()
|
|
114
|
+
print("Available agent modes:")
|
|
115
|
+
for mode in modes:
|
|
116
|
+
print(f" - {mode}")
|
|
117
|
+
|
|
118
|
+
|
|
99
119
|
def check_permissions() -> None:
|
|
100
120
|
"""Check and request macOS permissions for screen recording and accessibility.
|
|
101
121
|
|
|
@@ -207,14 +227,6 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
207
227
|
sys.exit(1)
|
|
208
228
|
|
|
209
229
|
base_url = args.oagi_base_url or os.getenv("OAGI_BASE_URL", DEFAULT_BASE_URL)
|
|
210
|
-
model = args.model or MODEL_ACTOR
|
|
211
|
-
default_max_steps = (
|
|
212
|
-
DEFAULT_MAX_STEPS_THINKER if model == MODEL_THINKER else DEFAULT_MAX_STEPS
|
|
213
|
-
)
|
|
214
|
-
max_steps = args.max_steps or default_max_steps
|
|
215
|
-
temperature = (
|
|
216
|
-
args.temperature if args.temperature is not None else DEFAULT_TEMPERATURE
|
|
217
|
-
)
|
|
218
230
|
mode = args.mode or MODE_ACTOR
|
|
219
231
|
step_delay = args.step_delay if args.step_delay is not None else DEFAULT_STEP_DELAY
|
|
220
232
|
export_format = args.export
|
|
@@ -233,26 +245,38 @@ def run_agent(args: argparse.Namespace) -> None:
|
|
|
233
245
|
|
|
234
246
|
observer = CombinedObserver()
|
|
235
247
|
|
|
236
|
-
#
|
|
237
|
-
|
|
238
|
-
mode
|
|
239
|
-
api_key
|
|
240
|
-
base_url
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
248
|
+
# Build agent kwargs - only pass explicitly provided values, let factory use defaults
|
|
249
|
+
agent_kwargs = {
|
|
250
|
+
"mode": mode,
|
|
251
|
+
"api_key": api_key,
|
|
252
|
+
"base_url": base_url,
|
|
253
|
+
"step_observer": observer,
|
|
254
|
+
"step_delay": step_delay,
|
|
255
|
+
}
|
|
256
|
+
if args.model:
|
|
257
|
+
agent_kwargs["model"] = args.model
|
|
258
|
+
# If thinker model specified without max_steps, use thinker's default
|
|
259
|
+
if args.model == MODEL_THINKER and not args.max_steps:
|
|
260
|
+
agent_kwargs["max_steps"] = DEFAULT_MAX_STEPS_THINKER
|
|
261
|
+
if args.max_steps:
|
|
262
|
+
agent_kwargs["max_steps"] = args.max_steps
|
|
263
|
+
if args.temperature is not None:
|
|
264
|
+
agent_kwargs["temperature"] = args.temperature
|
|
265
|
+
|
|
266
|
+
# Create agent
|
|
267
|
+
agent = create_agent(**agent_kwargs)
|
|
247
268
|
|
|
248
269
|
# Create handlers
|
|
249
270
|
action_handler = AsyncPyautoguiActionHandler()
|
|
250
271
|
image_provider = AsyncScreenshotMaker()
|
|
251
272
|
|
|
252
|
-
|
|
273
|
+
if args.instruction:
|
|
274
|
+
print(f"Starting agent with instruction: {args.instruction}")
|
|
275
|
+
else:
|
|
276
|
+
print(f"Starting agent with mode: {mode} (using pre-configured instruction)")
|
|
253
277
|
print(
|
|
254
|
-
f"Mode: {mode}, Model: {model}, Max steps: {max_steps}, "
|
|
255
|
-
f"Temperature: {temperature}, Step delay: {step_delay}s"
|
|
278
|
+
f"Mode: {mode}, Model: {agent.model}, Max steps: {agent.max_steps}, "
|
|
279
|
+
f"Temperature: {agent.temperature}, Step delay: {step_delay}s"
|
|
256
280
|
)
|
|
257
281
|
print("-" * 60)
|
|
258
282
|
|
oagi/client/async_.py
CHANGED
|
@@ -9,17 +9,19 @@
|
|
|
9
9
|
from functools import wraps
|
|
10
10
|
|
|
11
11
|
import httpx
|
|
12
|
+
from httpx import AsyncHTTPTransport
|
|
13
|
+
from openai import AsyncOpenAI
|
|
12
14
|
|
|
13
15
|
from ..constants import (
|
|
14
|
-
API_HEALTH_ENDPOINT,
|
|
15
16
|
API_V1_FILE_UPLOAD_ENDPOINT,
|
|
16
17
|
API_V1_GENERATE_ENDPOINT,
|
|
17
|
-
|
|
18
|
+
DEFAULT_MAX_RETRIES,
|
|
18
19
|
HTTP_CLIENT_TIMEOUT,
|
|
19
20
|
)
|
|
20
21
|
from ..logging import get_logger
|
|
21
22
|
from ..types import Image
|
|
22
|
-
from ..types.models import GenerateResponse,
|
|
23
|
+
from ..types.models import GenerateResponse, UploadFileResponse, Usage
|
|
24
|
+
from ..types.models.step import Step
|
|
23
25
|
from .base import BaseClient
|
|
24
26
|
|
|
25
27
|
logger = get_logger("async_client")
|
|
@@ -35,8 +37,7 @@ def async_log_trace_on_failure(func):
|
|
|
35
37
|
except Exception as e:
|
|
36
38
|
# Try to get response from the exception if it has one
|
|
37
39
|
if (response := getattr(e, "response", None)) is not None:
|
|
38
|
-
|
|
39
|
-
logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
|
|
40
|
+
BaseClient._log_trace_id(response)
|
|
40
41
|
raise
|
|
41
42
|
|
|
42
43
|
return wrapper
|
|
@@ -45,115 +46,72 @@ def async_log_trace_on_failure(func):
|
|
|
45
46
|
class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
46
47
|
"""Asynchronous HTTP client for the OAGI API."""
|
|
47
48
|
|
|
48
|
-
def __init__(
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
base_url: str | None = None,
|
|
52
|
+
api_key: str | None = None,
|
|
53
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
54
|
+
):
|
|
55
|
+
super().__init__(base_url, api_key, max_retries)
|
|
56
|
+
|
|
57
|
+
# OpenAI client for chat completions (with retries)
|
|
58
|
+
self.openai_client = AsyncOpenAI(
|
|
59
|
+
api_key=self.api_key,
|
|
60
|
+
base_url=f"{self.base_url}/v1",
|
|
61
|
+
max_retries=self.max_retries,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# httpx clients for S3 uploads and other endpoints (with retries)
|
|
65
|
+
transport = AsyncHTTPTransport(retries=self.max_retries)
|
|
66
|
+
self.http_client = httpx.AsyncClient(
|
|
67
|
+
transport=transport, base_url=self.base_url
|
|
68
|
+
)
|
|
69
|
+
self.upload_client = httpx.AsyncClient(
|
|
70
|
+
transport=transport, timeout=HTTP_CLIENT_TIMEOUT
|
|
71
|
+
)
|
|
72
|
+
|
|
52
73
|
logger.info(f"AsyncClient initialized with base_url: {self.base_url}")
|
|
53
74
|
|
|
54
75
|
async def __aenter__(self):
|
|
55
76
|
return self
|
|
56
77
|
|
|
57
78
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
58
|
-
await self.
|
|
59
|
-
await self.upload_client.aclose()
|
|
79
|
+
await self.close()
|
|
60
80
|
|
|
61
81
|
async def close(self):
|
|
62
|
-
"""Close the underlying
|
|
63
|
-
await self.
|
|
82
|
+
"""Close the underlying async clients."""
|
|
83
|
+
await self.openai_client.close()
|
|
84
|
+
await self.http_client.aclose()
|
|
64
85
|
await self.upload_client.aclose()
|
|
65
86
|
|
|
66
|
-
|
|
67
|
-
async def create_message(
|
|
87
|
+
async def chat_completion(
|
|
68
88
|
self,
|
|
69
89
|
model: str,
|
|
70
|
-
|
|
71
|
-
screenshot_url: str | None = None,
|
|
72
|
-
task_description: str | None = None,
|
|
73
|
-
task_id: str | None = None,
|
|
74
|
-
instruction: str | None = None,
|
|
75
|
-
messages_history: list | None = None,
|
|
90
|
+
messages: list,
|
|
76
91
|
temperature: float | None = None,
|
|
77
|
-
|
|
78
|
-
) ->
|
|
92
|
+
task_id: str | None = None,
|
|
93
|
+
) -> tuple[Step, str, Usage | None]:
|
|
79
94
|
"""
|
|
80
|
-
Call
|
|
95
|
+
Call OpenAI-compatible /v1/chat/completions endpoint.
|
|
81
96
|
|
|
82
97
|
Args:
|
|
83
|
-
model:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
task_id: Task ID for continuing existing task
|
|
88
|
-
instruction: Additional instruction when continuing a session
|
|
89
|
-
messages_history: OpenAI-compatible chat message history
|
|
90
|
-
temperature: Sampling temperature (0.0-2.0) for LLM inference
|
|
91
|
-
api_version: API version header
|
|
98
|
+
model: Model to use for inference
|
|
99
|
+
messages: Full message history (OpenAI-compatible format)
|
|
100
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
101
|
+
task_id: Optional task ID for multi-turn conversations
|
|
92
102
|
|
|
93
103
|
Returns:
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
httpx.HTTPStatusError: For HTTP error responses
|
|
104
|
+
Tuple of (Step, raw_output, Usage)
|
|
105
|
+
- Step: Parsed actions and reasoning
|
|
106
|
+
- raw_output: Raw model output string (for message history)
|
|
107
|
+
- Usage: Token usage statistics (or None if not available)
|
|
99
108
|
"""
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
"Exactly one of 'screenshot' or 'screenshot_url' must be provided"
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
self._log_request_info(model, task_description, task_id)
|
|
107
|
-
|
|
108
|
-
# Upload screenshot to S3 if bytes provided, otherwise use URL directly
|
|
109
|
-
upload_file_response = None
|
|
110
|
-
if screenshot is not None:
|
|
111
|
-
upload_file_response = await self.put_s3_presigned_url(
|
|
112
|
-
screenshot, api_version
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
# Prepare message payload
|
|
116
|
-
headers, payload = self._prepare_message_payload(
|
|
117
|
-
model=model,
|
|
118
|
-
upload_file_response=upload_file_response,
|
|
119
|
-
task_description=task_description,
|
|
120
|
-
task_id=task_id,
|
|
121
|
-
instruction=instruction,
|
|
122
|
-
messages_history=messages_history,
|
|
123
|
-
temperature=temperature,
|
|
124
|
-
api_version=api_version,
|
|
125
|
-
screenshot_url=screenshot_url,
|
|
109
|
+
logger.info(f"Making async chat completion request with model: {model}")
|
|
110
|
+
kwargs = self._build_chat_completion_kwargs(
|
|
111
|
+
model, messages, temperature, task_id
|
|
126
112
|
)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
try:
|
|
130
|
-
response = await self.client.post(
|
|
131
|
-
API_V2_MESSAGE_ENDPOINT,
|
|
132
|
-
json=payload,
|
|
133
|
-
headers=headers,
|
|
134
|
-
timeout=self.timeout,
|
|
135
|
-
)
|
|
136
|
-
return self._process_response(response)
|
|
137
|
-
except (httpx.TimeoutException, httpx.NetworkError) as e:
|
|
138
|
-
self._handle_upload_http_errors(e)
|
|
139
|
-
|
|
140
|
-
async def health_check(self) -> dict:
|
|
141
|
-
"""
|
|
142
|
-
Call the /health endpoint for health check
|
|
143
|
-
|
|
144
|
-
Returns:
|
|
145
|
-
dict: Health check response
|
|
146
|
-
"""
|
|
147
|
-
logger.debug("Making async health check request")
|
|
148
|
-
try:
|
|
149
|
-
response = await self.client.get(API_HEALTH_ENDPOINT)
|
|
150
|
-
response.raise_for_status()
|
|
151
|
-
result = response.json()
|
|
152
|
-
logger.debug("Async health check successful")
|
|
153
|
-
return result
|
|
154
|
-
except httpx.HTTPStatusError as e:
|
|
155
|
-
logger.warning(f"Async health check failed: {e}")
|
|
156
|
-
raise
|
|
113
|
+
response = await self.openai_client.chat.completions.create(**kwargs)
|
|
114
|
+
return self._parse_chat_completion_response(response)
|
|
157
115
|
|
|
158
116
|
async def get_s3_presigned_url(
|
|
159
117
|
self,
|
|
@@ -172,7 +130,7 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
172
130
|
|
|
173
131
|
try:
|
|
174
132
|
headers = self._build_headers(api_version)
|
|
175
|
-
response = await self.
|
|
133
|
+
response = await self.http_client.get(
|
|
176
134
|
API_V1_FILE_UPLOAD_ENDPOINT, headers=headers, timeout=self.timeout
|
|
177
135
|
)
|
|
178
136
|
return self._process_upload_response(response)
|
|
@@ -292,7 +250,7 @@ class AsyncClient(BaseClient[httpx.AsyncClient]):
|
|
|
292
250
|
|
|
293
251
|
# Make request
|
|
294
252
|
try:
|
|
295
|
-
response = await self.
|
|
253
|
+
response = await self.http_client.post(
|
|
296
254
|
API_V1_GENERATE_ENDPOINT,
|
|
297
255
|
json=payload,
|
|
298
256
|
headers=headers,
|