@oagi/oagi 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,567 @@
1
+ import * as z from 'zod';
2
+ import { ChatCompletionMessageParam, CompletionUsage } from 'openai/resources.js';
3
+
4
+ /**
5
+ * -----------------------------------------------------------------------------
6
+ * Copyright (c) OpenAGI Foundation
7
+ * All rights reserved.
8
+ *
9
+ * This file is part of the official API project.
10
+ * Licensed under the MIT License.
11
+ * -----------------------------------------------------------------------------
12
+ */
13
+
14
+ declare const ActionSchema: z.ZodObject<{
15
+ type: z.ZodEnum<{
16
+ click: "click";
17
+ left_double: "left_double";
18
+ left_triple: "left_triple";
19
+ right_single: "right_single";
20
+ drag: "drag";
21
+ hotkey: "hotkey";
22
+ type: "type";
23
+ scroll: "scroll";
24
+ finish: "finish";
25
+ wait: "wait";
26
+ call_user: "call_user";
27
+ }>;
28
+ argument: z.ZodString;
29
+ count: z.ZodDefault<z.ZodInt>;
30
+ }, z.core.$strip>;
31
+ type Action = z.infer<typeof ActionSchema>;
32
+
33
+ /**
34
+ * -----------------------------------------------------------------------------
35
+ * Copyright (c) OpenAGI Foundation
36
+ * All rights reserved.
37
+ *
38
+ * This file is part of the official API project.
39
+ * Licensed under the MIT License.
40
+ * -----------------------------------------------------------------------------
41
+ */
42
+
43
+ interface Step {
44
+ reason?: string;
45
+ actions: Action[];
46
+ stop: boolean;
47
+ }
48
+
49
+ /**
50
+ * -----------------------------------------------------------------------------
51
+ * Copyright (c) OpenAGI Foundation
52
+ * All rights reserved.
53
+ *
54
+ * This file is part of the official API project.
55
+ * Licensed under the MIT License.
56
+ * -----------------------------------------------------------------------------
57
+ */
58
+
59
+ /**
60
+ * A single todo item in the task workflow.
61
+ */
62
+ interface Todo {
63
+ /**
64
+ * Todo index in the list
65
+ */
66
+ index: number;
67
+ /**
68
+ * Todo description
69
+ */
70
+ description: string;
71
+ /**
72
+ * Current status of the todo
73
+ */
74
+ status: 'pending' | 'in_progress' | 'completed' | 'blocked';
75
+ /**
76
+ * Summary of execution for this todo
77
+ */
78
+ execution_summary?: string;
79
+ }
80
+ interface HistoryItem {
81
+ /**
82
+ * Index of the todo that was executed
83
+ */
84
+ todo_index: number;
85
+ /**
86
+ * Description of the todo
87
+ */
88
+ todo_description: string;
89
+ /**
90
+ * Number of actions taken
91
+ */
92
+ action_count: number;
93
+ /**
94
+ * Execution summary
95
+ */
96
+ summary?: string;
97
+ /**
98
+ * Whether the todo was completed
99
+ */
100
+ completed: boolean;
101
+ }
102
+ interface GenerateOption {
103
+ /**
104
+ * One of "oagi_first", "oagi_follow", "oagi_task_summary"
105
+ */
106
+ workerId: string;
107
+ /**
108
+ * Current todo description
109
+ */
110
+ overallTodo: string;
111
+ /**
112
+ * Overall task description
113
+ */
114
+ taskDescription?: string;
115
+ /**
116
+ * List of todo dicts with index, description, status, execution_summary
117
+ */
118
+ todos: Todo[];
119
+ /**
120
+ * List of history dicts with todo_index, todo_description, action_count, summary, completed
121
+ */
122
+ history?: HistoryItem[];
123
+ /**
124
+ * Index of current todo being executed
125
+ */
126
+ currentTodoIndex?: number;
127
+ /**
128
+ * Summary of overall task execution
129
+ */
130
+ taskExecutionSummary?: string;
131
+ /**
132
+ * Uploaded file UUID for screenshot (oagi_first)
133
+ */
134
+ currentScreenshot?: string;
135
+ /**
136
+ * Subtask instruction (oagi_follow)
137
+ */
138
+ currentSubtaskInstruction?: string;
139
+ /**
140
+ * Action steps list (oagi_follow)
141
+ */
142
+ windowSteps?: Step[];
143
+ /**
144
+ * Uploaded file UUIDs list (oagi_follow)
145
+ */
146
+ windowScreenshots?: string[];
147
+ /**
148
+ * Uploaded file UUID for result screenshot (oagi_follow)
149
+ */
150
+ resultScreenshot?: string;
151
+ /**
152
+ * Execution notes (oagi_follow)
153
+ */
154
+ priorNotes?: string;
155
+ /**
156
+ * Latest summary (oagi_task_summary)
157
+ */
158
+ latestTodoSummary?: string;
159
+ /**
160
+ * API version header
161
+ */
162
+ apiVersion?: string;
163
+ }
164
+ declare const ErrorDetailSchema: z.ZodObject<{
165
+ code: z.ZodString;
166
+ message: z.ZodString;
167
+ }, z.core.$strip>;
168
+ /**
169
+ * Detailed error information.
170
+ */
171
+ type ErrorDetail = z.infer<typeof ErrorDetailSchema>;
172
+ declare const ErrorResponseSchema: z.ZodObject<{
173
+ error: z.ZodOptional<z.ZodNullable<z.ZodObject<{
174
+ code: z.ZodString;
175
+ message: z.ZodString;
176
+ }, z.core.$strip>>>;
177
+ }, z.core.$strip>;
178
+ /**
179
+ * Standard error response format.
180
+ */
181
+ type ErrorResponse = z.infer<typeof ErrorResponseSchema>;
182
+ declare const UploadFileResponseSchema: z.ZodObject<{
183
+ url: z.ZodString;
184
+ uuid: z.ZodString;
185
+ expires_at: z.ZodInt;
186
+ file_expires_at: z.ZodInt;
187
+ download_url: z.ZodString;
188
+ }, z.core.$strip>;
189
+ /**
190
+ * Response from S3 presigned URL upload.
191
+ */
192
+ type UploadFileResponse = z.infer<typeof UploadFileResponseSchema>;
193
+ declare const GenerateResponseSchema: z.ZodObject<{
194
+ response: z.ZodString;
195
+ prompt_tokens: z.ZodInt;
196
+ completion_tokens: z.ZodInt;
197
+ cost: z.ZodOptional<z.ZodNullable<z.ZodFloat64>>;
198
+ request_id: z.ZodOptional<z.ZodNullable<z.ZodString>>;
199
+ }, z.core.$strip>;
200
+ /**
201
+ * Response from /v1/generate endpoint.
202
+ */
203
+ type GenerateResponse = z.infer<typeof GenerateResponseSchema>;
204
+
205
+ declare const ImageConfigSchema: z.ZodPipe<z.ZodObject<{
206
+ format: z.ZodDefault<z.ZodEnum<{
207
+ PNG: "PNG";
208
+ JPEG: "JPEG";
209
+ }>>;
210
+ quality: z.ZodDefault<z.ZodInt>;
211
+ width: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodInt>>>;
212
+ height: z.ZodDefault<z.ZodOptional<z.ZodNullable<z.ZodInt>>>;
213
+ optimize: z.ZodDefault<z.ZodBoolean>;
214
+ resample: z.ZodDefault<z.ZodEnum<{
215
+ NEAREST: "NEAREST";
216
+ BILINEAR: "BILINEAR";
217
+ BICUBIC: "BICUBIC";
218
+ LANCZOS: "LANCZOS";
219
+ }>>;
220
+ }, z.core.$strip>, z.ZodTransform<{
221
+ format: "PNG" | "JPEG";
222
+ quality: number;
223
+ width: number | null;
224
+ height: number | null;
225
+ optimize: boolean;
226
+ resample: "NEAREST" | "BILINEAR" | "BICUBIC" | "LANCZOS";
227
+ }, {
228
+ format: "PNG" | "JPEG";
229
+ quality: number;
230
+ width: number | null;
231
+ height: number | null;
232
+ optimize: boolean;
233
+ resample: "NEAREST" | "BILINEAR" | "BICUBIC" | "LANCZOS";
234
+ }>>;
235
+ type ImageConfig = z.infer<typeof ImageConfigSchema>;
236
+
237
+ /**
238
+ * -----------------------------------------------------------------------------
239
+ * Copyright (c) OpenAGI Foundation
240
+ * All rights reserved.
241
+ *
242
+ * This file is part of the official API project.
243
+ * Licensed under the MIT License.
244
+ * -----------------------------------------------------------------------------
245
+ */
246
+
247
+ declare const ImageEventSchema: z.ZodObject<{
248
+ timestamp: z.ZodDefault<z.ZodDate>;
249
+ type: z.ZodLiteral<"image">;
250
+ step_num: z.ZodNumber;
251
+ image: z.ZodString;
252
+ }, z.core.$strip>;
253
+ type ImageEvent = z.infer<typeof ImageEventSchema>;
254
+ declare const StepEventSchema: z.ZodObject<{
255
+ timestamp: z.ZodDefault<z.ZodDate>;
256
+ type: z.ZodLiteral<"step">;
257
+ step_num: z.ZodNumber;
258
+ image: z.ZodCustom<ArrayBuffer, ArrayBuffer>;
259
+ step: z.ZodCustom<Step, Step>;
260
+ task_id: z.ZodOptional<z.ZodString>;
261
+ }, z.core.$strip>;
262
+ type StepEvent = z.infer<typeof StepEventSchema>;
263
+ declare const ActionEventSchema: z.ZodObject<{
264
+ timestamp: z.ZodDefault<z.ZodDate>;
265
+ type: z.ZodLiteral<"action">;
266
+ step_num: z.ZodNumber;
267
+ actions: z.ZodArray<z.ZodCustom<{
268
+ type: "click" | "left_double" | "left_triple" | "right_single" | "drag" | "hotkey" | "type" | "scroll" | "finish" | "wait" | "call_user";
269
+ argument: string;
270
+ count: number;
271
+ }, {
272
+ type: "click" | "left_double" | "left_triple" | "right_single" | "drag" | "hotkey" | "type" | "scroll" | "finish" | "wait" | "call_user";
273
+ argument: string;
274
+ count: number;
275
+ }>>;
276
+ error: z.ZodOptional<z.ZodString>;
277
+ }, z.core.$strip>;
278
+ type ActionEvent = z.infer<typeof ActionEventSchema>;
279
+ declare const LogEventSchema: z.ZodObject<{
280
+ timestamp: z.ZodDefault<z.ZodDate>;
281
+ type: z.ZodLiteral<"log">;
282
+ message: z.ZodString;
283
+ }, z.core.$strip>;
284
+ type LogEvent = z.infer<typeof LogEventSchema>;
285
+ declare const SplitEventSchema: z.ZodObject<{
286
+ timestamp: z.ZodDefault<z.ZodDate>;
287
+ type: z.ZodLiteral<"split">;
288
+ label: z.ZodOptional<z.ZodString>;
289
+ }, z.core.$strip>;
290
+ type SplitEvent = z.infer<typeof SplitEventSchema>;
291
+ declare const PlanEventSchema: z.ZodObject<{
292
+ timestamp: z.ZodDefault<z.ZodDate>;
293
+ type: z.ZodLiteral<"plan">;
294
+ phase: z.ZodEnum<{
295
+ initial: "initial";
296
+ reflection: "reflection";
297
+ summary: "summary";
298
+ }>;
299
+ image: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodCustom<ArrayBuffer, ArrayBuffer>]>>;
300
+ reasoning: z.ZodString;
301
+ result: z.ZodOptional<z.ZodString>;
302
+ request_id: z.ZodOptional<z.ZodString>;
303
+ }, z.core.$strip>;
304
+ type PlanEvent = z.infer<typeof PlanEventSchema>;
305
+ type ObserverEvent = ImageEvent | StepEvent | ActionEvent | LogEvent | SplitEvent | PlanEvent;
306
+ declare abstract class StepObserver {
307
+ abstract onEvent(event: ObserverEvent): Promise<void>;
308
+ chain(observer?: StepObserver | null): StepObserver;
309
+ }
310
+
311
+ /**
312
+ * -----------------------------------------------------------------------------
313
+ * Copyright (c) OpenAGI Foundation
314
+ * All rights reserved.
315
+ *
316
+ * This file is part of the official API project.
317
+ * Licensed under the MIT License.
318
+ * -----------------------------------------------------------------------------
319
+ */
320
+
321
+ interface ActionHandler {
322
+ handle(actions: Action[]): Promise<void>;
323
+ }
324
+ interface ImageProvider {
325
+ provide(): Promise<ArrayBuffer>;
326
+ }
327
+
328
+ /**
329
+ * -----------------------------------------------------------------------------
330
+ * Copyright (c) OpenAGI Foundation
331
+ * All rights reserved.
332
+ *
333
+ * This file is part of the official API project.
334
+ * Licensed under the MIT License.
335
+ * -----------------------------------------------------------------------------
336
+ */
337
+
338
+ /**
339
+ * Base class for task automation with the OAGI API.
340
+ */
341
+ declare class Actor {
342
+ private model;
343
+ private temperature?;
344
+ /**
345
+ * Client-side generated UUID
346
+ */
347
+ private taskId;
348
+ private taskDescription;
349
+ /**
350
+ * OpenAI-compatible message history
351
+ */
352
+ private messageHistory;
353
+ private maxSteps;
354
+ /**
355
+ * Current step counter
356
+ */
357
+ private currentStep;
358
+ private client;
359
+ constructor(apiKey?: string, baseUrl?: string, model?: string, temperature?: number | undefined);
360
+ private validateAndIncrementStep;
361
+ /**
362
+ * Get screenshot URL, uploading to S3 if needed (async version).
363
+ * @param screenshot Screenshot as URL string, or raw bytes
364
+ * @returns Screenshot URL (either direct or from S3 upload)
365
+ */
366
+ private ensureScreenshotUrl;
367
+ /**
368
+ * Add user message with screenshot to message history.
369
+ *
370
+ * @param screenshot URL of the screenshot
371
+ * @param prompt Optional prompt text (for first message only)
372
+ */
373
+ private addUserMessageToHistory;
374
+ /**
375
+ * Build prompt for first message only.
376
+ */
377
+ private buildStepPrompt;
378
+ /**
379
+ * Initialize a new task with the given description.
380
+ *
381
+ * @param taskDescription Task description
382
+ * @param maxSteps Maximum number of steps allowed
383
+ */
384
+ initTask(taskDescription: string, maxSteps?: number): void;
385
+ /**
386
+ * Send screenshot to the server and get the next actions.
387
+ *
388
+ * @param screenshot Screenshot as URL string, or raw bytes
389
+ * @param instruction Optional additional instruction for this step (currently unused)
390
+ * @param temperature Sampling temperature for this step (overrides task default if provided)
391
+ */
392
+ step(screenshot: string | ArrayBuffer, _instruction?: string, temperature?: number): Promise<Step>;
393
+ }
394
+
395
+ /**
396
+ * -----------------------------------------------------------------------------
397
+ * Copyright (c) OpenAGI Foundation
398
+ * All rights reserved.
399
+ *
400
+ * This file is part of the official API project.
401
+ * Licensed under the MIT License.
402
+ * -----------------------------------------------------------------------------
403
+ */
404
+
405
+ declare class DefaultAgent implements Agent {
406
+ /** Default asynchronous agent implementation using OAGI client. */
407
+ private api_key?;
408
+ private base_url?;
409
+ private model;
410
+ private max_steps;
411
+ private temperature?;
412
+ private step_observer?;
413
+ private step_delay;
414
+ constructor(api_key?: string, base_url?: string, model?: string, max_steps?: number, temperature?: number | undefined, step_observer?: StepObserver, step_delay?: number);
415
+ execute(instruction: string, action_handler: ActionHandler, image_provider: ImageProvider): Promise<boolean>;
416
+ }
417
+
418
+ /**
419
+ * -----------------------------------------------------------------------------
420
+ * Copyright (c) OpenAGI Foundation
421
+ * All rights reserved.
422
+ *
423
+ * This file is part of the official API project.
424
+ * Licensed under the MIT License.
425
+ * -----------------------------------------------------------------------------
426
+ */
427
+
428
+ interface Agent {
429
+ /**
430
+ * Protocol for synchronous task execution agents.
431
+ */
432
+ execute(instruction: string, action_handler: ActionHandler, image_provider: ImageProvider): Promise<boolean>;
433
+ }
434
+
435
+ /**
436
+ * -----------------------------------------------------------------------------
437
+ * Copyright (c) OpenAGI Foundation
438
+ * All rights reserved.
439
+ *
440
+ * This file is part of the official API project.
441
+ * Licensed under the MIT License.
442
+ * -----------------------------------------------------------------------------
443
+ */
444
+
445
+ /**
446
+ * HTTP client for the OAGI API.
447
+ */
448
+ declare class Client {
449
+ private baseUrl;
450
+ private apiKey;
451
+ private timeout;
452
+ private client;
453
+ constructor(baseUrl?: string, apiKey?: string | null, maxRetries?: number);
454
+ private fetch;
455
+ private buildHeaders;
456
+ private handleResponseError;
457
+ private handleHttpErrors;
458
+ private static getErrorClass;
459
+ /**
460
+ * Call OpenAI-compatible /v1/chat/completions endpoint.
461
+ *
462
+ * @param model Model to use for inference
463
+ * @param messages Full message history (OpenAI-compatible format)
464
+ * @param temperature Sampling temperature (0.0-2.0)
465
+ * @param taskId Optional task ID for multi-turn conversations
466
+ * @returns Tuple of (Step, raw_output, Usage)
467
+ * - Step: Parsed actions and reasoning
468
+ * - raw_output: Raw model output string (for message history)
469
+ * - Usage: Token usage statistics (or None if not available)
470
+ */
471
+ chatCompletions(model: string, messages: ChatCompletionMessageParam[], temperature?: number, taskId?: string): Promise<[Step, rawOutput: string, CompletionUsage | undefined]>;
472
+ /**
473
+ * Call the /v1/file/upload endpoint to get a S3 presigned URL
474
+ *
475
+ * @param apiVersion API version header
476
+ * @returns {Promise<UploadFileResponse>} The response from /v1/file/upload with uuid and presigned S3 URL
477
+ */
478
+ getS3PresignedUrl(apiVersion?: string): Promise<UploadFileResponse>;
479
+ /**
480
+ * Upload image bytes to S3 using presigned URL
481
+ *
482
+ * @param url S3 presigned URL
483
+ * @param content Image bytes to upload
484
+ * @throws {APIError} If upload fails
485
+ */
486
+ uploadToS3(url: string, content: ArrayBuffer): Promise<void>;
487
+ /**
488
+ * Get S3 presigned URL and upload image (convenience method)
489
+ *
490
+ * @param screenshot Screenshot image bytes
491
+ * @param apiVersion API version header
492
+ * @returns {UploadFileResponse} The response from /v1/file/upload with uuid and presigned S3 URL
493
+ */
494
+ putS3PresignedUrl(screenshot: ArrayBuffer, apiVersion?: string): Promise<{
495
+ url: string;
496
+ uuid: string;
497
+ expires_at: number;
498
+ file_expires_at: number;
499
+ download_url: string;
500
+ }>;
501
+ /**
502
+ * Call the /v1/generate endpoint for OAGI worker processing.
503
+ *
504
+ * @returns {Promise<GenerateResponse>} The response from the API
505
+ * @throws {ValueError} If workerId is invalid
506
+ * @throws {APIError} If API returns error
507
+ */
508
+ callWorker({ workerId, overallTodo, taskDescription, todos, history, currentTodoIndex, taskExecutionSummary, currentScreenshot, currentSubtaskInstruction, windowSteps, windowScreenshots, resultScreenshot, priorNotes, latestTodoSummary, apiVersion, }: GenerateOption): Promise<GenerateResponse>;
509
+ }
510
+
511
+ /**
512
+ * -----------------------------------------------------------------------------
513
+ * Copyright (c) OpenAGI Foundation
514
+ * All rights reserved.
515
+ *
516
+ * This file is part of the official API project.
517
+ * Licensed under the MIT License.
518
+ * -----------------------------------------------------------------------------
519
+ */
520
+ declare class OAGIError extends Error {
521
+ }
522
+ declare class APIError extends OAGIError {
523
+ response: Response;
524
+ constructor(response: Response, message?: string);
525
+ toString(): string;
526
+ }
527
+ declare class AuthenticationError extends APIError {
528
+ }
529
+ declare class RateLimitError extends APIError {
530
+ }
531
+ declare class ValidationError extends APIError {
532
+ }
533
+ declare class NotFoundError extends APIError {
534
+ }
535
+ declare class ServerError extends APIError {
536
+ }
537
+ declare class ConfigurationError extends OAGIError {
538
+ }
539
+ declare class NetworkError extends OAGIError {
540
+ originalError: Error;
541
+ constructor(message: string, originalError: Error);
542
+ }
543
+ declare class RequestTimeoutError extends NetworkError {
544
+ }
545
+
546
+ type DesktopAutomationConfig = {
547
+ dragDurationMs?: number;
548
+ scrollAmount?: number;
549
+ waitDurationMs?: number;
550
+ hotkeyDelayMs?: number;
551
+ macosCtrlToCmd?: boolean;
552
+ capslockMode?: 'session' | 'system';
553
+ };
554
+ declare class ScreenshotMaker implements ImageProvider {
555
+ #private;
556
+ constructor(cfg?: Partial<ImageConfig>);
557
+ static toArrayBuffer(buffer: Buffer): ArrayBuffer;
558
+ provide(): Promise<ArrayBuffer>;
559
+ }
560
+ declare class DefaultActionHandler implements ActionHandler {
561
+ #private;
562
+ constructor(cfg?: DesktopAutomationConfig);
563
+ reset(): void;
564
+ handle(actions: Action[]): Promise<void>;
565
+ }
566
+
567
+ export { APIError, Actor, type Agent, AuthenticationError, Client, ConfigurationError, DefaultActionHandler, DefaultAgent, type ErrorDetail, type ErrorResponse, type GenerateResponse, NetworkError, NotFoundError, OAGIError, RateLimitError, RequestTimeoutError, ScreenshotMaker, ServerError, type UploadFileResponse, ValidationError };