agi 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,3 +1,5 @@
1
+ import { EventEmitter } from 'events';
2
+
1
3
  /**
2
4
  * Result types for task execution
3
5
  */
@@ -98,8 +100,9 @@ declare class Screenshot {
98
100
  */
99
101
 
100
102
  type SnapshotMode = 'none' | 'memory' | 'filesystem';
103
+ type AgentSessionType = 'managed-cdp' | 'external-cdp' | 'desktop';
101
104
  type SessionStatus = 'ready' | 'running' | 'waiting_for_input' | 'paused' | 'finished' | 'error';
102
- type EventType = 'step' | 'thought' | 'question' | 'done' | 'error' | 'log' | 'paused' | 'resumed' | 'heartbeat' | 'user';
105
+ type EventType$1 = 'step' | 'thought' | 'question' | 'done' | 'error' | 'log' | 'paused' | 'resumed' | 'heartbeat' | 'user';
103
106
  type MessageType = 'THOUGHT' | 'QUESTION' | 'USER' | 'DONE' | 'ERROR' | 'LOG';
104
107
  interface AGIClientOptions {
105
108
  /** API key for authentication */
@@ -120,6 +123,10 @@ interface CreateSessionOptions {
120
123
  maxSteps?: number;
121
124
  /** Environment UUID to restore from */
122
125
  restoreFromEnvironmentId?: string;
126
+ /** Agent session type */
127
+ agentSessionType?: AgentSessionType;
128
+ /** External CDP WebSocket URL (required for external-cdp session type) */
129
+ cdpUrl?: string;
123
130
  }
124
131
  interface SessionResponse {
125
132
  /** Session UUID */
@@ -138,6 +145,8 @@ interface SessionResponse {
138
145
  environmentId?: string;
139
146
  /** Task goal */
140
147
  goal?: string;
148
+ /** Agent session type */
149
+ agentSessionType?: string;
141
150
  }
142
151
  interface ExecuteStatusResponse {
143
152
  /** Current execution status */
@@ -167,7 +176,7 @@ interface SSEEvent {
167
176
  /** Event ID */
168
177
  id?: string;
169
178
  /** Event type */
170
- event: EventType;
179
+ event: EventType$1;
171
180
  /** Event data */
172
181
  data: Record<string, unknown>;
173
182
  }
@@ -215,12 +224,71 @@ interface RunTaskOptions {
215
224
  }
216
225
  interface StreamOptions {
217
226
  /** Event types to filter */
218
- eventTypes?: EventType[];
227
+ eventTypes?: EventType$1[];
219
228
  /** Sanitize event data */
220
229
  sanitize?: boolean;
221
230
  /** Include historical events */
222
231
  includeHistory?: boolean;
223
232
  }
233
+ type DesktopActionType = 'click' | 'double_click' | 'triple_click' | 'right_click' | 'hover' | 'type' | 'key' | 'scroll' | 'drag' | 'wait' | 'finish' | 'fail' | 'confirm' | 'ask_question';
234
+ interface DesktopAction {
235
+ /** Action type */
236
+ type: DesktopActionType | string;
237
+ /** X coordinate (for click, hover, scroll) */
238
+ x?: number;
239
+ /** Y coordinate (for click, hover, scroll) */
240
+ y?: number;
241
+ /** Text to type (for type action) */
242
+ text?: string;
243
+ /** Content to type (alias for text) */
244
+ content?: string;
245
+ /** Key or key combination (for key action) */
246
+ key?: string;
247
+ /** Scroll direction (for scroll action) */
248
+ direction?: 'up' | 'down' | 'left' | 'right';
249
+ /** Scroll amount in lines (for scroll action) */
250
+ amount?: number;
251
+ /** Wait duration in seconds (for wait action) */
252
+ duration?: number;
253
+ /** Start X coordinate (for drag action) */
254
+ start_x?: number;
255
+ /** Start Y coordinate (for drag action) */
256
+ start_y?: number;
257
+ /** End X coordinate (for drag action) */
258
+ end_x?: number;
259
+ /** End Y coordinate (for drag action) */
260
+ end_y?: number;
261
+ /** Summary message (for finish action) */
262
+ summary?: string;
263
+ /** Reason message (for fail action) */
264
+ reason?: string;
265
+ /** Question to ask (for ask_question action) */
266
+ question?: string;
267
+ /** Additional action properties */
268
+ [key: string]: unknown;
269
+ }
270
+ interface StepDesktopRequest {
271
+ /** Base64-encoded screenshot (full resolution). Viewport dimensions are extracted from the image. */
272
+ screenshot: string;
273
+ /** Optional user message (e.g., initial goal or follow-up instruction) */
274
+ message?: string;
275
+ }
276
+ interface StepDesktopResponse {
277
+ /** Actions to execute (flat format) */
278
+ actions: DesktopAction[];
279
+ /** Model reasoning */
280
+ thinking?: string;
281
+ /** Whether the task is complete */
282
+ finished: boolean;
283
+ /** Question for user, if any */
284
+ askUser?: string;
285
+ /** Current step number */
286
+ step: number;
287
+ }
288
+ interface ModelsResponse {
289
+ /** Available agent models */
290
+ models: string[];
291
+ }
224
292
 
225
293
  /**
226
294
  * Session context manager for high-level API
@@ -431,6 +499,10 @@ declare class HTTPClient {
431
499
  * Make an HTTP request with retries and error handling
432
500
  */
433
501
  request<T>(method: string, path: string, options?: RequestOptions): Promise<T>;
502
+ /**
503
+ * Make an HTTP request to an absolute URL with retries and error handling
504
+ */
505
+ requestUrl<T>(method: string, url: string, options?: RequestOptions): Promise<T>;
434
506
  /**
435
507
  * Stream Server-Sent Events from an endpoint
436
508
  */
@@ -455,16 +527,24 @@ declare class SessionsResource {
455
527
  /**
456
528
  * Create a new agent session
457
529
  *
458
- * @param agentName - Agent model to use (e.g., "agi-0", "agi-0-fast", "agi-1")
530
+ * @param agentName - Agent model to use (e.g., "agi-0", "agi-2-claude")
459
531
  * @param options - Session creation options
460
- * @returns SessionResponse with session_id, vnc_url, status, etc.
532
+ * @returns SessionResponse with sessionId, vncUrl, agentUrl, status, etc.
461
533
  *
462
534
  * @example
463
535
  * ```typescript
536
+ * // Standard browser session
464
537
  * const session = await client.sessions.create('agi-0', {
465
538
  * webhookUrl: 'https://yourapp.com/webhook',
466
539
  * maxSteps: 200
467
540
  * });
541
+ *
542
+ * // Desktop session (client-managed)
543
+ * const session = await client.sessions.create('agi-2-claude', {
544
+ * agentSessionType: 'desktop',
545
+ * goal: 'Open calculator and compute 2+2'
546
+ * });
547
+ * console.log(session.agentUrl); // Use with client.desktop.step()
468
548
  * ```
469
549
  */
470
550
  create(agentName?: string, options?: {
@@ -472,6 +552,8 @@ declare class SessionsResource {
472
552
  goal?: string;
473
553
  maxSteps?: number;
474
554
  restoreFromEnvironmentId?: string;
555
+ agentSessionType?: string;
556
+ cdpUrl?: string;
475
557
  }): Promise<SessionResponse>;
476
558
  /**
477
559
  * List all sessions for the authenticated user
@@ -664,6 +746,66 @@ declare class SessionsResource {
664
746
  * ```
665
747
  */
666
748
  screenshot(sessionId: string): Promise<ScreenshotResponse>;
749
+ /**
750
+ * Execute a single step for client-driven sessions (desktop mode).
751
+ *
752
+ * In desktop mode (agentSessionType="desktop"), the client manages the
753
+ * execution loop. This method sends a screenshot to the agent and receives
754
+ * actions to execute locally.
755
+ *
756
+ * @param agentUrl - Agent service URL from session.agentUrl
757
+ * @param sessionId - Session ID (required for routing in shared sandbox)
758
+ * @param screenshot - Base64-encoded screenshot (full resolution, JPEG or PNG)
759
+ * @param message - Optional user message (goal on first call, or follow-up instruction)
760
+ * @returns StepDesktopResponse with actions, thinking, finished, askUser, and step
761
+ *
762
+ * @example
763
+ * ```typescript
764
+ * // Create a desktop session
765
+ * const session = await client.sessions.create('agi-2-claude', {
766
+ * agentSessionType: 'desktop',
767
+ * goal: 'Open calculator and compute 2+2'
768
+ * });
769
+ *
770
+ * // Client-managed loop
771
+ * let finished = false;
772
+ * while (!finished) {
773
+ * const screenshot = captureScreenshot(); // Client captures
774
+ * const result = await client.sessions.step(
775
+ * session.agentUrl!,
776
+ * session.sessionId,
777
+ * screenshot
778
+ * );
779
+ * executeActions(result.actions); // Client executes
780
+ * finished = result.finished;
781
+ * if (result.askUser) {
782
+ * const answer = await promptUser(result.askUser);
783
+ * // Send answer in next step
784
+ * }
785
+ * }
786
+ * ```
787
+ */
788
+ step(agentUrl: string, sessionId: string, screenshot: string, message?: string): Promise<StepDesktopResponse>;
789
+ /**
790
+ * List available agent models.
791
+ *
792
+ * @param filter - Optional filter: "cdp" for browser agents, "desktop" for
793
+ * desktop agents
794
+ * @returns ModelsResponse with list of available model names
795
+ *
796
+ * @example
797
+ * ```typescript
798
+ * // List all models
799
+ * const models = await client.sessions.listModels();
800
+ * console.log(models.models);
801
+ *
802
+ * // List only desktop-compatible models
803
+ * const desktopModels = await client.sessions.listModels('desktop');
804
+ * console.log(desktopModels.models);
805
+ * // ['agi-2-claude', 'agi-2-qwen']
806
+ * ```
807
+ */
808
+ listModels(filter?: string): Promise<ModelsResponse>;
667
809
  }
668
810
 
669
811
  /**
@@ -767,6 +909,179 @@ declare class AGIClient {
767
909
  }): SessionContext;
768
910
  }
769
911
 
912
+ /**
913
+ * Async event loop manager for client-driven sessions.
914
+ */
915
+
916
+ /**
917
+ * State of the agent execution loop.
918
+ */
919
+ type LoopState = 'idle' | 'running' | 'paused' | 'stopped' | 'finished';
920
+ /**
921
+ * Callback type for capturing screenshots.
922
+ * Should return a base64-encoded screenshot.
923
+ */
924
+ type CaptureScreenshotFn = () => Promise<string>;
925
+ /**
926
+ * Callback type for executing actions.
927
+ */
928
+ type ExecuteActionsFn = (actions: DesktopAction[]) => Promise<void>;
929
+ /**
930
+ * Callback type for handling thinking output.
931
+ */
932
+ type OnThinkingFn = (thinking: string) => void;
933
+ /**
934
+ * Callback type for handling user questions.
935
+ * Should return the user's answer.
936
+ */
937
+ type OnAskUserFn = (question: string) => Promise<string>;
938
+ /**
939
+ * Callback type for step completion.
940
+ */
941
+ type OnStepFn = (step: number, response: StepDesktopResponse) => void;
942
+ /**
943
+ * Options for creating an AgentLoop.
944
+ */
945
+ interface AgentLoopOptions {
946
+ /** AGIClient instance */
947
+ client: AGIClient;
948
+ /** Agent service URL from session.agentUrl */
949
+ agentUrl: string;
950
+ /** Session ID (required for routing in shared sandbox) */
951
+ sessionId: string;
952
+ /** Async callback that returns base64-encoded screenshot */
953
+ captureScreenshot: CaptureScreenshotFn;
954
+ /** Async callback that executes a list of actions */
955
+ executeActions: ExecuteActionsFn;
956
+ /** Optional callback for agent thinking/reasoning output */
957
+ onThinking?: OnThinkingFn;
958
+ /** Optional async callback to handle user questions. If not provided and agent asks a question, the loop will stop. */
959
+ onAskUser?: OnAskUserFn;
960
+ /** Optional callback called after each step with step number and full response */
961
+ onStep?: OnStepFn;
962
+ /** Optional delay in milliseconds between steps (default: 0) */
963
+ stepDelay?: number;
964
+ }
965
+ /**
966
+ * Async event loop manager for client-driven sessions.
967
+ *
968
+ * This class manages the execution loop for agent sessions where the client
969
+ * is responsible for capturing screenshots and executing actions (e.g., desktop,
970
+ * mobile, or any client-driven session type). It provides start/pause/resume/stop
971
+ * control over the loop.
972
+ *
973
+ * The loop:
974
+ * 1. Captures a screenshot using the provided callback
975
+ * 2. Sends it to the agent via step()
976
+ * 3. Executes returned actions using the provided callback
977
+ * 4. Repeats until finished or stopped
978
+ *
979
+ * @example
980
+ * ```typescript
981
+ * import { AGIClient, AgentLoop } from 'agi-sdk';
982
+ *
983
+ * const client = new AGIClient({ apiKey: '...' });
984
+ *
985
+ * // Create client-driven session
986
+ * const session = await client.sessions.create('agi-2-claude', {
987
+ * agentSessionType: 'desktop',
988
+ * goal: 'Open calculator and compute 2+2'
989
+ * });
990
+ *
991
+ * // Create and run loop
992
+ * const loop = new AgentLoop({
993
+ * client,
994
+ * agentUrl: session.agentUrl!,
995
+ * captureScreenshot: async () => {
996
+ * // Return base64-encoded screenshot
997
+ * return '...';
998
+ * },
999
+ * executeActions: async (actions) => {
1000
+ * for (const action of actions) {
1001
+ * console.log('Executing:', action);
1002
+ * }
1003
+ * },
1004
+ * onThinking: (t) => console.log('Thinking:', t),
1005
+ * });
1006
+ *
1007
+ * const result = await loop.start();
1008
+ * console.log('Finished:', result.finished);
1009
+ * ```
1010
+ *
1011
+ * @example Pause/resume control
1012
+ * ```typescript
1013
+ * // Start loop without awaiting
1014
+ * const promise = loop.start();
1015
+ *
1016
+ * // Pause after 5 seconds
1017
+ * setTimeout(() => loop.pause(), 5000);
1018
+ *
1019
+ * // Resume after user input
1020
+ * process.stdin.once('data', () => loop.resume());
1021
+ *
1022
+ * // Wait for completion
1023
+ * const result = await promise;
1024
+ * ```
1025
+ */
1026
+ declare class AgentLoop {
1027
+ private readonly client;
1028
+ private readonly agentUrl;
1029
+ private readonly sessionId;
1030
+ private readonly captureScreenshot;
1031
+ private readonly executeActions;
1032
+ private readonly onThinking?;
1033
+ private readonly onAskUser?;
1034
+ private readonly onStep?;
1035
+ private readonly stepDelay;
1036
+ private _state;
1037
+ private _lastResult;
1038
+ private _currentStep;
1039
+ private pauseResolve;
1040
+ private pausePromise;
1041
+ constructor(options: AgentLoopOptions);
1042
+ /** Current state of the loop. */
1043
+ get state(): LoopState;
1044
+ /** Current step number. */
1045
+ get currentStep(): number;
1046
+ /** Last step result, if any. */
1047
+ get lastResult(): StepDesktopResponse | null;
1048
+ /**
1049
+ * Start the execution loop.
1050
+ *
1051
+ * Runs the loop until the task is finished, stopped, or an unhandled
1052
+ * ask_user question is encountered.
1053
+ *
1054
+ * @param message - Optional initial message (goal or instruction).
1055
+ * Usually not needed if goal was set during session creation.
1056
+ * @returns The final StepDesktopResponse
1057
+ * @throws Error if loop is already running
1058
+ */
1059
+ start(message?: string): Promise<StepDesktopResponse>;
1060
+ /**
1061
+ * Pause the execution loop.
1062
+ *
1063
+ * The loop will complete the current step before pausing.
1064
+ * Call resume() to continue.
1065
+ */
1066
+ pause(): void;
1067
+ /**
1068
+ * Resume a paused loop.
1069
+ */
1070
+ resume(): void;
1071
+ /**
1072
+ * Stop the execution loop.
1073
+ *
1074
+ * The loop will complete the current step before stopping.
1075
+ */
1076
+ stop(): void;
1077
+ /** Check if the loop is currently running. */
1078
+ isRunning(): boolean;
1079
+ /** Check if the loop is currently paused. */
1080
+ isPaused(): boolean;
1081
+ /** Check if the loop has finished successfully. */
1082
+ isFinished(): boolean;
1083
+ }
1084
+
770
1085
  /**
771
1086
  * Error classes for AGI SDK
772
1087
  */
@@ -821,4 +1136,292 @@ declare class APIError extends AGIError {
821
1136
  constructor(message: string, statusCode?: number, response?: unknown);
822
1137
  }
823
1138
 
824
- export { AGIClient, type AGIClientOptions, AGIError, APIError, AgentExecutionError, AuthenticationError, type CreateSessionOptions, type DeleteResponse, type EventType, type ExecuteStatusResponse, type MessageOptions, type MessageResponse, type MessageType, type MessagesResponse, type NavigateResponse, NotFoundError, PermissionError, RateLimitError, type RunTaskOptions, type SSEEvent, Screenshot, type ScreenshotResponse, SessionContext, type SessionResponse, type SessionStatus, SessionsResource, type SnapshotMode, type StreamOptions, type SuccessResponse, type TaskMetadata, type TaskResult, ValidationError };
1139
+ /**
1140
+ * Protocol types for driver communication.
1141
+ *
1142
+ * The driver communicates via JSON lines:
1143
+ * - Events are emitted on stdout (driver -> SDK)
1144
+ * - Commands are sent on stdin (SDK -> driver)
1145
+ */
1146
+ type EventType = 'ready' | 'state_change' | 'thinking' | 'action' | 'confirm' | 'ask_question' | 'finished' | 'error' | 'screenshot_captured';
1147
+ type CommandType = 'start' | 'screenshot' | 'pause' | 'resume' | 'stop' | 'confirm' | 'answer';
1148
+ type DriverState = 'idle' | 'running' | 'paused' | 'waiting_confirmation' | 'waiting_answer' | 'finished' | 'stopped' | 'error';
1149
+ interface BaseEvent {
1150
+ event: EventType;
1151
+ step: number;
1152
+ }
1153
+ interface ReadyEvent extends BaseEvent {
1154
+ event: 'ready';
1155
+ version: string;
1156
+ protocol: string;
1157
+ }
1158
+ interface StateChangeEvent extends BaseEvent {
1159
+ event: 'state_change';
1160
+ state: DriverState;
1161
+ }
1162
+ interface ThinkingEvent extends BaseEvent {
1163
+ event: 'thinking';
1164
+ text: string;
1165
+ }
1166
+ interface ActionEvent extends BaseEvent {
1167
+ event: 'action';
1168
+ action: DriverAction;
1169
+ }
1170
+ interface ConfirmEvent extends BaseEvent {
1171
+ event: 'confirm';
1172
+ action: DriverAction;
1173
+ reason: string;
1174
+ }
1175
+ interface AskQuestionEvent extends BaseEvent {
1176
+ event: 'ask_question';
1177
+ question: string;
1178
+ question_id: string;
1179
+ }
1180
+ interface FinishedEvent extends BaseEvent {
1181
+ event: 'finished';
1182
+ reason: string;
1183
+ summary: string;
1184
+ success: boolean;
1185
+ }
1186
+ interface ErrorEvent extends BaseEvent {
1187
+ event: 'error';
1188
+ message: string;
1189
+ code: string;
1190
+ recoverable: boolean;
1191
+ }
1192
+ /**
1193
+ * Emitted in local mode when the driver captures a screenshot.
1194
+ * Lightweight notification (no image data).
1195
+ */
1196
+ interface ScreenshotCapturedEvent extends BaseEvent {
1197
+ event: 'screenshot_captured';
1198
+ width: number;
1199
+ height: number;
1200
+ }
1201
+ type DriverEvent = ReadyEvent | StateChangeEvent | ThinkingEvent | ActionEvent | ConfirmEvent | AskQuestionEvent | FinishedEvent | ErrorEvent | ScreenshotCapturedEvent;
1202
+ interface DriverAction {
1203
+ type: string;
1204
+ x?: number;
1205
+ y?: number;
1206
+ [key: string]: unknown;
1207
+ }
1208
+ interface BaseCommand {
1209
+ command: CommandType;
1210
+ }
1211
+ interface StartCommand extends BaseCommand {
1212
+ command: 'start';
1213
+ session_id: string;
1214
+ goal: string;
1215
+ screenshot: string;
1216
+ screen_width: number;
1217
+ screen_height: number;
1218
+ platform: 'desktop' | 'android';
1219
+ model: string;
1220
+ /** "local" for autonomous mode, "" for legacy SDK-driven mode */
1221
+ mode?: string;
1222
+ }
1223
+ interface ScreenshotCommand extends BaseCommand {
1224
+ command: 'screenshot';
1225
+ data: string;
1226
+ screen_width: number;
1227
+ screen_height: number;
1228
+ }
1229
+ interface PauseCommand extends BaseCommand {
1230
+ command: 'pause';
1231
+ }
1232
+ interface ResumeCommand extends BaseCommand {
1233
+ command: 'resume';
1234
+ }
1235
+ interface StopCommand extends BaseCommand {
1236
+ command: 'stop';
1237
+ reason?: string;
1238
+ }
1239
+ interface ConfirmResponseCommand extends BaseCommand {
1240
+ command: 'confirm';
1241
+ approved: boolean;
1242
+ message?: string;
1243
+ }
1244
+ interface AnswerCommand extends BaseCommand {
1245
+ command: 'answer';
1246
+ text: string;
1247
+ question_id?: string;
1248
+ }
1249
+ type DriverCommand = StartCommand | ScreenshotCommand | PauseCommand | ResumeCommand | StopCommand | ConfirmResponseCommand | AnswerCommand;
1250
+
1251
+ /**
1252
+ * AgentDriver - Spawns and manages the agi-driver binary.
1253
+ *
1254
+ * The driver communicates via JSON lines over stdin/stdout and provides
1255
+ * an event-based interface for agent control.
1256
+ */
1257
+
1258
+ /**
1259
+ * Options for creating an AgentDriver.
1260
+ */
1261
+ interface DriverOptions {
1262
+ /** Path to the agi-driver binary. If not provided, will be auto-detected. */
1263
+ binaryPath?: string;
1264
+ /** Model to use (default: 'claude-sonnet') */
1265
+ model?: string;
1266
+ /** Platform type (default: 'desktop') */
1267
+ platform?: 'desktop' | 'android';
1268
+ /** "local" for autonomous mode, "" for legacy SDK-driven mode */
1269
+ mode?: string;
1270
+ /** Environment variables to pass to the driver process */
1271
+ env?: Record<string, string>;
1272
+ }
1273
+ /**
1274
+ * Result from running the driver.
1275
+ */
1276
+ interface DriverResult {
1277
+ /** Whether the task completed successfully */
1278
+ success: boolean;
1279
+ /** Reason for completion */
1280
+ reason: string;
1281
+ /** Summary of what was accomplished */
1282
+ summary: string;
1283
+ /** Final step number */
1284
+ step: number;
1285
+ }
1286
+ /**
1287
+ * AgentDriver manages the lifecycle of the agi-driver binary.
1288
+ *
1289
+ * @example
1290
+ * ```typescript
1291
+ * const driver = new AgentDriver();
1292
+ *
1293
+ * driver.on('action', async (action) => {
1294
+ * // Execute the action on the local machine
1295
+ * await executeAction(action);
1296
+ * // Send next screenshot
1297
+ * driver.sendScreenshot(captureScreenshot());
1298
+ * });
1299
+ *
1300
+ * driver.on('thinking', (text) => {
1301
+ * console.log('Agent thinking:', text);
1302
+ * });
1303
+ *
1304
+ * driver.on('confirm', async (reason) => {
1305
+ * // Ask user for confirmation
1306
+ * return await askUser(reason);
1307
+ * });
1308
+ *
1309
+ * const result = await driver.start('Open calculator and compute 2+2');
1310
+ * console.log('Done:', result.summary);
1311
+ * ```
1312
+ */
1313
+ declare class AgentDriver extends EventEmitter {
1314
+ private readonly binaryPath;
1315
+ private readonly pythonFallback;
1316
+ private readonly model;
1317
+ private readonly platform;
1318
+ private readonly mode;
1319
+ private readonly env;
1320
+ private process;
1321
+ private readline;
1322
+ private state;
1323
+ private step;
1324
+ private sessionId;
1325
+ private screenWidth;
1326
+ private screenHeight;
1327
+ private resolveStart;
1328
+ private rejectStart;
1329
+ private pendingConfirm;
1330
+ private pendingAnswer;
1331
+ constructor(options?: DriverOptions);
1332
+ /**
1333
+ * Get the current state of the driver.
1334
+ */
1335
+ get currentState(): DriverState;
1336
+ /**
1337
+ * Get the current step number.
1338
+ */
1339
+ get currentStep(): number;
1340
+ /**
1341
+ * Check if the driver is running.
1342
+ */
1343
+ get isRunning(): boolean;
1344
+ /**
1345
+ * Check if the driver is waiting for user input.
1346
+ */
1347
+ get isWaiting(): boolean;
1348
+ /**
1349
+ * Start the agent with a goal.
1350
+ *
1351
+ * @param goal - The task for the agent to accomplish
1352
+ * @param screenshot - Initial screenshot (base64-encoded). Not needed in local mode.
1353
+ * @param screenWidth - Screen width in pixels. Not needed in local mode.
1354
+ * @param screenHeight - Screen height in pixels. Not needed in local mode.
1355
+ * @param mode - Override the mode set in DriverOptions.
1356
+ * @returns Promise that resolves when the agent finishes
1357
+ */
1358
+ start(goal: string, screenshot?: string, screenWidth?: number, screenHeight?: number, mode?: string): Promise<DriverResult>;
1359
+ /**
1360
+ * Send a new screenshot to the driver.
1361
+ *
1362
+ * @param screenshot - Base64-encoded screenshot
1363
+ * @param screenWidth - Screen width in pixels
1364
+ * @param screenHeight - Screen height in pixels
1365
+ */
1366
+ sendScreenshot(screenshot: string, screenWidth?: number, screenHeight?: number): void;
1367
+ /**
1368
+ * Pause the driver.
1369
+ */
1370
+ pause(): void;
1371
+ /**
1372
+ * Resume the driver.
1373
+ */
1374
+ resume(): void;
1375
+ /**
1376
+ * Stop the driver.
1377
+ *
1378
+ * @param reason - Reason for stopping
1379
+ */
1380
+ stop(reason?: string): Promise<void>;
1381
+ /**
1382
+ * Respond to a confirmation request.
1383
+ *
1384
+ * @param approved - Whether the action is approved
1385
+ * @param message - Optional message to send with the response
1386
+ */
1387
+ respondConfirm(approved: boolean, message?: string): void;
1388
+ /**
1389
+ * Respond to a question.
1390
+ *
1391
+ * @param text - The answer text
1392
+ * @param questionId - Optional question ID
1393
+ */
1394
+ respondAnswer(text: string, questionId?: string): void;
1395
+ private sendCommand;
1396
+ private handleLine;
1397
+ private handleFinished;
1398
+ private handleError;
1399
+ private cleanup;
1400
+ }
1401
+
1402
+ /**
1403
+ * Binary locator for the agi-driver.
1404
+ *
1405
+ * Finds the platform-specific binary bundled with the package
1406
+ * or falls back to a global installation.
1407
+ */
1408
+ /**
1409
+ * Platform identifier for binary selection.
1410
+ */
1411
+ type PlatformId = 'darwin-arm64' | 'darwin-x64' | 'linux-x64' | 'win32-x64';
1412
+ /**
1413
+ * Get the current platform identifier.
1414
+ */
1415
+ declare function getPlatformId(): PlatformId;
1416
+ /**
1417
+ * Find the agi-driver binary path.
1418
+ *
1419
+ * @throws Error if the binary cannot be found
1420
+ */
1421
+ declare function findBinaryPath(): string;
1422
+ /**
1423
+ * Check if the binary is available.
1424
+ */
1425
+ declare function isBinaryAvailable(): boolean;
1426
+
1427
+ export { AGIClient, type AGIClientOptions, AGIError, APIError, AgentDriver, AgentExecutionError, AgentLoop, type AgentLoopOptions, type AgentSessionType, AuthenticationError, type CaptureScreenshotFn, type CreateSessionOptions, type DeleteResponse, type DesktopAction, type DesktopActionType, type DriverAction, type DriverCommand, type DriverEvent, type DriverOptions, type DriverResult, type DriverState, type EventType$1 as EventType, type ExecuteActionsFn, type ExecuteStatusResponse, type LoopState, type MessageOptions, type MessageResponse, type MessageType, type MessagesResponse, type ModelsResponse, type NavigateResponse, NotFoundError, type OnAskUserFn, type OnStepFn, type OnThinkingFn, PermissionError, type PlatformId, RateLimitError, type RunTaskOptions, type SSEEvent, Screenshot, type ScreenshotResponse, SessionContext, type SessionResponse, type SessionStatus, SessionsResource, type SnapshotMode, type StepDesktopRequest, type StepDesktopResponse, type StreamOptions, type SuccessResponse, type TaskMetadata, type TaskResult, ValidationError, findBinaryPath, getPlatformId, isBinaryAvailable };