agi 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -1
- package/dist/index.d.mts +609 -6
- package/dist/index.d.ts +609 -6
- package/dist/index.js +788 -5
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +783 -6
- package/dist/index.mjs.map +1 -1
- package/package.json +9 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
|
+
|
|
1
3
|
/**
|
|
2
4
|
* Result types for task execution
|
|
3
5
|
*/
|
|
@@ -98,8 +100,9 @@ declare class Screenshot {
|
|
|
98
100
|
*/
|
|
99
101
|
|
|
100
102
|
type SnapshotMode = 'none' | 'memory' | 'filesystem';
|
|
103
|
+
type AgentSessionType = 'managed-cdp' | 'external-cdp' | 'desktop';
|
|
101
104
|
type SessionStatus = 'ready' | 'running' | 'waiting_for_input' | 'paused' | 'finished' | 'error';
|
|
102
|
-
type EventType = 'step' | 'thought' | 'question' | 'done' | 'error' | 'log' | 'paused' | 'resumed' | 'heartbeat' | 'user';
|
|
105
|
+
type EventType$1 = 'step' | 'thought' | 'question' | 'done' | 'error' | 'log' | 'paused' | 'resumed' | 'heartbeat' | 'user';
|
|
103
106
|
type MessageType = 'THOUGHT' | 'QUESTION' | 'USER' | 'DONE' | 'ERROR' | 'LOG';
|
|
104
107
|
interface AGIClientOptions {
|
|
105
108
|
/** API key for authentication */
|
|
@@ -120,6 +123,10 @@ interface CreateSessionOptions {
|
|
|
120
123
|
maxSteps?: number;
|
|
121
124
|
/** Environment UUID to restore from */
|
|
122
125
|
restoreFromEnvironmentId?: string;
|
|
126
|
+
/** Agent session type */
|
|
127
|
+
agentSessionType?: AgentSessionType;
|
|
128
|
+
/** External CDP WebSocket URL (required for external-cdp session type) */
|
|
129
|
+
cdpUrl?: string;
|
|
123
130
|
}
|
|
124
131
|
interface SessionResponse {
|
|
125
132
|
/** Session UUID */
|
|
@@ -138,6 +145,8 @@ interface SessionResponse {
|
|
|
138
145
|
environmentId?: string;
|
|
139
146
|
/** Task goal */
|
|
140
147
|
goal?: string;
|
|
148
|
+
/** Agent session type */
|
|
149
|
+
agentSessionType?: string;
|
|
141
150
|
}
|
|
142
151
|
interface ExecuteStatusResponse {
|
|
143
152
|
/** Current execution status */
|
|
@@ -167,7 +176,7 @@ interface SSEEvent {
|
|
|
167
176
|
/** Event ID */
|
|
168
177
|
id?: string;
|
|
169
178
|
/** Event type */
|
|
170
|
-
event: EventType;
|
|
179
|
+
event: EventType$1;
|
|
171
180
|
/** Event data */
|
|
172
181
|
data: Record<string, unknown>;
|
|
173
182
|
}
|
|
@@ -215,12 +224,71 @@ interface RunTaskOptions {
|
|
|
215
224
|
}
|
|
216
225
|
interface StreamOptions {
|
|
217
226
|
/** Event types to filter */
|
|
218
|
-
eventTypes?: EventType[];
|
|
227
|
+
eventTypes?: EventType$1[];
|
|
219
228
|
/** Sanitize event data */
|
|
220
229
|
sanitize?: boolean;
|
|
221
230
|
/** Include historical events */
|
|
222
231
|
includeHistory?: boolean;
|
|
223
232
|
}
|
|
233
|
+
type DesktopActionType = 'click' | 'double_click' | 'triple_click' | 'right_click' | 'hover' | 'type' | 'key' | 'scroll' | 'drag' | 'wait' | 'finish' | 'fail' | 'confirm' | 'ask_question';
|
|
234
|
+
interface DesktopAction {
|
|
235
|
+
/** Action type */
|
|
236
|
+
type: DesktopActionType | string;
|
|
237
|
+
/** X coordinate (for click, hover, scroll) */
|
|
238
|
+
x?: number;
|
|
239
|
+
/** Y coordinate (for click, hover, scroll) */
|
|
240
|
+
y?: number;
|
|
241
|
+
/** Text to type (for type action) */
|
|
242
|
+
text?: string;
|
|
243
|
+
/** Content to type (alias for text) */
|
|
244
|
+
content?: string;
|
|
245
|
+
/** Key or key combination (for key action) */
|
|
246
|
+
key?: string;
|
|
247
|
+
/** Scroll direction (for scroll action) */
|
|
248
|
+
direction?: 'up' | 'down' | 'left' | 'right';
|
|
249
|
+
/** Scroll amount in lines (for scroll action) */
|
|
250
|
+
amount?: number;
|
|
251
|
+
/** Wait duration in seconds (for wait action) */
|
|
252
|
+
duration?: number;
|
|
253
|
+
/** Start X coordinate (for drag action) */
|
|
254
|
+
start_x?: number;
|
|
255
|
+
/** Start Y coordinate (for drag action) */
|
|
256
|
+
start_y?: number;
|
|
257
|
+
/** End X coordinate (for drag action) */
|
|
258
|
+
end_x?: number;
|
|
259
|
+
/** End Y coordinate (for drag action) */
|
|
260
|
+
end_y?: number;
|
|
261
|
+
/** Summary message (for finish action) */
|
|
262
|
+
summary?: string;
|
|
263
|
+
/** Reason message (for fail action) */
|
|
264
|
+
reason?: string;
|
|
265
|
+
/** Question to ask (for ask_question action) */
|
|
266
|
+
question?: string;
|
|
267
|
+
/** Additional action properties */
|
|
268
|
+
[key: string]: unknown;
|
|
269
|
+
}
|
|
270
|
+
interface StepDesktopRequest {
|
|
271
|
+
/** Base64-encoded screenshot (full resolution). Viewport dimensions are extracted from the image. */
|
|
272
|
+
screenshot: string;
|
|
273
|
+
/** Optional user message (e.g., initial goal or follow-up instruction) */
|
|
274
|
+
message?: string;
|
|
275
|
+
}
|
|
276
|
+
interface StepDesktopResponse {
|
|
277
|
+
/** Actions to execute (flat format) */
|
|
278
|
+
actions: DesktopAction[];
|
|
279
|
+
/** Model reasoning */
|
|
280
|
+
thinking?: string;
|
|
281
|
+
/** Whether the task is complete */
|
|
282
|
+
finished: boolean;
|
|
283
|
+
/** Question for user, if any */
|
|
284
|
+
askUser?: string;
|
|
285
|
+
/** Current step number */
|
|
286
|
+
step: number;
|
|
287
|
+
}
|
|
288
|
+
interface ModelsResponse {
|
|
289
|
+
/** Available agent models */
|
|
290
|
+
models: string[];
|
|
291
|
+
}
|
|
224
292
|
|
|
225
293
|
/**
|
|
226
294
|
* Session context manager for high-level API
|
|
@@ -431,6 +499,10 @@ declare class HTTPClient {
|
|
|
431
499
|
* Make an HTTP request with retries and error handling
|
|
432
500
|
*/
|
|
433
501
|
request<T>(method: string, path: string, options?: RequestOptions): Promise<T>;
|
|
502
|
+
/**
|
|
503
|
+
* Make an HTTP request to an absolute URL with retries and error handling
|
|
504
|
+
*/
|
|
505
|
+
requestUrl<T>(method: string, url: string, options?: RequestOptions): Promise<T>;
|
|
434
506
|
/**
|
|
435
507
|
* Stream Server-Sent Events from an endpoint
|
|
436
508
|
*/
|
|
@@ -455,16 +527,24 @@ declare class SessionsResource {
|
|
|
455
527
|
/**
|
|
456
528
|
* Create a new agent session
|
|
457
529
|
*
|
|
458
|
-
* @param agentName - Agent model to use (e.g., "agi-0", "agi-
|
|
530
|
+
* @param agentName - Agent model to use (e.g., "agi-0", "agi-2-claude")
|
|
459
531
|
* @param options - Session creation options
|
|
460
|
-
* @returns SessionResponse with
|
|
532
|
+
* @returns SessionResponse with sessionId, vncUrl, agentUrl, status, etc.
|
|
461
533
|
*
|
|
462
534
|
* @example
|
|
463
535
|
* ```typescript
|
|
536
|
+
* // Standard browser session
|
|
464
537
|
* const session = await client.sessions.create('agi-0', {
|
|
465
538
|
* webhookUrl: 'https://yourapp.com/webhook',
|
|
466
539
|
* maxSteps: 200
|
|
467
540
|
* });
|
|
541
|
+
*
|
|
542
|
+
* // Desktop session (client-managed)
|
|
543
|
+
* const session = await client.sessions.create('agi-2-claude', {
|
|
544
|
+
* agentSessionType: 'desktop',
|
|
545
|
+
* goal: 'Open calculator and compute 2+2'
|
|
546
|
+
* });
|
|
547
|
+
* console.log(session.agentUrl); // Use with client.desktop.step()
|
|
468
548
|
* ```
|
|
469
549
|
*/
|
|
470
550
|
create(agentName?: string, options?: {
|
|
@@ -472,6 +552,8 @@ declare class SessionsResource {
|
|
|
472
552
|
goal?: string;
|
|
473
553
|
maxSteps?: number;
|
|
474
554
|
restoreFromEnvironmentId?: string;
|
|
555
|
+
agentSessionType?: string;
|
|
556
|
+
cdpUrl?: string;
|
|
475
557
|
}): Promise<SessionResponse>;
|
|
476
558
|
/**
|
|
477
559
|
* List all sessions for the authenticated user
|
|
@@ -664,6 +746,66 @@ declare class SessionsResource {
|
|
|
664
746
|
* ```
|
|
665
747
|
*/
|
|
666
748
|
screenshot(sessionId: string): Promise<ScreenshotResponse>;
|
|
749
|
+
/**
|
|
750
|
+
* Execute a single step for client-driven sessions (desktop mode).
|
|
751
|
+
*
|
|
752
|
+
* In desktop mode (agentSessionType="desktop"), the client manages the
|
|
753
|
+
* execution loop. This method sends a screenshot to the agent and receives
|
|
754
|
+
* actions to execute locally.
|
|
755
|
+
*
|
|
756
|
+
* @param agentUrl - Agent service URL from session.agentUrl
|
|
757
|
+
* @param sessionId - Session ID (required for routing in shared sandbox)
|
|
758
|
+
* @param screenshot - Base64-encoded screenshot (full resolution, JPEG or PNG)
|
|
759
|
+
* @param message - Optional user message (goal on first call, or follow-up instruction)
|
|
760
|
+
* @returns StepDesktopResponse with actions, thinking, finished, askUser, and step
|
|
761
|
+
*
|
|
762
|
+
* @example
|
|
763
|
+
* ```typescript
|
|
764
|
+
* // Create a desktop session
|
|
765
|
+
* const session = await client.sessions.create('agi-2-claude', {
|
|
766
|
+
* agentSessionType: 'desktop',
|
|
767
|
+
* goal: 'Open calculator and compute 2+2'
|
|
768
|
+
* });
|
|
769
|
+
*
|
|
770
|
+
* // Client-managed loop
|
|
771
|
+
* let finished = false;
|
|
772
|
+
* while (!finished) {
|
|
773
|
+
* const screenshot = captureScreenshot(); // Client captures
|
|
774
|
+
* const result = await client.sessions.step(
|
|
775
|
+
* session.agentUrl!,
|
|
776
|
+
* session.sessionId,
|
|
777
|
+
* screenshot
|
|
778
|
+
* );
|
|
779
|
+
* executeActions(result.actions); // Client executes
|
|
780
|
+
* finished = result.finished;
|
|
781
|
+
* if (result.askUser) {
|
|
782
|
+
* const answer = await promptUser(result.askUser);
|
|
783
|
+
* // Send answer in next step
|
|
784
|
+
* }
|
|
785
|
+
* }
|
|
786
|
+
* ```
|
|
787
|
+
*/
|
|
788
|
+
step(agentUrl: string, sessionId: string, screenshot: string, message?: string): Promise<StepDesktopResponse>;
|
|
789
|
+
/**
|
|
790
|
+
* List available agent models.
|
|
791
|
+
*
|
|
792
|
+
* @param filter - Optional filter: "cdp" for browser agents, "desktop" for
|
|
793
|
+
* desktop agents
|
|
794
|
+
* @returns ModelsResponse with list of available model names
|
|
795
|
+
*
|
|
796
|
+
* @example
|
|
797
|
+
* ```typescript
|
|
798
|
+
* // List all models
|
|
799
|
+
* const models = await client.sessions.listModels();
|
|
800
|
+
* console.log(models.models);
|
|
801
|
+
*
|
|
802
|
+
* // List only desktop-compatible models
|
|
803
|
+
* const desktopModels = await client.sessions.listModels('desktop');
|
|
804
|
+
* console.log(desktopModels.models);
|
|
805
|
+
* // ['agi-2-claude', 'agi-2-qwen']
|
|
806
|
+
* ```
|
|
807
|
+
*/
|
|
808
|
+
listModels(filter?: string): Promise<ModelsResponse>;
|
|
667
809
|
}
|
|
668
810
|
|
|
669
811
|
/**
|
|
@@ -767,6 +909,179 @@ declare class AGIClient {
|
|
|
767
909
|
}): SessionContext;
|
|
768
910
|
}
|
|
769
911
|
|
|
912
|
+
/**
|
|
913
|
+
* Async event loop manager for client-driven sessions.
|
|
914
|
+
*/
|
|
915
|
+
|
|
916
|
+
/**
|
|
917
|
+
* State of the agent execution loop.
|
|
918
|
+
*/
|
|
919
|
+
type LoopState = 'idle' | 'running' | 'paused' | 'stopped' | 'finished';
|
|
920
|
+
/**
|
|
921
|
+
* Callback type for capturing screenshots.
|
|
922
|
+
* Should return a base64-encoded screenshot.
|
|
923
|
+
*/
|
|
924
|
+
type CaptureScreenshotFn = () => Promise<string>;
|
|
925
|
+
/**
|
|
926
|
+
* Callback type for executing actions.
|
|
927
|
+
*/
|
|
928
|
+
type ExecuteActionsFn = (actions: DesktopAction[]) => Promise<void>;
|
|
929
|
+
/**
|
|
930
|
+
* Callback type for handling thinking output.
|
|
931
|
+
*/
|
|
932
|
+
type OnThinkingFn = (thinking: string) => void;
|
|
933
|
+
/**
|
|
934
|
+
* Callback type for handling user questions.
|
|
935
|
+
* Should return the user's answer.
|
|
936
|
+
*/
|
|
937
|
+
type OnAskUserFn = (question: string) => Promise<string>;
|
|
938
|
+
/**
|
|
939
|
+
* Callback type for step completion.
|
|
940
|
+
*/
|
|
941
|
+
type OnStepFn = (step: number, response: StepDesktopResponse) => void;
|
|
942
|
+
/**
|
|
943
|
+
* Options for creating an AgentLoop.
|
|
944
|
+
*/
|
|
945
|
+
interface AgentLoopOptions {
|
|
946
|
+
/** AGIClient instance */
|
|
947
|
+
client: AGIClient;
|
|
948
|
+
/** Agent service URL from session.agentUrl */
|
|
949
|
+
agentUrl: string;
|
|
950
|
+
/** Session ID (required for routing in shared sandbox) */
|
|
951
|
+
sessionId: string;
|
|
952
|
+
/** Async callback that returns base64-encoded screenshot */
|
|
953
|
+
captureScreenshot: CaptureScreenshotFn;
|
|
954
|
+
/** Async callback that executes a list of actions */
|
|
955
|
+
executeActions: ExecuteActionsFn;
|
|
956
|
+
/** Optional callback for agent thinking/reasoning output */
|
|
957
|
+
onThinking?: OnThinkingFn;
|
|
958
|
+
/** Optional async callback to handle user questions. If not provided and agent asks a question, the loop will stop. */
|
|
959
|
+
onAskUser?: OnAskUserFn;
|
|
960
|
+
/** Optional callback called after each step with step number and full response */
|
|
961
|
+
onStep?: OnStepFn;
|
|
962
|
+
/** Optional delay in milliseconds between steps (default: 0) */
|
|
963
|
+
stepDelay?: number;
|
|
964
|
+
}
|
|
965
|
+
/**
|
|
966
|
+
* Async event loop manager for client-driven sessions.
|
|
967
|
+
*
|
|
968
|
+
* This class manages the execution loop for agent sessions where the client
|
|
969
|
+
* is responsible for capturing screenshots and executing actions (e.g., desktop,
|
|
970
|
+
* mobile, or any client-driven session type). It provides start/pause/resume/stop
|
|
971
|
+
* control over the loop.
|
|
972
|
+
*
|
|
973
|
+
* The loop:
|
|
974
|
+
* 1. Captures a screenshot using the provided callback
|
|
975
|
+
* 2. Sends it to the agent via step()
|
|
976
|
+
* 3. Executes returned actions using the provided callback
|
|
977
|
+
* 4. Repeats until finished or stopped
|
|
978
|
+
*
|
|
979
|
+
* @example
|
|
980
|
+
* ```typescript
|
|
981
|
+
* import { AGIClient, AgentLoop } from 'agi-sdk';
|
|
982
|
+
*
|
|
983
|
+
* const client = new AGIClient({ apiKey: '...' });
|
|
984
|
+
*
|
|
985
|
+
* // Create client-driven session
|
|
986
|
+
* const session = await client.sessions.create('agi-2-claude', {
|
|
987
|
+
* agentSessionType: 'desktop',
|
|
988
|
+
* goal: 'Open calculator and compute 2+2'
|
|
989
|
+
* });
|
|
990
|
+
*
|
|
991
|
+
* // Create and run loop
|
|
992
|
+
* const loop = new AgentLoop({
|
|
993
|
+
* client,
|
|
994
|
+
* agentUrl: session.agentUrl!,
|
|
995
|
+
* captureScreenshot: async () => {
|
|
996
|
+
* // Return base64-encoded screenshot
|
|
997
|
+
* return '...';
|
|
998
|
+
* },
|
|
999
|
+
* executeActions: async (actions) => {
|
|
1000
|
+
* for (const action of actions) {
|
|
1001
|
+
* console.log('Executing:', action);
|
|
1002
|
+
* }
|
|
1003
|
+
* },
|
|
1004
|
+
* onThinking: (t) => console.log('Thinking:', t),
|
|
1005
|
+
* });
|
|
1006
|
+
*
|
|
1007
|
+
* const result = await loop.start();
|
|
1008
|
+
* console.log('Finished:', result.finished);
|
|
1009
|
+
* ```
|
|
1010
|
+
*
|
|
1011
|
+
* @example Pause/resume control
|
|
1012
|
+
* ```typescript
|
|
1013
|
+
* // Start loop without awaiting
|
|
1014
|
+
* const promise = loop.start();
|
|
1015
|
+
*
|
|
1016
|
+
* // Pause after 5 seconds
|
|
1017
|
+
* setTimeout(() => loop.pause(), 5000);
|
|
1018
|
+
*
|
|
1019
|
+
* // Resume after user input
|
|
1020
|
+
* process.stdin.once('data', () => loop.resume());
|
|
1021
|
+
*
|
|
1022
|
+
* // Wait for completion
|
|
1023
|
+
* const result = await promise;
|
|
1024
|
+
* ```
|
|
1025
|
+
*/
|
|
1026
|
+
declare class AgentLoop {
|
|
1027
|
+
private readonly client;
|
|
1028
|
+
private readonly agentUrl;
|
|
1029
|
+
private readonly sessionId;
|
|
1030
|
+
private readonly captureScreenshot;
|
|
1031
|
+
private readonly executeActions;
|
|
1032
|
+
private readonly onThinking?;
|
|
1033
|
+
private readonly onAskUser?;
|
|
1034
|
+
private readonly onStep?;
|
|
1035
|
+
private readonly stepDelay;
|
|
1036
|
+
private _state;
|
|
1037
|
+
private _lastResult;
|
|
1038
|
+
private _currentStep;
|
|
1039
|
+
private pauseResolve;
|
|
1040
|
+
private pausePromise;
|
|
1041
|
+
constructor(options: AgentLoopOptions);
|
|
1042
|
+
/** Current state of the loop. */
|
|
1043
|
+
get state(): LoopState;
|
|
1044
|
+
/** Current step number. */
|
|
1045
|
+
get currentStep(): number;
|
|
1046
|
+
/** Last step result, if any. */
|
|
1047
|
+
get lastResult(): StepDesktopResponse | null;
|
|
1048
|
+
/**
|
|
1049
|
+
* Start the execution loop.
|
|
1050
|
+
*
|
|
1051
|
+
* Runs the loop until the task is finished, stopped, or an unhandled
|
|
1052
|
+
* ask_user question is encountered.
|
|
1053
|
+
*
|
|
1054
|
+
* @param message - Optional initial message (goal or instruction).
|
|
1055
|
+
* Usually not needed if goal was set during session creation.
|
|
1056
|
+
* @returns The final StepDesktopResponse
|
|
1057
|
+
* @throws Error if loop is already running
|
|
1058
|
+
*/
|
|
1059
|
+
start(message?: string): Promise<StepDesktopResponse>;
|
|
1060
|
+
/**
|
|
1061
|
+
* Pause the execution loop.
|
|
1062
|
+
*
|
|
1063
|
+
* The loop will complete the current step before pausing.
|
|
1064
|
+
* Call resume() to continue.
|
|
1065
|
+
*/
|
|
1066
|
+
pause(): void;
|
|
1067
|
+
/**
|
|
1068
|
+
* Resume a paused loop.
|
|
1069
|
+
*/
|
|
1070
|
+
resume(): void;
|
|
1071
|
+
/**
|
|
1072
|
+
* Stop the execution loop.
|
|
1073
|
+
*
|
|
1074
|
+
* The loop will complete the current step before stopping.
|
|
1075
|
+
*/
|
|
1076
|
+
stop(): void;
|
|
1077
|
+
/** Check if the loop is currently running. */
|
|
1078
|
+
isRunning(): boolean;
|
|
1079
|
+
/** Check if the loop is currently paused. */
|
|
1080
|
+
isPaused(): boolean;
|
|
1081
|
+
/** Check if the loop has finished successfully. */
|
|
1082
|
+
isFinished(): boolean;
|
|
1083
|
+
}
|
|
1084
|
+
|
|
770
1085
|
/**
|
|
771
1086
|
* Error classes for AGI SDK
|
|
772
1087
|
*/
|
|
@@ -821,4 +1136,292 @@ declare class APIError extends AGIError {
|
|
|
821
1136
|
constructor(message: string, statusCode?: number, response?: unknown);
|
|
822
1137
|
}
|
|
823
1138
|
|
|
824
|
-
|
|
1139
|
+
/**
|
|
1140
|
+
* Protocol types for driver communication.
|
|
1141
|
+
*
|
|
1142
|
+
* The driver communicates via JSON lines:
|
|
1143
|
+
* - Events are emitted on stdout (driver -> SDK)
|
|
1144
|
+
* - Commands are sent on stdin (SDK -> driver)
|
|
1145
|
+
*/
|
|
1146
|
+
type EventType = 'ready' | 'state_change' | 'thinking' | 'action' | 'confirm' | 'ask_question' | 'finished' | 'error' | 'screenshot_captured';
|
|
1147
|
+
type CommandType = 'start' | 'screenshot' | 'pause' | 'resume' | 'stop' | 'confirm' | 'answer';
|
|
1148
|
+
type DriverState = 'idle' | 'running' | 'paused' | 'waiting_confirmation' | 'waiting_answer' | 'finished' | 'stopped' | 'error';
|
|
1149
|
+
interface BaseEvent {
|
|
1150
|
+
event: EventType;
|
|
1151
|
+
step: number;
|
|
1152
|
+
}
|
|
1153
|
+
interface ReadyEvent extends BaseEvent {
|
|
1154
|
+
event: 'ready';
|
|
1155
|
+
version: string;
|
|
1156
|
+
protocol: string;
|
|
1157
|
+
}
|
|
1158
|
+
interface StateChangeEvent extends BaseEvent {
|
|
1159
|
+
event: 'state_change';
|
|
1160
|
+
state: DriverState;
|
|
1161
|
+
}
|
|
1162
|
+
interface ThinkingEvent extends BaseEvent {
|
|
1163
|
+
event: 'thinking';
|
|
1164
|
+
text: string;
|
|
1165
|
+
}
|
|
1166
|
+
interface ActionEvent extends BaseEvent {
|
|
1167
|
+
event: 'action';
|
|
1168
|
+
action: DriverAction;
|
|
1169
|
+
}
|
|
1170
|
+
interface ConfirmEvent extends BaseEvent {
|
|
1171
|
+
event: 'confirm';
|
|
1172
|
+
action: DriverAction;
|
|
1173
|
+
reason: string;
|
|
1174
|
+
}
|
|
1175
|
+
interface AskQuestionEvent extends BaseEvent {
|
|
1176
|
+
event: 'ask_question';
|
|
1177
|
+
question: string;
|
|
1178
|
+
question_id: string;
|
|
1179
|
+
}
|
|
1180
|
+
interface FinishedEvent extends BaseEvent {
|
|
1181
|
+
event: 'finished';
|
|
1182
|
+
reason: string;
|
|
1183
|
+
summary: string;
|
|
1184
|
+
success: boolean;
|
|
1185
|
+
}
|
|
1186
|
+
interface ErrorEvent extends BaseEvent {
|
|
1187
|
+
event: 'error';
|
|
1188
|
+
message: string;
|
|
1189
|
+
code: string;
|
|
1190
|
+
recoverable: boolean;
|
|
1191
|
+
}
|
|
1192
|
+
/**
|
|
1193
|
+
* Emitted in local mode when the driver captures a screenshot.
|
|
1194
|
+
* Lightweight notification (no image data).
|
|
1195
|
+
*/
|
|
1196
|
+
interface ScreenshotCapturedEvent extends BaseEvent {
|
|
1197
|
+
event: 'screenshot_captured';
|
|
1198
|
+
width: number;
|
|
1199
|
+
height: number;
|
|
1200
|
+
}
|
|
1201
|
+
type DriverEvent = ReadyEvent | StateChangeEvent | ThinkingEvent | ActionEvent | ConfirmEvent | AskQuestionEvent | FinishedEvent | ErrorEvent | ScreenshotCapturedEvent;
|
|
1202
|
+
interface DriverAction {
|
|
1203
|
+
type: string;
|
|
1204
|
+
x?: number;
|
|
1205
|
+
y?: number;
|
|
1206
|
+
[key: string]: unknown;
|
|
1207
|
+
}
|
|
1208
|
+
interface BaseCommand {
|
|
1209
|
+
command: CommandType;
|
|
1210
|
+
}
|
|
1211
|
+
interface StartCommand extends BaseCommand {
|
|
1212
|
+
command: 'start';
|
|
1213
|
+
session_id: string;
|
|
1214
|
+
goal: string;
|
|
1215
|
+
screenshot: string;
|
|
1216
|
+
screen_width: number;
|
|
1217
|
+
screen_height: number;
|
|
1218
|
+
platform: 'desktop' | 'android';
|
|
1219
|
+
model: string;
|
|
1220
|
+
/** "local" for autonomous mode, "" for legacy SDK-driven mode */
|
|
1221
|
+
mode?: string;
|
|
1222
|
+
}
|
|
1223
|
+
interface ScreenshotCommand extends BaseCommand {
|
|
1224
|
+
command: 'screenshot';
|
|
1225
|
+
data: string;
|
|
1226
|
+
screen_width: number;
|
|
1227
|
+
screen_height: number;
|
|
1228
|
+
}
|
|
1229
|
+
interface PauseCommand extends BaseCommand {
|
|
1230
|
+
command: 'pause';
|
|
1231
|
+
}
|
|
1232
|
+
interface ResumeCommand extends BaseCommand {
|
|
1233
|
+
command: 'resume';
|
|
1234
|
+
}
|
|
1235
|
+
interface StopCommand extends BaseCommand {
|
|
1236
|
+
command: 'stop';
|
|
1237
|
+
reason?: string;
|
|
1238
|
+
}
|
|
1239
|
+
interface ConfirmResponseCommand extends BaseCommand {
|
|
1240
|
+
command: 'confirm';
|
|
1241
|
+
approved: boolean;
|
|
1242
|
+
message?: string;
|
|
1243
|
+
}
|
|
1244
|
+
interface AnswerCommand extends BaseCommand {
|
|
1245
|
+
command: 'answer';
|
|
1246
|
+
text: string;
|
|
1247
|
+
question_id?: string;
|
|
1248
|
+
}
|
|
1249
|
+
type DriverCommand = StartCommand | ScreenshotCommand | PauseCommand | ResumeCommand | StopCommand | ConfirmResponseCommand | AnswerCommand;
|
|
1250
|
+
|
|
1251
|
+
/**
|
|
1252
|
+
* AgentDriver - Spawns and manages the agi-driver binary.
|
|
1253
|
+
*
|
|
1254
|
+
* The driver communicates via JSON lines over stdin/stdout and provides
|
|
1255
|
+
* an event-based interface for agent control.
|
|
1256
|
+
*/
|
|
1257
|
+
|
|
1258
|
+
/**
|
|
1259
|
+
* Options for creating an AgentDriver.
|
|
1260
|
+
*/
|
|
1261
|
+
interface DriverOptions {
|
|
1262
|
+
/** Path to the agi-driver binary. If not provided, will be auto-detected. */
|
|
1263
|
+
binaryPath?: string;
|
|
1264
|
+
/** Model to use (default: 'claude-sonnet') */
|
|
1265
|
+
model?: string;
|
|
1266
|
+
/** Platform type (default: 'desktop') */
|
|
1267
|
+
platform?: 'desktop' | 'android';
|
|
1268
|
+
/** "local" for autonomous mode, "" for legacy SDK-driven mode */
|
|
1269
|
+
mode?: string;
|
|
1270
|
+
/** Environment variables to pass to the driver process */
|
|
1271
|
+
env?: Record<string, string>;
|
|
1272
|
+
}
|
|
1273
|
+
/**
|
|
1274
|
+
* Result from running the driver.
|
|
1275
|
+
*/
|
|
1276
|
+
interface DriverResult {
|
|
1277
|
+
/** Whether the task completed successfully */
|
|
1278
|
+
success: boolean;
|
|
1279
|
+
/** Reason for completion */
|
|
1280
|
+
reason: string;
|
|
1281
|
+
/** Summary of what was accomplished */
|
|
1282
|
+
summary: string;
|
|
1283
|
+
/** Final step number */
|
|
1284
|
+
step: number;
|
|
1285
|
+
}
|
|
1286
|
+
/**
|
|
1287
|
+
* AgentDriver manages the lifecycle of the agi-driver binary.
|
|
1288
|
+
*
|
|
1289
|
+
* @example
|
|
1290
|
+
* ```typescript
|
|
1291
|
+
* const driver = new AgentDriver();
|
|
1292
|
+
*
|
|
1293
|
+
* driver.on('action', async (action) => {
|
|
1294
|
+
* // Execute the action on the local machine
|
|
1295
|
+
* await executeAction(action);
|
|
1296
|
+
* // Send next screenshot
|
|
1297
|
+
* driver.sendScreenshot(captureScreenshot());
|
|
1298
|
+
* });
|
|
1299
|
+
*
|
|
1300
|
+
* driver.on('thinking', (text) => {
|
|
1301
|
+
* console.log('Agent thinking:', text);
|
|
1302
|
+
* });
|
|
1303
|
+
*
|
|
1304
|
+
* driver.on('confirm', async (reason) => {
|
|
1305
|
+
* // Ask user for confirmation
|
|
1306
|
+
* return await askUser(reason);
|
|
1307
|
+
* });
|
|
1308
|
+
*
|
|
1309
|
+
* const result = await driver.start('Open calculator and compute 2+2');
|
|
1310
|
+
* console.log('Done:', result.summary);
|
|
1311
|
+
* ```
|
|
1312
|
+
*/
|
|
1313
|
+
declare class AgentDriver extends EventEmitter {
|
|
1314
|
+
private readonly binaryPath;
|
|
1315
|
+
private readonly pythonFallback;
|
|
1316
|
+
private readonly model;
|
|
1317
|
+
private readonly platform;
|
|
1318
|
+
private readonly mode;
|
|
1319
|
+
private readonly env;
|
|
1320
|
+
private process;
|
|
1321
|
+
private readline;
|
|
1322
|
+
private state;
|
|
1323
|
+
private step;
|
|
1324
|
+
private sessionId;
|
|
1325
|
+
private screenWidth;
|
|
1326
|
+
private screenHeight;
|
|
1327
|
+
private resolveStart;
|
|
1328
|
+
private rejectStart;
|
|
1329
|
+
private pendingConfirm;
|
|
1330
|
+
private pendingAnswer;
|
|
1331
|
+
constructor(options?: DriverOptions);
|
|
1332
|
+
/**
|
|
1333
|
+
* Get the current state of the driver.
|
|
1334
|
+
*/
|
|
1335
|
+
get currentState(): DriverState;
|
|
1336
|
+
/**
|
|
1337
|
+
* Get the current step number.
|
|
1338
|
+
*/
|
|
1339
|
+
get currentStep(): number;
|
|
1340
|
+
/**
|
|
1341
|
+
* Check if the driver is running.
|
|
1342
|
+
*/
|
|
1343
|
+
get isRunning(): boolean;
|
|
1344
|
+
/**
|
|
1345
|
+
* Check if the driver is waiting for user input.
|
|
1346
|
+
*/
|
|
1347
|
+
get isWaiting(): boolean;
|
|
1348
|
+
/**
|
|
1349
|
+
* Start the agent with a goal.
|
|
1350
|
+
*
|
|
1351
|
+
* @param goal - The task for the agent to accomplish
|
|
1352
|
+
* @param screenshot - Initial screenshot (base64-encoded). Not needed in local mode.
|
|
1353
|
+
* @param screenWidth - Screen width in pixels. Not needed in local mode.
|
|
1354
|
+
* @param screenHeight - Screen height in pixels. Not needed in local mode.
|
|
1355
|
+
* @param mode - Override the mode set in DriverOptions.
|
|
1356
|
+
* @returns Promise that resolves when the agent finishes
|
|
1357
|
+
*/
|
|
1358
|
+
start(goal: string, screenshot?: string, screenWidth?: number, screenHeight?: number, mode?: string): Promise<DriverResult>;
|
|
1359
|
+
/**
|
|
1360
|
+
* Send a new screenshot to the driver.
|
|
1361
|
+
*
|
|
1362
|
+
* @param screenshot - Base64-encoded screenshot
|
|
1363
|
+
* @param screenWidth - Screen width in pixels
|
|
1364
|
+
* @param screenHeight - Screen height in pixels
|
|
1365
|
+
*/
|
|
1366
|
+
sendScreenshot(screenshot: string, screenWidth?: number, screenHeight?: number): void;
|
|
1367
|
+
/**
|
|
1368
|
+
* Pause the driver.
|
|
1369
|
+
*/
|
|
1370
|
+
pause(): void;
|
|
1371
|
+
/**
|
|
1372
|
+
* Resume the driver.
|
|
1373
|
+
*/
|
|
1374
|
+
resume(): void;
|
|
1375
|
+
/**
|
|
1376
|
+
* Stop the driver.
|
|
1377
|
+
*
|
|
1378
|
+
* @param reason - Reason for stopping
|
|
1379
|
+
*/
|
|
1380
|
+
stop(reason?: string): Promise<void>;
|
|
1381
|
+
/**
|
|
1382
|
+
* Respond to a confirmation request.
|
|
1383
|
+
*
|
|
1384
|
+
* @param approved - Whether the action is approved
|
|
1385
|
+
* @param message - Optional message to send with the response
|
|
1386
|
+
*/
|
|
1387
|
+
respondConfirm(approved: boolean, message?: string): void;
|
|
1388
|
+
/**
|
|
1389
|
+
* Respond to a question.
|
|
1390
|
+
*
|
|
1391
|
+
* @param text - The answer text
|
|
1392
|
+
* @param questionId - Optional question ID
|
|
1393
|
+
*/
|
|
1394
|
+
respondAnswer(text: string, questionId?: string): void;
|
|
1395
|
+
private sendCommand;
|
|
1396
|
+
private handleLine;
|
|
1397
|
+
private handleFinished;
|
|
1398
|
+
private handleError;
|
|
1399
|
+
private cleanup;
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
/**
|
|
1403
|
+
* Binary locator for the agi-driver.
|
|
1404
|
+
*
|
|
1405
|
+
* Finds the platform-specific binary bundled with the package
|
|
1406
|
+
* or falls back to a global installation.
|
|
1407
|
+
*/
|
|
1408
|
+
/**
|
|
1409
|
+
* Platform identifier for binary selection.
|
|
1410
|
+
*/
|
|
1411
|
+
type PlatformId = 'darwin-arm64' | 'darwin-x64' | 'linux-x64' | 'win32-x64';
|
|
1412
|
+
/**
|
|
1413
|
+
* Get the current platform identifier.
|
|
1414
|
+
*/
|
|
1415
|
+
declare function getPlatformId(): PlatformId;
|
|
1416
|
+
/**
|
|
1417
|
+
* Find the agi-driver binary path.
|
|
1418
|
+
*
|
|
1419
|
+
* @throws Error if the binary cannot be found
|
|
1420
|
+
*/
|
|
1421
|
+
declare function findBinaryPath(): string;
|
|
1422
|
+
/**
|
|
1423
|
+
* Check if the binary is available.
|
|
1424
|
+
*/
|
|
1425
|
+
declare function isBinaryAvailable(): boolean;
|
|
1426
|
+
|
|
1427
|
+
export { AGIClient, type AGIClientOptions, AGIError, APIError, AgentDriver, AgentExecutionError, AgentLoop, type AgentLoopOptions, type AgentSessionType, AuthenticationError, type CaptureScreenshotFn, type CreateSessionOptions, type DeleteResponse, type DesktopAction, type DesktopActionType, type DriverAction, type DriverCommand, type DriverEvent, type DriverOptions, type DriverResult, type DriverState, type EventType$1 as EventType, type ExecuteActionsFn, type ExecuteStatusResponse, type LoopState, type MessageOptions, type MessageResponse, type MessageType, type MessagesResponse, type ModelsResponse, type NavigateResponse, NotFoundError, type OnAskUserFn, type OnStepFn, type OnThinkingFn, PermissionError, type PlatformId, RateLimitError, type RunTaskOptions, type SSEEvent, Screenshot, type ScreenshotResponse, SessionContext, type SessionResponse, type SessionStatus, SessionsResource, type SnapshotMode, type StepDesktopRequest, type StepDesktopResponse, type StreamOptions, type SuccessResponse, type TaskMetadata, type TaskResult, ValidationError, findBinaryPath, getPlatformId, isBinaryAvailable };
|