@shiplightai/sdk 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,446 @@
1
+ import { Page } from 'playwright';
2
+ import { ICustomAction, AgentStepResult } from 'sdk-core';
3
+ export { ActionExecutionContext, CustomActionResult, ICustomAction, LogLevel, SdkConfig, configureSdk, getSdkConfig } from 'sdk-core';
4
+ export { VariableStore } from 'shiplight-types';
5
+ export { z } from 'zod';
6
+
7
+ /**
8
+ * Public types for @shiplightai/sdk
9
+ *
10
+ * Custom action types are imported from sdk-core.
11
+ * This file contains types specific to the public SDK.
12
+ */
13
+
14
+ /**
15
+ * Options for creating an agent.
16
+ */
17
+ interface CreateAgentOptions {
18
+ /** LLM model to use (e.g., 'gemini-2.5-pro', 'gpt-4o') */
19
+ model: string;
20
+ /** Initial variables to set in the agent's variable store */
21
+ variables?: Record<string, any>;
22
+ /**
23
+ * Keys to mark as sensitive (values won't be sent to LLM).
24
+ * Use for passwords, API keys, tokens, etc.
25
+ */
26
+ sensitiveKeys?: string[];
27
+ /** Directory for test data files (uploads, fixtures) */
28
+ testDataDir?: string;
29
+ /** Directory for downloads */
30
+ downloadDir?: string;
31
+ /**
32
+ * Self-healing strategy when actions fail.
33
+ * - 'none': No self-healing, fail immediately
34
+ * - 'single': Single retry with AI (default)
35
+ * - 'multi': Multi-step recovery with AI
36
+ */
37
+ selfHealingStrategy?: 'none' | 'single' | 'multi';
38
+ }
39
+ /**
40
+ * Options for the step method.
41
+ */
42
+ interface StepOptions {
43
+ /**
44
+ * Maximum number of AI steps for self-healing.
45
+ * When specified, overrides the global selfHealingStrategy:
46
+ * - 0 or negative: no self-healing, fail immediately
47
+ * - 1: single retry with AI
48
+ * - >1: multi-step recovery with AI
49
+ *
50
+ * If not specified, uses the global selfHealingStrategy (default: 'single').
51
+ */
52
+ maxSteps?: number;
53
+ }
54
+ /**
55
+ * Options for the run method.
56
+ */
57
+ interface RunOptions {
58
+ /**
59
+ * Maximum number of steps the agent can take to complete the instruction.
60
+ * - 1: Single action (uses efficient single-step execution)
61
+ * - 2+: Multi-step execution with limit
62
+ * - Default: 15 steps
63
+ */
64
+ maxSteps?: number;
65
+ }
66
+ /**
67
+ * Options for the login method.
68
+ */
69
+ interface LoginOptions {
70
+ /** URL of the login page */
71
+ url: string;
72
+ /** Username or email for login */
73
+ username: string;
74
+ /** Password for login */
75
+ password: string;
76
+ /**
77
+ * TOTP secret key for 2FA (if required).
78
+ * The agent will generate the OTP code automatically.
79
+ */
80
+ totpSecret?: string;
81
+ }
82
+
83
+ /**
84
+ * Agent - Main entry point for browser automation with custom actions
85
+ */
86
+
87
+ /**
88
+ * Browser automation agent with custom action support.
89
+ *
90
+ * @example
91
+ * ```typescript
92
+ * import { createAgent, configureSdk, z } from '@shiplightai/sdk';
93
+ *
94
+ * // Configure SDK with API key (call once at startup)
95
+ * configureSdk({
96
+ * env: { GOOGLE_API_KEY: process.env.GOOGLE_API_KEY },
97
+ * });
98
+ *
99
+ * const agent = createAgent({
100
+ * model: 'gemini-2.5-pro',
101
+ * variables: { username: 'test@example.com' },
102
+ * sensitiveKeys: ['password'],
103
+ * });
104
+ *
105
+ * // Register a custom action
106
+ * agent.registerAction({
107
+ * name: 'extract_email_code',
108
+ * description: 'Extract verification code from email inbox',
109
+ * schema: z.object({
110
+ * email_address: z.string(),
111
+ * }),
112
+ * async execute(args, ctx) {
113
+ * const code = await getEmailCode(args.email_address);
114
+ * ctx.variableStore.set('code', code);
115
+ * return { success: true };
116
+ * },
117
+ * });
118
+ *
119
+ * // Use the agent
120
+ * await agent.act(page, 'Fill email with $username');
121
+ * await agent.act(page, 'Click submit');
122
+ * await agent.act(page, 'Get the verification code from email');
123
+ * await agent.act(page, 'Enter $code in the verification field');
124
+ * await agent.assert(page, 'Dashboard is visible');
125
+ * ```
126
+ */
127
+ declare class Agent {
128
+ private webAgent;
129
+ private variableStore;
130
+ private customActions;
131
+ constructor(options: CreateAgentOptions);
132
+ /**
133
+ * Register a custom action.
134
+ *
135
+ * Custom actions extend the agent's capabilities. The agent will automatically
136
+ * call your action when the task requires it, based on the name and description.
137
+ *
138
+ * @param action - Custom action definition
139
+ * @throws {Error} If action is missing required fields or name is already registered
140
+ *
141
+ * @example
142
+ * ```typescript
143
+ * agent.registerAction({
144
+ * name: 'send_sms',
145
+ * description: 'Send an SMS message to a phone number',
146
+ * schema: z.object({
147
+ * phone: z.string().describe('Phone number with country code'),
148
+ * message: z.string().describe('Message content'),
149
+ * }),
150
+ * async execute(args, ctx) {
151
+ * await twilioClient.send(args.phone, args.message);
152
+ * return { success: true, message: 'SMS sent' };
153
+ * },
154
+ * });
155
+ * ```
156
+ */
157
+ registerAction(action: ICustomAction): void;
158
+ /**
159
+ * Perform a single action on the page.
160
+ *
161
+ * Use this for discrete actions like clicking a button, filling a field,
162
+ * or selecting an option. The agent executes exactly one action.
163
+ *
164
+ * @param page - Playwright page instance
165
+ * @param instruction - Natural language instruction for a single action
166
+ * @returns Result with success status and details
167
+ *
168
+ * @example
169
+ * ```typescript
170
+ * await agent.act(page, 'Click the login button');
171
+ * await agent.act(page, 'Fill the email field with $username');
172
+ * await agent.act(page, 'Select "Express" from the shipping dropdown');
173
+ * ```
174
+ */
175
+ act(page: Page, instruction: string): Promise<AgentStepResult>;
176
+ /**
177
+ * Run a multi-step instruction until the goal is achieved.
178
+ *
179
+ * Use this for complex tasks that require multiple actions, like
180
+ * "Complete the checkout process" or "Fill out the registration form".
181
+ * The agent will take multiple steps until the goal is reached.
182
+ *
183
+ * @param page - Playwright page instance
184
+ * @param instruction - Natural language instruction describing the goal
185
+ * @param options - Optional configuration
186
+ * @returns Result with success status and details
187
+ *
188
+ * @example
189
+ * ```typescript
190
+ * // Multi-step tasks
191
+ * await agent.run(page, 'Complete the checkout process');
192
+ * await agent.run(page, 'Fill out the entire registration form');
193
+ *
194
+ * // Limit steps to prevent runaway execution
195
+ * await agent.run(page, 'Add 3 items to cart', { maxSteps: 10 });
196
+ * ```
197
+ */
198
+ run(page: Page, instruction: string, options?: RunOptions): Promise<AgentStepResult>;
199
+ /**
200
+ * Assert a condition on the page.
201
+ *
202
+ * The agent will analyze the page and determine if the assertion is true.
203
+ * Throws an error if the assertion fails.
204
+ *
205
+ * @param page - Playwright page instance
206
+ * @param statement - Assertion statement (e.g., "Login button is visible")
207
+ * @returns true if assertion passes
208
+ * @throws {Error} If assertion fails
209
+ *
210
+ * @example
211
+ * ```typescript
212
+ * await agent.assert(page, 'The dashboard shows welcome message');
213
+ * await agent.assert(page, 'Shopping cart has 3 items');
214
+ * await agent.assert(page, 'Error message is not displayed');
215
+ * ```
216
+ */
217
+ assert(page: Page, statement: string): Promise<boolean>;
218
+ /**
219
+ * Evaluate a condition on the page (returns boolean, doesn't throw).
220
+ *
221
+ * Similar to assert() but returns false instead of throwing on failure.
222
+ * Use this for conditional logic in tests.
223
+ *
224
+ * @param page - Playwright page instance
225
+ * @param statement - Condition to evaluate (e.g., "User is logged in")
226
+ * @returns true if condition is met, false otherwise
227
+ *
228
+ * @example
229
+ * ```typescript
230
+ * const isLoggedIn = await agent.evaluate(page, 'User is logged in');
231
+ * if (!isLoggedIn) {
232
+ * await agent.act(page, 'Click the login button');
233
+ * }
234
+ * ```
235
+ */
236
+ evaluate(page: Page, statement: string): Promise<boolean>;
237
+ /**
238
+ * Extract data from an element and store in a variable.
239
+ *
240
+ * @param page - Playwright page instance
241
+ * @param elementDescription - Description of element to extract from
242
+ * @param variableName - Name of variable to store the value
243
+ *
244
+ * @example
245
+ * ```typescript
246
+ * await agent.extract(page, 'the order total', 'orderTotal');
247
+ * // Later use: await agent.run(page, 'Verify $orderTotal matches invoice');
248
+ * ```
249
+ */
250
+ extract(page: Page, elementDescription: string, variableName: string): Promise<void>;
251
+ /**
252
+ * Perform automated login.
253
+ *
254
+ * The agent will navigate to the login URL, find login fields, enter credentials,
255
+ * handle 2FA if configured, and verify successful login.
256
+ *
257
+ * @param page - Playwright page instance
258
+ * @param options - Login URL, credentials, and options
259
+ * @returns true if login was successful
260
+ *
261
+ * @example
262
+ * ```typescript
263
+ * await agent.login(page, {
264
+ * url: 'https://example.com/login',
265
+ * username: 'user@example.com',
266
+ * password: 'secret123',
267
+ * });
268
+ * await agent.assert(page, 'Dashboard is visible');
269
+ * ```
270
+ *
271
+ * @example
272
+ * ```typescript
273
+ * // With 2FA
274
+ * await agent.login(page, {
275
+ * url: 'https://example.com/login',
276
+ * username: 'user@example.com',
277
+ * password: 'secret123',
278
+ * totpSecret: 'JBSWY3DPEHPK3PXP',
279
+ * });
280
+ * ```
281
+ */
282
+ login(page: Page, options: LoginOptions): Promise<boolean>;
283
+ /**
284
+ * Get a variable value from the variable store.
285
+ *
286
+ * Use this to access values that were set via extract() or setVariable().
287
+ *
288
+ * @param name - Variable name
289
+ * @returns Variable value, or undefined if not set
290
+ *
291
+ * @example
292
+ * ```typescript
293
+ * await agent.extract(page, 'the order total', 'orderTotal');
294
+ * const total = agent.getVariable('orderTotal');
295
+ * console.log('Order total:', total);
296
+ * ```
297
+ */
298
+ getVariable(name: string): string | undefined;
299
+ /**
300
+ * Set a variable value in the variable store.
301
+ *
302
+ * Variables can be referenced in instructions using $variableName syntax.
303
+ *
304
+ * @param name - Variable name
305
+ * @param value - Variable value
306
+ * @param sensitive - If true, value will be masked in logs (default: false)
307
+ *
308
+ * @example
309
+ * ```typescript
310
+ * agent.setVariable('couponCode', 'SAVE20');
311
+ * await agent.run(page, 'Enter $couponCode in the promo field');
312
+ *
313
+ * // Sensitive values are masked in logs
314
+ * agent.setVariable('apiKey', 'secret123', true);
315
+ * ```
316
+ */
317
+ setVariable(name: string, value: string, sensitive?: boolean): void;
318
+ /**
319
+ * Wait until a condition becomes true.
320
+ *
321
+ * Polls the page state and evaluates whether the condition is met.
322
+ * Useful for waiting on dynamic content, animations, or async operations.
323
+ *
324
+ * @param page - Playwright page instance
325
+ * @param condition - Natural language condition to wait for
326
+ * @param timeoutSeconds - Maximum wait time in seconds (default: 60)
327
+ * @returns true if condition was met, false if timeout
328
+ *
329
+ * @example
330
+ * ```typescript
331
+ * // Wait for loading to complete
332
+ * await agent.waitUntil(page, 'Loading spinner is no longer visible');
333
+ *
334
+ * // Wait for data to appear
335
+ * const appeared = await agent.waitUntil(page, 'Table shows at least 5 rows', 30);
336
+ * if (!appeared) {
337
+ * throw new Error('Data did not load in time');
338
+ * }
339
+ *
340
+ * // Wait for modal to close
341
+ * await agent.waitUntil(page, 'Confirmation modal is closed');
342
+ * ```
343
+ */
344
+ waitUntil(page: Page, condition: string, timeoutSeconds?: number): Promise<boolean>;
345
+ /**
346
+ * Execute Playwright code with self-healing.
347
+ *
348
+ * Wraps Playwright code with automatic recovery. If the code throws
349
+ * an exception, the agent will analyze the page and attempt to accomplish
350
+ * the goal described in `description`.
351
+ *
352
+ * The `description` parameter is crucial - it tells the agent what you're trying
353
+ * to achieve, so it can find alternative ways to accomplish the goal when
354
+ * the original code fails (e.g., due to changed selectors or page structure).
355
+ *
356
+ * Self-healing behavior is controlled by:
357
+ * - Global `selfHealingStrategy` (set in createAgent options, default: 'single')
358
+ * - Per-call `maxSteps` overrides global strategy: 0=none, 1=single, >1=multi
359
+ *
360
+ * @param page - Playwright page instance
361
+ * @param action - Async function containing Playwright code to execute
362
+ * @param description - Intent description - what the agent should accomplish if action fails
363
+ * @param options - Optional configuration for this call
364
+ * @returns Result with success status and action details
365
+ *
366
+ * @example
367
+ * ```typescript
368
+ * // Single action with self-healing
369
+ * await agent.step(
370
+ * page,
371
+ * async () => await page.click('#submit-btn'),
372
+ * 'Click the submit button'
373
+ * );
374
+ *
375
+ * // Code block with multiple actions
376
+ * await agent.step(
377
+ * page,
378
+ * async () => {
379
+ * await page.fill('#email', 'user@example.com');
380
+ * await page.fill('#password', 'secret');
381
+ * await page.click('#login');
382
+ * },
383
+ * 'Fill login form and submit'
384
+ * );
385
+ *
386
+ * // With maxSteps for multi-step recovery
387
+ * await agent.step(
388
+ * page,
389
+ * async () => await page.click('.dynamic-button'),
390
+ * 'Click the dynamic button that appears after loading',
391
+ * { maxSteps: 5 }
392
+ * );
393
+ * ```
394
+ */
395
+ step(page: Page, action: () => Promise<void>, description: string, options?: StepOptions): Promise<AgentStepResult>;
396
+ }
397
+ /**
398
+ * Create a browser automation agent.
399
+ *
400
+ * This is the main entry point for the SDK. Creates an agent that can
401
+ * execute natural language instructions and supports custom actions.
402
+ *
403
+ * @param options - Agent configuration options
404
+ * @returns Configured Agent instance
405
+ *
406
+ * @example
407
+ * ```typescript
408
+ * import { createAgent, configureSdk, z } from '@shiplightai/sdk';
409
+ *
410
+ * // Configure SDK with API key (call once at startup)
411
+ * configureSdk({
412
+ * env: { GOOGLE_API_KEY: process.env.GOOGLE_API_KEY },
413
+ * });
414
+ *
415
+ * const agent = createAgent({
416
+ * model: 'gemini-2.5-pro',
417
+ * variables: {
418
+ * username: 'test@example.com',
419
+ * password: 'secret123',
420
+ * },
421
+ * sensitiveKeys: ['password'],
422
+ * });
423
+ *
424
+ * // Register custom actions
425
+ * agent.registerAction({
426
+ * name: 'get_otp',
427
+ * description: 'Get OTP code from authenticator',
428
+ * schema: z.object({}),
429
+ * async execute(args, ctx) {
430
+ * const code = await generateOTP();
431
+ * ctx.variableStore.set('otp', code);
432
+ * return { success: true };
433
+ * },
434
+ * });
435
+ *
436
+ * // Run automation
437
+ * await agent.act(page, 'Fill username with $username');
438
+ * await agent.act(page, 'Fill password with $password');
439
+ * await agent.act(page, 'Click login');
440
+ * await agent.act(page, 'Enter the OTP code');
441
+ * await agent.assert(page, 'Dashboard is visible');
442
+ * ```
443
+ */
444
+ declare function createAgent(options: CreateAgentOptions): Agent;
445
+
446
+ export { Agent, type CreateAgentOptions, type LoginOptions, type RunOptions, type StepOptions, createAgent };