gpt-driver-node 1.0.0-alpha.1 → 1.0.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,367 @@
1
+ 'use strict';
2
+
3
+ var axios = require('axios');
4
+ var sharp = require('sharp');
5
+
6
+ const delay = async (milliseconds) => {
7
+ await new Promise((resolve) => setTimeout(resolve, milliseconds));
8
+ };
9
+ function buildUrl(base, extraPath) {
10
+ let baseUrl = base.toString();
11
+ if (baseUrl.endsWith("/")) {
12
+ baseUrl = baseUrl.slice(0, -1);
13
+ }
14
+ if (!extraPath.startsWith("/")) {
15
+ extraPath = "/" + extraPath;
16
+ }
17
+ return `${baseUrl}${extraPath}`;
18
+ }
19
+
20
+ class GptDriver {
21
+ apiKey;
22
+ gptDriverSessionId;
23
+ gptDriverBaseUrl;
24
+ appiumSessionConfig;
25
+ driver;
26
+ /**
27
+ * Creates an instance of the GptDriver class.
28
+ *
29
+ * Initializes the GptDriver instance with the given configuration. This includes:
30
+ *
31
+ * - Setting the API key used for authenticating requests to the GPT Driver server.
32
+ * - Configuring the WebDriver instance if provided or validating server configuration if no WebDriver is given.
33
+ * - Setting up the Appium session configuration, including constructing the server URL and integrating device settings.
34
+ *
35
+ * @param {GptDriverConfig} config - The configuration object for initializing the GptDriver instance. This includes:
36
+ * - `apiKey`: The API key for authenticating requests to the GPT Driver server.
37
+ * - `driver` (optional): An existing WebDriver instance.
38
+ * - `severConfig` (optional): Configuration for the Appium server, including URL and device settings.
39
+ * @throws {Error} If a WebDriver instance is provided without a server URL, or if neither a WebDriver instance nor
40
+ * a valid server configuration is supplied. A valid server configuration must include:
41
+ * - `url`: The URL of the server.
42
+ * - `device.platform`: The platform name of the device (e.g., iOS, Android).
43
+ */
44
+ constructor(config) {
45
+ this.apiKey = config.apiKey;
46
+ this.gptDriverBaseUrl = "https://api.mobileboost.io";
47
+ this.initializeDriver(config);
48
+ this.initializeAppiumConfig(config);
49
+ }
50
+ initializeDriver(config) {
51
+ if (config.driver) {
52
+ this.driver = config.driver;
53
+ if (!config.serverConfig?.url) {
54
+ throw new Error("Server url is missing. Please specify the server url when providing a driver.");
55
+ }
56
+ } else {
57
+ const isValidServerConfig = config.serverConfig?.url && config.serverConfig.device?.platform;
58
+ if (!isValidServerConfig) {
59
+ throw new Error("Either provide a driver, or a valid severConfig object.");
60
+ }
61
+ }
62
+ }
63
+ initializeAppiumConfig(config) {
64
+ const defaultPort = parseInt(process.env.APPIUM_PORT ?? "4723", 10);
65
+ const defaultHost = process.env.APPIUM_HOST ?? "127.0.0.1";
66
+ let serverUrl = config.serverConfig?.url instanceof URL ? config.serverConfig.url : new URL(config.serverConfig?.url ?? `http://${defaultHost}:${defaultPort}`);
67
+ this.appiumSessionConfig = {
68
+ serverUrl,
69
+ ...config.serverConfig?.device
70
+ };
71
+ }
72
+ /**
73
+ * Starts a new GPTDriver session and initializes the Appium session.
74
+ * The session creation process is logged, and a link is provided to monitor the session's execution.
75
+ *
76
+ * @throws {Error} If the session cannot be started or the driver is not properly initialized.
77
+ */
78
+ async startSession() {
79
+ console.log(">> Starting session...");
80
+ if (this.driver) {
81
+ let platform;
82
+ let platformVersion;
83
+ let deviceName;
84
+ let sessionId;
85
+ if (this.driver.sessionId == null) {
86
+ const driver = this.driver;
87
+ const capabilities = await driver.getCapabilities();
88
+ platform = capabilities.get("platformName");
89
+ platformVersion = capabilities.get("platformVersion") ?? this.appiumSessionConfig?.platformVersion;
90
+ deviceName = this.appiumSessionConfig?.deviceName ?? capabilities.get("deviceName");
91
+ const session = await driver.getSession();
92
+ sessionId = session.getId();
93
+ } else {
94
+ const driver = this.driver;
95
+ platform = driver.capabilities["appium:platformName"] ?? driver.capabilities["platformName"];
96
+ platformVersion = driver.capabilities["appium:platformVersion"] ?? driver.capabilities["platformVersion"];
97
+ deviceName = this.appiumSessionConfig?.deviceName ?? driver.capabilities["appium:deviceName"] ?? driver.capabilities["deviceName"];
98
+ sessionId = driver.sessionId;
99
+ }
100
+ this.appiumSessionConfig = {
101
+ ...this.appiumSessionConfig,
102
+ id: sessionId,
103
+ platform,
104
+ platformVersion,
105
+ deviceName
106
+ };
107
+ } else {
108
+ this.appiumSessionConfig.id = await this.createSession();
109
+ }
110
+ await this.createGptDriverSession();
111
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/window/rect`);
112
+ const rectResponse = await axios.get(
113
+ url
114
+ );
115
+ this.appiumSessionConfig.size = {
116
+ width: rectResponse.data.value.width,
117
+ height: rectResponse.data.value.height
118
+ };
119
+ console.log(`>> Session created. Monitor execution at: ${this.getSessionLink()}`);
120
+ }
121
+ async createSession() {
122
+ const { platform, deviceName, platformVersion, serverUrl } = this.appiumSessionConfig;
123
+ const url = buildUrl(serverUrl, `/session`);
124
+ const response = await axios.post(
125
+ url,
126
+ {
127
+ capabilities: {
128
+ alwaysMatch: {
129
+ platformName: platform,
130
+ "appium:automationName": platform === "iOS" ? "XCUITest" : "UiAutomator2",
131
+ "appium:deviceName": deviceName,
132
+ "appium:platformVersion": platformVersion
133
+ }
134
+ }
135
+ }
136
+ );
137
+ return response.data.value.sessionId;
138
+ }
139
+ async createGptDriverSession() {
140
+ const response = await axios.post(
141
+ `${this.gptDriverBaseUrl}/sessions/create`,
142
+ {
143
+ api_key: this.apiKey,
144
+ appium_session_id: this.appiumSessionConfig.id,
145
+ device_config: {
146
+ platform: this.appiumSessionConfig.platform,
147
+ device: this.appiumSessionConfig.deviceName,
148
+ os: this.appiumSessionConfig.platformVersion
149
+ }
150
+ }
151
+ );
152
+ this.gptDriverSessionId = response.data.sessionId;
153
+ }
154
+ getSessionLink() {
155
+ return `https://app.mobileboost.io/gpt-driver/sessions/${this.gptDriverSessionId}`;
156
+ }
157
+ /**
158
+ * Stops the current GPTDriver session and update its state.
159
+ *
160
+ * This method sends a request to the GPT Driver server to stop the session and logs the session status as either "failed" or "success."
161
+ *
162
+ * @param {"failed" | "success"} status - Indicates the outcome of the session.
163
+ * Use "success" if the session completed as expected,
164
+ * or "failed" if the session encountered an error or issue.
165
+ *
166
+ * @throws {Error} If the request to stop the session fails.
167
+ */
168
+ async stopSession(status) {
169
+ console.log(">> Stopping session...");
170
+ await axios.post(
171
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
172
+ {
173
+ api_key: this.apiKey,
174
+ status
175
+ }
176
+ );
177
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}`);
178
+ await axios.delete(url);
179
+ console.log(">> Session stopped.");
180
+ this.gptDriverSessionId = void 0;
181
+ }
182
+ /**
183
+ * Executes a specified command within the WebDriver session, optionally using an Appium handler.
184
+ *
185
+ * If an `appiumHandler` is provided, it will be invoked with the WebDriver instance to perform
186
+ * the command-specific operations. After executing the handler, the executed commands get logged on the GPTDriver servers.
187
+ * If the handler execution fails or no handler is provided, the command gets executed by the GPTDriver using just natural language.
188
+ *
189
+ * @param {string} command - The natural language command to be executed by the GPTDriver.
190
+ * @param {AppiumHandler} [appiumHandler] - An optional function that processes Appium-specific commands.
191
+ * If provided, this handler is executed instead of calling the GPTDriver serves.
192
+ *
193
+ * @throws {Error} If an error occurs during the execution of the Appium handler or while processing the command by the GPTDriver.
194
+ */
195
+ async execute(command, appiumHandler) {
196
+ console.log(">> Executing command:", command);
197
+ const driver = this.driver;
198
+ if (appiumHandler != null) {
199
+ try {
200
+ await appiumHandler(driver);
201
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
202
+ await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
203
+ api_key: this.apiKey,
204
+ base64_screenshot: screenshot,
205
+ command: appiumHandler.toString()
206
+ });
207
+ } catch (e) {
208
+ await this.gptHandler(command);
209
+ }
210
+ } else {
211
+ await this.gptHandler(command);
212
+ }
213
+ }
214
+ /**
215
+ * Asserts a single condition using the GPTDriver.
216
+ *
217
+ * This method sends an assertion request and verifies if the specified condition is met.
218
+ * If the assertion fails, an error is thrown.
219
+ *
220
+ * @param {string} assertion - The condition to be asserted.
221
+ * @throws {Error} If the assertion fails.
222
+ */
223
+ async assert(assertion) {
224
+ console.log(">> Asserting:", assertion);
225
+ const results = await this.checkBulk([assertion]);
226
+ if (!Object.values(results).at(0)) {
227
+ throw new Error(`Failed assertion: ${assertion}`);
228
+ }
229
+ }
230
+ /**
231
+ * Asserts multiple conditions using the GPTDriver.
232
+ *
233
+ * This method sends a bulk assertion request and verifies if all specified conditions are met.
234
+ * If any assertion fails, an error is thrown listing all failed assertions.
235
+ *
236
+ * @param {string[]} assertions - An array of conditions to be asserted.
237
+ * @throws {Error} If any of the assertions fail.
238
+ */
239
+ async assertBulk(assertions) {
240
+ console.log(">> Asserting:", assertions);
241
+ const results = await this.checkBulk(assertions);
242
+ const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
243
+ if (!current) {
244
+ return [...prev, assertions.at(currentIndex)];
245
+ }
246
+ return prev;
247
+ }, []);
248
+ if (failedAssertions.length > 0) {
249
+ throw new Error(`Failed assertions: ${failedAssertions.join(", ")}`);
250
+ }
251
+ }
252
+ /**
253
+ * Checks multiple conditions and returns their results using the GPTDriver.
254
+ *
255
+ * This method sends a bulk condition request and returns the results of the conditions.
256
+ *
257
+ * @param {string[]} conditions - An array of conditions to be checked.
258
+ * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
259
+ * to a boolean indicating whether the condition was met.
260
+ */
261
+ async checkBulk(conditions) {
262
+ console.log(">> Checking:", conditions);
263
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
264
+ const response = await axios.post(
265
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
266
+ {
267
+ api_key: this.apiKey,
268
+ base64_screenshot: screenshot,
269
+ assertions: conditions,
270
+ command: `Assert: ${JSON.stringify(conditions)}`
271
+ }
272
+ );
273
+ return response.data.results;
274
+ }
275
+ /**
276
+ * Extracts specified information using the GPTDriver.
277
+ *
278
+ * This method sends a request to perform data extraction based on
279
+ * the provided extraction criteria and returns the results of the extractions.
280
+ *
281
+ * @param {string[]} extractions - An array of extraction criteria. Each criterion specifies what information
282
+ * should be extracted from the session.
283
+ * @returns {Promise<Record<string, any>>} A promise that resolves with an object mapping each extraction criterion
284
+ * to the extracted data. The structure of the returned data depends on the
285
+ * specifics of the extraction criteria.
286
+ */
287
+ async extract(extractions) {
288
+ console.log(">> Extracting:", extractions);
289
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
290
+ const response = await axios.post(
291
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/extract`,
292
+ {
293
+ api_key: this.apiKey,
294
+ base64_screenshot: screenshot,
295
+ extractions,
296
+ command: `Extract: ${JSON.stringify(extractions)}`
297
+ }
298
+ );
299
+ return response.data.results;
300
+ }
301
+ async gptHandler(command) {
302
+ try {
303
+ let conditionSucceeded = false;
304
+ while (!conditionSucceeded) {
305
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
306
+ console.log(">> Asking GTP Driver for next action...");
307
+ const response = await axios.request(
308
+ {
309
+ url: `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/execute`,
310
+ method: "POST",
311
+ data: {
312
+ api_key: this.apiKey,
313
+ command,
314
+ base64_screenshot: screenshot
315
+ }
316
+ }
317
+ );
318
+ const executeStatus = response.data.status;
319
+ if (executeStatus === "failed") {
320
+ const errorMessage = response?.data?.commands?.at(0)?.data;
321
+ throw new Error(errorMessage ?? "Execution failed");
322
+ }
323
+ conditionSucceeded = executeStatus !== "inProgress";
324
+ const executeResponse = response.data;
325
+ for (const command2 of executeResponse.commands) {
326
+ await this.executeCommand(command2);
327
+ }
328
+ if (!conditionSucceeded) {
329
+ await delay(1500);
330
+ }
331
+ }
332
+ } catch (e) {
333
+ await this.stopSession("failed");
334
+ throw e;
335
+ }
336
+ }
337
+ async executeCommand(command) {
338
+ const firstAction = command.data.actions?.at(0);
339
+ if (firstAction?.type === "pause" && firstAction.duration != null) {
340
+ await delay(firstAction * 1e3);
341
+ } else {
342
+ const parsedUrl = new URL(command.url);
343
+ parsedUrl.protocol = this.appiumSessionConfig.serverUrl.protocol;
344
+ parsedUrl.host = this.appiumSessionConfig.serverUrl.host;
345
+ parsedUrl.port = this.appiumSessionConfig.serverUrl.port != "" ? `${this.appiumSessionConfig.serverUrl.port}` : "";
346
+ parsedUrl.pathname = this.appiumSessionConfig.serverUrl.pathname != "/" ? `${this.appiumSessionConfig.serverUrl.pathname}${parsedUrl.pathname}` : parsedUrl.pathname;
347
+ await axios.request({
348
+ url: parsedUrl.toString(),
349
+ method: command.method,
350
+ data: command.data
351
+ });
352
+ }
353
+ }
354
+ async getScreenshot(appiumSessionConfig) {
355
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
356
+ const screenshotResponse = await axios.get(url);
357
+ let screenshot = await screenshotResponse.data.value;
358
+ if (appiumSessionConfig.platform === "iOS") {
359
+ const imageBuffer = Buffer.from(screenshot, "base64");
360
+ const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
361
+ screenshot = transformedImage.toString("base64");
362
+ }
363
+ return screenshot;
364
+ }
365
+ }
366
+
367
+ module.exports = GptDriver;
@@ -0,0 +1,133 @@
1
+ import { WebDriver } from 'selenium-webdriver';
2
+ import { Browser } from 'webdriverio';
3
+
4
+ type Platform = "iOS" | "Android";
5
+ interface AppiumHandler {
6
+ (driver: WebDriver | Browser): Promise<any>;
7
+ }
8
+ interface ServerSessionInitConfig {
9
+ platform?: Platform;
10
+ deviceName?: string;
11
+ platformVersion?: string;
12
+ }
13
+ interface GptDriverConfig {
14
+ apiKey: string;
15
+ driver?: WebDriver | Browser;
16
+ serverConfig: {
17
+ device?: ServerSessionInitConfig;
18
+ url?: URL | string;
19
+ };
20
+ }
21
+
22
+ declare class GptDriver {
23
+ private apiKey;
24
+ private gptDriverSessionId?;
25
+ private gptDriverBaseUrl;
26
+ private appiumSessionConfig?;
27
+ private driver?;
28
+ /**
29
+ * Creates an instance of the GptDriver class.
30
+ *
31
+ * Initializes the GptDriver instance with the given configuration. This includes:
32
+ *
33
+ * - Setting the API key used for authenticating requests to the GPT Driver server.
34
+ * - Configuring the WebDriver instance if provided or validating server configuration if no WebDriver is given.
35
+ * - Setting up the Appium session configuration, including constructing the server URL and integrating device settings.
36
+ *
37
+ * @param {GptDriverConfig} config - The configuration object for initializing the GptDriver instance. This includes:
38
+ * - `apiKey`: The API key for authenticating requests to the GPT Driver server.
39
+ * - `driver` (optional): An existing WebDriver instance.
40
+ * - `severConfig` (optional): Configuration for the Appium server, including URL and device settings.
41
+ * @throws {Error} If a WebDriver instance is provided without a server URL, or if neither a WebDriver instance nor
42
+ * a valid server configuration is supplied. A valid server configuration must include:
43
+ * - `url`: The URL of the server.
44
+ * - `device.platform`: The platform name of the device (e.g., iOS, Android).
45
+ */
46
+ constructor(config: GptDriverConfig);
47
+ private initializeDriver;
48
+ private initializeAppiumConfig;
49
+ /**
50
+ * Starts a new GPTDriver session and initializes the Appium session.
51
+ * The session creation process is logged, and a link is provided to monitor the session's execution.
52
+ *
53
+ * @throws {Error} If the session cannot be started or the driver is not properly initialized.
54
+ */
55
+ startSession(): Promise<void>;
56
+ private createSession;
57
+ private createGptDriverSession;
58
+ private getSessionLink;
59
+ /**
60
+ * Stops the current GPTDriver session and update its state.
61
+ *
62
+ * This method sends a request to the GPT Driver server to stop the session and logs the session status as either "failed" or "success."
63
+ *
64
+ * @param {"failed" | "success"} status - Indicates the outcome of the session.
65
+ * Use "success" if the session completed as expected,
66
+ * or "failed" if the session encountered an error or issue.
67
+ *
68
+ * @throws {Error} If the request to stop the session fails.
69
+ */
70
+ stopSession(status: "failed" | "success"): Promise<void>;
71
+ /**
72
+ * Executes a specified command within the WebDriver session, optionally using an Appium handler.
73
+ *
74
+ * If an `appiumHandler` is provided, it will be invoked with the WebDriver instance to perform
75
+ * the command-specific operations. After executing the handler, the executed commands get logged on the GPTDriver servers.
76
+ * If the handler execution fails or no handler is provided, the command gets executed by the GPTDriver using just natural language.
77
+ *
78
+ * @param {string} command - The natural language command to be executed by the GPTDriver.
79
+ * @param {AppiumHandler} [appiumHandler] - An optional function that processes Appium-specific commands.
80
+ * If provided, this handler is executed instead of calling the GPTDriver serves.
81
+ *
82
+ * @throws {Error} If an error occurs during the execution of the Appium handler or while processing the command by the GPTDriver.
83
+ */
84
+ execute(command: string, appiumHandler?: AppiumHandler): Promise<void>;
85
+ /**
86
+ * Asserts a single condition using the GPTDriver.
87
+ *
88
+ * This method sends an assertion request and verifies if the specified condition is met.
89
+ * If the assertion fails, an error is thrown.
90
+ *
91
+ * @param {string} assertion - The condition to be asserted.
92
+ * @throws {Error} If the assertion fails.
93
+ */
94
+ assert(assertion: string): Promise<void>;
95
+ /**
96
+ * Asserts multiple conditions using the GPTDriver.
97
+ *
98
+ * This method sends a bulk assertion request and verifies if all specified conditions are met.
99
+ * If any assertion fails, an error is thrown listing all failed assertions.
100
+ *
101
+ * @param {string[]} assertions - An array of conditions to be asserted.
102
+ * @throws {Error} If any of the assertions fail.
103
+ */
104
+ assertBulk(assertions: string[]): Promise<void>;
105
+ /**
106
+ * Checks multiple conditions and returns their results using the GPTDriver.
107
+ *
108
+ * This method sends a bulk condition request and returns the results of the conditions.
109
+ *
110
+ * @param {string[]} conditions - An array of conditions to be checked.
111
+ * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
112
+ * to a boolean indicating whether the condition was met.
113
+ */
114
+ checkBulk(conditions: string[]): Promise<any>;
115
+ /**
116
+ * Extracts specified information using the GPTDriver.
117
+ *
118
+ * This method sends a request to perform data extraction based on
119
+ * the provided extraction criteria and returns the results of the extractions.
120
+ *
121
+ * @param {string[]} extractions - An array of extraction criteria. Each criterion specifies what information
122
+ * should be extracted from the session.
123
+ * @returns {Promise<Record<string, any>>} A promise that resolves with an object mapping each extraction criterion
124
+ * to the extracted data. The structure of the returned data depends on the
125
+ * specifics of the extraction criteria.
126
+ */
127
+ extract(extractions: string[]): Promise<any>;
128
+ private gptHandler;
129
+ private executeCommand;
130
+ private getScreenshot;
131
+ }
132
+
133
+ export { GptDriver as default };
package/dist/index.mjs ADDED
@@ -0,0 +1,365 @@
1
+ import axios from 'axios';
2
+ import sharp from 'sharp';
3
+
4
+ const delay = async (milliseconds) => {
5
+ await new Promise((resolve) => setTimeout(resolve, milliseconds));
6
+ };
7
+ function buildUrl(base, extraPath) {
8
+ let baseUrl = base.toString();
9
+ if (baseUrl.endsWith("/")) {
10
+ baseUrl = baseUrl.slice(0, -1);
11
+ }
12
+ if (!extraPath.startsWith("/")) {
13
+ extraPath = "/" + extraPath;
14
+ }
15
+ return `${baseUrl}${extraPath}`;
16
+ }
17
+
18
+ class GptDriver {
19
+ apiKey;
20
+ gptDriverSessionId;
21
+ gptDriverBaseUrl;
22
+ appiumSessionConfig;
23
+ driver;
24
+ /**
25
+ * Creates an instance of the GptDriver class.
26
+ *
27
+ * Initializes the GptDriver instance with the given configuration. This includes:
28
+ *
29
+ * - Setting the API key used for authenticating requests to the GPT Driver server.
30
+ * - Configuring the WebDriver instance if provided or validating server configuration if no WebDriver is given.
31
+ * - Setting up the Appium session configuration, including constructing the server URL and integrating device settings.
32
+ *
33
+ * @param {GptDriverConfig} config - The configuration object for initializing the GptDriver instance. This includes:
34
+ * - `apiKey`: The API key for authenticating requests to the GPT Driver server.
35
+ * - `driver` (optional): An existing WebDriver instance.
36
+ * - `severConfig` (optional): Configuration for the Appium server, including URL and device settings.
37
+ * @throws {Error} If a WebDriver instance is provided without a server URL, or if neither a WebDriver instance nor
38
+ * a valid server configuration is supplied. A valid server configuration must include:
39
+ * - `url`: The URL of the server.
40
+ * - `device.platform`: The platform name of the device (e.g., iOS, Android).
41
+ */
42
+ constructor(config) {
43
+ this.apiKey = config.apiKey;
44
+ this.gptDriverBaseUrl = "https://api.mobileboost.io";
45
+ this.initializeDriver(config);
46
+ this.initializeAppiumConfig(config);
47
+ }
48
+ initializeDriver(config) {
49
+ if (config.driver) {
50
+ this.driver = config.driver;
51
+ if (!config.serverConfig?.url) {
52
+ throw new Error("Server url is missing. Please specify the server url when providing a driver.");
53
+ }
54
+ } else {
55
+ const isValidServerConfig = config.serverConfig?.url && config.serverConfig.device?.platform;
56
+ if (!isValidServerConfig) {
57
+ throw new Error("Either provide a driver, or a valid severConfig object.");
58
+ }
59
+ }
60
+ }
61
+ initializeAppiumConfig(config) {
62
+ const defaultPort = parseInt(process.env.APPIUM_PORT ?? "4723", 10);
63
+ const defaultHost = process.env.APPIUM_HOST ?? "127.0.0.1";
64
+ let serverUrl = config.serverConfig?.url instanceof URL ? config.serverConfig.url : new URL(config.serverConfig?.url ?? `http://${defaultHost}:${defaultPort}`);
65
+ this.appiumSessionConfig = {
66
+ serverUrl,
67
+ ...config.serverConfig?.device
68
+ };
69
+ }
70
+ /**
71
+ * Starts a new GPTDriver session and initializes the Appium session.
72
+ * The session creation process is logged, and a link is provided to monitor the session's execution.
73
+ *
74
+ * @throws {Error} If the session cannot be started or the driver is not properly initialized.
75
+ */
76
+ async startSession() {
77
+ console.log(">> Starting session...");
78
+ if (this.driver) {
79
+ let platform;
80
+ let platformVersion;
81
+ let deviceName;
82
+ let sessionId;
83
+ if (this.driver.sessionId == null) {
84
+ const driver = this.driver;
85
+ const capabilities = await driver.getCapabilities();
86
+ platform = capabilities.get("platformName");
87
+ platformVersion = capabilities.get("platformVersion") ?? this.appiumSessionConfig?.platformVersion;
88
+ deviceName = this.appiumSessionConfig?.deviceName ?? capabilities.get("deviceName");
89
+ const session = await driver.getSession();
90
+ sessionId = session.getId();
91
+ } else {
92
+ const driver = this.driver;
93
+ platform = driver.capabilities["appium:platformName"] ?? driver.capabilities["platformName"];
94
+ platformVersion = driver.capabilities["appium:platformVersion"] ?? driver.capabilities["platformVersion"];
95
+ deviceName = this.appiumSessionConfig?.deviceName ?? driver.capabilities["appium:deviceName"] ?? driver.capabilities["deviceName"];
96
+ sessionId = driver.sessionId;
97
+ }
98
+ this.appiumSessionConfig = {
99
+ ...this.appiumSessionConfig,
100
+ id: sessionId,
101
+ platform,
102
+ platformVersion,
103
+ deviceName
104
+ };
105
+ } else {
106
+ this.appiumSessionConfig.id = await this.createSession();
107
+ }
108
+ await this.createGptDriverSession();
109
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/window/rect`);
110
+ const rectResponse = await axios.get(
111
+ url
112
+ );
113
+ this.appiumSessionConfig.size = {
114
+ width: rectResponse.data.value.width,
115
+ height: rectResponse.data.value.height
116
+ };
117
+ console.log(`>> Session created. Monitor execution at: ${this.getSessionLink()}`);
118
+ }
119
+ async createSession() {
120
+ const { platform, deviceName, platformVersion, serverUrl } = this.appiumSessionConfig;
121
+ const url = buildUrl(serverUrl, `/session`);
122
+ const response = await axios.post(
123
+ url,
124
+ {
125
+ capabilities: {
126
+ alwaysMatch: {
127
+ platformName: platform,
128
+ "appium:automationName": platform === "iOS" ? "XCUITest" : "UiAutomator2",
129
+ "appium:deviceName": deviceName,
130
+ "appium:platformVersion": platformVersion
131
+ }
132
+ }
133
+ }
134
+ );
135
+ return response.data.value.sessionId;
136
+ }
137
+ async createGptDriverSession() {
138
+ const response = await axios.post(
139
+ `${this.gptDriverBaseUrl}/sessions/create`,
140
+ {
141
+ api_key: this.apiKey,
142
+ appium_session_id: this.appiumSessionConfig.id,
143
+ device_config: {
144
+ platform: this.appiumSessionConfig.platform,
145
+ device: this.appiumSessionConfig.deviceName,
146
+ os: this.appiumSessionConfig.platformVersion
147
+ }
148
+ }
149
+ );
150
+ this.gptDriverSessionId = response.data.sessionId;
151
+ }
152
+ getSessionLink() {
153
+ return `https://app.mobileboost.io/gpt-driver/sessions/${this.gptDriverSessionId}`;
154
+ }
155
+ /**
156
+ * Stops the current GPTDriver session and update its state.
157
+ *
158
+ * This method sends a request to the GPT Driver server to stop the session and logs the session status as either "failed" or "success."
159
+ *
160
+ * @param {"failed" | "success"} status - Indicates the outcome of the session.
161
+ * Use "success" if the session completed as expected,
162
+ * or "failed" if the session encountered an error or issue.
163
+ *
164
+ * @throws {Error} If the request to stop the session fails.
165
+ */
166
+ async stopSession(status) {
167
+ console.log(">> Stopping session...");
168
+ await axios.post(
169
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
170
+ {
171
+ api_key: this.apiKey,
172
+ status
173
+ }
174
+ );
175
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}`);
176
+ await axios.delete(url);
177
+ console.log(">> Session stopped.");
178
+ this.gptDriverSessionId = void 0;
179
+ }
180
+ /**
181
+ * Executes a specified command within the WebDriver session, optionally using an Appium handler.
182
+ *
183
+ * If an `appiumHandler` is provided, it will be invoked with the WebDriver instance to perform
184
+ * the command-specific operations. After executing the handler, the executed commands get logged on the GPTDriver servers.
185
+ * If the handler execution fails or no handler is provided, the command gets executed by the GPTDriver using just natural language.
186
+ *
187
+ * @param {string} command - The natural language command to be executed by the GPTDriver.
188
+ * @param {AppiumHandler} [appiumHandler] - An optional function that processes Appium-specific commands.
189
+ * If provided, this handler is executed instead of calling the GPTDriver serves.
190
+ *
191
+ * @throws {Error} If an error occurs during the execution of the Appium handler or while processing the command by the GPTDriver.
192
+ */
193
+ async execute(command, appiumHandler) {
194
+ console.log(">> Executing command:", command);
195
+ const driver = this.driver;
196
+ if (appiumHandler != null) {
197
+ try {
198
+ await appiumHandler(driver);
199
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
200
+ await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
201
+ api_key: this.apiKey,
202
+ base64_screenshot: screenshot,
203
+ command: appiumHandler.toString()
204
+ });
205
+ } catch (e) {
206
+ await this.gptHandler(command);
207
+ }
208
+ } else {
209
+ await this.gptHandler(command);
210
+ }
211
+ }
212
+ /**
213
+ * Asserts a single condition using the GPTDriver.
214
+ *
215
+ * This method sends an assertion request and verifies if the specified condition is met.
216
+ * If the assertion fails, an error is thrown.
217
+ *
218
+ * @param {string} assertion - The condition to be asserted.
219
+ * @throws {Error} If the assertion fails.
220
+ */
221
+ async assert(assertion) {
222
+ console.log(">> Asserting:", assertion);
223
+ const results = await this.checkBulk([assertion]);
224
+ if (!Object.values(results).at(0)) {
225
+ throw new Error(`Failed assertion: ${assertion}`);
226
+ }
227
+ }
228
+ /**
229
+ * Asserts multiple conditions using the GPTDriver.
230
+ *
231
+ * This method sends a bulk assertion request and verifies if all specified conditions are met.
232
+ * If any assertion fails, an error is thrown listing all failed assertions.
233
+ *
234
+ * @param {string[]} assertions - An array of conditions to be asserted.
235
+ * @throws {Error} If any of the assertions fail.
236
+ */
237
+ async assertBulk(assertions) {
238
+ console.log(">> Asserting:", assertions);
239
+ const results = await this.checkBulk(assertions);
240
+ const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
241
+ if (!current) {
242
+ return [...prev, assertions.at(currentIndex)];
243
+ }
244
+ return prev;
245
+ }, []);
246
+ if (failedAssertions.length > 0) {
247
+ throw new Error(`Failed assertions: ${failedAssertions.join(", ")}`);
248
+ }
249
+ }
250
+ /**
251
+ * Checks multiple conditions and returns their results using the GPTDriver.
252
+ *
253
+ * This method sends a bulk condition request and returns the results of the conditions.
254
+ *
255
+ * @param {string[]} conditions - An array of conditions to be checked.
256
+ * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
257
+ * to a boolean indicating whether the condition was met.
258
+ */
259
+ async checkBulk(conditions) {
260
+ console.log(">> Checking:", conditions);
261
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
262
+ const response = await axios.post(
263
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
264
+ {
265
+ api_key: this.apiKey,
266
+ base64_screenshot: screenshot,
267
+ assertions: conditions,
268
+ command: `Assert: ${JSON.stringify(conditions)}`
269
+ }
270
+ );
271
+ return response.data.results;
272
+ }
273
+ /**
274
+ * Extracts specified information using the GPTDriver.
275
+ *
276
+ * This method sends a request to perform data extraction based on
277
+ * the provided extraction criteria and returns the results of the extractions.
278
+ *
279
+ * @param {string[]} extractions - An array of extraction criteria. Each criterion specifies what information
280
+ * should be extracted from the session.
281
+ * @returns {Promise<Record<string, any>>} A promise that resolves with an object mapping each extraction criterion
282
+ * to the extracted data. The structure of the returned data depends on the
283
+ * specifics of the extraction criteria.
284
+ */
285
+ async extract(extractions) {
286
+ console.log(">> Extracting:", extractions);
287
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
288
+ const response = await axios.post(
289
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/extract`,
290
+ {
291
+ api_key: this.apiKey,
292
+ base64_screenshot: screenshot,
293
+ extractions,
294
+ command: `Extract: ${JSON.stringify(extractions)}`
295
+ }
296
+ );
297
+ return response.data.results;
298
+ }
299
+ async gptHandler(command) {
300
+ try {
301
+ let conditionSucceeded = false;
302
+ while (!conditionSucceeded) {
303
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
304
+ console.log(">> Asking GTP Driver for next action...");
305
+ const response = await axios.request(
306
+ {
307
+ url: `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/execute`,
308
+ method: "POST",
309
+ data: {
310
+ api_key: this.apiKey,
311
+ command,
312
+ base64_screenshot: screenshot
313
+ }
314
+ }
315
+ );
316
+ const executeStatus = response.data.status;
317
+ if (executeStatus === "failed") {
318
+ const errorMessage = response?.data?.commands?.at(0)?.data;
319
+ throw new Error(errorMessage ?? "Execution failed");
320
+ }
321
+ conditionSucceeded = executeStatus !== "inProgress";
322
+ const executeResponse = response.data;
323
+ for (const command2 of executeResponse.commands) {
324
+ await this.executeCommand(command2);
325
+ }
326
+ if (!conditionSucceeded) {
327
+ await delay(1500);
328
+ }
329
+ }
330
+ } catch (e) {
331
+ await this.stopSession("failed");
332
+ throw e;
333
+ }
334
+ }
335
+ async executeCommand(command) {
336
+ const firstAction = command.data.actions?.at(0);
337
+ if (firstAction?.type === "pause" && firstAction.duration != null) {
338
+ await delay(firstAction * 1e3);
339
+ } else {
340
+ const parsedUrl = new URL(command.url);
341
+ parsedUrl.protocol = this.appiumSessionConfig.serverUrl.protocol;
342
+ parsedUrl.host = this.appiumSessionConfig.serverUrl.host;
343
+ parsedUrl.port = this.appiumSessionConfig.serverUrl.port != "" ? `${this.appiumSessionConfig.serverUrl.port}` : "";
344
+ parsedUrl.pathname = this.appiumSessionConfig.serverUrl.pathname != "/" ? `${this.appiumSessionConfig.serverUrl.pathname}${parsedUrl.pathname}` : parsedUrl.pathname;
345
+ await axios.request({
346
+ url: parsedUrl.toString(),
347
+ method: command.method,
348
+ data: command.data
349
+ });
350
+ }
351
+ }
352
+ async getScreenshot(appiumSessionConfig) {
353
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
354
+ const screenshotResponse = await axios.get(url);
355
+ let screenshot = await screenshotResponse.data.value;
356
+ if (appiumSessionConfig.platform === "iOS") {
357
+ const imageBuffer = Buffer.from(screenshot, "base64");
358
+ const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
359
+ screenshot = transformedImage.toString("base64");
360
+ }
361
+ return screenshot;
362
+ }
363
+ }
364
+
365
+ export { GptDriver as default };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gpt-driver-node",
3
- "version": "1.0.0-alpha.1",
3
+ "version": "1.0.0-alpha.11",
4
4
  "main": "./dist/index.cjs",
5
5
  "module": "./dist/index.mjs",
6
6
  "types": "./dist/index.d.cts",
@@ -10,15 +10,18 @@
10
10
  "watch": "tsx watch src/index.ts",
11
11
  "test": "jest"
12
12
  },
13
+ "type": "module",
13
14
  "keywords": [],
14
15
  "author": "MobileBoost",
15
16
  "license": "CC BY-NC-SA 4.0",
16
17
  "description": "Test your mobile apps with the AI native GPT Driver (docs.mobileboost.io).",
17
18
  "dependencies": {
18
- "appium-xcuitest-driver": "^7.24.14",
19
+ "@types/selenium-webdriver": "^4.1.25",
20
+ "@wdio/types": "^9.0.4",
19
21
  "axios": "^1.7.3",
22
+ "selenium-webdriver": "^4.23.0",
20
23
  "sharp": "^0.33.4",
21
- "webdriverio": "^8.40.0"
24
+ "webdriverio": "^9.0.7"
22
25
  },
23
26
  "devDependencies": {
24
27
  "tsx": "^4.16.5",
package/tsconfig.json CHANGED
@@ -3,7 +3,8 @@
3
3
  "target": "ESNext",
4
4
  "module": "ESNext",
5
5
  "strict": true,
6
- // "esModuleInterop": true,
6
+ "outDir": "./dist",
7
+ "esModuleInterop": true,
7
8
  "declaration": true,
8
9
  "moduleResolution": "node",
9
10
  "isolatedModules": true,
package/src/helpers.ts DELETED
@@ -1,31 +0,0 @@
1
- import sharp from "sharp";
2
-
3
- const delay = async (milliseconds: number): Promise<void> => {
4
- await new Promise((resolve) => setTimeout(resolve, milliseconds));
5
- };
6
-
7
-
8
- const getScreenshot = async (driver: WebdriverIO.Browser, deviceSize: DeviceSize) => {
9
- let screenshot = await driver.takeScreenshot();
10
- if (driver.capabilities.platformName === "iOS") {
11
- const imageBuffer = Buffer.from(screenshot, 'base64');
12
- const transformedImage = await sharp(imageBuffer).resize(deviceSize.width, deviceSize.height).toBuffer()
13
- screenshot = transformedImage.toString('base64')
14
- }
15
-
16
- return screenshot;
17
- }
18
-
19
- const delayIfNeeded = async (lastResponseTime?: number) => {
20
- if (lastResponseTime == null) {
21
- return;
22
- }
23
- const currentTime = Date.now();
24
- const difference = currentTime - lastResponseTime;
25
-
26
- if (difference < 1500) {
27
- await delay(difference);
28
- }
29
- }
30
-
31
- export {delay, getScreenshot, delayIfNeeded}
package/src/index.ts DELETED
@@ -1,151 +0,0 @@
1
- import axios from "axios";
2
- import {remote, RemoteOptions} from 'webdriverio'
3
- import {delay, delayIfNeeded, getScreenshot} from "./helpers";
4
- import {AppiumHandler, Command, DeviceConfig, DeviceSize, ExecuteResponse, GptDriverConfig} from "./types";
5
-
6
- class GptDriver {
7
- apiKey: string;
8
- baseUrl: string;
9
- driver?: WebdriverIO.Browser;
10
- deviceConfig?: DeviceConfig;
11
- firestoreSessionId?: string;
12
- deviceSize?: DeviceSize;
13
- lastCommandExecutionTime?: number;
14
-
15
- constructor(config: GptDriverConfig) {
16
- this.apiKey = config.apiKey;
17
- this.baseUrl = "https://api.mobileboost.io";
18
-
19
- if (config.driver !== undefined) {
20
- this.driver = config.driver
21
- } else if (config.deviceConfig !== undefined) {
22
- this.deviceConfig = config.deviceConfig;
23
- } else {
24
- throw new Error("Either provide an appium driver or a deviceConfig object")
25
- }
26
- }
27
-
28
- async startSession() {
29
- if (this.driver == undefined && this.deviceConfig != undefined) {
30
- const capabilities = {
31
- platformName: this.deviceConfig.platform,
32
- 'appium:automationName': this.deviceConfig.platform === "Android" ? 'UiAutomator2' : "XCUITest",
33
- 'appium:deviceName': this.deviceConfig.deviceName,
34
- };
35
-
36
- const wdOpts: RemoteOptions = {
37
- hostname: process.env.APPIUM_HOST || 'localhost',
38
- port: process.env.APPIUM_PORT != null ? parseInt(process.env.APPIUM_PORT, 10) : 4723,
39
- logLevel: 'info',
40
- capabilities: capabilities,
41
- };
42
-
43
- this.driver = await remote(wdOpts);
44
- }
45
-
46
-
47
- const driver = this.driver!;
48
-
49
- const response = await axios.post(
50
- `${this.baseUrl}/sessions/create`,
51
- {
52
- api_key: this.apiKey,
53
- appium_session_id: driver.sessionId,
54
- device_config: {
55
- // @ts-ignore
56
- platform: driver.capabilities.platformName,
57
- // @ts-ignore
58
- device: driver.capabilities.deviceName,
59
- // @ts-ignore
60
- os: driver.capabilities.platformVersion
61
- },
62
- },
63
- )
64
- const rect = await driver.getWindowRect();
65
- this.deviceSize = {width: rect.width, height: rect.height}
66
-
67
- this.firestoreSessionId = response.data.sessionId;
68
-
69
- }
70
-
71
- async stopSession({status}: { status: "failed" | "success" }) {
72
- await axios.post(
73
- `${this.baseUrl}/sessions/${this.firestoreSessionId}/stop`,
74
- {
75
- api_key: this.apiKey,
76
- status,
77
- },
78
- )
79
-
80
- this.firestoreSessionId = undefined;
81
- }
82
-
83
- async execute(command: string, appiumHandler?: AppiumHandler) {
84
- const driver = this.driver!;
85
-
86
- if (appiumHandler != null) {
87
- try {
88
- await appiumHandler(driver);
89
- } catch (e) {
90
- await this.gptHandler(command)
91
- }
92
- } else {
93
- await this.gptHandler(command)
94
- }
95
- }
96
-
97
- private async gptHandler(command: string) {
98
- const driver = this.driver!;
99
-
100
- try {
101
- let conditionSucceeded = false;
102
-
103
- while (!conditionSucceeded) {
104
- await delayIfNeeded(this.lastCommandExecutionTime);
105
- const screenshot = await getScreenshot(driver, this.deviceSize!)
106
-
107
- const response = await axios.request({
108
- url: `${this.baseUrl}/sessions/${this.firestoreSessionId}/execute`,
109
- method: "POST",
110
- data: {
111
- api_key: this.apiKey,
112
- command,
113
- base64_screenshot: screenshot,
114
- },
115
- }
116
- )
117
- const executeStatus = response.data.status;
118
- if (executeStatus === "failed") {
119
- throw new Error("Execution failed");
120
- }
121
-
122
- conditionSucceeded = executeStatus !== "inProgress";
123
- const executeResponse: ExecuteResponse = response.data;
124
- for (const command of executeResponse.commands) {
125
- await this.executeCommand(command)
126
- }
127
- this.lastCommandExecutionTime = Date.now();
128
- }
129
- } catch (e) {
130
- await this.stopSession({status: "failed"})
131
- throw e;
132
- }
133
-
134
- }
135
-
136
- private async executeCommand(command: Command) {
137
- const firstAction = command.data.actions?.at(0);
138
- if (firstAction?.type === "pause" && firstAction.duration != null) {
139
- await delay(firstAction * 1000)
140
- } else {
141
- await axios.request({
142
- url: command.url,
143
- method: command.method,
144
- data: command.data
145
- })
146
- }
147
-
148
- }
149
- }
150
-
151
- export default GptDriver;
package/src/types.ts DELETED
@@ -1,33 +0,0 @@
1
- interface ExecuteResponse {
2
- commands: Command[]
3
- status: "inProgress" | "success" | "failed"
4
- }
5
-
6
- interface Command {
7
- method: 'GET' | 'DELETE' | 'POST';
8
- url: string;
9
- data: any;
10
- }
11
-
12
- interface AppiumHandler {
13
- (driver: WebdriverIO.Browser): Promise<any>;
14
- }
15
-
16
- interface DeviceConfig {
17
- platform: "iOS" | "Android";
18
- deviceName?: string;
19
- platformVersion?: string;
20
- }
21
-
22
- interface GptDriverConfig {
23
- apiKey: string;
24
- driver?: WebdriverIO.Browser;
25
- deviceConfig?: DeviceConfig;
26
- }
27
-
28
- interface DeviceSize {
29
- width: number,
30
- height: number
31
- }
32
-
33
- export type {ExecuteResponse, Command, AppiumHandler, DeviceConfig, GptDriverConfig, DeviceSize}