gpt-driver-node 1.0.0-alpha.6 → 1.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,21 +1,22 @@
1
1
  'use strict';
2
2
 
3
3
  var axios = require('axios');
4
- var sharp = require('sharp');
5
4
  var seleniumWebdriver = require('selenium-webdriver');
5
+ var sharp = require('sharp');
6
6
 
7
7
  const delay = async (milliseconds) => {
8
8
  await new Promise((resolve) => setTimeout(resolve, milliseconds));
9
9
  };
10
- const getScreenshot = async (appiumSessionConfig, driver) => {
11
- let screenshot = await driver.takeScreenshot();
12
- if (appiumSessionConfig.platform === "iOS") {
13
- const imageBuffer = Buffer.from(screenshot, "base64");
14
- const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
15
- screenshot = transformedImage.toString("base64");
10
+ function buildUrl(base, extraPath) {
11
+ let baseUrl = base.toString();
12
+ if (baseUrl.endsWith("/")) {
13
+ baseUrl = baseUrl.slice(0, -1);
16
14
  }
17
- return screenshot;
18
- };
15
+ if (!extraPath.startsWith("/")) {
16
+ extraPath = "/" + extraPath;
17
+ }
18
+ return `${baseUrl}${extraPath}`;
19
+ }
19
20
 
20
21
  class GptDriver {
21
22
  apiKey;
@@ -61,9 +62,9 @@ class GptDriver {
61
62
  }
62
63
  }
63
64
  initializeAppiumConfig(config) {
64
- const defaultPort = process.env.APPIUM_PORT ? parseInt(process.env.APPIUM_PORT, 10) : 4723;
65
+ const defaultPort = parseInt(process.env.APPIUM_PORT ?? "4723", 10);
65
66
  const defaultHost = process.env.APPIUM_HOST ?? "127.0.0.1";
66
- const serverUrl = config.severConfig?.url != null ? typeof config.severConfig?.url === "string" ? new URL(config.severConfig.url) : config.severConfig?.url : new URL(`http://${defaultPort}:${defaultHost}`);
67
+ let serverUrl = config.severConfig?.url instanceof URL ? config.severConfig.url : new URL(config.severConfig?.url ?? `http://${defaultHost}:${defaultPort}`);
67
68
  this.appiumSessionConfig = {
68
69
  serverUrl,
69
70
  ...config.severConfig?.device
@@ -78,37 +79,61 @@ class GptDriver {
78
79
  async startSession() {
79
80
  console.log(">> Starting session...");
80
81
  if (this.driver) {
81
- const capabilities = await this.driver.getCapabilities();
82
- const platform = capabilities.get("platformName");
83
- const platformVersion = capabilities.get("platformVersion") ?? this.appiumSessionConfig?.platformVersion;
84
- const deviceName = this.appiumSessionConfig?.deviceName ?? capabilities.get("deviceName");
82
+ let platform;
83
+ let platformVersion;
84
+ let deviceName;
85
+ let sessionId;
86
+ if (this.driver instanceof seleniumWebdriver.WebDriver) {
87
+ const capabilities = await this.driver.getCapabilities();
88
+ platform = capabilities.get("platformName");
89
+ platformVersion = capabilities.get("platformVersion") ?? this.appiumSessionConfig?.platformVersion;
90
+ deviceName = this.appiumSessionConfig?.deviceName ?? capabilities.get("deviceName");
91
+ const session = await this.driver.getSession();
92
+ sessionId = session.getId();
93
+ } else {
94
+ platform = this.driver.capabilities["appium:platformName"];
95
+ platformVersion = this.driver.capabilities["appium:platformVersion"];
96
+ deviceName = this.appiumSessionConfig?.deviceName ?? this.driver.capabilities["appium:deviceName"] ?? "";
97
+ sessionId = this.driver.sessionId;
98
+ }
85
99
  this.appiumSessionConfig = {
86
100
  ...this.appiumSessionConfig,
101
+ id: sessionId,
87
102
  platform,
88
103
  platformVersion,
89
104
  deviceName
90
105
  };
91
106
  } else {
92
- this.driver = await this.buildDriver();
107
+ this.appiumSessionConfig.id = await this.createSession();
93
108
  }
94
- const session = await this.driver.getSession();
95
- this.appiumSessionConfig.id = session.getId();
96
109
  await this.createGptDriverSession();
97
- const rectResponse = await this.driver.manage().window().getRect();
110
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/window/rect`);
111
+ const rectResponse = await axios.get(
112
+ url
113
+ );
98
114
  this.appiumSessionConfig.size = {
99
- width: rectResponse.width,
100
- height: rectResponse.height
115
+ width: rectResponse.data.value.width,
116
+ height: rectResponse.data.value.height
101
117
  };
102
118
  console.log(`>> Session created. Monitor execution at: ${this.getSessionLink()}`);
103
119
  }
104
- async buildDriver() {
105
- const { platform, deviceName, platformVersion } = this.appiumSessionConfig;
106
- return new seleniumWebdriver.Builder().withCapabilities({
107
- platformName: platform,
108
- automationName: platform === "iOS" ? "XCUITest" : "UiAutomator2",
109
- deviceName,
110
- platformVersion
111
- }).usingServer(this.appiumSessionConfig.serverUrl.toString()).build();
120
+ async createSession() {
121
+ const { platform, deviceName, platformVersion, serverUrl } = this.appiumSessionConfig;
122
+ const url = buildUrl(serverUrl, `/session`);
123
+ const response = await axios.post(
124
+ url,
125
+ {
126
+ capabilities: {
127
+ alwaysMatch: {
128
+ platformName: platform,
129
+ "appium:automationName": platform === "iOS" ? "XCUITest" : "UiAutomator2",
130
+ "appium:deviceName": deviceName,
131
+ "appium:platformVersion": platformVersion
132
+ }
133
+ }
134
+ }
135
+ );
136
+ return response.data.value.sessionId;
112
137
  }
113
138
  async createGptDriverSession() {
114
139
  const response = await axios.post(
@@ -148,6 +173,8 @@ class GptDriver {
148
173
  status
149
174
  }
150
175
  );
176
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}`);
177
+ await axios.delete(url);
151
178
  console.log(">> Session stopped.");
152
179
  this.gptDriverSessionId = void 0;
153
180
  }
@@ -170,7 +197,7 @@ class GptDriver {
170
197
  if (appiumHandler != null) {
171
198
  try {
172
199
  await appiumHandler(driver);
173
- const screenshot = await getScreenshot(this.appiumSessionConfig, this.driver);
200
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
174
201
  await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
175
202
  api_key: this.apiKey,
176
203
  base64_screenshot: screenshot,
@@ -232,7 +259,7 @@ class GptDriver {
232
259
  */
233
260
  async checkBulk(conditions) {
234
261
  console.log(">> Checking:", conditions);
235
- const screenshot = await getScreenshot(this.appiumSessionConfig, this.driver);
262
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
236
263
  const response = await axios.post(
237
264
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
238
265
  {
@@ -258,7 +285,7 @@ class GptDriver {
258
285
  */
259
286
  async extract(extractions) {
260
287
  console.log(">> Extracting:", extractions);
261
- const screenshot = await getScreenshot(this.appiumSessionConfig, this.driver);
288
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
262
289
  const response = await axios.post(
263
290
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/extract`,
264
291
  {
@@ -274,7 +301,7 @@ class GptDriver {
274
301
  try {
275
302
  let conditionSucceeded = false;
276
303
  while (!conditionSucceeded) {
277
- const screenshot = await getScreenshot(this.appiumSessionConfig, this.driver);
304
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
278
305
  console.log(">> Asking GTP Driver for next action...");
279
306
  const response = await axios.request(
280
307
  {
@@ -323,6 +350,17 @@ class GptDriver {
323
350
  });
324
351
  }
325
352
  }
353
+ async getScreenshot(appiumSessionConfig) {
354
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
355
+ const screenshotResponse = await axios.get(url);
356
+ let screenshot = await screenshotResponse.data.value;
357
+ if (appiumSessionConfig.platform === "iOS") {
358
+ const imageBuffer = Buffer.from(screenshot, "base64");
359
+ const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
360
+ screenshot = transformedImage.toString("base64");
361
+ }
362
+ return screenshot;
363
+ }
326
364
  }
327
365
 
328
366
  module.exports = GptDriver;
package/dist/index.d.cts CHANGED
@@ -1,8 +1,9 @@
1
1
  import { WebDriver } from 'selenium-webdriver';
2
+ import { Browser } from 'webdriverio';
2
3
 
3
4
  type Platform = "iOS" | "Android";
4
5
  interface AppiumHandler {
5
- (driver: WebDriver): Promise<any>;
6
+ (driver: WebDriver | Browser): Promise<any>;
6
7
  }
7
8
  interface ServerSessionInitConfig {
8
9
  platform?: Platform;
@@ -11,7 +12,7 @@ interface ServerSessionInitConfig {
11
12
  }
12
13
  interface GptDriverConfig {
13
14
  apiKey: string;
14
- driver?: WebDriver;
15
+ driver?: WebDriver | Browser;
15
16
  severConfig: {
16
17
  device?: ServerSessionInitConfig;
17
18
  url?: URL | string;
@@ -52,7 +53,7 @@ declare class GptDriver {
52
53
  * @throws {Error} If the session cannot be started or the driver is not properly initialized.
53
54
  */
54
55
  startSession(): Promise<void>;
55
- private buildDriver;
56
+ private createSession;
56
57
  private createGptDriverSession;
57
58
  private getSessionLink;
58
59
  /**
@@ -126,6 +127,7 @@ declare class GptDriver {
126
127
  extract(extractions: string[]): Promise<any>;
127
128
  private gptHandler;
128
129
  private executeCommand;
130
+ private getScreenshot;
129
131
  }
130
132
 
131
133
  export { GptDriver as default };
package/dist/index.mjs CHANGED
@@ -1,19 +1,20 @@
1
1
  import axios from 'axios';
2
+ import { WebDriver } from 'selenium-webdriver';
2
3
  import sharp from 'sharp';
3
- import { Builder } from 'selenium-webdriver';
4
4
 
5
5
  const delay = async (milliseconds) => {
6
6
  await new Promise((resolve) => setTimeout(resolve, milliseconds));
7
7
  };
8
- const getScreenshot = async (appiumSessionConfig, driver) => {
9
- let screenshot = await driver.takeScreenshot();
10
- if (appiumSessionConfig.platform === "iOS") {
11
- const imageBuffer = Buffer.from(screenshot, "base64");
12
- const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
13
- screenshot = transformedImage.toString("base64");
8
+ function buildUrl(base, extraPath) {
9
+ let baseUrl = base.toString();
10
+ if (baseUrl.endsWith("/")) {
11
+ baseUrl = baseUrl.slice(0, -1);
14
12
  }
15
- return screenshot;
16
- };
13
+ if (!extraPath.startsWith("/")) {
14
+ extraPath = "/" + extraPath;
15
+ }
16
+ return `${baseUrl}${extraPath}`;
17
+ }
17
18
 
18
19
  class GptDriver {
19
20
  apiKey;
@@ -59,9 +60,9 @@ class GptDriver {
59
60
  }
60
61
  }
61
62
  initializeAppiumConfig(config) {
62
- const defaultPort = process.env.APPIUM_PORT ? parseInt(process.env.APPIUM_PORT, 10) : 4723;
63
+ const defaultPort = parseInt(process.env.APPIUM_PORT ?? "4723", 10);
63
64
  const defaultHost = process.env.APPIUM_HOST ?? "127.0.0.1";
64
- const serverUrl = config.severConfig?.url != null ? typeof config.severConfig?.url === "string" ? new URL(config.severConfig.url) : config.severConfig?.url : new URL(`http://${defaultPort}:${defaultHost}`);
65
+ let serverUrl = config.severConfig?.url instanceof URL ? config.severConfig.url : new URL(config.severConfig?.url ?? `http://${defaultHost}:${defaultPort}`);
65
66
  this.appiumSessionConfig = {
66
67
  serverUrl,
67
68
  ...config.severConfig?.device
@@ -76,37 +77,61 @@ class GptDriver {
76
77
  async startSession() {
77
78
  console.log(">> Starting session...");
78
79
  if (this.driver) {
79
- const capabilities = await this.driver.getCapabilities();
80
- const platform = capabilities.get("platformName");
81
- const platformVersion = capabilities.get("platformVersion") ?? this.appiumSessionConfig?.platformVersion;
82
- const deviceName = this.appiumSessionConfig?.deviceName ?? capabilities.get("deviceName");
80
+ let platform;
81
+ let platformVersion;
82
+ let deviceName;
83
+ let sessionId;
84
+ if (this.driver instanceof WebDriver) {
85
+ const capabilities = await this.driver.getCapabilities();
86
+ platform = capabilities.get("platformName");
87
+ platformVersion = capabilities.get("platformVersion") ?? this.appiumSessionConfig?.platformVersion;
88
+ deviceName = this.appiumSessionConfig?.deviceName ?? capabilities.get("deviceName");
89
+ const session = await this.driver.getSession();
90
+ sessionId = session.getId();
91
+ } else {
92
+ platform = this.driver.capabilities["appium:platformName"];
93
+ platformVersion = this.driver.capabilities["appium:platformVersion"];
94
+ deviceName = this.appiumSessionConfig?.deviceName ?? this.driver.capabilities["appium:deviceName"] ?? "";
95
+ sessionId = this.driver.sessionId;
96
+ }
83
97
  this.appiumSessionConfig = {
84
98
  ...this.appiumSessionConfig,
99
+ id: sessionId,
85
100
  platform,
86
101
  platformVersion,
87
102
  deviceName
88
103
  };
89
104
  } else {
90
- this.driver = await this.buildDriver();
105
+ this.appiumSessionConfig.id = await this.createSession();
91
106
  }
92
- const session = await this.driver.getSession();
93
- this.appiumSessionConfig.id = session.getId();
94
107
  await this.createGptDriverSession();
95
- const rectResponse = await this.driver.manage().window().getRect();
108
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/window/rect`);
109
+ const rectResponse = await axios.get(
110
+ url
111
+ );
96
112
  this.appiumSessionConfig.size = {
97
- width: rectResponse.width,
98
- height: rectResponse.height
113
+ width: rectResponse.data.value.width,
114
+ height: rectResponse.data.value.height
99
115
  };
100
116
  console.log(`>> Session created. Monitor execution at: ${this.getSessionLink()}`);
101
117
  }
102
- async buildDriver() {
103
- const { platform, deviceName, platformVersion } = this.appiumSessionConfig;
104
- return new Builder().withCapabilities({
105
- platformName: platform,
106
- automationName: platform === "iOS" ? "XCUITest" : "UiAutomator2",
107
- deviceName,
108
- platformVersion
109
- }).usingServer(this.appiumSessionConfig.serverUrl.toString()).build();
118
+ async createSession() {
119
+ const { platform, deviceName, platformVersion, serverUrl } = this.appiumSessionConfig;
120
+ const url = buildUrl(serverUrl, `/session`);
121
+ const response = await axios.post(
122
+ url,
123
+ {
124
+ capabilities: {
125
+ alwaysMatch: {
126
+ platformName: platform,
127
+ "appium:automationName": platform === "iOS" ? "XCUITest" : "UiAutomator2",
128
+ "appium:deviceName": deviceName,
129
+ "appium:platformVersion": platformVersion
130
+ }
131
+ }
132
+ }
133
+ );
134
+ return response.data.value.sessionId;
110
135
  }
111
136
  async createGptDriverSession() {
112
137
  const response = await axios.post(
@@ -146,6 +171,8 @@ class GptDriver {
146
171
  status
147
172
  }
148
173
  );
174
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}`);
175
+ await axios.delete(url);
149
176
  console.log(">> Session stopped.");
150
177
  this.gptDriverSessionId = void 0;
151
178
  }
@@ -168,7 +195,7 @@ class GptDriver {
168
195
  if (appiumHandler != null) {
169
196
  try {
170
197
  await appiumHandler(driver);
171
- const screenshot = await getScreenshot(this.appiumSessionConfig, this.driver);
198
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
172
199
  await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
173
200
  api_key: this.apiKey,
174
201
  base64_screenshot: screenshot,
@@ -230,7 +257,7 @@ class GptDriver {
230
257
  */
231
258
  async checkBulk(conditions) {
232
259
  console.log(">> Checking:", conditions);
233
- const screenshot = await getScreenshot(this.appiumSessionConfig, this.driver);
260
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
234
261
  const response = await axios.post(
235
262
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
236
263
  {
@@ -256,7 +283,7 @@ class GptDriver {
256
283
  */
257
284
  async extract(extractions) {
258
285
  console.log(">> Extracting:", extractions);
259
- const screenshot = await getScreenshot(this.appiumSessionConfig, this.driver);
286
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
260
287
  const response = await axios.post(
261
288
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/extract`,
262
289
  {
@@ -272,7 +299,7 @@ class GptDriver {
272
299
  try {
273
300
  let conditionSucceeded = false;
274
301
  while (!conditionSucceeded) {
275
- const screenshot = await getScreenshot(this.appiumSessionConfig, this.driver);
302
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
276
303
  console.log(">> Asking GTP Driver for next action...");
277
304
  const response = await axios.request(
278
305
  {
@@ -321,6 +348,17 @@ class GptDriver {
321
348
  });
322
349
  }
323
350
  }
351
+ async getScreenshot(appiumSessionConfig) {
352
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
353
+ const screenshotResponse = await axios.get(url);
354
+ let screenshot = await screenshotResponse.data.value;
355
+ if (appiumSessionConfig.platform === "iOS") {
356
+ const imageBuffer = Buffer.from(screenshot, "base64");
357
+ const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
358
+ screenshot = transformedImage.toString("base64");
359
+ }
360
+ return screenshot;
361
+ }
324
362
  }
325
363
 
326
364
  export { GptDriver as default };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gpt-driver-node",
3
- "version": "1.0.0-alpha.6",
3
+ "version": "1.0.0-alpha.8",
4
4
  "main": "./dist/index.cjs",
5
5
  "module": "./dist/index.mjs",
6
6
  "types": "./dist/index.d.cts",
@@ -10,15 +10,18 @@
10
10
  "watch": "tsx watch src/index.ts",
11
11
  "test": "jest"
12
12
  },
13
+ "type": "module",
13
14
  "keywords": [],
14
15
  "author": "MobileBoost",
15
16
  "license": "CC BY-NC-SA 4.0",
16
17
  "description": "Test your mobile apps with the AI native GPT Driver (docs.mobileboost.io).",
17
18
  "dependencies": {
18
19
  "@types/selenium-webdriver": "^4.1.25",
20
+ "@wdio/types": "^9.0.4",
19
21
  "axios": "^1.7.3",
20
22
  "selenium-webdriver": "^4.23.0",
21
- "sharp": "^0.33.4"
23
+ "sharp": "^0.33.4",
24
+ "webdriverio": "^9.0.7"
22
25
  },
23
26
  "devDependencies": {
24
27
  "tsx": "^4.16.5",