gpt-driver-node 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.cjs +119 -41
  2. package/dist/index.mjs +119 -41
  3. package/package.json +1 -1
package/dist/index.cjs CHANGED
@@ -11,29 +11,6 @@ var winston = require('winston');
11
11
  var zod = require('zod');
12
12
  var crypto = require('node:crypto');
13
13
 
14
- const delay = async (milliseconds) => {
15
- await new Promise((resolve) => setTimeout(resolve, milliseconds));
16
- };
17
- function buildUrl(base, extraPath) {
18
- let baseUrl = base.toString();
19
- if (baseUrl.endsWith("/")) {
20
- baseUrl = baseUrl.slice(0, -1);
21
- }
22
- if (!extraPath.startsWith("/")) {
23
- extraPath = "/" + extraPath;
24
- }
25
- return `${baseUrl}${extraPath}`;
26
- }
27
- const getImageDimensions = async (base64) => {
28
- const base64Data = base64.replace(/^data:image\/\w+;base64,/, "");
29
- const buffer = Buffer.from(base64Data, "base64");
30
- const metadata = await sharp(buffer).metadata();
31
- if (!metadata.width || !metadata.height) {
32
- throw new Error("Unable to get image dimensions");
33
- }
34
- return { width: metadata.width, height: metadata.height };
35
- };
36
-
37
14
  const colors = {
38
15
  reset: "\x1B[0m",
39
16
  bold: "\x1B[1m",
@@ -95,6 +72,75 @@ ${logStyles.gray(stack)}` : logMessage;
95
72
  ]
96
73
  });
97
74
 
75
+ const delay = async (milliseconds) => {
76
+ await new Promise((resolve) => setTimeout(resolve, milliseconds));
77
+ };
78
+ function isRetryableNetworkError(error) {
79
+ const retryableCodes = [
80
+ "ECONNRESET",
81
+ "ETIMEDOUT",
82
+ "ECONNREFUSED",
83
+ "ENOTFOUND",
84
+ "ENETUNREACH",
85
+ "EAI_AGAIN",
86
+ "EPIPE",
87
+ "ECONNABORTED"
88
+ ];
89
+ if (error.code && retryableCodes.includes(error.code)) {
90
+ return true;
91
+ }
92
+ if (error.message?.includes("socket disconnected") || error.message?.includes("TLS") || error.message?.includes("SSL")) {
93
+ return true;
94
+ }
95
+ if (error.code === "ECONNABORTED" && error.message?.includes("timeout")) {
96
+ return true;
97
+ }
98
+ if (error.response?.status >= 500) {
99
+ return true;
100
+ }
101
+ return false;
102
+ }
103
+ async function requestWithRetry(config, maxRetries = 3, baseDelayMs = 1e3) {
104
+ let lastError;
105
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
106
+ try {
107
+ const response = await axios.request(config);
108
+ return response.data;
109
+ } catch (error) {
110
+ lastError = error;
111
+ const isRetryable = isRetryableNetworkError(error);
112
+ if (!isRetryable || attempt === maxRetries) {
113
+ throw error;
114
+ }
115
+ const delayMs = baseDelayMs * Math.pow(2, attempt - 1);
116
+ globalLogger.warn(
117
+ `Request failed (attempt ${attempt}/${maxRetries}): ${error.message}. Retrying in ${delayMs}ms...`
118
+ );
119
+ await delay(delayMs);
120
+ }
121
+ }
122
+ throw lastError;
123
+ }
124
+ function buildUrl(base, extraPath) {
125
+ let baseUrl = base.toString();
126
+ if (baseUrl.endsWith("/")) {
127
+ baseUrl = baseUrl.slice(0, -1);
128
+ }
129
+ if (!extraPath.startsWith("/")) {
130
+ extraPath = "/" + extraPath;
131
+ }
132
+ return `${baseUrl}${extraPath}`;
133
+ }
134
+ const getImageDimensions = async (base64) => {
135
+ const base64Data = base64.replace(/^data:image\/\w+;base64,/, "");
136
+ const buffer = Buffer.from(base64Data, "base64");
137
+ const metadata = await sharp(buffer).metadata();
138
+ if (!metadata.width || !metadata.height) {
139
+ throw new Error("Unable to get image dimensions");
140
+ }
141
+ return { width: metadata.width, height: metadata.height };
142
+ };
143
+
98
144
  const waitForStableScreen = async (getScreenshot, options = {}) => {
99
145
  const {
100
146
  maxTimeoutSec = 5,
@@ -1436,14 +1482,43 @@ ${"=".repeat(50)}`);
1436
1482
  }
1437
1483
  async getScreenshot(appiumSessionConfig, shouldScale = true) {
1438
1484
  globalLogger.debug("Capturing screenshot...");
1439
- const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1440
- const screenshotResponse = await axios.get(url);
1441
- let screenshot = await screenshotResponse.data.value;
1442
- if (appiumSessionConfig.platform === "iOS" && shouldScale) {
1443
- globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
1444
- const imageBuffer = Buffer.from(screenshot, "base64");
1445
- const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
1446
- screenshot = transformedImage.toString("base64");
1485
+ let screenshot;
1486
+ if (appiumSessionConfig.platform === "Android") {
1487
+ try {
1488
+ const { execSync } = await import('node:child_process');
1489
+ let udid;
1490
+ if (this.driver) {
1491
+ if (this.driver.sessionId != null) {
1492
+ const caps = this.driver.capabilities;
1493
+ udid = caps["appium:udid"] || caps["udid"];
1494
+ } else {
1495
+ const driver = this.driver;
1496
+ const capabilities = await driver.getCapabilities();
1497
+ udid = capabilities.get("appium:udid") || capabilities.get("udid");
1498
+ }
1499
+ }
1500
+ const deviceArg = udid ? `-s ${udid}` : "";
1501
+ const buffer = execSync(`adb ${deviceArg} exec-out screencap -p`, {
1502
+ encoding: "buffer",
1503
+ maxBuffer: 50 * 1024 * 1024
1504
+ });
1505
+ screenshot = buffer.toString("base64");
1506
+ } catch (e) {
1507
+ globalLogger.warn("ADB screenshot failed, falling back to Appium screenshot");
1508
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1509
+ const screenshotResponse = await axios.get(url);
1510
+ screenshot = screenshotResponse.data.value;
1511
+ }
1512
+ } else {
1513
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1514
+ const screenshotResponse = await axios.get(url);
1515
+ screenshot = screenshotResponse.data.value;
1516
+ if (shouldScale) {
1517
+ globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
1518
+ const imageBuffer = Buffer.from(screenshot, "base64");
1519
+ const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
1520
+ screenshot = transformedImage.toString("base64");
1521
+ }
1447
1522
  }
1448
1523
  return screenshot;
1449
1524
  }
@@ -2248,7 +2323,6 @@ ${"=".repeat(50)}`);
2248
2323
  let conditionSucceeded = false;
2249
2324
  while (!conditionSucceeded) {
2250
2325
  let screenshot;
2251
- let originalScreenshotBase64 = null;
2252
2326
  if (!this.useGptDriverCloud) {
2253
2327
  const stabilityResult = await waitForStableScreen(
2254
2328
  () => this.getScreenshot(this.appiumSessionConfig)
@@ -2259,8 +2333,9 @@ ${"=".repeat(50)}`);
2259
2333
  }
2260
2334
  }
2261
2335
  globalLogger.info("Requesting next action from GPT Driver...");
2262
- const response = await axios.request(
2263
- {
2336
+ let responseData;
2337
+ try {
2338
+ responseData = await requestWithRetry({
2264
2339
  url: `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/execute`,
2265
2340
  method: "POST",
2266
2341
  data: {
@@ -2270,18 +2345,21 @@ ${"=".repeat(50)}`);
2270
2345
  caching_mode: cachingMode ?? this.cachingMode,
2271
2346
  step_number: this.step_number
2272
2347
  }
2273
- }
2274
- );
2275
- const executeStatus = response.data.status;
2348
+ });
2349
+ } catch (error) {
2350
+ globalLogger.error(`GPT Driver request failed: ${error.message}`);
2351
+ globalLogger.error(`Stack trace: ${error.stack}`);
2352
+ throw error;
2353
+ }
2354
+ const executeStatus = responseData.status;
2276
2355
  if (executeStatus === "failed") {
2277
- const errorMessage = response.data?.commands?.at(0)?.data;
2356
+ const errorMessage = responseData?.commands?.at(0)?.data;
2278
2357
  globalLogger.error(`Execution failed: ${errorMessage ?? "Unknown error"}`);
2279
2358
  throw new Error(errorMessage ?? "Execution failed");
2280
2359
  }
2281
2360
  conditionSucceeded = executeStatus !== "inProgress";
2282
- const executeResponse = response.data;
2283
- globalLogger.debug(`Received ${executeResponse.commands.length} command(s) to execute`);
2284
- for (const appiumCommand of executeResponse.commands) {
2361
+ globalLogger.debug(`Received ${responseData.commands.length} command(s) to execute`);
2362
+ for (const appiumCommand of responseData.commands) {
2285
2363
  await this.executeCommand(appiumCommand);
2286
2364
  }
2287
2365
  }
package/dist/index.mjs CHANGED
@@ -9,29 +9,6 @@ import winston from 'winston';
9
9
  import { z } from 'zod';
10
10
  import crypto from 'node:crypto';
11
11
 
12
- const delay = async (milliseconds) => {
13
- await new Promise((resolve) => setTimeout(resolve, milliseconds));
14
- };
15
- function buildUrl(base, extraPath) {
16
- let baseUrl = base.toString();
17
- if (baseUrl.endsWith("/")) {
18
- baseUrl = baseUrl.slice(0, -1);
19
- }
20
- if (!extraPath.startsWith("/")) {
21
- extraPath = "/" + extraPath;
22
- }
23
- return `${baseUrl}${extraPath}`;
24
- }
25
- const getImageDimensions = async (base64) => {
26
- const base64Data = base64.replace(/^data:image\/\w+;base64,/, "");
27
- const buffer = Buffer.from(base64Data, "base64");
28
- const metadata = await sharp(buffer).metadata();
29
- if (!metadata.width || !metadata.height) {
30
- throw new Error("Unable to get image dimensions");
31
- }
32
- return { width: metadata.width, height: metadata.height };
33
- };
34
-
35
12
  const colors = {
36
13
  reset: "\x1B[0m",
37
14
  bold: "\x1B[1m",
@@ -93,6 +70,75 @@ ${logStyles.gray(stack)}` : logMessage;
93
70
  ]
94
71
  });
95
72
 
73
+ const delay = async (milliseconds) => {
74
+ await new Promise((resolve) => setTimeout(resolve, milliseconds));
75
+ };
76
+ function isRetryableNetworkError(error) {
77
+ const retryableCodes = [
78
+ "ECONNRESET",
79
+ "ETIMEDOUT",
80
+ "ECONNREFUSED",
81
+ "ENOTFOUND",
82
+ "ENETUNREACH",
83
+ "EAI_AGAIN",
84
+ "EPIPE",
85
+ "ECONNABORTED"
86
+ ];
87
+ if (error.code && retryableCodes.includes(error.code)) {
88
+ return true;
89
+ }
90
+ if (error.message?.includes("socket disconnected") || error.message?.includes("TLS") || error.message?.includes("SSL")) {
91
+ return true;
92
+ }
93
+ if (error.code === "ECONNABORTED" && error.message?.includes("timeout")) {
94
+ return true;
95
+ }
96
+ if (error.response?.status >= 500) {
97
+ return true;
98
+ }
99
+ return false;
100
+ }
101
+ async function requestWithRetry(config, maxRetries = 3, baseDelayMs = 1e3) {
102
+ let lastError;
103
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
104
+ try {
105
+ const response = await axios.request(config);
106
+ return response.data;
107
+ } catch (error) {
108
+ lastError = error;
109
+ const isRetryable = isRetryableNetworkError(error);
110
+ if (!isRetryable || attempt === maxRetries) {
111
+ throw error;
112
+ }
113
+ const delayMs = baseDelayMs * Math.pow(2, attempt - 1);
114
+ globalLogger.warn(
115
+ `Request failed (attempt ${attempt}/${maxRetries}): ${error.message}. Retrying in ${delayMs}ms...`
116
+ );
117
+ await delay(delayMs);
118
+ }
119
+ }
120
+ throw lastError;
121
+ }
122
+ function buildUrl(base, extraPath) {
123
+ let baseUrl = base.toString();
124
+ if (baseUrl.endsWith("/")) {
125
+ baseUrl = baseUrl.slice(0, -1);
126
+ }
127
+ if (!extraPath.startsWith("/")) {
128
+ extraPath = "/" + extraPath;
129
+ }
130
+ return `${baseUrl}${extraPath}`;
131
+ }
132
+ const getImageDimensions = async (base64) => {
133
+ const base64Data = base64.replace(/^data:image\/\w+;base64,/, "");
134
+ const buffer = Buffer.from(base64Data, "base64");
135
+ const metadata = await sharp(buffer).metadata();
136
+ if (!metadata.width || !metadata.height) {
137
+ throw new Error("Unable to get image dimensions");
138
+ }
139
+ return { width: metadata.width, height: metadata.height };
140
+ };
141
+
96
142
  const waitForStableScreen = async (getScreenshot, options = {}) => {
97
143
  const {
98
144
  maxTimeoutSec = 5,
@@ -1434,14 +1480,43 @@ ${"=".repeat(50)}`);
1434
1480
  }
1435
1481
  async getScreenshot(appiumSessionConfig, shouldScale = true) {
1436
1482
  globalLogger.debug("Capturing screenshot...");
1437
- const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1438
- const screenshotResponse = await axios.get(url);
1439
- let screenshot = await screenshotResponse.data.value;
1440
- if (appiumSessionConfig.platform === "iOS" && shouldScale) {
1441
- globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
1442
- const imageBuffer = Buffer.from(screenshot, "base64");
1443
- const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
1444
- screenshot = transformedImage.toString("base64");
1483
+ let screenshot;
1484
+ if (appiumSessionConfig.platform === "Android") {
1485
+ try {
1486
+ const { execSync } = await import('node:child_process');
1487
+ let udid;
1488
+ if (this.driver) {
1489
+ if (this.driver.sessionId != null) {
1490
+ const caps = this.driver.capabilities;
1491
+ udid = caps["appium:udid"] || caps["udid"];
1492
+ } else {
1493
+ const driver = this.driver;
1494
+ const capabilities = await driver.getCapabilities();
1495
+ udid = capabilities.get("appium:udid") || capabilities.get("udid");
1496
+ }
1497
+ }
1498
+ const deviceArg = udid ? `-s ${udid}` : "";
1499
+ const buffer = execSync(`adb ${deviceArg} exec-out screencap -p`, {
1500
+ encoding: "buffer",
1501
+ maxBuffer: 50 * 1024 * 1024
1502
+ });
1503
+ screenshot = buffer.toString("base64");
1504
+ } catch (e) {
1505
+ globalLogger.warn("ADB screenshot failed, falling back to Appium screenshot");
1506
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1507
+ const screenshotResponse = await axios.get(url);
1508
+ screenshot = screenshotResponse.data.value;
1509
+ }
1510
+ } else {
1511
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1512
+ const screenshotResponse = await axios.get(url);
1513
+ screenshot = screenshotResponse.data.value;
1514
+ if (shouldScale) {
1515
+ globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
1516
+ const imageBuffer = Buffer.from(screenshot, "base64");
1517
+ const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
1518
+ screenshot = transformedImage.toString("base64");
1519
+ }
1445
1520
  }
1446
1521
  return screenshot;
1447
1522
  }
@@ -2246,7 +2321,6 @@ ${"=".repeat(50)}`);
2246
2321
  let conditionSucceeded = false;
2247
2322
  while (!conditionSucceeded) {
2248
2323
  let screenshot;
2249
- let originalScreenshotBase64 = null;
2250
2324
  if (!this.useGptDriverCloud) {
2251
2325
  const stabilityResult = await waitForStableScreen(
2252
2326
  () => this.getScreenshot(this.appiumSessionConfig)
@@ -2257,8 +2331,9 @@ ${"=".repeat(50)}`);
2257
2331
  }
2258
2332
  }
2259
2333
  globalLogger.info("Requesting next action from GPT Driver...");
2260
- const response = await axios.request(
2261
- {
2334
+ let responseData;
2335
+ try {
2336
+ responseData = await requestWithRetry({
2262
2337
  url: `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/execute`,
2263
2338
  method: "POST",
2264
2339
  data: {
@@ -2268,18 +2343,21 @@ ${"=".repeat(50)}`);
2268
2343
  caching_mode: cachingMode ?? this.cachingMode,
2269
2344
  step_number: this.step_number
2270
2345
  }
2271
- }
2272
- );
2273
- const executeStatus = response.data.status;
2346
+ });
2347
+ } catch (error) {
2348
+ globalLogger.error(`GPT Driver request failed: ${error.message}`);
2349
+ globalLogger.error(`Stack trace: ${error.stack}`);
2350
+ throw error;
2351
+ }
2352
+ const executeStatus = responseData.status;
2274
2353
  if (executeStatus === "failed") {
2275
- const errorMessage = response.data?.commands?.at(0)?.data;
2354
+ const errorMessage = responseData?.commands?.at(0)?.data;
2276
2355
  globalLogger.error(`Execution failed: ${errorMessage ?? "Unknown error"}`);
2277
2356
  throw new Error(errorMessage ?? "Execution failed");
2278
2357
  }
2279
2358
  conditionSucceeded = executeStatus !== "inProgress";
2280
- const executeResponse = response.data;
2281
- globalLogger.debug(`Received ${executeResponse.commands.length} command(s) to execute`);
2282
- for (const appiumCommand of executeResponse.commands) {
2359
+ globalLogger.debug(`Received ${responseData.commands.length} command(s) to execute`);
2360
+ for (const appiumCommand of responseData.commands) {
2283
2361
  await this.executeCommand(appiumCommand);
2284
2362
  }
2285
2363
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gpt-driver-node",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "main": "./dist/index.cjs",
5
5
  "module": "./dist/index.mjs",
6
6
  "types": "./dist/index.d.cts",