gpt-driver-node 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,65 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+ See [docs.mobileboost.io](https://docs.mobileboost.io) for full documentation.
5
+
6
+ ## [1.0.12] - 2026-02-27
7
+
8
+ ### Added
9
+ - `maxWaitForStableScreenSecs` config option to control the maximum time to wait for a stable screen
10
+
11
+ ### Changed
12
+ - Enhanced screen stability handling for more reliable test execution
13
+
14
+ ## [1.0.11] - 2026-02-25
15
+
16
+ ### Changed
17
+ - Improved AI execution logging with richer detail (screenshot, prompt, commands, reasoning, cache status)
18
+
19
+ ## [1.0.10] - 2026-02-25
20
+
21
+ ### Added
22
+ - Screen resolution is now automatically sent to the server during session initialization when a device `size` is provided
23
+
24
+ ## [1.0.9] - 2026-02-20
25
+
26
+ ### Changed
27
+ - Smart loop now throws an error on failure instead of resolving silently — update your error handling accordingly
28
+
29
+ ## [1.0.8] - 2026-02-19
30
+
31
+ ### Changed
32
+ - Screenshots are now compressed before being sent, reducing bandwidth usage
33
+ - ADB-based screenshots are used automatically on Android when the Appium server is local
34
+
35
+ ## [1.0.7] - 2026-02-18
36
+
37
+ ### Added
38
+ - `additionalUserContext` option in `GptDriverConfig` — pass extra context to the AI for more accurate test execution
39
+
40
+ ### Fixed
41
+ - Session is now correctly marked as failed when `maxCacheAttempts` is reached
42
+
43
+ ## [1.0.6] - 2026-02-09
44
+
45
+ ### Added
46
+ - Automatic retry logic for middleware service calls
47
+ - ADB screenshot support for Android devices
48
+ - `usemiddleLayerAssertFn` option for assertion handling
49
+
50
+ ### Changed
51
+ - Screen stability checks are now performed once per loop iteration for better cache efficiency
52
+ - Default log level changed to `info`
53
+
54
+ ## [1.0.5] - 2026-01-12
55
+
56
+ ### Added
57
+ - Press/Enter key command support
58
+ - Config file and environment variable support via `configFilePath`
59
+ - `actionDescription` field on commands for improved traceability
60
+ - Screen stability wait between steps to increase cache hit rate
61
+
62
+ ## [1.0.3] - 2026-01-02
63
+
64
+ ### Changed
65
+ - `executeFlow` reliability and performance improvements
package/dist/index.cjs CHANGED
@@ -632,7 +632,7 @@ async function executeSmartLoop(ctx, params) {
632
632
  let commands = [];
633
633
  let isCacheHit = false;
634
634
  let screenshotResolution = void 0;
635
- const stabilityResult = await waitForStableScreen(ctx.getScreenshot);
635
+ const stabilityResult = await waitForStableScreen(ctx.getScreenshot, { ...ctx.maxWaitForStableScreenSecs && { maxTimeoutSec: ctx.maxWaitForStableScreenSecs } });
636
636
  for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
637
637
  const screenshotStartTime = performance.now();
638
638
  const screenshotEndTime = performance.now();
@@ -684,6 +684,7 @@ async function executeSmartLoop(ctx, params) {
684
684
  screenshotResolution = await getImageDimensions(screenshot2);
685
685
  }
686
686
  let aiCommands = [];
687
+ let reasoning = [];
687
688
  let actionDescription = [];
688
689
  if (!isCacheHit) {
689
690
  anyCacheMiss = true;
@@ -727,7 +728,9 @@ async function executeSmartLoop(ctx, params) {
727
728
  actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
728
729
  }
729
730
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
731
+ reasoning = [];
730
732
  if (reasoningIndex !== -1) {
733
+ reasoning = gptCommands.at(reasoningIndex).split("reasoning:").at(1).trim().split("- ");
731
734
  const parsedCommands = gptCommands.slice(reasoningIndex);
732
735
  const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
733
736
  if (rememberCommands.length > 0) {
@@ -742,7 +745,13 @@ async function executeSmartLoop(ctx, params) {
742
745
  screenshot,
743
746
  commands
744
747
  });
745
- const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
748
+ const logPromise = ctx.logAIExecution({
749
+ screenshot,
750
+ prompt: params.instruction,
751
+ commands: aiCommands,
752
+ reasoning,
753
+ fromCache: isCacheHit
754
+ });
746
755
  if (ctx.pendingLogPromises) {
747
756
  ctx.pendingLogPromises.push(logPromise);
748
757
  }
@@ -955,6 +964,7 @@ class GptDriver {
955
964
  organisationId;
956
965
  configFilePath;
957
966
  additionalUserContext;
967
+ maxWaitForStableScreenSecs;
958
968
  // Session Execution Stats
959
969
  _stats_startTime = 0;
960
970
  _stats_executedSteps = 0;
@@ -991,6 +1001,7 @@ class GptDriver {
991
1001
  this.organisationId = config.organisationId;
992
1002
  this.configFilePath = config.configFilePath;
993
1003
  this.additionalUserContext = config.additionalUserContext;
1004
+ this.maxWaitForStableScreenSecs = config.maxWaitForStableScreenSecs;
994
1005
  if (config.useGptDriverCloud) {
995
1006
  if (config.serverConfig.device?.platform == null) {
996
1007
  throw new Error("Platform is missing. Please specify the platform when using GPTDriver Cloud.");
@@ -1239,10 +1250,11 @@ ${"=".repeat(50)}`);
1239
1250
  performSwipe: (params) => this.performSwipe(params),
1240
1251
  performType: (text) => this.performType(text),
1241
1252
  performPressEnter: () => this.performPressEnter(),
1242
- logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1253
+ logAIExecution: async (params) => this.logAIExecution(params),
1243
1254
  organisationId: this.organisationId,
1244
1255
  middleLayerAssertFn: options?.middleLayerAssertFn,
1245
- pendingLogPromises: options?.pendingLogPromises
1256
+ pendingLogPromises: options?.pendingLogPromises,
1257
+ maxWaitForStableScreenSecs: this.maxWaitForStableScreenSecs
1246
1258
  };
1247
1259
  }
1248
1260
  /**
@@ -1318,7 +1330,7 @@ ${"=".repeat(50)}`);
1318
1330
  actions: [
1319
1331
  { type: "pointerMove", duration: 0, x: clampedX, y: clampedY },
1320
1332
  { type: "pointerDown", button: 0 },
1321
- { type: "pause", duration: 100 },
1333
+ { type: "pause", duration: 300 },
1322
1334
  { type: "pointerUp", button: 0 }
1323
1335
  ]
1324
1336
  }
@@ -1571,7 +1583,7 @@ ${"=".repeat(50)}`);
1571
1583
  const driver = this.driver;
1572
1584
  if (appiumHandler != null) {
1573
1585
  try {
1574
- await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1586
+ this.pendingLogPromises.push(this.takeScreenshotAndLogCodeExecution(appiumHandler.toString()));
1575
1587
  await appiumHandler(driver);
1576
1588
  globalLogger.debug("Custom Appium handler executed successfully");
1577
1589
  } catch (e) {
@@ -1603,7 +1615,7 @@ ${"=".repeat(50)}`);
1603
1615
  try {
1604
1616
  const handlerStartTime = performance.now();
1605
1617
  globalLogger.debug(`[Performance] Executing custom Appium handler...`);
1606
- await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1618
+ this.pendingLogPromises.push(this.takeScreenshotAndLogCodeExecution(appiumHandler.toString()));
1607
1619
  await appiumHandler(driver);
1608
1620
  const handlerEndTime = performance.now();
1609
1621
  globalLogger.debug("Custom Appium handler executed successfully");
@@ -2197,7 +2209,13 @@ ${issues}`);
2197
2209
  case "type": {
2198
2210
  const text = this.interpolateTemplate(step.text, params);
2199
2211
  globalLogger.info(`${prefix}: Type text`);
2200
- this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2212
+ this.pendingLogPromises.push(
2213
+ this.takeScreenshotAndLogAIExecution({
2214
+ prompt: `type: text=${text}`,
2215
+ commands: ["Local type execution"],
2216
+ reasoning: ["Local type execution"]
2217
+ })
2218
+ );
2201
2219
  await this.performType(text);
2202
2220
  if (isFromFileRef) {
2203
2221
  fileRefStepNumber++;
@@ -2208,7 +2226,13 @@ ${issues}`);
2208
2226
  }
2209
2227
  case "enter": {
2210
2228
  globalLogger.info(`${prefix}: Press Enter`);
2211
- this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2229
+ this.pendingLogPromises.push(
2230
+ this.takeScreenshotAndLogAIExecution({
2231
+ prompt: `press: Enter`,
2232
+ commands: ["Local press Enter execution"],
2233
+ reasoning: ["Local press Enter execution"]
2234
+ })
2235
+ );
2212
2236
  await this.performPressEnter();
2213
2237
  if (step.delayNextStep) {
2214
2238
  await this._delay(step.delayNextStep);
@@ -2245,7 +2269,13 @@ ${issues}`);
2245
2269
  }
2246
2270
  } else {
2247
2271
  globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
2248
- this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2272
+ this.pendingLogPromises.push(
2273
+ this.takeScreenshotAndLogAIExecution({
2274
+ prompt: `${step.type}: direction=${step.direction}`,
2275
+ commands: [`Local ${step.type} execution`],
2276
+ reasoning: [`Local ${step.type} execution`]
2277
+ })
2278
+ );
2249
2279
  await this.performSwipe({
2250
2280
  direction: step.direction,
2251
2281
  x1: step.x1,
@@ -2266,7 +2296,13 @@ ${issues}`);
2266
2296
  }
2267
2297
  case "zoom": {
2268
2298
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
2269
- this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2299
+ this.pendingLogPromises.push(
2300
+ this.takeScreenshotAndLogAIExecution({
2301
+ prompt: `zoom: direction=${step.direction}`,
2302
+ commands: [`Local zoom execution`],
2303
+ reasoning: [`Local zoom execution`]
2304
+ })
2305
+ );
2270
2306
  if (isFromFileRef) {
2271
2307
  fileRefStepNumber++;
2272
2308
  } else {
@@ -2277,7 +2313,13 @@ ${issues}`);
2277
2313
  case "scrollUntil": {
2278
2314
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
2279
2315
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
2280
- this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2316
+ this.pendingLogPromises.push(
2317
+ this.takeScreenshotAndLogAIExecution({
2318
+ prompt: `scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`,
2319
+ commands: [`Local scrollUntil execution`],
2320
+ reasoning: [`Local scrollUntil execution`]
2321
+ })
2322
+ );
2281
2323
  await this.performScrollUntil({
2282
2324
  direction: step.direction,
2283
2325
  text: interpolatedText,
@@ -2296,7 +2338,13 @@ ${issues}`);
2296
2338
  const bundleId = params["bundleId"];
2297
2339
  const url = this.interpolateTemplate(step.url, params);
2298
2340
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
2299
- this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2341
+ this.pendingLogPromises.push(
2342
+ this.takeScreenshotAndLogAIExecution({
2343
+ prompt: `openDeepLinkUrl: url=${url}`,
2344
+ commands: [`Local openDeepLinkUrl execution`],
2345
+ reasoning: [`Local openDeepLinkUrl execution`]
2346
+ })
2347
+ );
2300
2348
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
2301
2349
  break;
2302
2350
  }
@@ -2417,6 +2465,28 @@ ${"=".repeat(50)}`);
2417
2465
  globalLogger.error("Failed to log code execution", e);
2418
2466
  }
2419
2467
  }
2468
+ async logAIExecution(params) {
2469
+ try {
2470
+ await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_ai_execution`, {
2471
+ api_key: this.apiKey,
2472
+ base64_screenshot: params.screenshot,
2473
+ prompt: params.prompt,
2474
+ reasoning: params.reasoning,
2475
+ commands: params.commands,
2476
+ from_cache: !!params.fromCache
2477
+ });
2478
+ } catch (e) {
2479
+ globalLogger.error("Failed to log code execution", e);
2480
+ }
2481
+ }
2482
+ async takeScreenshotAndLogAIExecution(params) {
2483
+ try {
2484
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
2485
+ await this.logAIExecution({ ...params, screenshot });
2486
+ } catch (e) {
2487
+ globalLogger.error("Failed to log code execution", e);
2488
+ }
2489
+ }
2420
2490
  async takeScreenshotAndLogCodeExecution(command) {
2421
2491
  try {
2422
2492
  const screenshot = await this.getScreenshot(this.appiumSessionConfig);
package/dist/index.d.cts CHANGED
@@ -25,6 +25,7 @@ interface GptDriverConfig {
25
25
  additionalUserContext?: string;
26
26
  organisationId?: string;
27
27
  configFilePath?: string;
28
+ maxWaitForStableScreenSecs?: number;
28
29
  }
29
30
  /**
30
31
  * Parameters for opening a deep link url in the Appium session.
@@ -514,6 +515,7 @@ declare class GptDriver {
514
515
  private organisationId?;
515
516
  private configFilePath?;
516
517
  private additionalUserContext?;
518
+ private maxWaitForStableScreenSecs?;
517
519
  private _stats_startTime;
518
520
  private _stats_executedSteps;
519
521
  private _stats_cacheHits;
@@ -877,6 +879,8 @@ declare class GptDriver {
877
879
  private gptHandler;
878
880
  private executeCommand;
879
881
  private logCodeExecution;
882
+ private logAIExecution;
883
+ private takeScreenshotAndLogAIExecution;
880
884
  private takeScreenshotAndLogCodeExecution;
881
885
  }
882
886
 
package/dist/index.mjs CHANGED
@@ -630,7 +630,7 @@ async function executeSmartLoop(ctx, params) {
630
630
  let commands = [];
631
631
  let isCacheHit = false;
632
632
  let screenshotResolution = void 0;
633
- const stabilityResult = await waitForStableScreen(ctx.getScreenshot);
633
+ const stabilityResult = await waitForStableScreen(ctx.getScreenshot, { ...ctx.maxWaitForStableScreenSecs && { maxTimeoutSec: ctx.maxWaitForStableScreenSecs } });
634
634
  for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
635
635
  const screenshotStartTime = performance.now();
636
636
  const screenshotEndTime = performance.now();
@@ -682,6 +682,7 @@ async function executeSmartLoop(ctx, params) {
682
682
  screenshotResolution = await getImageDimensions(screenshot2);
683
683
  }
684
684
  let aiCommands = [];
685
+ let reasoning = [];
685
686
  let actionDescription = [];
686
687
  if (!isCacheHit) {
687
688
  anyCacheMiss = true;
@@ -725,7 +726,9 @@ async function executeSmartLoop(ctx, params) {
725
726
  actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
726
727
  }
727
728
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
729
+ reasoning = [];
728
730
  if (reasoningIndex !== -1) {
731
+ reasoning = gptCommands.at(reasoningIndex).split("reasoning:").at(1).trim().split("- ");
729
732
  const parsedCommands = gptCommands.slice(reasoningIndex);
730
733
  const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
731
734
  if (rememberCommands.length > 0) {
@@ -740,7 +743,13 @@ async function executeSmartLoop(ctx, params) {
740
743
  screenshot,
741
744
  commands
742
745
  });
743
- const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
746
+ const logPromise = ctx.logAIExecution({
747
+ screenshot,
748
+ prompt: params.instruction,
749
+ commands: aiCommands,
750
+ reasoning,
751
+ fromCache: isCacheHit
752
+ });
744
753
  if (ctx.pendingLogPromises) {
745
754
  ctx.pendingLogPromises.push(logPromise);
746
755
  }
@@ -953,6 +962,7 @@ class GptDriver {
953
962
  organisationId;
954
963
  configFilePath;
955
964
  additionalUserContext;
965
+ maxWaitForStableScreenSecs;
956
966
  // Session Execution Stats
957
967
  _stats_startTime = 0;
958
968
  _stats_executedSteps = 0;
@@ -989,6 +999,7 @@ class GptDriver {
989
999
  this.organisationId = config.organisationId;
990
1000
  this.configFilePath = config.configFilePath;
991
1001
  this.additionalUserContext = config.additionalUserContext;
1002
+ this.maxWaitForStableScreenSecs = config.maxWaitForStableScreenSecs;
992
1003
  if (config.useGptDriverCloud) {
993
1004
  if (config.serverConfig.device?.platform == null) {
994
1005
  throw new Error("Platform is missing. Please specify the platform when using GPTDriver Cloud.");
@@ -1237,10 +1248,11 @@ ${"=".repeat(50)}`);
1237
1248
  performSwipe: (params) => this.performSwipe(params),
1238
1249
  performType: (text) => this.performType(text),
1239
1250
  performPressEnter: () => this.performPressEnter(),
1240
- logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1251
+ logAIExecution: async (params) => this.logAIExecution(params),
1241
1252
  organisationId: this.organisationId,
1242
1253
  middleLayerAssertFn: options?.middleLayerAssertFn,
1243
- pendingLogPromises: options?.pendingLogPromises
1254
+ pendingLogPromises: options?.pendingLogPromises,
1255
+ maxWaitForStableScreenSecs: this.maxWaitForStableScreenSecs
1244
1256
  };
1245
1257
  }
1246
1258
  /**
@@ -1316,7 +1328,7 @@ ${"=".repeat(50)}`);
1316
1328
  actions: [
1317
1329
  { type: "pointerMove", duration: 0, x: clampedX, y: clampedY },
1318
1330
  { type: "pointerDown", button: 0 },
1319
- { type: "pause", duration: 100 },
1331
+ { type: "pause", duration: 300 },
1320
1332
  { type: "pointerUp", button: 0 }
1321
1333
  ]
1322
1334
  }
@@ -1569,7 +1581,7 @@ ${"=".repeat(50)}`);
1569
1581
  const driver = this.driver;
1570
1582
  if (appiumHandler != null) {
1571
1583
  try {
1572
- await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1584
+ this.pendingLogPromises.push(this.takeScreenshotAndLogCodeExecution(appiumHandler.toString()));
1573
1585
  await appiumHandler(driver);
1574
1586
  globalLogger.debug("Custom Appium handler executed successfully");
1575
1587
  } catch (e) {
@@ -1601,7 +1613,7 @@ ${"=".repeat(50)}`);
1601
1613
  try {
1602
1614
  const handlerStartTime = performance.now();
1603
1615
  globalLogger.debug(`[Performance] Executing custom Appium handler...`);
1604
- await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1616
+ this.pendingLogPromises.push(this.takeScreenshotAndLogCodeExecution(appiumHandler.toString()));
1605
1617
  await appiumHandler(driver);
1606
1618
  const handlerEndTime = performance.now();
1607
1619
  globalLogger.debug("Custom Appium handler executed successfully");
@@ -2195,7 +2207,13 @@ ${issues}`);
2195
2207
  case "type": {
2196
2208
  const text = this.interpolateTemplate(step.text, params);
2197
2209
  globalLogger.info(`${prefix}: Type text`);
2198
- this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2210
+ this.pendingLogPromises.push(
2211
+ this.takeScreenshotAndLogAIExecution({
2212
+ prompt: `type: text=${text}`,
2213
+ commands: ["Local type execution"],
2214
+ reasoning: ["Local type execution"]
2215
+ })
2216
+ );
2199
2217
  await this.performType(text);
2200
2218
  if (isFromFileRef) {
2201
2219
  fileRefStepNumber++;
@@ -2206,7 +2224,13 @@ ${issues}`);
2206
2224
  }
2207
2225
  case "enter": {
2208
2226
  globalLogger.info(`${prefix}: Press Enter`);
2209
- this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2227
+ this.pendingLogPromises.push(
2228
+ this.takeScreenshotAndLogAIExecution({
2229
+ prompt: `press: Enter`,
2230
+ commands: ["Local press Enter execution"],
2231
+ reasoning: ["Local press Enter execution"]
2232
+ })
2233
+ );
2210
2234
  await this.performPressEnter();
2211
2235
  if (step.delayNextStep) {
2212
2236
  await this._delay(step.delayNextStep);
@@ -2243,7 +2267,13 @@ ${issues}`);
2243
2267
  }
2244
2268
  } else {
2245
2269
  globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
2246
- this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2270
+ this.pendingLogPromises.push(
2271
+ this.takeScreenshotAndLogAIExecution({
2272
+ prompt: `${step.type}: direction=${step.direction}`,
2273
+ commands: [`Local ${step.type} execution`],
2274
+ reasoning: [`Local ${step.type} execution`]
2275
+ })
2276
+ );
2247
2277
  await this.performSwipe({
2248
2278
  direction: step.direction,
2249
2279
  x1: step.x1,
@@ -2264,7 +2294,13 @@ ${issues}`);
2264
2294
  }
2265
2295
  case "zoom": {
2266
2296
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
2267
- this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2297
+ this.pendingLogPromises.push(
2298
+ this.takeScreenshotAndLogAIExecution({
2299
+ prompt: `zoom: direction=${step.direction}`,
2300
+ commands: [`Local zoom execution`],
2301
+ reasoning: [`Local zoom execution`]
2302
+ })
2303
+ );
2268
2304
  if (isFromFileRef) {
2269
2305
  fileRefStepNumber++;
2270
2306
  } else {
@@ -2275,7 +2311,13 @@ ${issues}`);
2275
2311
  case "scrollUntil": {
2276
2312
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
2277
2313
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
2278
- this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2314
+ this.pendingLogPromises.push(
2315
+ this.takeScreenshotAndLogAIExecution({
2316
+ prompt: `scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`,
2317
+ commands: [`Local scrollUntil execution`],
2318
+ reasoning: [`Local scrollUntil execution`]
2319
+ })
2320
+ );
2279
2321
  await this.performScrollUntil({
2280
2322
  direction: step.direction,
2281
2323
  text: interpolatedText,
@@ -2294,7 +2336,13 @@ ${issues}`);
2294
2336
  const bundleId = params["bundleId"];
2295
2337
  const url = this.interpolateTemplate(step.url, params);
2296
2338
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
2297
- this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2339
+ this.pendingLogPromises.push(
2340
+ this.takeScreenshotAndLogAIExecution({
2341
+ prompt: `openDeepLinkUrl: url=${url}`,
2342
+ commands: [`Local openDeepLinkUrl execution`],
2343
+ reasoning: [`Local openDeepLinkUrl execution`]
2344
+ })
2345
+ );
2298
2346
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
2299
2347
  break;
2300
2348
  }
@@ -2415,6 +2463,28 @@ ${"=".repeat(50)}`);
2415
2463
  globalLogger.error("Failed to log code execution", e);
2416
2464
  }
2417
2465
  }
2466
+ async logAIExecution(params) {
2467
+ try {
2468
+ await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_ai_execution`, {
2469
+ api_key: this.apiKey,
2470
+ base64_screenshot: params.screenshot,
2471
+ prompt: params.prompt,
2472
+ reasoning: params.reasoning,
2473
+ commands: params.commands,
2474
+ from_cache: !!params.fromCache
2475
+ });
2476
+ } catch (e) {
2477
+ globalLogger.error("Failed to log code execution", e);
2478
+ }
2479
+ }
2480
+ async takeScreenshotAndLogAIExecution(params) {
2481
+ try {
2482
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
2483
+ await this.logAIExecution({ ...params, screenshot });
2484
+ } catch (e) {
2485
+ globalLogger.error("Failed to log code execution", e);
2486
+ }
2487
+ }
2418
2488
  async takeScreenshotAndLogCodeExecution(command) {
2419
2489
  try {
2420
2490
  const screenshot = await this.getScreenshot(this.appiumSessionConfig);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gpt-driver-node",
3
- "version": "1.0.10",
3
+ "version": "1.0.12",
4
4
  "main": "./dist/index.cjs",
5
5
  "module": "./dist/index.mjs",
6
6
  "types": "./dist/index.d.cts",