gpt-driver-node 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2,9 +2,11 @@
2
2
 
3
3
  var node_fs = require('node:fs');
4
4
  var path = require('node:path');
5
+ var promises = require('node:fs/promises');
5
6
  var axios = require('axios');
6
7
  var sharp = require('sharp');
7
8
  var webdriverio = require('webdriverio');
9
+ var dotenv = require('dotenv');
8
10
  var winston = require('winston');
9
11
  var zod = require('zod');
10
12
  var crypto = require('node:crypto');
@@ -22,6 +24,15 @@ function buildUrl(base, extraPath) {
22
24
  }
23
25
  return `${baseUrl}${extraPath}`;
24
26
  }
27
+ const getImageDimensions = async (base64) => {
28
+ const base64Data = base64.replace(/^data:image\/\w+;base64,/, "");
29
+ const buffer = Buffer.from(base64Data, "base64");
30
+ const metadata = await sharp(buffer).metadata();
31
+ if (!metadata.width || !metadata.height) {
32
+ throw new Error("Unable to get image dimensions");
33
+ }
34
+ return { width: metadata.width, height: metadata.height };
35
+ };
25
36
 
26
37
  const colors = {
27
38
  reset: "\x1B[0m",
@@ -84,6 +95,75 @@ ${logStyles.gray(stack)}` : logMessage;
84
95
  ]
85
96
  });
86
97
 
98
+ const waitForStableScreen = async (getScreenshot, options = {}) => {
99
+ const {
100
+ maxTimeoutSec = 5,
101
+ intervalSec = 0.01,
102
+ tolerance = 1e-5,
103
+ pixelThreshold = 0,
104
+ downscaleWidth = 600,
105
+ downscaleHeight = 600
106
+ } = options;
107
+ const startTime = Date.now();
108
+ const maxTimeoutMillis = maxTimeoutSec * 1e3;
109
+ const intervalMillis = intervalSec * 1e3;
110
+ let previousDownsampledBuffer = null;
111
+ let lastScreenshotBase64 = null;
112
+ while (Date.now() - startTime < maxTimeoutMillis) {
113
+ try {
114
+ const screenshotBase64 = await getScreenshot();
115
+ lastScreenshotBase64 = screenshotBase64;
116
+ const imageBuffer = Buffer.from(screenshotBase64, "base64");
117
+ const downsampledBuffer = await sharp(imageBuffer).resize(downscaleWidth, downscaleHeight, { fit: "fill" }).ensureAlpha().raw().toBuffer();
118
+ if (previousDownsampledBuffer) {
119
+ const isSimilar = areImagesSimilar(
120
+ previousDownsampledBuffer,
121
+ downsampledBuffer,
122
+ downscaleWidth,
123
+ downscaleHeight,
124
+ tolerance,
125
+ pixelThreshold
126
+ );
127
+ if (isSimilar) {
128
+ return {
129
+ stable: true,
130
+ originalScreenshotBase64: screenshotBase64
131
+ };
132
+ }
133
+ }
134
+ previousDownsampledBuffer = downsampledBuffer;
135
+ } catch (e) {
136
+ globalLogger.warn(`Error during stability check: ${e}`);
137
+ throw e;
138
+ }
139
+ await delay(intervalMillis);
140
+ }
141
+ return {
142
+ stable: false,
143
+ originalScreenshotBase64: lastScreenshotBase64
144
+ };
145
+ };
146
+ function areImagesSimilar(buffer1, buffer2, width, height, tolerance, pixelThreshold) {
147
+ if (buffer1.length !== buffer2.length) {
148
+ return false;
149
+ }
150
+ const totalPixels = width * height;
151
+ let differentPixels = 0;
152
+ for (let i = 0; i < buffer1.length; i += 4) {
153
+ const r1 = buffer1[i];
154
+ const g1 = buffer1[i + 1];
155
+ const b1 = buffer1[i + 2];
156
+ const r2 = buffer2[i];
157
+ const g2 = buffer2[i + 1];
158
+ const b2 = buffer2[i + 2];
159
+ if (Math.abs(r1 - r2) > pixelThreshold || Math.abs(g1 - g2) > pixelThreshold || Math.abs(b1 - b2) > pixelThreshold) {
160
+ differentPixels++;
161
+ }
162
+ }
163
+ const diffRatio = differentPixels / totalPixels;
164
+ return diffRatio <= tolerance;
165
+ }
166
+
87
167
  const SavableStepBaseSchema = zod.z.object({
88
168
  id: zod.z.number().optional(),
89
169
  descriptionText: zod.z.string().optional(),
@@ -107,9 +187,17 @@ const SavableTypeStepSchema = SavableStepBaseSchema.extend({
107
187
  type: zod.z.literal("type"),
108
188
  text: zod.z.string()
109
189
  });
110
- const SavableScrollStepSchema = SavableStepBaseSchema.extend({
111
- type: zod.z.literal("scroll"),
112
- direction: zod.z.enum(["up", "down"])
190
+ const SavableSwipeStepSchema = SavableStepBaseSchema.extend({
191
+ type: zod.z.literal("swipe"),
192
+ direction: zod.z.enum(["left", "right", "up", "down"]),
193
+ x1: zod.z.number().optional(),
194
+ y1: zod.z.number().optional(),
195
+ x2: zod.z.number().optional(),
196
+ y2: zod.z.number().optional(),
197
+ duration: zod.z.number().optional().default(500)
198
+ });
199
+ const SavableScrollStepSchema = SavableSwipeStepSchema.extend({
200
+ type: zod.z.literal("scroll")
113
201
  });
114
202
  const SavableZoomStepSchema = SavableStepBaseSchema.extend({
115
203
  type: zod.z.literal("zoom"),
@@ -130,6 +218,10 @@ const SavableAIStepSchema = SavableStepBaseSchema.extend({
130
218
  type: zod.z.literal("ai"),
131
219
  instruction: zod.z.string()
132
220
  });
221
+ const SavableEnterStepSchema = SavableStepBaseSchema.extend({
222
+ type: zod.z.literal("enter"),
223
+ delayNextStep: zod.z.number().optional().default(500)
224
+ });
133
225
  const SavableFileRefStepSchema = SavableStepBaseSchema.extend({
134
226
  type: zod.z.literal("fileRef"),
135
227
  path: zod.z.string(),
@@ -142,6 +234,8 @@ const SavableStepSchema = zod.z.discriminatedUnion("type", [
142
234
  // type: 'assert'
143
235
  SavableTypeStepSchema,
144
236
  // type: 'type'
237
+ SavableSwipeStepSchema,
238
+ // type: 'swipe'
145
239
  SavableScrollStepSchema,
146
240
  // type: 'scroll'
147
241
  SavableZoomStepSchema,
@@ -152,6 +246,8 @@ const SavableStepSchema = zod.z.discriminatedUnion("type", [
152
246
  // type: 'deeplink'
153
247
  SavableAIStepSchema,
154
248
  // type: 'ai'
249
+ SavableEnterStepSchema,
250
+ // type: 'enter'
155
251
  SavableFileRefStepSchema
156
252
  // type: 'fileRef'
157
253
  ]);
@@ -173,7 +269,7 @@ const CACHE_SERVER_URL = "https://cache.mobileboost.io";
173
269
  const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
174
270
  const RESCALE_FACTOR = 4;
175
271
  const SMART_LOOP_MAX_ITERATIONS = 15;
176
- const CACHE_RETRY_MS = 2e3;
272
+ const CACHE_RETRY_MS = 1500;
177
273
  const CACHE_CHECK_INTERVAL_MS = 500;
178
274
 
179
275
  function generateCacheHash(apiKey, filepath, stepNumber, description, platform, resolution) {
@@ -257,15 +353,24 @@ async function executeFromCache(params) {
257
353
  return { found: false };
258
354
  } catch (error) {
259
355
  if (axios.isAxiosError(error)) {
260
- globalLogger.warn(`[Cache] Cache lookup failed: ${error.response?.data || error.message}`);
356
+ const errorMsg = `Cache lookup failed: ${error.response?.data || error.message}`;
357
+ globalLogger.warn(`[Cache] ${errorMsg}`);
261
358
  } else {
262
- globalLogger.error(`[Cache] Error executing from cache: ${error}`);
359
+ const errorMsg = `Error executing from cache: ${error}`;
360
+ globalLogger.error(`[Cache] ${errorMsg}`);
263
361
  }
264
362
  return { found: false };
265
363
  }
266
364
  }
267
365
  async function populateCache(params) {
268
366
  try {
367
+ globalLogger.debug(`[populateCache] Parameters: ${JSON.stringify({
368
+ stepNumber: params.stepNumber,
369
+ filepath: params.filepath,
370
+ platform: params.platform,
371
+ screenResolution: params.screenResolution,
372
+ executionDataCount: params.executionData.length
373
+ })}`);
269
374
  const hash = generateCacheHash(
270
375
  params.apiKey,
271
376
  params.filepath,
@@ -274,13 +379,15 @@ async function populateCache(params) {
274
379
  params.platform,
275
380
  params.screenResolution
276
381
  );
382
+ globalLogger.debug(`[populateCache] Generated hash: ${hash}`);
277
383
  const payload = await Promise.all(params.executionData.map(async (item) => {
278
384
  const resizedBuffer = await resizeScreenshotForCache(item.screenshot);
385
+ const scaledScreenshotBase64 = resizedBuffer.toString("base64");
279
386
  const scaledCommands = item.commands.map(
280
387
  (cmd) => scaleCommand(cmd, "divide")
281
388
  );
282
389
  return {
283
- screenshot: resizedBuffer.toString("base64"),
390
+ screenshot: scaledScreenshotBase64,
284
391
  commands: scaledCommands
285
392
  };
286
393
  }));
@@ -292,9 +399,11 @@ async function populateCache(params) {
292
399
  return { success: true };
293
400
  } catch (error) {
294
401
  if (axios.isAxiosError(error)) {
295
- globalLogger.error(`[Cache] Failed to populate cache: ${error.response?.data || error.message}`);
402
+ const errorMsg = `Failed to populate cache: ${error.response?.data || error.message}`;
403
+ globalLogger.error(`[Cache] ${errorMsg}`);
296
404
  } else {
297
- globalLogger.error(`[Cache] Error populating cache: ${error}`);
405
+ const errorMsg = `Error populating cache: ${error}`;
406
+ globalLogger.error(`[Cache] ${errorMsg}`);
298
407
  }
299
408
  return { success: false };
300
409
  }
@@ -424,9 +533,37 @@ function isWaitCommand(cmd) {
424
533
  function isScrollCommand(cmd) {
425
534
  return cmd.startsWith("scroll:");
426
535
  }
536
+ function isEnterCommand(cmd) {
537
+ const key = cmd.toLowerCase().split("press:")[1]?.trim() ?? "";
538
+ return key === "enter";
539
+ }
427
540
  function isTypeCommand(cmd) {
428
541
  return cmd.startsWith("type:");
429
542
  }
543
+ function isSlideCommand(cmd) {
544
+ return cmd.startsWith("slide");
545
+ }
546
+ function parseSlideCommand(cmd) {
547
+ const slideMatch = cmd.match(
548
+ /slide\s+(up|down|left|right)\s+(\d+)%(?::\s*[^;]*)?;(\d+);(\d+)/i
549
+ );
550
+ if (!slideMatch) {
551
+ return null;
552
+ }
553
+ const extractedDirection = slideMatch[1].toLowerCase();
554
+ const directionMap = {
555
+ down: "up",
556
+ up: "down",
557
+ left: "left",
558
+ right: "right"
559
+ };
560
+ return {
561
+ direction: directionMap[extractedDirection],
562
+ percentage: parseInt(slideMatch[2], 10),
563
+ startX: parseInt(slideMatch[3], 10),
564
+ startY: parseInt(slideMatch[4], 10)
565
+ };
566
+ }
430
567
 
431
568
  async function executeSmartLoop(ctx, params) {
432
569
  const maxCacheAttempts = Math.floor(CACHE_RETRY_MS / CACHE_CHECK_INTERVAL_MS);
@@ -435,35 +572,56 @@ async function executeSmartLoop(ctx, params) {
435
572
  let lastCacheIndex = void 0;
436
573
  let anyCacheMiss = false;
437
574
  let everHadCacheHit = false;
575
+ let totalCacheHits = 0;
438
576
  const currentExecutionData = [];
577
+ const stepStartTime = performance.now();
578
+ const iterationTimes = [];
439
579
  globalLogger.info(`[SmartLoop] Starting for step ${params.stepNumber}: "${params.description}"`);
580
+ globalLogger.debug(`[Performance][SmartLoop] Step execution started at ${stepStartTime.toFixed(2)}ms`);
440
581
  try {
441
582
  while (loopCount < SMART_LOOP_MAX_ITERATIONS) {
583
+ const iterationStartTime = performance.now();
584
+ globalLogger.debug(`[SmartLoop] Starting iteration ${loopCount + 1}/${SMART_LOOP_MAX_ITERATIONS}`);
442
585
  let screenshot = "";
443
586
  let commands = [];
444
587
  let isCacheHit = false;
588
+ let screenshotResolution = void 0;
589
+ const stabilityResult = await waitForStableScreen(ctx.getScreenshot);
445
590
  for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
446
- screenshot = await ctx.getScreenshot();
591
+ const screenshotStartTime = performance.now();
592
+ const screenshotEndTime = performance.now();
593
+ globalLogger.debug(`[Performance][SmartLoop] Screenshot capture took ${(screenshotEndTime - screenshotStartTime).toFixed(2)}ms`);
594
+ if (!stabilityResult.stable) {
595
+ globalLogger.warn("[SmartLoop] Screen did not stabilize within timeout, proceeding with last screenshot");
596
+ }
597
+ screenshot = stabilityResult.originalScreenshotBase64;
598
+ if (screenshotResolution === void 0) {
599
+ screenshotResolution = await getImageDimensions(screenshot);
600
+ }
447
601
  const sizeInBytes = screenshot.length * 0.75;
448
602
  const sizeInMB = (sizeInBytes / (1024 * 1024)).toFixed(2);
449
603
  globalLogger.debug(`[SmartLoop] Captured screenshot: ~${sizeInMB} MB`);
450
604
  try {
451
605
  globalLogger.debug(`[SmartLoop] Checking cache (Attempt ${attempt + 1}/${maxCacheAttempts})`);
606
+ const cacheCheckStartTime = performance.now();
452
607
  const cacheResult = await executeFromCache({
453
608
  apiKey: ctx.organisationId,
454
609
  stepNumber: params.stepNumber,
455
610
  stepDescription: params.description,
456
611
  screenshot,
457
- screenResolution: ctx.screenSize,
612
+ screenResolution: screenshotResolution,
458
613
  highestUsedIndex: lastCacheIndex,
459
614
  platform: ctx.platform,
460
615
  filepath: params.filepath
461
616
  });
617
+ const cacheCheckEndTime = performance.now();
618
+ globalLogger.debug(`[Performance][SmartLoop] Cache check took ${(cacheCheckEndTime - cacheCheckStartTime).toFixed(2)}ms`);
462
619
  if (cacheResult.found && cacheResult.cacheCommands) {
463
620
  commands = cacheResult.cacheCommands;
464
621
  lastCacheIndex = cacheResult.cacheIndex;
465
622
  isCacheHit = true;
466
623
  everHadCacheHit = true;
624
+ totalCacheHits++;
467
625
  globalLogger.info(`[SmartLoop] Cache Hit! (${commands.length} commands)`);
468
626
  break;
469
627
  }
@@ -475,18 +633,53 @@ async function executeSmartLoop(ctx, params) {
475
633
  await delay(CACHE_CHECK_INTERVAL_MS);
476
634
  }
477
635
  }
636
+ if (screenshotResolution === void 0) {
637
+ const screenshot2 = await ctx.getScreenshot();
638
+ screenshotResolution = await getImageDimensions(screenshot2);
639
+ }
478
640
  let aiCommands = [];
641
+ let actionDescription = [];
479
642
  if (!isCacheHit) {
480
643
  anyCacheMiss = true;
481
644
  globalLogger.info(`[SmartLoop] Cache Miss. Requesting AI agent...`);
482
- const agentResponse = await executeAgentStep({
483
- apiKey: ctx.organisationId,
484
- base64_screenshot: screenshot,
485
- instruction: params.instruction,
486
- action_history: actionHistory
487
- });
488
- aiCommands = agentResponse.appetizeCommands || [];
489
- const gptCommands = agentResponse.gptCommands || [];
645
+ const aiStartTime = performance.now();
646
+ let agentResponse;
647
+ if (ctx.middleLayerAssertFn) {
648
+ const response = await ctx.middleLayerAssertFn(screenshot);
649
+ const results = response.data.results;
650
+ const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
651
+ if (failedConditions.length === 0) {
652
+ agentResponse = {
653
+ appetizeCommands: [
654
+ "task complete: All assertions passed"
655
+ ]
656
+ };
657
+ } else {
658
+ agentResponse = {
659
+ appetizeCommands: [
660
+ `error detected: Failed conditions: ${failedConditions.join(", ")}`
661
+ ]
662
+ };
663
+ }
664
+ } else {
665
+ agentResponse = await executeAgentStep({
666
+ apiKey: ctx.organisationId,
667
+ base64_screenshot: screenshot,
668
+ instruction: params.instruction,
669
+ action_history: actionHistory
670
+ });
671
+ }
672
+ const aiEndTime = performance.now();
673
+ globalLogger.debug(`[Performance][SmartLoop] AI agent call took ${(aiEndTime - aiStartTime).toFixed(2)}ms`);
674
+ aiCommands = agentResponse.appetizeCommands ?? [];
675
+ const gptCommands = agentResponse.gptCommands ?? [];
676
+ actionDescription = [];
677
+ const actionDescriptionIndex = gptCommands.findIndex(
678
+ (entry) => entry.startsWith("actions_description:")
679
+ );
680
+ if (actionDescriptionIndex !== -1) {
681
+ actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
682
+ }
490
683
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
491
684
  if (reasoningIndex !== -1) {
492
685
  const parsedCommands = gptCommands.slice(reasoningIndex);
@@ -496,19 +689,23 @@ async function executeSmartLoop(ctx, params) {
496
689
  }
497
690
  actionHistory = [...actionHistory, ...parsedCommands];
498
691
  }
499
- commands = [...aiCommands];
692
+ commands = [...actionDescription, ...aiCommands];
500
693
  globalLogger.debug(`[SmartLoop] AI returned ${commands.length} command(s)`);
501
694
  }
502
695
  currentExecutionData.push({
503
696
  screenshot,
504
- commands: aiCommands.length > 0 ? aiCommands : commands
697
+ commands
505
698
  });
506
- await ctx.logCodeExecution(screenshot, commands.join("\n"));
699
+ const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
700
+ if (ctx.pendingLogPromises) {
701
+ ctx.pendingLogPromises.push(logPromise);
702
+ }
507
703
  let actionExecuted = false;
508
704
  let taskCompleted = false;
509
705
  if (commands.length > 0) {
510
706
  globalLogger.debug(`[SmartLoop] Executing ${commands.length} command(s)`);
511
707
  }
708
+ const commandExecutionStartTime = performance.now();
512
709
  for (const cmd of commands) {
513
710
  if (isTaskComplete(cmd)) {
514
711
  taskCompleted = true;
@@ -525,9 +722,18 @@ async function executeSmartLoop(ctx, params) {
525
722
  const coords = parseTapCoordinates(cmd);
526
723
  if (coords) {
527
724
  globalLogger.debug(`[SmartLoop] Executing tap at (${coords.x}, ${coords.y})`);
528
- await ctx.performTap(coords.x, coords.y);
725
+ await ctx.performTap(
726
+ coords.x,
727
+ coords.y,
728
+ screenshotResolution.width,
729
+ screenshotResolution.height
730
+ );
529
731
  actionExecuted = true;
530
732
  }
733
+ } else if (isEnterCommand(cmd)) {
734
+ globalLogger.debug(`[SmartLoop] Pressing Enter`);
735
+ await ctx.performPressEnter();
736
+ actionExecuted = true;
531
737
  } else if (isWaitCommand(cmd)) {
532
738
  const seconds = parseWaitSeconds(cmd);
533
739
  if (seconds) {
@@ -539,7 +745,50 @@ async function executeSmartLoop(ctx, params) {
539
745
  const direction = parseScrollDirection(cmd);
540
746
  if (direction) {
541
747
  globalLogger.debug(`[SmartLoop] Scrolling ${direction}`);
542
- await ctx.performScroll(direction);
748
+ await ctx.performSwipe({ direction });
749
+ if (isCacheHit) {
750
+ await delay(650);
751
+ }
752
+ actionExecuted = true;
753
+ }
754
+ } else if (isSlideCommand(cmd)) {
755
+ const slideParams = parseSlideCommand(cmd);
756
+ if (slideParams) {
757
+ const { direction, percentage, startX, startY } = slideParams;
758
+ const isVertical = direction === "up" || direction === "down";
759
+ const distance = Math.round(
760
+ (isVertical ? screenshotResolution.height : screenshotResolution.width) * (percentage / 100)
761
+ );
762
+ let endX = startX;
763
+ let endY = startY;
764
+ switch (direction) {
765
+ case "up":
766
+ endY = startY + distance;
767
+ break;
768
+ case "down":
769
+ endY = startY - distance;
770
+ break;
771
+ case "left":
772
+ endX = startX - distance;
773
+ break;
774
+ case "right":
775
+ endX = startX + distance;
776
+ break;
777
+ }
778
+ globalLogger.debug(`[SmartLoop] Sliding ${direction} ${percentage}% from (${startX}, ${startY}) to (${endX}, ${endY})`);
779
+ await ctx.performSwipe({
780
+ direction,
781
+ x1: startX,
782
+ y1: startY,
783
+ x2: endX,
784
+ y2: endY,
785
+ screenshotWidth: screenshotResolution.width,
786
+ screenshotHeight: screenshotResolution.height,
787
+ duration: 500
788
+ });
789
+ if (isCacheHit) {
790
+ await delay(650);
791
+ }
543
792
  actionExecuted = true;
544
793
  }
545
794
  } else if (isTypeCommand(cmd)) {
@@ -552,49 +801,83 @@ async function executeSmartLoop(ctx, params) {
552
801
  }
553
802
  }
554
803
  if (actionExecuted) {
804
+ const commandExecutionEndTime = performance.now();
805
+ globalLogger.debug(`[Performance][SmartLoop] Command execution took ${(commandExecutionEndTime - commandExecutionStartTime).toFixed(2)}ms`);
555
806
  if (isCacheHit) {
556
807
  actionHistory.push(...commands);
557
808
  }
558
809
  await delay(100);
559
810
  }
811
+ const iterationEndTime = performance.now();
812
+ const iterationDuration = iterationEndTime - iterationStartTime;
813
+ iterationTimes.push(iterationDuration);
814
+ globalLogger.debug(`[Performance][SmartLoop] Iteration ${loopCount + 1} completed in ${iterationDuration.toFixed(2)}ms (${(iterationDuration / 1e3).toFixed(2)}s)`);
560
815
  if (taskCompleted) {
561
816
  globalLogger.info(`[SmartLoop] Task completed successfully`);
817
+ const stepEndTime2 = performance.now();
818
+ const totalDuration2 = stepEndTime2 - stepStartTime;
819
+ const totalSeconds2 = totalDuration2 / 1e3;
820
+ const totalMinutes2 = totalSeconds2 / 60;
821
+ const averageIterationTime = iterationTimes.reduce((sum, time) => sum + time, 0) / iterationTimes.length;
822
+ globalLogger.debug(`[Performance][SmartLoop] Step execution summary:`);
823
+ globalLogger.debug(`[Performance][SmartLoop] - Total iterations: ${loopCount + 1}`);
824
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration2.toFixed(2)}ms (${totalSeconds2.toFixed(2)}s / ${totalMinutes2.toFixed(2)}min)`);
825
+ globalLogger.debug(`[Performance][SmartLoop] - Average iteration time: ${averageIterationTime.toFixed(2)}ms (${(averageIterationTime / 1e3).toFixed(2)}s)`);
826
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
562
827
  if (anyCacheMiss && currentExecutionData.length > 0) {
563
828
  globalLogger.info(`[SmartLoop] Populating cache with ${currentExecutionData.length} frame(s)...`);
564
- try {
565
- await populateCache({
566
- apiKey: ctx.organisationId,
567
- stepNumber: params.stepNumber,
568
- stepDescription: params.description,
569
- executionData: currentExecutionData,
570
- screenResolution: ctx.screenSize,
571
- platform: ctx.platform,
572
- filepath: params.filepath
573
- });
829
+ const cachePopulateStartTime = performance.now();
830
+ populateCache({
831
+ apiKey: ctx.organisationId,
832
+ stepNumber: params.stepNumber,
833
+ stepDescription: params.description,
834
+ executionData: currentExecutionData,
835
+ screenResolution: screenshotResolution,
836
+ platform: ctx.platform,
837
+ filepath: params.filepath
838
+ }).then(() => {
839
+ const cachePopulateEndTime = performance.now();
840
+ globalLogger.debug(`[Performance][SmartLoop] Cache population took ${(cachePopulateEndTime - cachePopulateStartTime).toFixed(2)}ms`);
574
841
  globalLogger.debug(`[SmartLoop] Cache populated successfully`);
575
- } catch (e) {
576
- globalLogger.warn(`[SmartLoop] Failed to populate cache: ${e.message}`);
577
- }
842
+ }).catch((e) => globalLogger.warn(`[SmartLoop] Cache population failed: ${e.message}`));
578
843
  } else if (!anyCacheMiss) {
579
844
  globalLogger.debug(`[SmartLoop] Skipping cache population (all actions were cached)`);
580
845
  }
581
846
  return {
582
847
  success: true,
583
848
  iterations: loopCount + 1,
584
- cacheHit: everHadCacheHit
849
+ cacheHit: everHadCacheHit,
850
+ cacheHitCount: totalCacheHits
585
851
  };
586
852
  }
587
853
  loopCount++;
588
854
  }
855
+ const stepEndTime = performance.now();
856
+ const totalDuration = stepEndTime - stepStartTime;
857
+ const totalSeconds = totalDuration / 1e3;
858
+ const totalMinutes = totalSeconds / 60;
859
+ globalLogger.debug(`[Performance][SmartLoop] Timeout reached after ${loopCount} iterations`);
860
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
861
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
589
862
  throw new Error(`Smart Loop timeout after ${SMART_LOOP_MAX_ITERATIONS} iterations`);
590
863
  } catch (error) {
591
864
  const message = error instanceof Error ? error.message : String(error);
865
+ const stepEndTime = performance.now();
866
+ const totalDuration = stepEndTime - stepStartTime;
867
+ const totalSeconds = totalDuration / 1e3;
868
+ const totalMinutes = totalSeconds / 60;
592
869
  globalLogger.error(`[SmartLoop] Error: ${message}`);
870
+ globalLogger.debug(`[Performance][SmartLoop] Error occurred after ${loopCount + 1} iteration(s)`);
871
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration before error: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
872
+ if (iterationTimes.length > 0) {
873
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
874
+ }
593
875
  return {
594
876
  success: false,
595
877
  error: message,
596
878
  iterations: loopCount + 1,
597
- cacheHit: everHadCacheHit
879
+ cacheHit: everHadCacheHit,
880
+ cacheHitCount: totalCacheHits
598
881
  };
599
882
  }
600
883
  }
@@ -625,8 +908,13 @@ class GptDriver {
625
908
  step_number = 1;
626
909
  organisationId;
627
910
  configFilePath;
911
+ // Session Execution Stats
912
+ _stats_startTime = 0;
913
+ _stats_executedSteps = 0;
914
+ _stats_cacheHits = 0;
628
915
  // Smart loop state - maintains action history across steps for context
629
916
  globalActionHistory = [];
917
+ pendingLogPromises = [];
630
918
  /**
631
919
  * Creates an instance of the GptDriver class.
632
920
  *
@@ -699,6 +987,9 @@ class GptDriver {
699
987
  */
700
988
  async startSession() {
701
989
  globalLogger.info("Starting session...");
990
+ this._stats_startTime = performance.now();
991
+ this._stats_executedSteps = 0;
992
+ this._stats_cacheHits = 0;
702
993
  if (!this.useGptDriverCloud) {
703
994
  if (this.driver) {
704
995
  let platform;
@@ -798,6 +1089,37 @@ class GptDriver {
798
1089
  getSessionLink() {
799
1090
  return `https://app.mobileboost.io/gpt-driver/sessions/${this.gptDriverSessionId}`;
800
1091
  }
1092
+ /**
1093
+ * Stops the current GPTDriver session and update its state.
1094
+ *
1095
+ * This method sends a request to the GPT Driver server to stop the session and logs the session status as either "failed" or "success."
1096
+ *
1097
+ * @param {"failed" | "success"} status - Indicates the outcome of the session.
1098
+ * Use "success" if the session completed as expected,
1099
+ * or "failed" if the session encountered an error or issue.
1100
+ *
1101
+ * @throws {Error} If the request to stop the session fails.
1102
+ */
1103
+ /**
1104
+ * Marks the current GPTDriver session as succeeded.
1105
+ *
1106
+ * This method stops the session and logs it as "succeeded"
1107
+ *
1108
+ * @throws {Error} If the request to stop the session fails.
1109
+ */
1110
+ async setSessionSucceeded() {
1111
+ await this.setSessionStatus("succeeded");
1112
+ }
1113
+ /**
1114
+ * Marks the current GPTDriver session as failed.
1115
+ *
1116
+ * This method stops the session and logs it as "failed."
1117
+ *
1118
+ * @throws {Error} If the request to stop the session fails.
1119
+ */
1120
+ async setSessionFailed() {
1121
+ await this.setSessionStatus("failed");
1122
+ }
801
1123
  /**
802
1124
  * Stops the current GPTDriver session and update its state.
803
1125
  *
@@ -810,7 +1132,13 @@ class GptDriver {
810
1132
  * @throws {Error} If the request to stop the session fails.
811
1133
  */
812
1134
  async setSessionStatus(status) {
1135
+ try {
1136
+ await Promise.all(this.pendingLogPromises);
1137
+ } catch (e) {
1138
+ globalLogger.error(`Error waiting for pending log promises: ${e.message}`);
1139
+ }
813
1140
  if (this.gptDriverSessionId) {
1141
+ this.printSessionSummary();
814
1142
  globalLogger.info(`Stopping session with status: ${status}`);
815
1143
  await axios.post(
816
1144
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
@@ -826,6 +1154,19 @@ class GptDriver {
826
1154
  this.globalActionHistory = [];
827
1155
  }
828
1156
  }
1157
+ printSessionSummary() {
1158
+ if (this._stats_startTime === 0) return;
1159
+ const endTime = performance.now();
1160
+ const totalTime = (endTime - this._stats_startTime) / 1e3;
1161
+ globalLogger.info(`
1162
+ ${"=".repeat(50)}`);
1163
+ globalLogger.info(`Session Execution Summary:`);
1164
+ globalLogger.info(`Total Session Time: ${totalTime.toFixed(2)} seconds`);
1165
+ globalLogger.info(`Total Steps executed: ${this._stats_executedSteps}`);
1166
+ globalLogger.info(`Total Step Cache Hits: ${this._stats_cacheHits}`);
1167
+ globalLogger.info(`${"=".repeat(50)}
1168
+ `);
1169
+ }
829
1170
  // ─────────────────────────────────────────────────────────────────────────────
830
1171
  // SMART LOOP INTEGRATION
831
1172
  // ─────────────────────────────────────────────────────────────────────────────
@@ -833,21 +1174,23 @@ class GptDriver {
833
1174
  * Creates a SmartLoopContext for the current session.
834
1175
  * This context provides all the callbacks needed by the smart loop executor.
835
1176
  */
836
- createSmartLoopContext() {
1177
+ createSmartLoopContext(options) {
837
1178
  if (!this.organisationId) {
838
1179
  throw new Error("Organisation ID is missing, please set it in the GPTDriver constructor");
839
1180
  }
840
1181
  return {
841
1182
  apiKey: this.apiKey,
842
1183
  platform: this.appiumSessionConfig?.platform,
843
- screenSize: this.appiumSessionConfig.size,
844
1184
  globalActionHistory: this.globalActionHistory,
845
- getScreenshot: () => this.getScreenshot(this.appiumSessionConfig),
846
- performTap: (x, y) => this.performTap(x, y),
847
- performScroll: (direction) => this.performScroll(direction),
1185
+ getScreenshot: () => this.getScreenshot(this.appiumSessionConfig, false),
1186
+ performTap: (x, y, screenshotWidth, screenshotHeight) => this.performTap(x, y, screenshotWidth, screenshotHeight),
1187
+ performSwipe: (params) => this.performSwipe(params),
848
1188
  performType: (text) => this.performType(text),
849
- logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command),
850
- organisationId: this.organisationId
1189
+ performPressEnter: () => this.performPressEnter(),
1190
+ logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1191
+ organisationId: this.organisationId,
1192
+ middleLayerAssertFn: options?.middleLayerAssertFn,
1193
+ pendingLogPromises: options?.pendingLogPromises
851
1194
  };
852
1195
  }
853
1196
  /**
@@ -897,15 +1240,31 @@ class GptDriver {
897
1240
  /**
898
1241
  * Performs a tap action at the specified coordinates.
899
1242
  */
900
- async performTap(x, y) {
1243
+ async performTap(x, y, screenshotWidth, screenshotHeight) {
901
1244
  const client = await this.getWdioClient();
1245
+ const platform = this.appiumSessionConfig?.platform;
1246
+ const { width: w, height: h } = this.appiumSessionConfig?.size ?? { width: 1080, height: 1920 };
1247
+ const scaled = this.scaleForIOS(
1248
+ x,
1249
+ y,
1250
+ platform,
1251
+ screenshotWidth,
1252
+ screenshotHeight,
1253
+ w,
1254
+ h
1255
+ );
1256
+ const clampedX = this.clamp(scaled.x, 0, w - 1);
1257
+ const clampedY = this.clamp(scaled.y, 0, h - 1);
1258
+ globalLogger.debug(
1259
+ `[Tap] Platform: ${platform}, Input: ${x},${y}, Window: ${w}x${h}, Final: ${clampedX},${clampedY}`
1260
+ );
902
1261
  await client.performActions([
903
1262
  {
904
1263
  type: "pointer",
905
1264
  id: "finger1",
906
1265
  parameters: { pointerType: "touch" },
907
1266
  actions: [
908
- { type: "pointerMove", duration: 0, x, y },
1267
+ { type: "pointerMove", duration: 0, x: clampedX, y: clampedY },
909
1268
  { type: "pointerDown", button: 0 },
910
1269
  { type: "pause", duration: 100 },
911
1270
  { type: "pointerUp", button: 0 }
@@ -915,25 +1274,143 @@ class GptDriver {
915
1274
  }
916
1275
  async performType(text) {
917
1276
  const client = await this.getWdioClient();
918
- await client.keys(text.split(""));
1277
+ const platform = this.appiumSessionConfig?.platform;
1278
+ if (platform === "iOS") {
1279
+ const actions = text.split("").flatMap((char) => [
1280
+ { type: "keyDown", value: char },
1281
+ { type: "keyUp", value: char }
1282
+ ]);
1283
+ await client.performActions([
1284
+ {
1285
+ type: "key",
1286
+ id: "keyboard",
1287
+ actions
1288
+ }
1289
+ ]);
1290
+ } else {
1291
+ await client.keys(text.split(""));
1292
+ }
1293
+ }
1294
+ async performPressEnter() {
1295
+ const client = await this.getWdioClient();
1296
+ const platform = this.appiumSessionConfig?.platform;
1297
+ if (platform === "iOS") {
1298
+ await client.performActions([
1299
+ {
1300
+ type: "key",
1301
+ id: "keyboard",
1302
+ actions: [
1303
+ { type: "keyDown", value: "\uE007" },
1304
+ { type: "keyUp", value: "\uE007" }
1305
+ ]
1306
+ }
1307
+ ]);
1308
+ } else {
1309
+ await client.keys(["Enter"]);
1310
+ }
1311
+ }
1312
+ clamp(value, min, max) {
1313
+ return Math.max(min, Math.min(max, value));
1314
+ }
1315
+ scaleForIOS(x, y, platform, screenshotWidth, screenshotHeight, windowWidth, windowHeight) {
1316
+ if (platform !== "iOS" || !screenshotWidth || !screenshotHeight || !windowWidth || !windowHeight) {
1317
+ return { x: Math.round(x), y: Math.round(y) };
1318
+ }
1319
+ const scaleX = windowWidth / screenshotWidth;
1320
+ const scaleY = windowHeight / screenshotHeight;
1321
+ return {
1322
+ x: Math.round(x * scaleX),
1323
+ y: Math.round(y * scaleY)
1324
+ };
919
1325
  }
920
- async performScroll(direction) {
1326
+ async performSwipe(params) {
921
1327
  const client = await this.getWdioClient();
922
- const w = this.appiumSessionConfig?.size?.width ?? 1080;
923
- const h = this.appiumSessionConfig?.size?.height ?? 1920;
924
- const x = Math.round(w / 2);
925
- const startY = direction === "down" ? Math.round(h * 0.8) : Math.round(h * 0.2);
926
- const endY = direction === "down" ? Math.round(h * 0.2) : Math.round(h * 0.8);
1328
+ const platform = this.appiumSessionConfig?.platform;
1329
+ const { width: w, height: h } = this.appiumSessionConfig?.size ?? { width: 1080, height: 1920 };
1330
+ const defaultStartX = w / 2;
1331
+ let defaultStartY;
1332
+ if (params.direction === "down") {
1333
+ defaultStartY = h * 0.75;
1334
+ } else if (params.direction === "up") {
1335
+ defaultStartY = h * 0.25;
1336
+ } else {
1337
+ defaultStartY = h / 2;
1338
+ }
1339
+ let startX;
1340
+ let startY;
1341
+ if (params.x1 !== void 0 || params.y1 !== void 0) {
1342
+ const scaled = this.scaleForIOS(
1343
+ params.x1 ?? defaultStartX,
1344
+ params.y1 ?? defaultStartY,
1345
+ platform,
1346
+ params.screenshotWidth,
1347
+ params.screenshotHeight,
1348
+ w,
1349
+ h
1350
+ );
1351
+ startX = scaled.x;
1352
+ startY = scaled.y;
1353
+ } else {
1354
+ startX = Math.round(defaultStartX);
1355
+ startY = Math.round(defaultStartY);
1356
+ }
1357
+ startX = this.clamp(startX, 0, w - 1);
1358
+ startY = this.clamp(startY, 0, h - 1);
1359
+ let endX;
1360
+ let endY;
1361
+ if (params.x2 !== void 0 || params.y2 !== void 0) {
1362
+ const scaled = this.scaleForIOS(
1363
+ params.x2 ?? startX,
1364
+ params.y2 ?? startY,
1365
+ platform,
1366
+ params.screenshotWidth,
1367
+ params.screenshotHeight,
1368
+ w,
1369
+ h
1370
+ );
1371
+ endX = scaled.x;
1372
+ endY = scaled.y;
1373
+ } else {
1374
+ const deltaX = Math.round(w * 0.5);
1375
+ const deltaY = Math.round(h * 0.5);
1376
+ switch (params.direction) {
1377
+ case "left":
1378
+ endX = Math.max(0, startX - deltaX);
1379
+ endY = startY;
1380
+ break;
1381
+ case "right":
1382
+ endX = Math.min(w - 1, startX + deltaX);
1383
+ endY = startY;
1384
+ break;
1385
+ case "up":
1386
+ endX = startX;
1387
+ endY = Math.min(h - 1, startY + deltaY);
1388
+ break;
1389
+ case "down":
1390
+ endX = startX;
1391
+ endY = Math.max(0, startY - deltaY);
1392
+ break;
1393
+ default:
1394
+ endX = startX;
1395
+ endY = startY;
1396
+ }
1397
+ }
1398
+ endX = this.clamp(endX, 0, w - 1);
1399
+ endY = this.clamp(endY, 0, h - 1);
1400
+ globalLogger.debug(
1401
+ `[Swipe] Platform: ${platform}, Direction: ${params.direction}, Start: ${startX},${startY}, End: ${endX},${endY}`
1402
+ );
1403
+ const duration = params.duration ?? 500;
927
1404
  await client.performActions([
928
1405
  {
929
1406
  type: "pointer",
930
1407
  id: "finger1",
931
1408
  parameters: { pointerType: "touch" },
932
1409
  actions: [
933
- { type: "pointerMove", duration: 0, x, y: startY },
1410
+ { type: "pointerMove", duration: 0, x: startX, y: startY },
934
1411
  { type: "pointerDown", button: 0 },
935
1412
  { type: "pause", duration: 100 },
936
- { type: "pointerMove", duration: 500, x, y: endY },
1413
+ { type: "pointerMove", duration, x: endX, y: endY },
937
1414
  { type: "pointerUp", button: 0 }
938
1415
  ]
939
1416
  }
@@ -952,17 +1429,17 @@ class GptDriver {
952
1429
  if (found) {
953
1430
  return;
954
1431
  }
955
- await this.performScroll(direction);
1432
+ await this.performSwipe({ direction });
956
1433
  await this._delay(500);
957
1434
  }
958
1435
  throw new Error(`scrollUntil target not found after ${max} scroll(s)`);
959
1436
  }
960
- async getScreenshot(appiumSessionConfig) {
1437
+ async getScreenshot(appiumSessionConfig, shouldScale = true) {
961
1438
  globalLogger.debug("Capturing screenshot...");
962
1439
  const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
963
1440
  const screenshotResponse = await axios.get(url);
964
1441
  let screenshot = await screenshotResponse.data.value;
965
- if (appiumSessionConfig.platform === "iOS") {
1442
+ if (appiumSessionConfig.platform === "iOS" && shouldScale) {
966
1443
  globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
967
1444
  const imageBuffer = Buffer.from(screenshot, "base64");
968
1445
  const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
@@ -1016,41 +1493,71 @@ class GptDriver {
1016
1493
  }
1017
1494
  }
1018
1495
  async aiExecute(commandOrOptions, options) {
1496
+ const startTime = performance.now();
1497
+ globalLogger.debug(`[Performance] aiExecute started at ${startTime.toFixed(2)}ms`);
1019
1498
  const command = typeof commandOrOptions === "string" ? commandOrOptions : commandOrOptions.command;
1020
1499
  const opts = typeof commandOrOptions === "string" ? options : commandOrOptions;
1021
1500
  const appiumHandler = opts?.appiumHandler;
1022
1501
  const cachingMode = opts?.cachingMode;
1023
1502
  const useSmartLoop = opts?.useSmartLoop ?? false;
1024
1503
  if (!this.appiumSessionStarted) {
1504
+ const sessionStartTime = performance.now();
1505
+ globalLogger.debug(`[Performance] Starting session...`);
1025
1506
  await this.startSession();
1507
+ const sessionEndTime = performance.now();
1508
+ globalLogger.debug(`[Performance] Session started in ${(sessionEndTime - sessionStartTime).toFixed(2)}ms`);
1026
1509
  }
1027
1510
  globalLogger.info(`Executing command: ${command}`);
1028
1511
  const driver = this.driver;
1029
1512
  if (appiumHandler != null) {
1030
1513
  try {
1514
+ const handlerStartTime = performance.now();
1515
+ globalLogger.debug(`[Performance] Executing custom Appium handler...`);
1031
1516
  await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1032
1517
  await appiumHandler(driver);
1518
+ const handlerEndTime = performance.now();
1033
1519
  globalLogger.debug("Custom Appium handler executed successfully");
1520
+ globalLogger.debug(`[Performance] Appium handler completed in ${(handlerEndTime - handlerStartTime).toFixed(2)}ms`);
1034
1521
  this.step_number++;
1522
+ this._stats_executedSteps++;
1523
+ const totalTime2 = performance.now() - startTime;
1524
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime2.toFixed(2)}ms`);
1035
1525
  return;
1036
1526
  } catch (e) {
1037
1527
  globalLogger.warn("Custom Appium handler failed, falling back to AI execution");
1038
1528
  }
1039
1529
  }
1040
1530
  if (useSmartLoop) {
1041
- const ctx = this.createSmartLoopContext();
1531
+ const smartLoopStartTime = performance.now();
1532
+ globalLogger.debug(`[Performance] Starting smart loop execution...`);
1533
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1042
1534
  const result = await executeSmartLoop(ctx, {
1043
1535
  stepNumber: this.step_number,
1044
1536
  description: command,
1045
- instruction: command
1537
+ instruction: command,
1538
+ ...this.testId && {
1539
+ filepath: this.testId
1540
+ }
1046
1541
  });
1542
+ const smartLoopEndTime = performance.now();
1543
+ globalLogger.debug(`[Performance] Smart loop completed in ${(smartLoopEndTime - smartLoopStartTime).toFixed(2)}ms`);
1047
1544
  if (!result.success) {
1048
1545
  throw new Error(result.error || "Smart loop execution failed");
1049
1546
  }
1547
+ if (result.cacheHitCount) {
1548
+ this._stats_cacheHits += result.cacheHitCount;
1549
+ }
1050
1550
  this.step_number++;
1551
+ this._stats_executedSteps += result.iterations ?? 1;
1051
1552
  } else {
1553
+ const gptHandlerStartTime = performance.now();
1554
+ globalLogger.debug(`[Performance] Starting GPT handler execution...`);
1052
1555
  await this.gptHandler(command, cachingMode);
1556
+ const gptHandlerEndTime = performance.now();
1557
+ globalLogger.debug(`[Performance] GPT handler completed in ${(gptHandlerEndTime - gptHandlerStartTime).toFixed(2)}ms`);
1053
1558
  }
1559
+ const totalTime = performance.now() - startTime;
1560
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime.toFixed(2)}ms`);
1054
1561
  }
1055
1562
  /**
1056
1563
  * Asserts a single condition using the GPTDriver.
@@ -1059,15 +1566,37 @@ class GptDriver {
1059
1566
  * If the assertion fails, an error is thrown.
1060
1567
  *
1061
1568
  * @param {string} assertion - The condition to be asserted.
1062
- * @param cachingMode - The caching mode to be used for the assertion.
1569
+ * @param {Object} options - Optional configuration object
1570
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertion.
1571
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1063
1572
  * @throws {Error} If the assertion fails.
1573
+ *
1574
+ * @example
1575
+ * // Basic usage
1576
+ * await driver.assert('Login button is visible');
1577
+ *
1578
+ * @example
1579
+ * // With caching mode
1580
+ * await driver.assert('Login button is visible', {
1581
+ * cachingMode: "FULL_SCREEN"
1582
+ * });
1583
+ *
1584
+ * @example
1585
+ * // With smart loop enabled
1586
+ * await driver.assert('Login button is visible', {
1587
+ * useSmartLoop: true,
1588
+ * cachingMode: "FULL_SCREEN"
1589
+ * });
1064
1590
  */
1065
- async assert(assertion, cachingMode) {
1591
+ async assert(assertion, {
1592
+ cachingMode,
1593
+ useSmartLoop = false
1594
+ }) {
1066
1595
  if (!this.appiumSessionStarted) {
1067
1596
  await this.startSession();
1068
1597
  }
1069
1598
  try {
1070
- const results = await this.checkBulk([assertion], cachingMode);
1599
+ const results = await this.checkBulk([assertion], cachingMode, 2, 1e3, useSmartLoop);
1071
1600
  if (!Object.values(results).at(0)) {
1072
1601
  await this.setSessionStatus("failed");
1073
1602
  globalLogger.error(`Assertion failed: ${assertion}`);
@@ -1087,15 +1616,37 @@ class GptDriver {
1087
1616
  * If any assertion fails, an error is thrown listing all failed assertions.
1088
1617
  *
1089
1618
  * @param {string[]} assertions - An array of conditions to be asserted.
1090
- * @param cachingMode - The caching mode to be used for the assertions.
1619
+ * @param {Object} options - Optional configuration object
1620
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertions.
1621
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution. Default: false
1091
1622
  * @throws {Error} If any of the assertions fail.
1623
+ *
1624
+ * @example
1625
+ * // Basic usage
1626
+ * await driver.assertBulk(['Login button is visible', 'Username field is enabled']);
1627
+ *
1628
+ * @example
1629
+ * // With caching mode
1630
+ * await driver.assertBulk(['Login button is visible'], {
1631
+ * cachingMode: "FULL_SCREEN"
1632
+ * });
1633
+ *
1634
+ * @example
1635
+ * // With smart loop enabled
1636
+ * await driver.assertBulk(['Login button is visible'], {
1637
+ * useSmartLoop: true,
1638
+ * cachingMode: "FULL_SCREEN"
1639
+ * });
1092
1640
  */
1093
- async assertBulk(assertions, cachingMode) {
1641
+ async assertBulk(assertions, {
1642
+ cachingMode,
1643
+ useSmartLoop = false
1644
+ }) {
1094
1645
  if (!this.appiumSessionStarted) {
1095
1646
  await this.startSession();
1096
1647
  }
1097
1648
  try {
1098
- const results = await this.checkBulk(assertions, cachingMode);
1649
+ const results = await this.checkBulk(assertions, cachingMode, 2, 1e3, useSmartLoop);
1099
1650
  const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
1100
1651
  if (!current) {
1101
1652
  return [...prev, assertions.at(currentIndex)];
@@ -1124,14 +1675,30 @@ class GptDriver {
1124
1675
  * @param {CachingMode} cachingMode - The caching mode to be used for the conditions.
1125
1676
  * @param {number} maxRetries - The maximum number of retries if any condition fails (default: 2).
1126
1677
  * @param {number} retryDelayMs - The delay in milliseconds between retries (default: 1000).
1678
+ * @param {boolean} useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1127
1679
  * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
1128
1680
  * to a boolean indicating whether the condition was met.
1681
+ *
1682
+ * @example
1683
+ * // Basic usage
1684
+ * const results = await driver.checkBulk(['Login button is visible', 'Username field is enabled']);
1685
+ * console.log(results); // { 'Login button is visible': true, 'Username field is enabled': false }
1686
+ *
1687
+ * @example
1688
+ * // With smart loop enabled
1689
+ * const results = await driver.checkBulk(
1690
+ * ['Login button is visible'],
1691
+ * "FULL_SCREEN",
1692
+ * 2,
1693
+ * 1000,
1694
+ * true
1695
+ * );
1129
1696
  */
1130
- async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3) {
1697
+ async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3, useSmartLoop = false) {
1131
1698
  let attempt = 0;
1132
1699
  let results = {};
1133
1700
  while (attempt <= maxRetries) {
1134
- results = await this._checkBulkOnce(conditions, cachingMode, attempt);
1701
+ results = await this._checkBulkOnce(conditions, cachingMode, attempt, useSmartLoop);
1135
1702
  const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
1136
1703
  if (failedConditions.length === 0) {
1137
1704
  return results;
@@ -1153,33 +1720,67 @@ class GptDriver {
1153
1720
  *
1154
1721
  * @private
1155
1722
  */
1156
- async _checkBulkOnce(conditions, cachingMode, attempt = 0) {
1723
+ async _checkBulkOnce(conditions, cachingMode, attempt = 0, useSmartLoop = false) {
1157
1724
  if (!this.appiumSessionStarted) {
1158
1725
  await this.startSession();
1159
1726
  }
1160
1727
  globalLogger.info(`Checking conditions (attempt ${attempt}): ${conditions.join(", ")}`);
1728
+ const pendingLogPromises = [];
1161
1729
  try {
1162
- let screenshot;
1163
- if (!this.useGptDriverCloud) {
1164
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
1165
- }
1166
- const response = await axios.post(
1167
- `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1168
- {
1169
- api_key: this.apiKey,
1170
- base64_screenshot: screenshot,
1171
- assertions: conditions,
1172
- command: `Assert: ${JSON.stringify(conditions)}`,
1173
- caching_mode: cachingMode ?? this.cachingMode,
1174
- step_number: this.step_number
1730
+ if (useSmartLoop) {
1731
+ const instruction = `Assert: ${JSON.stringify(conditions)}`;
1732
+ const middleLayerAssertFn = async (screenshot) => {
1733
+ return await axios.post(
1734
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1735
+ {
1736
+ api_key: this.apiKey,
1737
+ base64_screenshot: screenshot,
1738
+ assertions: conditions,
1739
+ command: `Assert: ${JSON.stringify(conditions)}`,
1740
+ caching_mode: cachingMode ?? this.cachingMode,
1741
+ step_number: this.step_number
1742
+ }
1743
+ );
1744
+ };
1745
+ const ctx = this.createSmartLoopContext({ middleLayerAssertFn, pendingLogPromises });
1746
+ const result = await executeSmartLoop(ctx, {
1747
+ stepNumber: this.step_number,
1748
+ description: instruction,
1749
+ instruction,
1750
+ ...this.testId && {
1751
+ filepath: this.testId
1752
+ }
1753
+ });
1754
+ if (result.cacheHitCount) {
1755
+ this._stats_cacheHits += result.cacheHitCount;
1175
1756
  }
1176
- );
1177
- globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1178
- return response.data.results;
1757
+ this._stats_executedSteps += result.iterations ?? 1;
1758
+ return { [instruction]: result.success };
1759
+ } else {
1760
+ let screenshot;
1761
+ if (!this.useGptDriverCloud) {
1762
+ screenshot = await this.getScreenshot(this.appiumSessionConfig);
1763
+ }
1764
+ const response = await axios.post(
1765
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1766
+ {
1767
+ api_key: this.apiKey,
1768
+ base64_screenshot: screenshot,
1769
+ assertions: conditions,
1770
+ command: `Assert: ${JSON.stringify(conditions)}`,
1771
+ caching_mode: cachingMode ?? this.cachingMode,
1772
+ step_number: this.step_number
1773
+ }
1774
+ );
1775
+ globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1776
+ return response.data.results;
1777
+ }
1179
1778
  } catch (e) {
1180
1779
  globalLogger.error("Failed to check conditions", e);
1181
1780
  await this.setSessionStatus("failed");
1182
1781
  throw e;
1782
+ } finally {
1783
+ await Promise.all(pendingLogPromises);
1183
1784
  }
1184
1785
  }
1185
1786
  /**
@@ -1272,10 +1873,12 @@ class GptDriver {
1272
1873
  * const result = await driver.executeFlow('tests/login-flow.json', { useSmartLoop: true });
1273
1874
  */
1274
1875
  async executeFlow(filePath, options) {
1876
+ const flowStartTime = performance.now();
1275
1877
  const useSmartLoop = options?.useSmartLoop ?? false;
1276
1878
  const configFilePath = this.configFilePath;
1277
1879
  let baseDir;
1278
1880
  let absolutePath;
1881
+ let parsedConfigFile;
1279
1882
  if (configFilePath) {
1280
1883
  let raw2;
1281
1884
  try {
@@ -1293,7 +1896,7 @@ class GptDriver {
1293
1896
  globalLogger.error(msg);
1294
1897
  throw new Error(msg);
1295
1898
  }
1296
- const parsedConfigFile = ConfigSchema.parse(json2);
1899
+ parsedConfigFile = ConfigSchema.parse(json2);
1297
1900
  if (path.isAbsolute(parsedConfigFile.testDir)) {
1298
1901
  baseDir = parsedConfigFile.testDir;
1299
1902
  } else {
@@ -1348,7 +1951,7 @@ ${issues}`);
1348
1951
  }
1349
1952
  return val.data;
1350
1953
  };
1351
- const expandSteps = async (steps, inheritedParams, parentDir, stack) => {
1954
+ const expandSteps = async (steps, inheritedParams, parentDir, stack, currentFilePath) => {
1352
1955
  const out = [];
1353
1956
  for (const step of steps) {
1354
1957
  if (step.type === "fileRef") {
@@ -1360,44 +1963,80 @@ ${issues}`);
1360
1963
  }
1361
1964
  visited.add(refKey);
1362
1965
  const child = await loadFlow(refPath);
1363
- const mergedParams = { ...inheritedParams, ...step.overrides ?? {} };
1966
+ const mergedParams = { ...inheritedParams, ...child.params, ...step.overrides ?? {} };
1364
1967
  const childDir = path.dirname(refPath);
1365
- const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey]);
1968
+ const childRelativePath = path.relative(baseDir, refPath).replace(/^\.\//, "");
1969
+ const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey], childRelativePath);
1366
1970
  out.push(...childExpanded);
1367
1971
  } else {
1368
- const resolved = { ...step, __params: { ...inheritedParams } };
1972
+ const resolved = {
1973
+ ...step,
1974
+ __params: { ...inheritedParams },
1975
+ __filepath: currentFilePath
1976
+ };
1369
1977
  out.push(resolved);
1370
1978
  }
1371
1979
  }
1372
1980
  return out;
1373
1981
  };
1374
- const effectiveParams = { ...rootFlow.params ?? {} };
1375
- const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath]);
1982
+ const envPath = path.join(process.cwd(), ".env");
1983
+ let envVars = {};
1984
+ try {
1985
+ await node_fs.promises.access(envPath);
1986
+ const envContent = await promises.readFile(envPath, "utf-8");
1987
+ envVars = dotenv.parse(envContent);
1988
+ } catch (_) {
1989
+ }
1990
+ const effectiveParams = { ...parsedConfigFile?.variables, ...envVars, ...rootFlow.params };
1991
+ const rootRelativePath = path.relative(baseDir, absolutePath).replace(/^\.\//, "");
1992
+ const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath], rootRelativePath);
1376
1993
  if (!this.appiumSessionStarted) {
1377
1994
  await this.startSession();
1378
1995
  }
1379
1996
  globalLogger.info(`Executing flow '${rootFlow.name}' with ${expandedSteps.length} step(s)...`);
1380
1997
  let executed = 0;
1998
+ let cacheHits = 0;
1999
+ let lastStepWasFromFileRef = false;
2000
+ let fileRefStepNumber = 1;
1381
2001
  try {
2002
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig, false);
2003
+ const screenshotResolution = await getImageDimensions(screenshot);
1382
2004
  for (const step of expandedSteps) {
1383
2005
  const params = step.__params ?? effectiveParams;
1384
- const prefix = `Step #${executed + 1} [${step.type}${step.optional ? ", optional" : ""}]`;
2006
+ const filepath = step.__filepath ?? rootRelativePath;
2007
+ console.log("step.__filepath", step.__filepath);
2008
+ console.log("rootRelativePath", rootRelativePath);
2009
+ const isFromFileRef = filepath !== rootRelativePath;
2010
+ console.log("isFromFileRef", isFromFileRef);
2011
+ if (!lastStepWasFromFileRef && isFromFileRef) {
2012
+ fileRefStepNumber = 1;
2013
+ }
2014
+ const effectiveStepNumber = isFromFileRef ? fileRefStepNumber : this.step_number;
2015
+ const prefix = `Step #${executed + 1} (step_number: ${effectiveStepNumber}) [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
1385
2016
  try {
1386
2017
  switch (step.type) {
1387
2018
  case "ai": {
1388
2019
  const instruction = this.interpolateTemplate(step.instruction, params);
1389
2020
  globalLogger.info(`${prefix}: ${instruction}`);
1390
2021
  if (useSmartLoop) {
1391
- const ctx = this.createSmartLoopContext();
2022
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1392
2023
  const result = await executeSmartLoop(ctx, {
1393
- stepNumber: this.step_number,
2024
+ stepNumber: effectiveStepNumber,
1394
2025
  description: instruction,
1395
- instruction
2026
+ instruction,
2027
+ filepath
1396
2028
  });
1397
2029
  if (!result.success) {
1398
2030
  throw new Error(result.error || "Smart loop execution failed");
1399
2031
  }
1400
- this.step_number++;
2032
+ if (result.cacheHit) {
2033
+ cacheHits++;
2034
+ }
2035
+ if (isFromFileRef) {
2036
+ fileRefStepNumber++;
2037
+ } else {
2038
+ this.step_number++;
2039
+ }
1401
2040
  } else {
1402
2041
  await this.aiExecute({ command: instruction });
1403
2042
  }
@@ -1410,16 +2049,24 @@ ${issues}`);
1410
2049
  }
1411
2050
  globalLogger.info(`${prefix}: ${description}`);
1412
2051
  if (useSmartLoop) {
1413
- const ctx = this.createSmartLoopContext();
2052
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1414
2053
  const result = await executeSmartLoop(ctx, {
1415
- stepNumber: this.step_number,
2054
+ stepNumber: effectiveStepNumber,
1416
2055
  description,
1417
- instruction: description
2056
+ instruction: description,
2057
+ filepath
1418
2058
  });
1419
2059
  if (!result.success) {
1420
2060
  throw new Error(result.error || "Smart loop execution failed");
1421
2061
  }
1422
- this.step_number++;
2062
+ if (result.cacheHit) {
2063
+ cacheHits++;
2064
+ }
2065
+ if (isFromFileRef) {
2066
+ fileRefStepNumber++;
2067
+ } else {
2068
+ this.step_number++;
2069
+ }
1423
2070
  } else {
1424
2071
  await this.aiExecute({ command: description });
1425
2072
  }
@@ -1433,16 +2080,24 @@ ${issues}`);
1433
2080
  globalLogger.info(`${prefix}: ${description}`);
1434
2081
  if (useSmartLoop) {
1435
2082
  const instruction = `Verify that: ${description}`;
1436
- const ctx = this.createSmartLoopContext();
2083
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1437
2084
  const result = await executeSmartLoop(ctx, {
1438
- stepNumber: this.step_number,
2085
+ stepNumber: effectiveStepNumber,
1439
2086
  description,
1440
- instruction
2087
+ instruction,
2088
+ filepath
1441
2089
  });
1442
2090
  if (!result.success) {
1443
2091
  throw new Error(result.error || "Smart loop execution failed");
1444
2092
  }
1445
- this.step_number++;
2093
+ if (result.cacheHit) {
2094
+ cacheHits++;
2095
+ }
2096
+ if (isFromFileRef) {
2097
+ fileRefStepNumber++;
2098
+ } else {
2099
+ this.step_number++;
2100
+ }
1446
2101
  } else {
1447
2102
  await this.assert(description);
1448
2103
  }
@@ -1451,35 +2106,98 @@ ${issues}`);
1451
2106
  case "type": {
1452
2107
  const text = this.interpolateTemplate(step.text, params);
1453
2108
  globalLogger.info(`${prefix}: Type text`);
1454
- await this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2109
+ this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
1455
2110
  await this.performType(text);
1456
- this.step_number++;
2111
+ if (isFromFileRef) {
2112
+ fileRefStepNumber++;
2113
+ } else {
2114
+ this.step_number++;
2115
+ }
1457
2116
  break;
1458
2117
  }
1459
- case "scroll": {
1460
- globalLogger.info(`${prefix}: Scroll ${step.direction}`);
1461
- await this.takeScreenshotAndLogCodeExecution(`scroll: direction=${step.direction}`);
1462
- await this.performScroll(step.direction);
1463
- this.step_number++;
2118
+ case "enter": {
2119
+ globalLogger.info(`${prefix}: Press Enter`);
2120
+ this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2121
+ await this.performPressEnter();
2122
+ if (step.delayNextStep) {
2123
+ await this._delay(step.delayNextStep);
2124
+ }
2125
+ if (isFromFileRef) {
2126
+ fileRefStepNumber++;
2127
+ } else {
2128
+ this.step_number++;
2129
+ }
2130
+ break;
2131
+ }
2132
+ case "scroll":
2133
+ case "swipe": {
2134
+ const description = step.descriptionText ? this.interpolateTemplate(step.descriptionText, params) : void 0;
2135
+ if (description && useSmartLoop) {
2136
+ globalLogger.info(`${prefix}: ${description}`);
2137
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
2138
+ const result = await executeSmartLoop(ctx, {
2139
+ stepNumber: effectiveStepNumber,
2140
+ description,
2141
+ instruction: description,
2142
+ filepath
2143
+ });
2144
+ if (!result.success) {
2145
+ throw new Error(result.error || "Smart loop execution failed");
2146
+ }
2147
+ if (result.cacheHit) {
2148
+ cacheHits++;
2149
+ }
2150
+ if (isFromFileRef) {
2151
+ fileRefStepNumber++;
2152
+ } else {
2153
+ this.step_number++;
2154
+ }
2155
+ } else {
2156
+ globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
2157
+ this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2158
+ await this.performSwipe({
2159
+ direction: step.direction,
2160
+ x1: step.x1,
2161
+ y1: step.y1,
2162
+ x2: step.x2,
2163
+ y2: step.y2,
2164
+ duration: step.duration,
2165
+ screenshotHeight: screenshotResolution.height,
2166
+ screenshotWidth: screenshotResolution.width
2167
+ });
2168
+ if (isFromFileRef) {
2169
+ fileRefStepNumber++;
2170
+ } else {
2171
+ this.step_number++;
2172
+ }
2173
+ }
1464
2174
  break;
1465
2175
  }
1466
2176
  case "zoom": {
1467
2177
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
1468
- await this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
1469
- this.step_number++;
2178
+ this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2179
+ if (isFromFileRef) {
2180
+ fileRefStepNumber++;
2181
+ } else {
2182
+ this.step_number++;
2183
+ }
1470
2184
  break;
1471
2185
  }
1472
2186
  case "scrollUntil": {
1473
2187
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
1474
2188
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
1475
- await this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2189
+ this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
1476
2190
  await this.performScrollUntil({
1477
2191
  direction: step.direction,
1478
2192
  text: interpolatedText,
1479
2193
  elementId: step.elementId,
1480
2194
  maxScrolls: step.maxScrolls
1481
2195
  });
1482
- this.step_number++;
2196
+ if (isFromFileRef) {
2197
+ fileRefStepNumber++;
2198
+ } else {
2199
+ this.step_number++;
2200
+ }
1483
2201
  break;
1484
2202
  }
1485
2203
  case "deeplink": {
@@ -1487,7 +2205,7 @@ ${issues}`);
1487
2205
  const bundleId = params["bundleId"];
1488
2206
  const url = this.interpolateTemplate(step.url, params);
1489
2207
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
1490
- await this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2208
+ this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
1491
2209
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
1492
2210
  break;
1493
2211
  }
@@ -1495,10 +2213,12 @@ ${issues}`);
1495
2213
  throw new Error(`Unsupported step type at execution: ${step.type}`);
1496
2214
  }
1497
2215
  }
2216
+ lastStepWasFromFileRef = isFromFileRef;
1498
2217
  executed++;
1499
2218
  } catch (err) {
1500
2219
  if (step.optional) {
1501
2220
  globalLogger.warn(`${prefix} failed but marked optional. Continuing. Error: ${err.message}`);
2221
+ lastStepWasFromFileRef = isFromFileRef;
1502
2222
  continue;
1503
2223
  }
1504
2224
  throw err;
@@ -1511,6 +2231,16 @@ ${issues}`);
1511
2231
  }
1512
2232
  throw e;
1513
2233
  }
2234
+ const flowEndTime = performance.now();
2235
+ const totalTime = (flowEndTime - flowStartTime) / 1e3;
2236
+ globalLogger.info(`
2237
+ ${"=".repeat(50)}`);
2238
+ globalLogger.info(`Flow Execution Summary:`);
2239
+ globalLogger.info(`Total Test Time: ${totalTime.toFixed(2)} seconds`);
2240
+ globalLogger.info(`Total Steps executed: ${executed}`);
2241
+ globalLogger.info(`Total Step Cache Hits: ${cacheHits}`);
2242
+ globalLogger.info(`${"=".repeat(50)}
2243
+ `);
1514
2244
  return rootFlow;
1515
2245
  }
1516
2246
  async gptHandler(command, cachingMode) {
@@ -1518,8 +2248,15 @@ ${issues}`);
1518
2248
  let conditionSucceeded = false;
1519
2249
  while (!conditionSucceeded) {
1520
2250
  let screenshot;
2251
+ let originalScreenshotBase64 = null;
1521
2252
  if (!this.useGptDriverCloud) {
1522
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
2253
+ const stabilityResult = await waitForStableScreen(
2254
+ () => this.getScreenshot(this.appiumSessionConfig)
2255
+ );
2256
+ screenshot = stabilityResult.originalScreenshotBase64;
2257
+ if (!stabilityResult.stable) {
2258
+ globalLogger.warn("Screen did not stabilize within timeout, proceeding with last screenshot");
2259
+ }
1523
2260
  }
1524
2261
  globalLogger.info("Requesting next action from GPT Driver...");
1525
2262
  const response = await axios.request(
@@ -1547,7 +2284,6 @@ ${issues}`);
1547
2284
  for (const appiumCommand of executeResponse.commands) {
1548
2285
  await this.executeCommand(appiumCommand);
1549
2286
  }
1550
- await delay(1500);
1551
2287
  }
1552
2288
  this.step_number = this.step_number + 1;
1553
2289
  globalLogger.info("Command execution completed successfully");
@@ -1575,13 +2311,13 @@ ${issues}`);
1575
2311
  });
1576
2312
  }
1577
2313
  }
1578
- async logCodeExecution(screenshot, command) {
2314
+ async logCodeExecution(screenshot, command, isCacheHit) {
1579
2315
  try {
1580
- const screenshot2 = await this.getScreenshot(this.appiumSessionConfig);
1581
2316
  await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
1582
2317
  api_key: this.apiKey,
1583
- base64_screenshot: screenshot2,
1584
- command
2318
+ base64_screenshot: screenshot,
2319
+ command,
2320
+ from_cache: !!isCacheHit
1585
2321
  });
1586
2322
  } catch (e) {
1587
2323
  globalLogger.error("Failed to log code execution", e);