gpt-driver-node 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,8 +1,10 @@
1
1
  import { promises } from 'node:fs';
2
2
  import path from 'node:path';
3
+ import { readFile } from 'node:fs/promises';
3
4
  import axios from 'axios';
4
5
  import sharp from 'sharp';
5
6
  import { attach } from 'webdriverio';
7
+ import dotenv from 'dotenv';
6
8
  import winston from 'winston';
7
9
  import { z } from 'zod';
8
10
  import crypto from 'node:crypto';
@@ -20,6 +22,15 @@ function buildUrl(base, extraPath) {
20
22
  }
21
23
  return `${baseUrl}${extraPath}`;
22
24
  }
25
+ const getImageDimensions = async (base64) => {
26
+ const base64Data = base64.replace(/^data:image\/\w+;base64,/, "");
27
+ const buffer = Buffer.from(base64Data, "base64");
28
+ const metadata = await sharp(buffer).metadata();
29
+ if (!metadata.width || !metadata.height) {
30
+ throw new Error("Unable to get image dimensions");
31
+ }
32
+ return { width: metadata.width, height: metadata.height };
33
+ };
23
34
 
24
35
  const colors = {
25
36
  reset: "\x1B[0m",
@@ -82,6 +93,75 @@ ${logStyles.gray(stack)}` : logMessage;
82
93
  ]
83
94
  });
84
95
 
96
+ const waitForStableScreen = async (getScreenshot, options = {}) => {
97
+ const {
98
+ maxTimeoutSec = 5,
99
+ intervalSec = 0.01,
100
+ tolerance = 1e-5,
101
+ pixelThreshold = 0,
102
+ downscaleWidth = 600,
103
+ downscaleHeight = 600
104
+ } = options;
105
+ const startTime = Date.now();
106
+ const maxTimeoutMillis = maxTimeoutSec * 1e3;
107
+ const intervalMillis = intervalSec * 1e3;
108
+ let previousDownsampledBuffer = null;
109
+ let lastScreenshotBase64 = null;
110
+ while (Date.now() - startTime < maxTimeoutMillis) {
111
+ try {
112
+ const screenshotBase64 = await getScreenshot();
113
+ lastScreenshotBase64 = screenshotBase64;
114
+ const imageBuffer = Buffer.from(screenshotBase64, "base64");
115
+ const downsampledBuffer = await sharp(imageBuffer).resize(downscaleWidth, downscaleHeight, { fit: "fill" }).ensureAlpha().raw().toBuffer();
116
+ if (previousDownsampledBuffer) {
117
+ const isSimilar = areImagesSimilar(
118
+ previousDownsampledBuffer,
119
+ downsampledBuffer,
120
+ downscaleWidth,
121
+ downscaleHeight,
122
+ tolerance,
123
+ pixelThreshold
124
+ );
125
+ if (isSimilar) {
126
+ return {
127
+ stable: true,
128
+ originalScreenshotBase64: screenshotBase64
129
+ };
130
+ }
131
+ }
132
+ previousDownsampledBuffer = downsampledBuffer;
133
+ } catch (e) {
134
+ globalLogger.warn(`Error during stability check: ${e}`);
135
+ throw e;
136
+ }
137
+ await delay(intervalMillis);
138
+ }
139
+ return {
140
+ stable: false,
141
+ originalScreenshotBase64: lastScreenshotBase64
142
+ };
143
+ };
144
+ function areImagesSimilar(buffer1, buffer2, width, height, tolerance, pixelThreshold) {
145
+ if (buffer1.length !== buffer2.length) {
146
+ return false;
147
+ }
148
+ const totalPixels = width * height;
149
+ let differentPixels = 0;
150
+ for (let i = 0; i < buffer1.length; i += 4) {
151
+ const r1 = buffer1[i];
152
+ const g1 = buffer1[i + 1];
153
+ const b1 = buffer1[i + 2];
154
+ const r2 = buffer2[i];
155
+ const g2 = buffer2[i + 1];
156
+ const b2 = buffer2[i + 2];
157
+ if (Math.abs(r1 - r2) > pixelThreshold || Math.abs(g1 - g2) > pixelThreshold || Math.abs(b1 - b2) > pixelThreshold) {
158
+ differentPixels++;
159
+ }
160
+ }
161
+ const diffRatio = differentPixels / totalPixels;
162
+ return diffRatio <= tolerance;
163
+ }
164
+
85
165
  const SavableStepBaseSchema = z.object({
86
166
  id: z.number().optional(),
87
167
  descriptionText: z.string().optional(),
@@ -105,9 +185,17 @@ const SavableTypeStepSchema = SavableStepBaseSchema.extend({
105
185
  type: z.literal("type"),
106
186
  text: z.string()
107
187
  });
108
- const SavableScrollStepSchema = SavableStepBaseSchema.extend({
109
- type: z.literal("scroll"),
110
- direction: z.enum(["up", "down"])
188
+ const SavableSwipeStepSchema = SavableStepBaseSchema.extend({
189
+ type: z.literal("swipe"),
190
+ direction: z.enum(["left", "right", "up", "down"]),
191
+ x1: z.number().optional(),
192
+ y1: z.number().optional(),
193
+ x2: z.number().optional(),
194
+ y2: z.number().optional(),
195
+ duration: z.number().optional().default(500)
196
+ });
197
+ const SavableScrollStepSchema = SavableSwipeStepSchema.extend({
198
+ type: z.literal("scroll")
111
199
  });
112
200
  const SavableZoomStepSchema = SavableStepBaseSchema.extend({
113
201
  type: z.literal("zoom"),
@@ -128,6 +216,10 @@ const SavableAIStepSchema = SavableStepBaseSchema.extend({
128
216
  type: z.literal("ai"),
129
217
  instruction: z.string()
130
218
  });
219
+ const SavableEnterStepSchema = SavableStepBaseSchema.extend({
220
+ type: z.literal("enter"),
221
+ delayNextStep: z.number().optional().default(500)
222
+ });
131
223
  const SavableFileRefStepSchema = SavableStepBaseSchema.extend({
132
224
  type: z.literal("fileRef"),
133
225
  path: z.string(),
@@ -140,6 +232,8 @@ const SavableStepSchema = z.discriminatedUnion("type", [
140
232
  // type: 'assert'
141
233
  SavableTypeStepSchema,
142
234
  // type: 'type'
235
+ SavableSwipeStepSchema,
236
+ // type: 'swipe'
143
237
  SavableScrollStepSchema,
144
238
  // type: 'scroll'
145
239
  SavableZoomStepSchema,
@@ -150,6 +244,8 @@ const SavableStepSchema = z.discriminatedUnion("type", [
150
244
  // type: 'deeplink'
151
245
  SavableAIStepSchema,
152
246
  // type: 'ai'
247
+ SavableEnterStepSchema,
248
+ // type: 'enter'
153
249
  SavableFileRefStepSchema
154
250
  // type: 'fileRef'
155
251
  ]);
@@ -171,7 +267,7 @@ const CACHE_SERVER_URL = "https://cache.mobileboost.io";
171
267
  const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
172
268
  const RESCALE_FACTOR = 4;
173
269
  const SMART_LOOP_MAX_ITERATIONS = 15;
174
- const CACHE_RETRY_MS = 2e3;
270
+ const CACHE_RETRY_MS = 1500;
175
271
  const CACHE_CHECK_INTERVAL_MS = 500;
176
272
 
177
273
  function generateCacheHash(apiKey, filepath, stepNumber, description, platform, resolution) {
@@ -255,15 +351,24 @@ async function executeFromCache(params) {
255
351
  return { found: false };
256
352
  } catch (error) {
257
353
  if (axios.isAxiosError(error)) {
258
- globalLogger.warn(`[Cache] Cache lookup failed: ${error.response?.data || error.message}`);
354
+ const errorMsg = `Cache lookup failed: ${error.response?.data || error.message}`;
355
+ globalLogger.warn(`[Cache] ${errorMsg}`);
259
356
  } else {
260
- globalLogger.error(`[Cache] Error executing from cache: ${error}`);
357
+ const errorMsg = `Error executing from cache: ${error}`;
358
+ globalLogger.error(`[Cache] ${errorMsg}`);
261
359
  }
262
360
  return { found: false };
263
361
  }
264
362
  }
265
363
  async function populateCache(params) {
266
364
  try {
365
+ globalLogger.debug(`[populateCache] Parameters: ${JSON.stringify({
366
+ stepNumber: params.stepNumber,
367
+ filepath: params.filepath,
368
+ platform: params.platform,
369
+ screenResolution: params.screenResolution,
370
+ executionDataCount: params.executionData.length
371
+ })}`);
267
372
  const hash = generateCacheHash(
268
373
  params.apiKey,
269
374
  params.filepath,
@@ -272,13 +377,15 @@ async function populateCache(params) {
272
377
  params.platform,
273
378
  params.screenResolution
274
379
  );
380
+ globalLogger.debug(`[populateCache] Generated hash: ${hash}`);
275
381
  const payload = await Promise.all(params.executionData.map(async (item) => {
276
382
  const resizedBuffer = await resizeScreenshotForCache(item.screenshot);
383
+ const scaledScreenshotBase64 = resizedBuffer.toString("base64");
277
384
  const scaledCommands = item.commands.map(
278
385
  (cmd) => scaleCommand(cmd, "divide")
279
386
  );
280
387
  return {
281
- screenshot: resizedBuffer.toString("base64"),
388
+ screenshot: scaledScreenshotBase64,
282
389
  commands: scaledCommands
283
390
  };
284
391
  }));
@@ -290,9 +397,11 @@ async function populateCache(params) {
290
397
  return { success: true };
291
398
  } catch (error) {
292
399
  if (axios.isAxiosError(error)) {
293
- globalLogger.error(`[Cache] Failed to populate cache: ${error.response?.data || error.message}`);
400
+ const errorMsg = `Failed to populate cache: ${error.response?.data || error.message}`;
401
+ globalLogger.error(`[Cache] ${errorMsg}`);
294
402
  } else {
295
- globalLogger.error(`[Cache] Error populating cache: ${error}`);
403
+ const errorMsg = `Error populating cache: ${error}`;
404
+ globalLogger.error(`[Cache] ${errorMsg}`);
296
405
  }
297
406
  return { success: false };
298
407
  }
@@ -422,9 +531,37 @@ function isWaitCommand(cmd) {
422
531
  function isScrollCommand(cmd) {
423
532
  return cmd.startsWith("scroll:");
424
533
  }
534
+ function isEnterCommand(cmd) {
535
+ const key = cmd.toLowerCase().split("press:")[1]?.trim() ?? "";
536
+ return key === "enter";
537
+ }
425
538
  function isTypeCommand(cmd) {
426
539
  return cmd.startsWith("type:");
427
540
  }
541
+ function isSlideCommand(cmd) {
542
+ return cmd.startsWith("slide");
543
+ }
544
+ function parseSlideCommand(cmd) {
545
+ const slideMatch = cmd.match(
546
+ /slide\s+(up|down|left|right)\s+(\d+)%(?::\s*[^;]*)?;(\d+);(\d+)/i
547
+ );
548
+ if (!slideMatch) {
549
+ return null;
550
+ }
551
+ const extractedDirection = slideMatch[1].toLowerCase();
552
+ const directionMap = {
553
+ down: "up",
554
+ up: "down",
555
+ left: "left",
556
+ right: "right"
557
+ };
558
+ return {
559
+ direction: directionMap[extractedDirection],
560
+ percentage: parseInt(slideMatch[2], 10),
561
+ startX: parseInt(slideMatch[3], 10),
562
+ startY: parseInt(slideMatch[4], 10)
563
+ };
564
+ }
428
565
 
429
566
  async function executeSmartLoop(ctx, params) {
430
567
  const maxCacheAttempts = Math.floor(CACHE_RETRY_MS / CACHE_CHECK_INTERVAL_MS);
@@ -433,35 +570,56 @@ async function executeSmartLoop(ctx, params) {
433
570
  let lastCacheIndex = void 0;
434
571
  let anyCacheMiss = false;
435
572
  let everHadCacheHit = false;
573
+ let totalCacheHits = 0;
436
574
  const currentExecutionData = [];
575
+ const stepStartTime = performance.now();
576
+ const iterationTimes = [];
437
577
  globalLogger.info(`[SmartLoop] Starting for step ${params.stepNumber}: "${params.description}"`);
578
+ globalLogger.debug(`[Performance][SmartLoop] Step execution started at ${stepStartTime.toFixed(2)}ms`);
438
579
  try {
439
580
  while (loopCount < SMART_LOOP_MAX_ITERATIONS) {
581
+ const iterationStartTime = performance.now();
582
+ globalLogger.debug(`[SmartLoop] Starting iteration ${loopCount + 1}/${SMART_LOOP_MAX_ITERATIONS}`);
440
583
  let screenshot = "";
441
584
  let commands = [];
442
585
  let isCacheHit = false;
586
+ let screenshotResolution = void 0;
587
+ const stabilityResult = await waitForStableScreen(ctx.getScreenshot);
443
588
  for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
444
- screenshot = await ctx.getScreenshot();
589
+ const screenshotStartTime = performance.now();
590
+ const screenshotEndTime = performance.now();
591
+ globalLogger.debug(`[Performance][SmartLoop] Screenshot capture took ${(screenshotEndTime - screenshotStartTime).toFixed(2)}ms`);
592
+ if (!stabilityResult.stable) {
593
+ globalLogger.warn("[SmartLoop] Screen did not stabilize within timeout, proceeding with last screenshot");
594
+ }
595
+ screenshot = stabilityResult.originalScreenshotBase64;
596
+ if (screenshotResolution === void 0) {
597
+ screenshotResolution = await getImageDimensions(screenshot);
598
+ }
445
599
  const sizeInBytes = screenshot.length * 0.75;
446
600
  const sizeInMB = (sizeInBytes / (1024 * 1024)).toFixed(2);
447
601
  globalLogger.debug(`[SmartLoop] Captured screenshot: ~${sizeInMB} MB`);
448
602
  try {
449
603
  globalLogger.debug(`[SmartLoop] Checking cache (Attempt ${attempt + 1}/${maxCacheAttempts})`);
604
+ const cacheCheckStartTime = performance.now();
450
605
  const cacheResult = await executeFromCache({
451
606
  apiKey: ctx.organisationId,
452
607
  stepNumber: params.stepNumber,
453
608
  stepDescription: params.description,
454
609
  screenshot,
455
- screenResolution: ctx.screenSize,
610
+ screenResolution: screenshotResolution,
456
611
  highestUsedIndex: lastCacheIndex,
457
612
  platform: ctx.platform,
458
613
  filepath: params.filepath
459
614
  });
615
+ const cacheCheckEndTime = performance.now();
616
+ globalLogger.debug(`[Performance][SmartLoop] Cache check took ${(cacheCheckEndTime - cacheCheckStartTime).toFixed(2)}ms`);
460
617
  if (cacheResult.found && cacheResult.cacheCommands) {
461
618
  commands = cacheResult.cacheCommands;
462
619
  lastCacheIndex = cacheResult.cacheIndex;
463
620
  isCacheHit = true;
464
621
  everHadCacheHit = true;
622
+ totalCacheHits++;
465
623
  globalLogger.info(`[SmartLoop] Cache Hit! (${commands.length} commands)`);
466
624
  break;
467
625
  }
@@ -473,18 +631,53 @@ async function executeSmartLoop(ctx, params) {
473
631
  await delay(CACHE_CHECK_INTERVAL_MS);
474
632
  }
475
633
  }
634
+ if (screenshotResolution === void 0) {
635
+ const screenshot2 = await ctx.getScreenshot();
636
+ screenshotResolution = await getImageDimensions(screenshot2);
637
+ }
476
638
  let aiCommands = [];
639
+ let actionDescription = [];
477
640
  if (!isCacheHit) {
478
641
  anyCacheMiss = true;
479
642
  globalLogger.info(`[SmartLoop] Cache Miss. Requesting AI agent...`);
480
- const agentResponse = await executeAgentStep({
481
- apiKey: ctx.organisationId,
482
- base64_screenshot: screenshot,
483
- instruction: params.instruction,
484
- action_history: actionHistory
485
- });
486
- aiCommands = agentResponse.appetizeCommands || [];
487
- const gptCommands = agentResponse.gptCommands || [];
643
+ const aiStartTime = performance.now();
644
+ let agentResponse;
645
+ if (ctx.middleLayerAssertFn) {
646
+ const response = await ctx.middleLayerAssertFn(screenshot);
647
+ const results = response.data.results;
648
+ const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
649
+ if (failedConditions.length === 0) {
650
+ agentResponse = {
651
+ appetizeCommands: [
652
+ "task complete: All assertions passed"
653
+ ]
654
+ };
655
+ } else {
656
+ agentResponse = {
657
+ appetizeCommands: [
658
+ `error detected: Failed conditions: ${failedConditions.join(", ")}`
659
+ ]
660
+ };
661
+ }
662
+ } else {
663
+ agentResponse = await executeAgentStep({
664
+ apiKey: ctx.organisationId,
665
+ base64_screenshot: screenshot,
666
+ instruction: params.instruction,
667
+ action_history: actionHistory
668
+ });
669
+ }
670
+ const aiEndTime = performance.now();
671
+ globalLogger.debug(`[Performance][SmartLoop] AI agent call took ${(aiEndTime - aiStartTime).toFixed(2)}ms`);
672
+ aiCommands = agentResponse.appetizeCommands ?? [];
673
+ const gptCommands = agentResponse.gptCommands ?? [];
674
+ actionDescription = [];
675
+ const actionDescriptionIndex = gptCommands.findIndex(
676
+ (entry) => entry.startsWith("actions_description:")
677
+ );
678
+ if (actionDescriptionIndex !== -1) {
679
+ actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
680
+ }
488
681
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
489
682
  if (reasoningIndex !== -1) {
490
683
  const parsedCommands = gptCommands.slice(reasoningIndex);
@@ -494,19 +687,23 @@ async function executeSmartLoop(ctx, params) {
494
687
  }
495
688
  actionHistory = [...actionHistory, ...parsedCommands];
496
689
  }
497
- commands = [...aiCommands];
690
+ commands = [...actionDescription, ...aiCommands];
498
691
  globalLogger.debug(`[SmartLoop] AI returned ${commands.length} command(s)`);
499
692
  }
500
693
  currentExecutionData.push({
501
694
  screenshot,
502
- commands: aiCommands.length > 0 ? aiCommands : commands
695
+ commands
503
696
  });
504
- await ctx.logCodeExecution(screenshot, commands.join("\n"));
697
+ const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
698
+ if (ctx.pendingLogPromises) {
699
+ ctx.pendingLogPromises.push(logPromise);
700
+ }
505
701
  let actionExecuted = false;
506
702
  let taskCompleted = false;
507
703
  if (commands.length > 0) {
508
704
  globalLogger.debug(`[SmartLoop] Executing ${commands.length} command(s)`);
509
705
  }
706
+ const commandExecutionStartTime = performance.now();
510
707
  for (const cmd of commands) {
511
708
  if (isTaskComplete(cmd)) {
512
709
  taskCompleted = true;
@@ -523,9 +720,18 @@ async function executeSmartLoop(ctx, params) {
523
720
  const coords = parseTapCoordinates(cmd);
524
721
  if (coords) {
525
722
  globalLogger.debug(`[SmartLoop] Executing tap at (${coords.x}, ${coords.y})`);
526
- await ctx.performTap(coords.x, coords.y);
723
+ await ctx.performTap(
724
+ coords.x,
725
+ coords.y,
726
+ screenshotResolution.width,
727
+ screenshotResolution.height
728
+ );
527
729
  actionExecuted = true;
528
730
  }
731
+ } else if (isEnterCommand(cmd)) {
732
+ globalLogger.debug(`[SmartLoop] Pressing Enter`);
733
+ await ctx.performPressEnter();
734
+ actionExecuted = true;
529
735
  } else if (isWaitCommand(cmd)) {
530
736
  const seconds = parseWaitSeconds(cmd);
531
737
  if (seconds) {
@@ -537,7 +743,50 @@ async function executeSmartLoop(ctx, params) {
537
743
  const direction = parseScrollDirection(cmd);
538
744
  if (direction) {
539
745
  globalLogger.debug(`[SmartLoop] Scrolling ${direction}`);
540
- await ctx.performScroll(direction);
746
+ await ctx.performSwipe({ direction });
747
+ if (isCacheHit) {
748
+ await delay(650);
749
+ }
750
+ actionExecuted = true;
751
+ }
752
+ } else if (isSlideCommand(cmd)) {
753
+ const slideParams = parseSlideCommand(cmd);
754
+ if (slideParams) {
755
+ const { direction, percentage, startX, startY } = slideParams;
756
+ const isVertical = direction === "up" || direction === "down";
757
+ const distance = Math.round(
758
+ (isVertical ? screenshotResolution.height : screenshotResolution.width) * (percentage / 100)
759
+ );
760
+ let endX = startX;
761
+ let endY = startY;
762
+ switch (direction) {
763
+ case "up":
764
+ endY = startY + distance;
765
+ break;
766
+ case "down":
767
+ endY = startY - distance;
768
+ break;
769
+ case "left":
770
+ endX = startX - distance;
771
+ break;
772
+ case "right":
773
+ endX = startX + distance;
774
+ break;
775
+ }
776
+ globalLogger.debug(`[SmartLoop] Sliding ${direction} ${percentage}% from (${startX}, ${startY}) to (${endX}, ${endY})`);
777
+ await ctx.performSwipe({
778
+ direction,
779
+ x1: startX,
780
+ y1: startY,
781
+ x2: endX,
782
+ y2: endY,
783
+ screenshotWidth: screenshotResolution.width,
784
+ screenshotHeight: screenshotResolution.height,
785
+ duration: 500
786
+ });
787
+ if (isCacheHit) {
788
+ await delay(650);
789
+ }
541
790
  actionExecuted = true;
542
791
  }
543
792
  } else if (isTypeCommand(cmd)) {
@@ -550,49 +799,83 @@ async function executeSmartLoop(ctx, params) {
550
799
  }
551
800
  }
552
801
  if (actionExecuted) {
802
+ const commandExecutionEndTime = performance.now();
803
+ globalLogger.debug(`[Performance][SmartLoop] Command execution took ${(commandExecutionEndTime - commandExecutionStartTime).toFixed(2)}ms`);
553
804
  if (isCacheHit) {
554
805
  actionHistory.push(...commands);
555
806
  }
556
807
  await delay(100);
557
808
  }
809
+ const iterationEndTime = performance.now();
810
+ const iterationDuration = iterationEndTime - iterationStartTime;
811
+ iterationTimes.push(iterationDuration);
812
+ globalLogger.debug(`[Performance][SmartLoop] Iteration ${loopCount + 1} completed in ${iterationDuration.toFixed(2)}ms (${(iterationDuration / 1e3).toFixed(2)}s)`);
558
813
  if (taskCompleted) {
559
814
  globalLogger.info(`[SmartLoop] Task completed successfully`);
815
+ const stepEndTime2 = performance.now();
816
+ const totalDuration2 = stepEndTime2 - stepStartTime;
817
+ const totalSeconds2 = totalDuration2 / 1e3;
818
+ const totalMinutes2 = totalSeconds2 / 60;
819
+ const averageIterationTime = iterationTimes.reduce((sum, time) => sum + time, 0) / iterationTimes.length;
820
+ globalLogger.debug(`[Performance][SmartLoop] Step execution summary:`);
821
+ globalLogger.debug(`[Performance][SmartLoop] - Total iterations: ${loopCount + 1}`);
822
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration2.toFixed(2)}ms (${totalSeconds2.toFixed(2)}s / ${totalMinutes2.toFixed(2)}min)`);
823
+ globalLogger.debug(`[Performance][SmartLoop] - Average iteration time: ${averageIterationTime.toFixed(2)}ms (${(averageIterationTime / 1e3).toFixed(2)}s)`);
824
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
560
825
  if (anyCacheMiss && currentExecutionData.length > 0) {
561
826
  globalLogger.info(`[SmartLoop] Populating cache with ${currentExecutionData.length} frame(s)...`);
562
- try {
563
- await populateCache({
564
- apiKey: ctx.organisationId,
565
- stepNumber: params.stepNumber,
566
- stepDescription: params.description,
567
- executionData: currentExecutionData,
568
- screenResolution: ctx.screenSize,
569
- platform: ctx.platform,
570
- filepath: params.filepath
571
- });
827
+ const cachePopulateStartTime = performance.now();
828
+ populateCache({
829
+ apiKey: ctx.organisationId,
830
+ stepNumber: params.stepNumber,
831
+ stepDescription: params.description,
832
+ executionData: currentExecutionData,
833
+ screenResolution: screenshotResolution,
834
+ platform: ctx.platform,
835
+ filepath: params.filepath
836
+ }).then(() => {
837
+ const cachePopulateEndTime = performance.now();
838
+ globalLogger.debug(`[Performance][SmartLoop] Cache population took ${(cachePopulateEndTime - cachePopulateStartTime).toFixed(2)}ms`);
572
839
  globalLogger.debug(`[SmartLoop] Cache populated successfully`);
573
- } catch (e) {
574
- globalLogger.warn(`[SmartLoop] Failed to populate cache: ${e.message}`);
575
- }
840
+ }).catch((e) => globalLogger.warn(`[SmartLoop] Cache population failed: ${e.message}`));
576
841
  } else if (!anyCacheMiss) {
577
842
  globalLogger.debug(`[SmartLoop] Skipping cache population (all actions were cached)`);
578
843
  }
579
844
  return {
580
845
  success: true,
581
846
  iterations: loopCount + 1,
582
- cacheHit: everHadCacheHit
847
+ cacheHit: everHadCacheHit,
848
+ cacheHitCount: totalCacheHits
583
849
  };
584
850
  }
585
851
  loopCount++;
586
852
  }
853
+ const stepEndTime = performance.now();
854
+ const totalDuration = stepEndTime - stepStartTime;
855
+ const totalSeconds = totalDuration / 1e3;
856
+ const totalMinutes = totalSeconds / 60;
857
+ globalLogger.debug(`[Performance][SmartLoop] Timeout reached after ${loopCount} iterations`);
858
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
859
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
587
860
  throw new Error(`Smart Loop timeout after ${SMART_LOOP_MAX_ITERATIONS} iterations`);
588
861
  } catch (error) {
589
862
  const message = error instanceof Error ? error.message : String(error);
863
+ const stepEndTime = performance.now();
864
+ const totalDuration = stepEndTime - stepStartTime;
865
+ const totalSeconds = totalDuration / 1e3;
866
+ const totalMinutes = totalSeconds / 60;
590
867
  globalLogger.error(`[SmartLoop] Error: ${message}`);
868
+ globalLogger.debug(`[Performance][SmartLoop] Error occurred after ${loopCount + 1} iteration(s)`);
869
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration before error: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
870
+ if (iterationTimes.length > 0) {
871
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
872
+ }
591
873
  return {
592
874
  success: false,
593
875
  error: message,
594
876
  iterations: loopCount + 1,
595
- cacheHit: everHadCacheHit
877
+ cacheHit: everHadCacheHit,
878
+ cacheHitCount: totalCacheHits
596
879
  };
597
880
  }
598
881
  }
@@ -623,8 +906,13 @@ class GptDriver {
623
906
  step_number = 1;
624
907
  organisationId;
625
908
  configFilePath;
909
+ // Session Execution Stats
910
+ _stats_startTime = 0;
911
+ _stats_executedSteps = 0;
912
+ _stats_cacheHits = 0;
626
913
  // Smart loop state - maintains action history across steps for context
627
914
  globalActionHistory = [];
915
+ pendingLogPromises = [];
628
916
  /**
629
917
  * Creates an instance of the GptDriver class.
630
918
  *
@@ -697,6 +985,9 @@ class GptDriver {
697
985
  */
698
986
  async startSession() {
699
987
  globalLogger.info("Starting session...");
988
+ this._stats_startTime = performance.now();
989
+ this._stats_executedSteps = 0;
990
+ this._stats_cacheHits = 0;
700
991
  if (!this.useGptDriverCloud) {
701
992
  if (this.driver) {
702
993
  let platform;
@@ -796,6 +1087,37 @@ class GptDriver {
796
1087
  getSessionLink() {
797
1088
  return `https://app.mobileboost.io/gpt-driver/sessions/${this.gptDriverSessionId}`;
798
1089
  }
1090
+ /**
1091
+ * Stops the current GPTDriver session and update its state.
1092
+ *
1093
+ * This method sends a request to the GPT Driver server to stop the session and logs the session status as either "failed" or "success."
1094
+ *
1095
+ * @param {"failed" | "success"} status - Indicates the outcome of the session.
1096
+ * Use "success" if the session completed as expected,
1097
+ * or "failed" if the session encountered an error or issue.
1098
+ *
1099
+ * @throws {Error} If the request to stop the session fails.
1100
+ */
1101
+ /**
1102
+ * Marks the current GPTDriver session as succeeded.
1103
+ *
1104
+ * This method stops the session and logs it as "succeeded"
1105
+ *
1106
+ * @throws {Error} If the request to stop the session fails.
1107
+ */
1108
+ async setSessionSucceeded() {
1109
+ await this.setSessionStatus("succeeded");
1110
+ }
1111
+ /**
1112
+ * Marks the current GPTDriver session as failed.
1113
+ *
1114
+ * This method stops the session and logs it as "failed."
1115
+ *
1116
+ * @throws {Error} If the request to stop the session fails.
1117
+ */
1118
+ async setSessionFailed() {
1119
+ await this.setSessionStatus("failed");
1120
+ }
799
1121
  /**
800
1122
  * Stops the current GPTDriver session and update its state.
801
1123
  *
@@ -808,7 +1130,13 @@ class GptDriver {
808
1130
  * @throws {Error} If the request to stop the session fails.
809
1131
  */
810
1132
  async setSessionStatus(status) {
1133
+ try {
1134
+ await Promise.all(this.pendingLogPromises);
1135
+ } catch (e) {
1136
+ globalLogger.error(`Error waiting for pending log promises: ${e.message}`);
1137
+ }
811
1138
  if (this.gptDriverSessionId) {
1139
+ this.printSessionSummary();
812
1140
  globalLogger.info(`Stopping session with status: ${status}`);
813
1141
  await axios.post(
814
1142
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
@@ -824,6 +1152,19 @@ class GptDriver {
824
1152
  this.globalActionHistory = [];
825
1153
  }
826
1154
  }
1155
+ printSessionSummary() {
1156
+ if (this._stats_startTime === 0) return;
1157
+ const endTime = performance.now();
1158
+ const totalTime = (endTime - this._stats_startTime) / 1e3;
1159
+ globalLogger.info(`
1160
+ ${"=".repeat(50)}`);
1161
+ globalLogger.info(`Session Execution Summary:`);
1162
+ globalLogger.info(`Total Session Time: ${totalTime.toFixed(2)} seconds`);
1163
+ globalLogger.info(`Total Steps executed: ${this._stats_executedSteps}`);
1164
+ globalLogger.info(`Total Step Cache Hits: ${this._stats_cacheHits}`);
1165
+ globalLogger.info(`${"=".repeat(50)}
1166
+ `);
1167
+ }
827
1168
  // ─────────────────────────────────────────────────────────────────────────────
828
1169
  // SMART LOOP INTEGRATION
829
1170
  // ─────────────────────────────────────────────────────────────────────────────
@@ -831,21 +1172,23 @@ class GptDriver {
831
1172
  * Creates a SmartLoopContext for the current session.
832
1173
  * This context provides all the callbacks needed by the smart loop executor.
833
1174
  */
834
- createSmartLoopContext() {
1175
+ createSmartLoopContext(options) {
835
1176
  if (!this.organisationId) {
836
1177
  throw new Error("Organisation ID is missing, please set it in the GPTDriver constructor");
837
1178
  }
838
1179
  return {
839
1180
  apiKey: this.apiKey,
840
1181
  platform: this.appiumSessionConfig?.platform,
841
- screenSize: this.appiumSessionConfig.size,
842
1182
  globalActionHistory: this.globalActionHistory,
843
- getScreenshot: () => this.getScreenshot(this.appiumSessionConfig),
844
- performTap: (x, y) => this.performTap(x, y),
845
- performScroll: (direction) => this.performScroll(direction),
1183
+ getScreenshot: () => this.getScreenshot(this.appiumSessionConfig, false),
1184
+ performTap: (x, y, screenshotWidth, screenshotHeight) => this.performTap(x, y, screenshotWidth, screenshotHeight),
1185
+ performSwipe: (params) => this.performSwipe(params),
846
1186
  performType: (text) => this.performType(text),
847
- logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command),
848
- organisationId: this.organisationId
1187
+ performPressEnter: () => this.performPressEnter(),
1188
+ logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1189
+ organisationId: this.organisationId,
1190
+ middleLayerAssertFn: options?.middleLayerAssertFn,
1191
+ pendingLogPromises: options?.pendingLogPromises
849
1192
  };
850
1193
  }
851
1194
  /**
@@ -895,15 +1238,31 @@ class GptDriver {
895
1238
  /**
896
1239
  * Performs a tap action at the specified coordinates.
897
1240
  */
898
- async performTap(x, y) {
1241
+ async performTap(x, y, screenshotWidth, screenshotHeight) {
899
1242
  const client = await this.getWdioClient();
1243
+ const platform = this.appiumSessionConfig?.platform;
1244
+ const { width: w, height: h } = this.appiumSessionConfig?.size ?? { width: 1080, height: 1920 };
1245
+ const scaled = this.scaleForIOS(
1246
+ x,
1247
+ y,
1248
+ platform,
1249
+ screenshotWidth,
1250
+ screenshotHeight,
1251
+ w,
1252
+ h
1253
+ );
1254
+ const clampedX = this.clamp(scaled.x, 0, w - 1);
1255
+ const clampedY = this.clamp(scaled.y, 0, h - 1);
1256
+ globalLogger.debug(
1257
+ `[Tap] Platform: ${platform}, Input: ${x},${y}, Window: ${w}x${h}, Final: ${clampedX},${clampedY}`
1258
+ );
900
1259
  await client.performActions([
901
1260
  {
902
1261
  type: "pointer",
903
1262
  id: "finger1",
904
1263
  parameters: { pointerType: "touch" },
905
1264
  actions: [
906
- { type: "pointerMove", duration: 0, x, y },
1265
+ { type: "pointerMove", duration: 0, x: clampedX, y: clampedY },
907
1266
  { type: "pointerDown", button: 0 },
908
1267
  { type: "pause", duration: 100 },
909
1268
  { type: "pointerUp", button: 0 }
@@ -913,25 +1272,143 @@ class GptDriver {
913
1272
  }
914
1273
  async performType(text) {
915
1274
  const client = await this.getWdioClient();
916
- await client.keys(text.split(""));
1275
+ const platform = this.appiumSessionConfig?.platform;
1276
+ if (platform === "iOS") {
1277
+ const actions = text.split("").flatMap((char) => [
1278
+ { type: "keyDown", value: char },
1279
+ { type: "keyUp", value: char }
1280
+ ]);
1281
+ await client.performActions([
1282
+ {
1283
+ type: "key",
1284
+ id: "keyboard",
1285
+ actions
1286
+ }
1287
+ ]);
1288
+ } else {
1289
+ await client.keys(text.split(""));
1290
+ }
1291
+ }
1292
+ async performPressEnter() {
1293
+ const client = await this.getWdioClient();
1294
+ const platform = this.appiumSessionConfig?.platform;
1295
+ if (platform === "iOS") {
1296
+ await client.performActions([
1297
+ {
1298
+ type: "key",
1299
+ id: "keyboard",
1300
+ actions: [
1301
+ { type: "keyDown", value: "\uE007" },
1302
+ { type: "keyUp", value: "\uE007" }
1303
+ ]
1304
+ }
1305
+ ]);
1306
+ } else {
1307
+ await client.keys(["Enter"]);
1308
+ }
1309
+ }
1310
+ clamp(value, min, max) {
1311
+ return Math.max(min, Math.min(max, value));
1312
+ }
1313
+ scaleForIOS(x, y, platform, screenshotWidth, screenshotHeight, windowWidth, windowHeight) {
1314
+ if (platform !== "iOS" || !screenshotWidth || !screenshotHeight || !windowWidth || !windowHeight) {
1315
+ return { x: Math.round(x), y: Math.round(y) };
1316
+ }
1317
+ const scaleX = windowWidth / screenshotWidth;
1318
+ const scaleY = windowHeight / screenshotHeight;
1319
+ return {
1320
+ x: Math.round(x * scaleX),
1321
+ y: Math.round(y * scaleY)
1322
+ };
917
1323
  }
918
- async performScroll(direction) {
1324
+ async performSwipe(params) {
919
1325
  const client = await this.getWdioClient();
920
- const w = this.appiumSessionConfig?.size?.width ?? 1080;
921
- const h = this.appiumSessionConfig?.size?.height ?? 1920;
922
- const x = Math.round(w / 2);
923
- const startY = direction === "down" ? Math.round(h * 0.8) : Math.round(h * 0.2);
924
- const endY = direction === "down" ? Math.round(h * 0.2) : Math.round(h * 0.8);
1326
+ const platform = this.appiumSessionConfig?.platform;
1327
+ const { width: w, height: h } = this.appiumSessionConfig?.size ?? { width: 1080, height: 1920 };
1328
+ const defaultStartX = w / 2;
1329
+ let defaultStartY;
1330
+ if (params.direction === "down") {
1331
+ defaultStartY = h * 0.75;
1332
+ } else if (params.direction === "up") {
1333
+ defaultStartY = h * 0.25;
1334
+ } else {
1335
+ defaultStartY = h / 2;
1336
+ }
1337
+ let startX;
1338
+ let startY;
1339
+ if (params.x1 !== void 0 || params.y1 !== void 0) {
1340
+ const scaled = this.scaleForIOS(
1341
+ params.x1 ?? defaultStartX,
1342
+ params.y1 ?? defaultStartY,
1343
+ platform,
1344
+ params.screenshotWidth,
1345
+ params.screenshotHeight,
1346
+ w,
1347
+ h
1348
+ );
1349
+ startX = scaled.x;
1350
+ startY = scaled.y;
1351
+ } else {
1352
+ startX = Math.round(defaultStartX);
1353
+ startY = Math.round(defaultStartY);
1354
+ }
1355
+ startX = this.clamp(startX, 0, w - 1);
1356
+ startY = this.clamp(startY, 0, h - 1);
1357
+ let endX;
1358
+ let endY;
1359
+ if (params.x2 !== void 0 || params.y2 !== void 0) {
1360
+ const scaled = this.scaleForIOS(
1361
+ params.x2 ?? startX,
1362
+ params.y2 ?? startY,
1363
+ platform,
1364
+ params.screenshotWidth,
1365
+ params.screenshotHeight,
1366
+ w,
1367
+ h
1368
+ );
1369
+ endX = scaled.x;
1370
+ endY = scaled.y;
1371
+ } else {
1372
+ const deltaX = Math.round(w * 0.5);
1373
+ const deltaY = Math.round(h * 0.5);
1374
+ switch (params.direction) {
1375
+ case "left":
1376
+ endX = Math.max(0, startX - deltaX);
1377
+ endY = startY;
1378
+ break;
1379
+ case "right":
1380
+ endX = Math.min(w - 1, startX + deltaX);
1381
+ endY = startY;
1382
+ break;
1383
+ case "up":
1384
+ endX = startX;
1385
+ endY = Math.min(h - 1, startY + deltaY);
1386
+ break;
1387
+ case "down":
1388
+ endX = startX;
1389
+ endY = Math.max(0, startY - deltaY);
1390
+ break;
1391
+ default:
1392
+ endX = startX;
1393
+ endY = startY;
1394
+ }
1395
+ }
1396
+ endX = this.clamp(endX, 0, w - 1);
1397
+ endY = this.clamp(endY, 0, h - 1);
1398
+ globalLogger.debug(
1399
+ `[Swipe] Platform: ${platform}, Direction: ${params.direction}, Start: ${startX},${startY}, End: ${endX},${endY}`
1400
+ );
1401
+ const duration = params.duration ?? 500;
925
1402
  await client.performActions([
926
1403
  {
927
1404
  type: "pointer",
928
1405
  id: "finger1",
929
1406
  parameters: { pointerType: "touch" },
930
1407
  actions: [
931
- { type: "pointerMove", duration: 0, x, y: startY },
1408
+ { type: "pointerMove", duration: 0, x: startX, y: startY },
932
1409
  { type: "pointerDown", button: 0 },
933
1410
  { type: "pause", duration: 100 },
934
- { type: "pointerMove", duration: 500, x, y: endY },
1411
+ { type: "pointerMove", duration, x: endX, y: endY },
935
1412
  { type: "pointerUp", button: 0 }
936
1413
  ]
937
1414
  }
@@ -950,17 +1427,17 @@ class GptDriver {
950
1427
  if (found) {
951
1428
  return;
952
1429
  }
953
- await this.performScroll(direction);
1430
+ await this.performSwipe({ direction });
954
1431
  await this._delay(500);
955
1432
  }
956
1433
  throw new Error(`scrollUntil target not found after ${max} scroll(s)`);
957
1434
  }
958
- async getScreenshot(appiumSessionConfig) {
1435
+ async getScreenshot(appiumSessionConfig, shouldScale = true) {
959
1436
  globalLogger.debug("Capturing screenshot...");
960
1437
  const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
961
1438
  const screenshotResponse = await axios.get(url);
962
1439
  let screenshot = await screenshotResponse.data.value;
963
- if (appiumSessionConfig.platform === "iOS") {
1440
+ if (appiumSessionConfig.platform === "iOS" && shouldScale) {
964
1441
  globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
965
1442
  const imageBuffer = Buffer.from(screenshot, "base64");
966
1443
  const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
@@ -1014,41 +1491,71 @@ class GptDriver {
1014
1491
  }
1015
1492
  }
1016
1493
  async aiExecute(commandOrOptions, options) {
1494
+ const startTime = performance.now();
1495
+ globalLogger.debug(`[Performance] aiExecute started at ${startTime.toFixed(2)}ms`);
1017
1496
  const command = typeof commandOrOptions === "string" ? commandOrOptions : commandOrOptions.command;
1018
1497
  const opts = typeof commandOrOptions === "string" ? options : commandOrOptions;
1019
1498
  const appiumHandler = opts?.appiumHandler;
1020
1499
  const cachingMode = opts?.cachingMode;
1021
1500
  const useSmartLoop = opts?.useSmartLoop ?? false;
1022
1501
  if (!this.appiumSessionStarted) {
1502
+ const sessionStartTime = performance.now();
1503
+ globalLogger.debug(`[Performance] Starting session...`);
1023
1504
  await this.startSession();
1505
+ const sessionEndTime = performance.now();
1506
+ globalLogger.debug(`[Performance] Session started in ${(sessionEndTime - sessionStartTime).toFixed(2)}ms`);
1024
1507
  }
1025
1508
  globalLogger.info(`Executing command: ${command}`);
1026
1509
  const driver = this.driver;
1027
1510
  if (appiumHandler != null) {
1028
1511
  try {
1512
+ const handlerStartTime = performance.now();
1513
+ globalLogger.debug(`[Performance] Executing custom Appium handler...`);
1029
1514
  await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1030
1515
  await appiumHandler(driver);
1516
+ const handlerEndTime = performance.now();
1031
1517
  globalLogger.debug("Custom Appium handler executed successfully");
1518
+ globalLogger.debug(`[Performance] Appium handler completed in ${(handlerEndTime - handlerStartTime).toFixed(2)}ms`);
1032
1519
  this.step_number++;
1520
+ this._stats_executedSteps++;
1521
+ const totalTime2 = performance.now() - startTime;
1522
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime2.toFixed(2)}ms`);
1033
1523
  return;
1034
1524
  } catch (e) {
1035
1525
  globalLogger.warn("Custom Appium handler failed, falling back to AI execution");
1036
1526
  }
1037
1527
  }
1038
1528
  if (useSmartLoop) {
1039
- const ctx = this.createSmartLoopContext();
1529
+ const smartLoopStartTime = performance.now();
1530
+ globalLogger.debug(`[Performance] Starting smart loop execution...`);
1531
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1040
1532
  const result = await executeSmartLoop(ctx, {
1041
1533
  stepNumber: this.step_number,
1042
1534
  description: command,
1043
- instruction: command
1535
+ instruction: command,
1536
+ ...this.testId && {
1537
+ filepath: this.testId
1538
+ }
1044
1539
  });
1540
+ const smartLoopEndTime = performance.now();
1541
+ globalLogger.debug(`[Performance] Smart loop completed in ${(smartLoopEndTime - smartLoopStartTime).toFixed(2)}ms`);
1045
1542
  if (!result.success) {
1046
1543
  throw new Error(result.error || "Smart loop execution failed");
1047
1544
  }
1545
+ if (result.cacheHitCount) {
1546
+ this._stats_cacheHits += result.cacheHitCount;
1547
+ }
1048
1548
  this.step_number++;
1549
+ this._stats_executedSteps += result.iterations ?? 1;
1049
1550
  } else {
1551
+ const gptHandlerStartTime = performance.now();
1552
+ globalLogger.debug(`[Performance] Starting GPT handler execution...`);
1050
1553
  await this.gptHandler(command, cachingMode);
1554
+ const gptHandlerEndTime = performance.now();
1555
+ globalLogger.debug(`[Performance] GPT handler completed in ${(gptHandlerEndTime - gptHandlerStartTime).toFixed(2)}ms`);
1051
1556
  }
1557
+ const totalTime = performance.now() - startTime;
1558
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime.toFixed(2)}ms`);
1052
1559
  }
1053
1560
  /**
1054
1561
  * Asserts a single condition using the GPTDriver.
@@ -1057,15 +1564,37 @@ class GptDriver {
1057
1564
  * If the assertion fails, an error is thrown.
1058
1565
  *
1059
1566
  * @param {string} assertion - The condition to be asserted.
1060
- * @param cachingMode - The caching mode to be used for the assertion.
1567
+ * @param {Object} options - Optional configuration object
1568
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertion.
1569
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1061
1570
  * @throws {Error} If the assertion fails.
1571
+ *
1572
+ * @example
1573
+ * // Basic usage
1574
+ * await driver.assert('Login button is visible');
1575
+ *
1576
+ * @example
1577
+ * // With caching mode
1578
+ * await driver.assert('Login button is visible', {
1579
+ * cachingMode: "FULL_SCREEN"
1580
+ * });
1581
+ *
1582
+ * @example
1583
+ * // With smart loop enabled
1584
+ * await driver.assert('Login button is visible', {
1585
+ * useSmartLoop: true,
1586
+ * cachingMode: "FULL_SCREEN"
1587
+ * });
1062
1588
  */
1063
- async assert(assertion, cachingMode) {
1589
+ async assert(assertion, {
1590
+ cachingMode,
1591
+ useSmartLoop = false
1592
+ }) {
1064
1593
  if (!this.appiumSessionStarted) {
1065
1594
  await this.startSession();
1066
1595
  }
1067
1596
  try {
1068
- const results = await this.checkBulk([assertion], cachingMode);
1597
+ const results = await this.checkBulk([assertion], cachingMode, 2, 1e3, useSmartLoop);
1069
1598
  if (!Object.values(results).at(0)) {
1070
1599
  await this.setSessionStatus("failed");
1071
1600
  globalLogger.error(`Assertion failed: ${assertion}`);
@@ -1085,15 +1614,37 @@ class GptDriver {
1085
1614
  * If any assertion fails, an error is thrown listing all failed assertions.
1086
1615
  *
1087
1616
  * @param {string[]} assertions - An array of conditions to be asserted.
1088
- * @param cachingMode - The caching mode to be used for the assertions.
1617
+ * @param {Object} options - Optional configuration object
1618
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertions.
1619
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution. Default: false
1089
1620
  * @throws {Error} If any of the assertions fail.
1621
+ *
1622
+ * @example
1623
+ * // Basic usage
1624
+ * await driver.assertBulk(['Login button is visible', 'Username field is enabled']);
1625
+ *
1626
+ * @example
1627
+ * // With caching mode
1628
+ * await driver.assertBulk(['Login button is visible'], {
1629
+ * cachingMode: "FULL_SCREEN"
1630
+ * });
1631
+ *
1632
+ * @example
1633
+ * // With smart loop enabled
1634
+ * await driver.assertBulk(['Login button is visible'], {
1635
+ * useSmartLoop: true,
1636
+ * cachingMode: "FULL_SCREEN"
1637
+ * });
1090
1638
  */
1091
- async assertBulk(assertions, cachingMode) {
1639
+ async assertBulk(assertions, {
1640
+ cachingMode,
1641
+ useSmartLoop = false
1642
+ }) {
1092
1643
  if (!this.appiumSessionStarted) {
1093
1644
  await this.startSession();
1094
1645
  }
1095
1646
  try {
1096
- const results = await this.checkBulk(assertions, cachingMode);
1647
+ const results = await this.checkBulk(assertions, cachingMode, 2, 1e3, useSmartLoop);
1097
1648
  const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
1098
1649
  if (!current) {
1099
1650
  return [...prev, assertions.at(currentIndex)];
@@ -1122,14 +1673,30 @@ class GptDriver {
1122
1673
  * @param {CachingMode} cachingMode - The caching mode to be used for the conditions.
1123
1674
  * @param {number} maxRetries - The maximum number of retries if any condition fails (default: 2).
1124
1675
  * @param {number} retryDelayMs - The delay in milliseconds between retries (default: 1000).
1676
+ * @param {boolean} useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1125
1677
  * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
1126
1678
  * to a boolean indicating whether the condition was met.
1679
+ *
1680
+ * @example
1681
+ * // Basic usage
1682
+ * const results = await driver.checkBulk(['Login button is visible', 'Username field is enabled']);
1683
+ * console.log(results); // { 'Login button is visible': true, 'Username field is enabled': false }
1684
+ *
1685
+ * @example
1686
+ * // With smart loop enabled
1687
+ * const results = await driver.checkBulk(
1688
+ * ['Login button is visible'],
1689
+ * "FULL_SCREEN",
1690
+ * 2,
1691
+ * 1000,
1692
+ * true
1693
+ * );
1127
1694
  */
1128
- async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3) {
1695
+ async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3, useSmartLoop = false) {
1129
1696
  let attempt = 0;
1130
1697
  let results = {};
1131
1698
  while (attempt <= maxRetries) {
1132
- results = await this._checkBulkOnce(conditions, cachingMode, attempt);
1699
+ results = await this._checkBulkOnce(conditions, cachingMode, attempt, useSmartLoop);
1133
1700
  const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
1134
1701
  if (failedConditions.length === 0) {
1135
1702
  return results;
@@ -1151,33 +1718,67 @@ class GptDriver {
1151
1718
  *
1152
1719
  * @private
1153
1720
  */
1154
- async _checkBulkOnce(conditions, cachingMode, attempt = 0) {
1721
+ async _checkBulkOnce(conditions, cachingMode, attempt = 0, useSmartLoop = false) {
1155
1722
  if (!this.appiumSessionStarted) {
1156
1723
  await this.startSession();
1157
1724
  }
1158
1725
  globalLogger.info(`Checking conditions (attempt ${attempt}): ${conditions.join(", ")}`);
1726
+ const pendingLogPromises = [];
1159
1727
  try {
1160
- let screenshot;
1161
- if (!this.useGptDriverCloud) {
1162
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
1163
- }
1164
- const response = await axios.post(
1165
- `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1166
- {
1167
- api_key: this.apiKey,
1168
- base64_screenshot: screenshot,
1169
- assertions: conditions,
1170
- command: `Assert: ${JSON.stringify(conditions)}`,
1171
- caching_mode: cachingMode ?? this.cachingMode,
1172
- step_number: this.step_number
1728
+ if (useSmartLoop) {
1729
+ const instruction = `Assert: ${JSON.stringify(conditions)}`;
1730
+ const middleLayerAssertFn = async (screenshot) => {
1731
+ return await axios.post(
1732
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1733
+ {
1734
+ api_key: this.apiKey,
1735
+ base64_screenshot: screenshot,
1736
+ assertions: conditions,
1737
+ command: `Assert: ${JSON.stringify(conditions)}`,
1738
+ caching_mode: cachingMode ?? this.cachingMode,
1739
+ step_number: this.step_number
1740
+ }
1741
+ );
1742
+ };
1743
+ const ctx = this.createSmartLoopContext({ middleLayerAssertFn, pendingLogPromises });
1744
+ const result = await executeSmartLoop(ctx, {
1745
+ stepNumber: this.step_number,
1746
+ description: instruction,
1747
+ instruction,
1748
+ ...this.testId && {
1749
+ filepath: this.testId
1750
+ }
1751
+ });
1752
+ if (result.cacheHitCount) {
1753
+ this._stats_cacheHits += result.cacheHitCount;
1173
1754
  }
1174
- );
1175
- globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1176
- return response.data.results;
1755
+ this._stats_executedSteps += result.iterations ?? 1;
1756
+ return { [instruction]: result.success };
1757
+ } else {
1758
+ let screenshot;
1759
+ if (!this.useGptDriverCloud) {
1760
+ screenshot = await this.getScreenshot(this.appiumSessionConfig);
1761
+ }
1762
+ const response = await axios.post(
1763
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1764
+ {
1765
+ api_key: this.apiKey,
1766
+ base64_screenshot: screenshot,
1767
+ assertions: conditions,
1768
+ command: `Assert: ${JSON.stringify(conditions)}`,
1769
+ caching_mode: cachingMode ?? this.cachingMode,
1770
+ step_number: this.step_number
1771
+ }
1772
+ );
1773
+ globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1774
+ return response.data.results;
1775
+ }
1177
1776
  } catch (e) {
1178
1777
  globalLogger.error("Failed to check conditions", e);
1179
1778
  await this.setSessionStatus("failed");
1180
1779
  throw e;
1780
+ } finally {
1781
+ await Promise.all(pendingLogPromises);
1181
1782
  }
1182
1783
  }
1183
1784
  /**
@@ -1270,10 +1871,12 @@ class GptDriver {
1270
1871
  * const result = await driver.executeFlow('tests/login-flow.json', { useSmartLoop: true });
1271
1872
  */
1272
1873
  async executeFlow(filePath, options) {
1874
+ const flowStartTime = performance.now();
1273
1875
  const useSmartLoop = options?.useSmartLoop ?? false;
1274
1876
  const configFilePath = this.configFilePath;
1275
1877
  let baseDir;
1276
1878
  let absolutePath;
1879
+ let parsedConfigFile;
1277
1880
  if (configFilePath) {
1278
1881
  let raw2;
1279
1882
  try {
@@ -1291,7 +1894,7 @@ class GptDriver {
1291
1894
  globalLogger.error(msg);
1292
1895
  throw new Error(msg);
1293
1896
  }
1294
- const parsedConfigFile = ConfigSchema.parse(json2);
1897
+ parsedConfigFile = ConfigSchema.parse(json2);
1295
1898
  if (path.isAbsolute(parsedConfigFile.testDir)) {
1296
1899
  baseDir = parsedConfigFile.testDir;
1297
1900
  } else {
@@ -1346,7 +1949,7 @@ ${issues}`);
1346
1949
  }
1347
1950
  return val.data;
1348
1951
  };
1349
- const expandSteps = async (steps, inheritedParams, parentDir, stack) => {
1952
+ const expandSteps = async (steps, inheritedParams, parentDir, stack, currentFilePath) => {
1350
1953
  const out = [];
1351
1954
  for (const step of steps) {
1352
1955
  if (step.type === "fileRef") {
@@ -1358,44 +1961,80 @@ ${issues}`);
1358
1961
  }
1359
1962
  visited.add(refKey);
1360
1963
  const child = await loadFlow(refPath);
1361
- const mergedParams = { ...inheritedParams, ...step.overrides ?? {} };
1964
+ const mergedParams = { ...inheritedParams, ...child.params, ...step.overrides ?? {} };
1362
1965
  const childDir = path.dirname(refPath);
1363
- const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey]);
1966
+ const childRelativePath = path.relative(baseDir, refPath).replace(/^\.\//, "");
1967
+ const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey], childRelativePath);
1364
1968
  out.push(...childExpanded);
1365
1969
  } else {
1366
- const resolved = { ...step, __params: { ...inheritedParams } };
1970
+ const resolved = {
1971
+ ...step,
1972
+ __params: { ...inheritedParams },
1973
+ __filepath: currentFilePath
1974
+ };
1367
1975
  out.push(resolved);
1368
1976
  }
1369
1977
  }
1370
1978
  return out;
1371
1979
  };
1372
- const effectiveParams = { ...rootFlow.params ?? {} };
1373
- const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath]);
1980
+ const envPath = path.join(process.cwd(), ".env");
1981
+ let envVars = {};
1982
+ try {
1983
+ await promises.access(envPath);
1984
+ const envContent = await readFile(envPath, "utf-8");
1985
+ envVars = dotenv.parse(envContent);
1986
+ } catch (_) {
1987
+ }
1988
+ const effectiveParams = { ...parsedConfigFile?.variables, ...envVars, ...rootFlow.params };
1989
+ const rootRelativePath = path.relative(baseDir, absolutePath).replace(/^\.\//, "");
1990
+ const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath], rootRelativePath);
1374
1991
  if (!this.appiumSessionStarted) {
1375
1992
  await this.startSession();
1376
1993
  }
1377
1994
  globalLogger.info(`Executing flow '${rootFlow.name}' with ${expandedSteps.length} step(s)...`);
1378
1995
  let executed = 0;
1996
+ let cacheHits = 0;
1997
+ let lastStepWasFromFileRef = false;
1998
+ let fileRefStepNumber = 1;
1379
1999
  try {
2000
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig, false);
2001
+ const screenshotResolution = await getImageDimensions(screenshot);
1380
2002
  for (const step of expandedSteps) {
1381
2003
  const params = step.__params ?? effectiveParams;
1382
- const prefix = `Step #${executed + 1} [${step.type}${step.optional ? ", optional" : ""}]`;
2004
+ const filepath = step.__filepath ?? rootRelativePath;
2005
+ console.log("step.__filepath", step.__filepath);
2006
+ console.log("rootRelativePath", rootRelativePath);
2007
+ const isFromFileRef = filepath !== rootRelativePath;
2008
+ console.log("isFromFileRef", isFromFileRef);
2009
+ if (!lastStepWasFromFileRef && isFromFileRef) {
2010
+ fileRefStepNumber = 1;
2011
+ }
2012
+ const effectiveStepNumber = isFromFileRef ? fileRefStepNumber : this.step_number;
2013
+ const prefix = `Step #${executed + 1} (step_number: ${effectiveStepNumber}) [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
1383
2014
  try {
1384
2015
  switch (step.type) {
1385
2016
  case "ai": {
1386
2017
  const instruction = this.interpolateTemplate(step.instruction, params);
1387
2018
  globalLogger.info(`${prefix}: ${instruction}`);
1388
2019
  if (useSmartLoop) {
1389
- const ctx = this.createSmartLoopContext();
2020
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1390
2021
  const result = await executeSmartLoop(ctx, {
1391
- stepNumber: this.step_number,
2022
+ stepNumber: effectiveStepNumber,
1392
2023
  description: instruction,
1393
- instruction
2024
+ instruction,
2025
+ filepath
1394
2026
  });
1395
2027
  if (!result.success) {
1396
2028
  throw new Error(result.error || "Smart loop execution failed");
1397
2029
  }
1398
- this.step_number++;
2030
+ if (result.cacheHit) {
2031
+ cacheHits++;
2032
+ }
2033
+ if (isFromFileRef) {
2034
+ fileRefStepNumber++;
2035
+ } else {
2036
+ this.step_number++;
2037
+ }
1399
2038
  } else {
1400
2039
  await this.aiExecute({ command: instruction });
1401
2040
  }
@@ -1408,16 +2047,24 @@ ${issues}`);
1408
2047
  }
1409
2048
  globalLogger.info(`${prefix}: ${description}`);
1410
2049
  if (useSmartLoop) {
1411
- const ctx = this.createSmartLoopContext();
2050
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1412
2051
  const result = await executeSmartLoop(ctx, {
1413
- stepNumber: this.step_number,
2052
+ stepNumber: effectiveStepNumber,
1414
2053
  description,
1415
- instruction: description
2054
+ instruction: description,
2055
+ filepath
1416
2056
  });
1417
2057
  if (!result.success) {
1418
2058
  throw new Error(result.error || "Smart loop execution failed");
1419
2059
  }
1420
- this.step_number++;
2060
+ if (result.cacheHit) {
2061
+ cacheHits++;
2062
+ }
2063
+ if (isFromFileRef) {
2064
+ fileRefStepNumber++;
2065
+ } else {
2066
+ this.step_number++;
2067
+ }
1421
2068
  } else {
1422
2069
  await this.aiExecute({ command: description });
1423
2070
  }
@@ -1431,16 +2078,24 @@ ${issues}`);
1431
2078
  globalLogger.info(`${prefix}: ${description}`);
1432
2079
  if (useSmartLoop) {
1433
2080
  const instruction = `Verify that: ${description}`;
1434
- const ctx = this.createSmartLoopContext();
2081
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1435
2082
  const result = await executeSmartLoop(ctx, {
1436
- stepNumber: this.step_number,
2083
+ stepNumber: effectiveStepNumber,
1437
2084
  description,
1438
- instruction
2085
+ instruction,
2086
+ filepath
1439
2087
  });
1440
2088
  if (!result.success) {
1441
2089
  throw new Error(result.error || "Smart loop execution failed");
1442
2090
  }
1443
- this.step_number++;
2091
+ if (result.cacheHit) {
2092
+ cacheHits++;
2093
+ }
2094
+ if (isFromFileRef) {
2095
+ fileRefStepNumber++;
2096
+ } else {
2097
+ this.step_number++;
2098
+ }
1444
2099
  } else {
1445
2100
  await this.assert(description);
1446
2101
  }
@@ -1449,35 +2104,98 @@ ${issues}`);
1449
2104
  case "type": {
1450
2105
  const text = this.interpolateTemplate(step.text, params);
1451
2106
  globalLogger.info(`${prefix}: Type text`);
1452
- await this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2107
+ this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
1453
2108
  await this.performType(text);
1454
- this.step_number++;
2109
+ if (isFromFileRef) {
2110
+ fileRefStepNumber++;
2111
+ } else {
2112
+ this.step_number++;
2113
+ }
1455
2114
  break;
1456
2115
  }
1457
- case "scroll": {
1458
- globalLogger.info(`${prefix}: Scroll ${step.direction}`);
1459
- await this.takeScreenshotAndLogCodeExecution(`scroll: direction=${step.direction}`);
1460
- await this.performScroll(step.direction);
1461
- this.step_number++;
2116
+ case "enter": {
2117
+ globalLogger.info(`${prefix}: Press Enter`);
2118
+ this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2119
+ await this.performPressEnter();
2120
+ if (step.delayNextStep) {
2121
+ await this._delay(step.delayNextStep);
2122
+ }
2123
+ if (isFromFileRef) {
2124
+ fileRefStepNumber++;
2125
+ } else {
2126
+ this.step_number++;
2127
+ }
2128
+ break;
2129
+ }
2130
+ case "scroll":
2131
+ case "swipe": {
2132
+ const description = step.descriptionText ? this.interpolateTemplate(step.descriptionText, params) : void 0;
2133
+ if (description && useSmartLoop) {
2134
+ globalLogger.info(`${prefix}: ${description}`);
2135
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
2136
+ const result = await executeSmartLoop(ctx, {
2137
+ stepNumber: effectiveStepNumber,
2138
+ description,
2139
+ instruction: description,
2140
+ filepath
2141
+ });
2142
+ if (!result.success) {
2143
+ throw new Error(result.error || "Smart loop execution failed");
2144
+ }
2145
+ if (result.cacheHit) {
2146
+ cacheHits++;
2147
+ }
2148
+ if (isFromFileRef) {
2149
+ fileRefStepNumber++;
2150
+ } else {
2151
+ this.step_number++;
2152
+ }
2153
+ } else {
2154
+ globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
2155
+ this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2156
+ await this.performSwipe({
2157
+ direction: step.direction,
2158
+ x1: step.x1,
2159
+ y1: step.y1,
2160
+ x2: step.x2,
2161
+ y2: step.y2,
2162
+ duration: step.duration,
2163
+ screenshotHeight: screenshotResolution.height,
2164
+ screenshotWidth: screenshotResolution.width
2165
+ });
2166
+ if (isFromFileRef) {
2167
+ fileRefStepNumber++;
2168
+ } else {
2169
+ this.step_number++;
2170
+ }
2171
+ }
1462
2172
  break;
1463
2173
  }
1464
2174
  case "zoom": {
1465
2175
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
1466
- await this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
1467
- this.step_number++;
2176
+ this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2177
+ if (isFromFileRef) {
2178
+ fileRefStepNumber++;
2179
+ } else {
2180
+ this.step_number++;
2181
+ }
1468
2182
  break;
1469
2183
  }
1470
2184
  case "scrollUntil": {
1471
2185
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
1472
2186
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
1473
- await this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2187
+ this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
1474
2188
  await this.performScrollUntil({
1475
2189
  direction: step.direction,
1476
2190
  text: interpolatedText,
1477
2191
  elementId: step.elementId,
1478
2192
  maxScrolls: step.maxScrolls
1479
2193
  });
1480
- this.step_number++;
2194
+ if (isFromFileRef) {
2195
+ fileRefStepNumber++;
2196
+ } else {
2197
+ this.step_number++;
2198
+ }
1481
2199
  break;
1482
2200
  }
1483
2201
  case "deeplink": {
@@ -1485,7 +2203,7 @@ ${issues}`);
1485
2203
  const bundleId = params["bundleId"];
1486
2204
  const url = this.interpolateTemplate(step.url, params);
1487
2205
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
1488
- await this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2206
+ this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
1489
2207
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
1490
2208
  break;
1491
2209
  }
@@ -1493,10 +2211,12 @@ ${issues}`);
1493
2211
  throw new Error(`Unsupported step type at execution: ${step.type}`);
1494
2212
  }
1495
2213
  }
2214
+ lastStepWasFromFileRef = isFromFileRef;
1496
2215
  executed++;
1497
2216
  } catch (err) {
1498
2217
  if (step.optional) {
1499
2218
  globalLogger.warn(`${prefix} failed but marked optional. Continuing. Error: ${err.message}`);
2219
+ lastStepWasFromFileRef = isFromFileRef;
1500
2220
  continue;
1501
2221
  }
1502
2222
  throw err;
@@ -1509,6 +2229,16 @@ ${issues}`);
1509
2229
  }
1510
2230
  throw e;
1511
2231
  }
2232
+ const flowEndTime = performance.now();
2233
+ const totalTime = (flowEndTime - flowStartTime) / 1e3;
2234
+ globalLogger.info(`
2235
+ ${"=".repeat(50)}`);
2236
+ globalLogger.info(`Flow Execution Summary:`);
2237
+ globalLogger.info(`Total Test Time: ${totalTime.toFixed(2)} seconds`);
2238
+ globalLogger.info(`Total Steps executed: ${executed}`);
2239
+ globalLogger.info(`Total Step Cache Hits: ${cacheHits}`);
2240
+ globalLogger.info(`${"=".repeat(50)}
2241
+ `);
1512
2242
  return rootFlow;
1513
2243
  }
1514
2244
  async gptHandler(command, cachingMode) {
@@ -1516,8 +2246,15 @@ ${issues}`);
1516
2246
  let conditionSucceeded = false;
1517
2247
  while (!conditionSucceeded) {
1518
2248
  let screenshot;
2249
+ let originalScreenshotBase64 = null;
1519
2250
  if (!this.useGptDriverCloud) {
1520
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
2251
+ const stabilityResult = await waitForStableScreen(
2252
+ () => this.getScreenshot(this.appiumSessionConfig)
2253
+ );
2254
+ screenshot = stabilityResult.originalScreenshotBase64;
2255
+ if (!stabilityResult.stable) {
2256
+ globalLogger.warn("Screen did not stabilize within timeout, proceeding with last screenshot");
2257
+ }
1521
2258
  }
1522
2259
  globalLogger.info("Requesting next action from GPT Driver...");
1523
2260
  const response = await axios.request(
@@ -1545,7 +2282,6 @@ ${issues}`);
1545
2282
  for (const appiumCommand of executeResponse.commands) {
1546
2283
  await this.executeCommand(appiumCommand);
1547
2284
  }
1548
- await delay(1500);
1549
2285
  }
1550
2286
  this.step_number = this.step_number + 1;
1551
2287
  globalLogger.info("Command execution completed successfully");
@@ -1573,13 +2309,13 @@ ${issues}`);
1573
2309
  });
1574
2310
  }
1575
2311
  }
1576
- async logCodeExecution(screenshot, command) {
2312
+ async logCodeExecution(screenshot, command, isCacheHit) {
1577
2313
  try {
1578
- const screenshot2 = await this.getScreenshot(this.appiumSessionConfig);
1579
2314
  await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
1580
2315
  api_key: this.apiKey,
1581
- base64_screenshot: screenshot2,
1582
- command
2316
+ base64_screenshot: screenshot,
2317
+ command,
2318
+ from_cache: !!isCacheHit
1583
2319
  });
1584
2320
  } catch (e) {
1585
2321
  globalLogger.error("Failed to log code execution", e);