gpt-driver-node 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2,9 +2,11 @@
2
2
 
3
3
  var node_fs = require('node:fs');
4
4
  var path = require('node:path');
5
+ var promises = require('node:fs/promises');
5
6
  var axios = require('axios');
6
7
  var sharp = require('sharp');
7
8
  var webdriverio = require('webdriverio');
9
+ var dotenv = require('dotenv');
8
10
  var winston = require('winston');
9
11
  var zod = require('zod');
10
12
  var crypto = require('node:crypto');
@@ -93,6 +95,75 @@ ${logStyles.gray(stack)}` : logMessage;
93
95
  ]
94
96
  });
95
97
 
98
+ const waitForStableScreen = async (getScreenshot, options = {}) => {
99
+ const {
100
+ maxTimeoutSec = 5,
101
+ intervalSec = 0.01,
102
+ tolerance = 1e-5,
103
+ pixelThreshold = 0,
104
+ downscaleWidth = 600,
105
+ downscaleHeight = 600
106
+ } = options;
107
+ const startTime = Date.now();
108
+ const maxTimeoutMillis = maxTimeoutSec * 1e3;
109
+ const intervalMillis = intervalSec * 1e3;
110
+ let previousDownsampledBuffer = null;
111
+ let lastScreenshotBase64 = null;
112
+ while (Date.now() - startTime < maxTimeoutMillis) {
113
+ try {
114
+ const screenshotBase64 = await getScreenshot();
115
+ lastScreenshotBase64 = screenshotBase64;
116
+ const imageBuffer = Buffer.from(screenshotBase64, "base64");
117
+ const downsampledBuffer = await sharp(imageBuffer).resize(downscaleWidth, downscaleHeight, { fit: "fill" }).ensureAlpha().raw().toBuffer();
118
+ if (previousDownsampledBuffer) {
119
+ const isSimilar = areImagesSimilar(
120
+ previousDownsampledBuffer,
121
+ downsampledBuffer,
122
+ downscaleWidth,
123
+ downscaleHeight,
124
+ tolerance,
125
+ pixelThreshold
126
+ );
127
+ if (isSimilar) {
128
+ return {
129
+ stable: true,
130
+ originalScreenshotBase64: screenshotBase64
131
+ };
132
+ }
133
+ }
134
+ previousDownsampledBuffer = downsampledBuffer;
135
+ } catch (e) {
136
+ globalLogger.warn(`Error during stability check: ${e}`);
137
+ throw e;
138
+ }
139
+ await delay(intervalMillis);
140
+ }
141
+ return {
142
+ stable: false,
143
+ originalScreenshotBase64: lastScreenshotBase64
144
+ };
145
+ };
146
+ function areImagesSimilar(buffer1, buffer2, width, height, tolerance, pixelThreshold) {
147
+ if (buffer1.length !== buffer2.length) {
148
+ return false;
149
+ }
150
+ const totalPixels = width * height;
151
+ let differentPixels = 0;
152
+ for (let i = 0; i < buffer1.length; i += 4) {
153
+ const r1 = buffer1[i];
154
+ const g1 = buffer1[i + 1];
155
+ const b1 = buffer1[i + 2];
156
+ const r2 = buffer2[i];
157
+ const g2 = buffer2[i + 1];
158
+ const b2 = buffer2[i + 2];
159
+ if (Math.abs(r1 - r2) > pixelThreshold || Math.abs(g1 - g2) > pixelThreshold || Math.abs(b1 - b2) > pixelThreshold) {
160
+ differentPixels++;
161
+ }
162
+ }
163
+ const diffRatio = differentPixels / totalPixels;
164
+ return diffRatio <= tolerance;
165
+ }
166
+
96
167
  const SavableStepBaseSchema = zod.z.object({
97
168
  id: zod.z.number().optional(),
98
169
  descriptionText: zod.z.string().optional(),
@@ -125,9 +196,8 @@ const SavableSwipeStepSchema = SavableStepBaseSchema.extend({
125
196
  y2: zod.z.number().optional(),
126
197
  duration: zod.z.number().optional().default(500)
127
198
  });
128
- const SavableScrollStepSchema = SavableStepBaseSchema.extend({
129
- type: zod.z.literal("scroll"),
130
- direction: zod.z.enum(["up", "down"])
199
+ const SavableScrollStepSchema = SavableSwipeStepSchema.extend({
200
+ type: zod.z.literal("scroll")
131
201
  });
132
202
  const SavableZoomStepSchema = SavableStepBaseSchema.extend({
133
203
  type: zod.z.literal("zoom"),
@@ -148,6 +218,10 @@ const SavableAIStepSchema = SavableStepBaseSchema.extend({
148
218
  type: zod.z.literal("ai"),
149
219
  instruction: zod.z.string()
150
220
  });
221
+ const SavableEnterStepSchema = SavableStepBaseSchema.extend({
222
+ type: zod.z.literal("enter"),
223
+ delayNextStep: zod.z.number().optional().default(500)
224
+ });
151
225
  const SavableFileRefStepSchema = SavableStepBaseSchema.extend({
152
226
  type: zod.z.literal("fileRef"),
153
227
  path: zod.z.string(),
@@ -172,6 +246,8 @@ const SavableStepSchema = zod.z.discriminatedUnion("type", [
172
246
  // type: 'deeplink'
173
247
  SavableAIStepSchema,
174
248
  // type: 'ai'
249
+ SavableEnterStepSchema,
250
+ // type: 'enter'
175
251
  SavableFileRefStepSchema
176
252
  // type: 'fileRef'
177
253
  ]);
@@ -193,7 +269,7 @@ const CACHE_SERVER_URL = "https://cache.mobileboost.io";
193
269
  const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
194
270
  const RESCALE_FACTOR = 4;
195
271
  const SMART_LOOP_MAX_ITERATIONS = 15;
196
- const CACHE_RETRY_MS = 2e3;
272
+ const CACHE_RETRY_MS = 1500;
197
273
  const CACHE_CHECK_INTERVAL_MS = 500;
198
274
 
199
275
  function generateCacheHash(apiKey, filepath, stepNumber, description, platform, resolution) {
@@ -277,15 +353,24 @@ async function executeFromCache(params) {
277
353
  return { found: false };
278
354
  } catch (error) {
279
355
  if (axios.isAxiosError(error)) {
280
- globalLogger.warn(`[Cache] Cache lookup failed: ${error.response?.data || error.message}`);
356
+ const errorMsg = `Cache lookup failed: ${error.response?.data || error.message}`;
357
+ globalLogger.warn(`[Cache] ${errorMsg}`);
281
358
  } else {
282
- globalLogger.error(`[Cache] Error executing from cache: ${error}`);
359
+ const errorMsg = `Error executing from cache: ${error}`;
360
+ globalLogger.error(`[Cache] ${errorMsg}`);
283
361
  }
284
362
  return { found: false };
285
363
  }
286
364
  }
287
365
  async function populateCache(params) {
288
366
  try {
367
+ globalLogger.debug(`[populateCache] Parameters: ${JSON.stringify({
368
+ stepNumber: params.stepNumber,
369
+ filepath: params.filepath,
370
+ platform: params.platform,
371
+ screenResolution: params.screenResolution,
372
+ executionDataCount: params.executionData.length
373
+ })}`);
289
374
  const hash = generateCacheHash(
290
375
  params.apiKey,
291
376
  params.filepath,
@@ -294,13 +379,15 @@ async function populateCache(params) {
294
379
  params.platform,
295
380
  params.screenResolution
296
381
  );
382
+ globalLogger.debug(`[populateCache] Generated hash: ${hash}`);
297
383
  const payload = await Promise.all(params.executionData.map(async (item) => {
298
384
  const resizedBuffer = await resizeScreenshotForCache(item.screenshot);
385
+ const scaledScreenshotBase64 = resizedBuffer.toString("base64");
299
386
  const scaledCommands = item.commands.map(
300
387
  (cmd) => scaleCommand(cmd, "divide")
301
388
  );
302
389
  return {
303
- screenshot: resizedBuffer.toString("base64"),
390
+ screenshot: scaledScreenshotBase64,
304
391
  commands: scaledCommands
305
392
  };
306
393
  }));
@@ -312,9 +399,11 @@ async function populateCache(params) {
312
399
  return { success: true };
313
400
  } catch (error) {
314
401
  if (axios.isAxiosError(error)) {
315
- globalLogger.error(`[Cache] Failed to populate cache: ${error.response?.data || error.message}`);
402
+ const errorMsg = `Failed to populate cache: ${error.response?.data || error.message}`;
403
+ globalLogger.error(`[Cache] ${errorMsg}`);
316
404
  } else {
317
- globalLogger.error(`[Cache] Error populating cache: ${error}`);
405
+ const errorMsg = `Error populating cache: ${error}`;
406
+ globalLogger.error(`[Cache] ${errorMsg}`);
318
407
  }
319
408
  return { success: false };
320
409
  }
@@ -444,6 +533,10 @@ function isWaitCommand(cmd) {
444
533
  function isScrollCommand(cmd) {
445
534
  return cmd.startsWith("scroll:");
446
535
  }
536
+ function isEnterCommand(cmd) {
537
+ const key = cmd.toLowerCase().split("press:")[1]?.trim() ?? "";
538
+ return key === "enter";
539
+ }
447
540
  function isTypeCommand(cmd) {
448
541
  return cmd.startsWith("type:");
449
542
  }
@@ -461,8 +554,8 @@ function parseSlideCommand(cmd) {
461
554
  const directionMap = {
462
555
  down: "up",
463
556
  up: "down",
464
- left: "right",
465
- right: "left"
557
+ left: "left",
558
+ right: "right"
466
559
  };
467
560
  return {
468
561
  direction: directionMap[extractedDirection],
@@ -479,22 +572,38 @@ async function executeSmartLoop(ctx, params) {
479
572
  let lastCacheIndex = void 0;
480
573
  let anyCacheMiss = false;
481
574
  let everHadCacheHit = false;
575
+ let totalCacheHits = 0;
482
576
  const currentExecutionData = [];
577
+ const stepStartTime = performance.now();
578
+ const iterationTimes = [];
483
579
  globalLogger.info(`[SmartLoop] Starting for step ${params.stepNumber}: "${params.description}"`);
580
+ globalLogger.debug(`[Performance][SmartLoop] Step execution started at ${stepStartTime.toFixed(2)}ms`);
484
581
  try {
485
582
  while (loopCount < SMART_LOOP_MAX_ITERATIONS) {
583
+ const iterationStartTime = performance.now();
584
+ globalLogger.debug(`[SmartLoop] Starting iteration ${loopCount + 1}/${SMART_LOOP_MAX_ITERATIONS}`);
486
585
  let screenshot = "";
487
586
  let commands = [];
488
587
  let isCacheHit = false;
489
- const firstScreenshot = await ctx.getScreenshot();
490
- const screenshotResolution = await getImageDimensions(firstScreenshot);
588
+ let screenshotResolution = void 0;
589
+ const stabilityResult = await waitForStableScreen(ctx.getScreenshot);
491
590
  for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
492
- screenshot = await ctx.getScreenshot();
591
+ const screenshotStartTime = performance.now();
592
+ const screenshotEndTime = performance.now();
593
+ globalLogger.debug(`[Performance][SmartLoop] Screenshot capture took ${(screenshotEndTime - screenshotStartTime).toFixed(2)}ms`);
594
+ if (!stabilityResult.stable) {
595
+ globalLogger.warn("[SmartLoop] Screen did not stabilize within timeout, proceeding with last screenshot");
596
+ }
597
+ screenshot = stabilityResult.originalScreenshotBase64;
598
+ if (screenshotResolution === void 0) {
599
+ screenshotResolution = await getImageDimensions(screenshot);
600
+ }
493
601
  const sizeInBytes = screenshot.length * 0.75;
494
602
  const sizeInMB = (sizeInBytes / (1024 * 1024)).toFixed(2);
495
603
  globalLogger.debug(`[SmartLoop] Captured screenshot: ~${sizeInMB} MB`);
496
604
  try {
497
605
  globalLogger.debug(`[SmartLoop] Checking cache (Attempt ${attempt + 1}/${maxCacheAttempts})`);
606
+ const cacheCheckStartTime = performance.now();
498
607
  const cacheResult = await executeFromCache({
499
608
  apiKey: ctx.organisationId,
500
609
  stepNumber: params.stepNumber,
@@ -505,11 +614,14 @@ async function executeSmartLoop(ctx, params) {
505
614
  platform: ctx.platform,
506
615
  filepath: params.filepath
507
616
  });
617
+ const cacheCheckEndTime = performance.now();
618
+ globalLogger.debug(`[Performance][SmartLoop] Cache check took ${(cacheCheckEndTime - cacheCheckStartTime).toFixed(2)}ms`);
508
619
  if (cacheResult.found && cacheResult.cacheCommands) {
509
620
  commands = cacheResult.cacheCommands;
510
621
  lastCacheIndex = cacheResult.cacheIndex;
511
622
  isCacheHit = true;
512
623
  everHadCacheHit = true;
624
+ totalCacheHits++;
513
625
  globalLogger.info(`[SmartLoop] Cache Hit! (${commands.length} commands)`);
514
626
  break;
515
627
  }
@@ -521,18 +633,53 @@ async function executeSmartLoop(ctx, params) {
521
633
  await delay(CACHE_CHECK_INTERVAL_MS);
522
634
  }
523
635
  }
636
+ if (screenshotResolution === void 0) {
637
+ const screenshot2 = await ctx.getScreenshot();
638
+ screenshotResolution = await getImageDimensions(screenshot2);
639
+ }
524
640
  let aiCommands = [];
641
+ let actionDescription = [];
525
642
  if (!isCacheHit) {
526
643
  anyCacheMiss = true;
527
644
  globalLogger.info(`[SmartLoop] Cache Miss. Requesting AI agent...`);
528
- const agentResponse = await executeAgentStep({
529
- apiKey: ctx.organisationId,
530
- base64_screenshot: screenshot,
531
- instruction: params.instruction,
532
- action_history: actionHistory
533
- });
534
- aiCommands = agentResponse.appetizeCommands || [];
535
- const gptCommands = agentResponse.gptCommands || [];
645
+ const aiStartTime = performance.now();
646
+ let agentResponse;
647
+ if (ctx.middleLayerAssertFn) {
648
+ const response = await ctx.middleLayerAssertFn(screenshot);
649
+ const results = response.data.results;
650
+ const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
651
+ if (failedConditions.length === 0) {
652
+ agentResponse = {
653
+ appetizeCommands: [
654
+ "task complete: All assertions passed"
655
+ ]
656
+ };
657
+ } else {
658
+ agentResponse = {
659
+ appetizeCommands: [
660
+ `error detected: Failed conditions: ${failedConditions.join(", ")}`
661
+ ]
662
+ };
663
+ }
664
+ } else {
665
+ agentResponse = await executeAgentStep({
666
+ apiKey: ctx.organisationId,
667
+ base64_screenshot: screenshot,
668
+ instruction: params.instruction,
669
+ action_history: actionHistory
670
+ });
671
+ }
672
+ const aiEndTime = performance.now();
673
+ globalLogger.debug(`[Performance][SmartLoop] AI agent call took ${(aiEndTime - aiStartTime).toFixed(2)}ms`);
674
+ aiCommands = agentResponse.appetizeCommands ?? [];
675
+ const gptCommands = agentResponse.gptCommands ?? [];
676
+ actionDescription = [];
677
+ const actionDescriptionIndex = gptCommands.findIndex(
678
+ (entry) => entry.startsWith("actions_description:")
679
+ );
680
+ if (actionDescriptionIndex !== -1) {
681
+ actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
682
+ }
536
683
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
537
684
  if (reasoningIndex !== -1) {
538
685
  const parsedCommands = gptCommands.slice(reasoningIndex);
@@ -542,19 +689,23 @@ async function executeSmartLoop(ctx, params) {
542
689
  }
543
690
  actionHistory = [...actionHistory, ...parsedCommands];
544
691
  }
545
- commands = [...aiCommands];
692
+ commands = [...actionDescription, ...aiCommands];
546
693
  globalLogger.debug(`[SmartLoop] AI returned ${commands.length} command(s)`);
547
694
  }
548
695
  currentExecutionData.push({
549
696
  screenshot,
550
- commands: aiCommands.length > 0 ? aiCommands : commands
697
+ commands
551
698
  });
552
- await ctx.logCodeExecution(screenshot, commands.join("\n"));
699
+ const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
700
+ if (ctx.pendingLogPromises) {
701
+ ctx.pendingLogPromises.push(logPromise);
702
+ }
553
703
  let actionExecuted = false;
554
704
  let taskCompleted = false;
555
705
  if (commands.length > 0) {
556
706
  globalLogger.debug(`[SmartLoop] Executing ${commands.length} command(s)`);
557
707
  }
708
+ const commandExecutionStartTime = performance.now();
558
709
  for (const cmd of commands) {
559
710
  if (isTaskComplete(cmd)) {
560
711
  taskCompleted = true;
@@ -579,6 +730,10 @@ async function executeSmartLoop(ctx, params) {
579
730
  );
580
731
  actionExecuted = true;
581
732
  }
733
+ } else if (isEnterCommand(cmd)) {
734
+ globalLogger.debug(`[SmartLoop] Pressing Enter`);
735
+ await ctx.performPressEnter();
736
+ actionExecuted = true;
582
737
  } else if (isWaitCommand(cmd)) {
583
738
  const seconds = parseWaitSeconds(cmd);
584
739
  if (seconds) {
@@ -591,6 +746,9 @@ async function executeSmartLoop(ctx, params) {
591
746
  if (direction) {
592
747
  globalLogger.debug(`[SmartLoop] Scrolling ${direction}`);
593
748
  await ctx.performSwipe({ direction });
749
+ if (isCacheHit) {
750
+ await delay(650);
751
+ }
594
752
  actionExecuted = true;
595
753
  }
596
754
  } else if (isSlideCommand(cmd)) {
@@ -628,6 +786,9 @@ async function executeSmartLoop(ctx, params) {
628
786
  screenshotHeight: screenshotResolution.height,
629
787
  duration: 500
630
788
  });
789
+ if (isCacheHit) {
790
+ await delay(650);
791
+ }
631
792
  actionExecuted = true;
632
793
  }
633
794
  } else if (isTypeCommand(cmd)) {
@@ -640,49 +801,83 @@ async function executeSmartLoop(ctx, params) {
640
801
  }
641
802
  }
642
803
  if (actionExecuted) {
804
+ const commandExecutionEndTime = performance.now();
805
+ globalLogger.debug(`[Performance][SmartLoop] Command execution took ${(commandExecutionEndTime - commandExecutionStartTime).toFixed(2)}ms`);
643
806
  if (isCacheHit) {
644
807
  actionHistory.push(...commands);
645
808
  }
646
809
  await delay(100);
647
810
  }
811
+ const iterationEndTime = performance.now();
812
+ const iterationDuration = iterationEndTime - iterationStartTime;
813
+ iterationTimes.push(iterationDuration);
814
+ globalLogger.debug(`[Performance][SmartLoop] Iteration ${loopCount + 1} completed in ${iterationDuration.toFixed(2)}ms (${(iterationDuration / 1e3).toFixed(2)}s)`);
648
815
  if (taskCompleted) {
649
816
  globalLogger.info(`[SmartLoop] Task completed successfully`);
817
+ const stepEndTime2 = performance.now();
818
+ const totalDuration2 = stepEndTime2 - stepStartTime;
819
+ const totalSeconds2 = totalDuration2 / 1e3;
820
+ const totalMinutes2 = totalSeconds2 / 60;
821
+ const averageIterationTime = iterationTimes.reduce((sum, time) => sum + time, 0) / iterationTimes.length;
822
+ globalLogger.debug(`[Performance][SmartLoop] Step execution summary:`);
823
+ globalLogger.debug(`[Performance][SmartLoop] - Total iterations: ${loopCount + 1}`);
824
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration2.toFixed(2)}ms (${totalSeconds2.toFixed(2)}s / ${totalMinutes2.toFixed(2)}min)`);
825
+ globalLogger.debug(`[Performance][SmartLoop] - Average iteration time: ${averageIterationTime.toFixed(2)}ms (${(averageIterationTime / 1e3).toFixed(2)}s)`);
826
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
650
827
  if (anyCacheMiss && currentExecutionData.length > 0) {
651
828
  globalLogger.info(`[SmartLoop] Populating cache with ${currentExecutionData.length} frame(s)...`);
652
- try {
653
- await populateCache({
654
- apiKey: ctx.organisationId,
655
- stepNumber: params.stepNumber,
656
- stepDescription: params.description,
657
- executionData: currentExecutionData,
658
- screenResolution: screenshotResolution,
659
- platform: ctx.platform,
660
- filepath: params.filepath
661
- });
829
+ const cachePopulateStartTime = performance.now();
830
+ populateCache({
831
+ apiKey: ctx.organisationId,
832
+ stepNumber: params.stepNumber,
833
+ stepDescription: params.description,
834
+ executionData: currentExecutionData,
835
+ screenResolution: screenshotResolution,
836
+ platform: ctx.platform,
837
+ filepath: params.filepath
838
+ }).then(() => {
839
+ const cachePopulateEndTime = performance.now();
840
+ globalLogger.debug(`[Performance][SmartLoop] Cache population took ${(cachePopulateEndTime - cachePopulateStartTime).toFixed(2)}ms`);
662
841
  globalLogger.debug(`[SmartLoop] Cache populated successfully`);
663
- } catch (e) {
664
- globalLogger.warn(`[SmartLoop] Failed to populate cache: ${e.message}`);
665
- }
842
+ }).catch((e) => globalLogger.warn(`[SmartLoop] Cache population failed: ${e.message}`));
666
843
  } else if (!anyCacheMiss) {
667
844
  globalLogger.debug(`[SmartLoop] Skipping cache population (all actions were cached)`);
668
845
  }
669
846
  return {
670
847
  success: true,
671
848
  iterations: loopCount + 1,
672
- cacheHit: everHadCacheHit
849
+ cacheHit: everHadCacheHit,
850
+ cacheHitCount: totalCacheHits
673
851
  };
674
852
  }
675
853
  loopCount++;
676
854
  }
855
+ const stepEndTime = performance.now();
856
+ const totalDuration = stepEndTime - stepStartTime;
857
+ const totalSeconds = totalDuration / 1e3;
858
+ const totalMinutes = totalSeconds / 60;
859
+ globalLogger.debug(`[Performance][SmartLoop] Timeout reached after ${loopCount} iterations`);
860
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
861
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
677
862
  throw new Error(`Smart Loop timeout after ${SMART_LOOP_MAX_ITERATIONS} iterations`);
678
863
  } catch (error) {
679
864
  const message = error instanceof Error ? error.message : String(error);
865
+ const stepEndTime = performance.now();
866
+ const totalDuration = stepEndTime - stepStartTime;
867
+ const totalSeconds = totalDuration / 1e3;
868
+ const totalMinutes = totalSeconds / 60;
680
869
  globalLogger.error(`[SmartLoop] Error: ${message}`);
870
+ globalLogger.debug(`[Performance][SmartLoop] Error occurred after ${loopCount + 1} iteration(s)`);
871
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration before error: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
872
+ if (iterationTimes.length > 0) {
873
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
874
+ }
681
875
  return {
682
876
  success: false,
683
877
  error: message,
684
878
  iterations: loopCount + 1,
685
- cacheHit: everHadCacheHit
879
+ cacheHit: everHadCacheHit,
880
+ cacheHitCount: totalCacheHits
686
881
  };
687
882
  }
688
883
  }
@@ -713,8 +908,13 @@ class GptDriver {
713
908
  step_number = 1;
714
909
  organisationId;
715
910
  configFilePath;
911
+ // Session Execution Stats
912
+ _stats_startTime = 0;
913
+ _stats_executedSteps = 0;
914
+ _stats_cacheHits = 0;
716
915
  // Smart loop state - maintains action history across steps for context
717
916
  globalActionHistory = [];
917
+ pendingLogPromises = [];
718
918
  /**
719
919
  * Creates an instance of the GptDriver class.
720
920
  *
@@ -787,6 +987,9 @@ class GptDriver {
787
987
  */
788
988
  async startSession() {
789
989
  globalLogger.info("Starting session...");
990
+ this._stats_startTime = performance.now();
991
+ this._stats_executedSteps = 0;
992
+ this._stats_cacheHits = 0;
790
993
  if (!this.useGptDriverCloud) {
791
994
  if (this.driver) {
792
995
  let platform;
@@ -886,6 +1089,37 @@ class GptDriver {
886
1089
  getSessionLink() {
887
1090
  return `https://app.mobileboost.io/gpt-driver/sessions/${this.gptDriverSessionId}`;
888
1091
  }
1092
+ /**
1093
+ * Stops the current GPTDriver session and update its state.
1094
+ *
1095
+ * This method sends a request to the GPT Driver server to stop the session and logs the session status as either "failed" or "success."
1096
+ *
1097
+ * @param {"failed" | "success"} status - Indicates the outcome of the session.
1098
+ * Use "success" if the session completed as expected,
1099
+ * or "failed" if the session encountered an error or issue.
1100
+ *
1101
+ * @throws {Error} If the request to stop the session fails.
1102
+ */
1103
+ /**
1104
+ * Marks the current GPTDriver session as succeeded.
1105
+ *
1106
+ * This method stops the session and logs it as "succeeded"
1107
+ *
1108
+ * @throws {Error} If the request to stop the session fails.
1109
+ */
1110
+ async setSessionSucceeded() {
1111
+ await this.setSessionStatus("succeeded");
1112
+ }
1113
+ /**
1114
+ * Marks the current GPTDriver session as failed.
1115
+ *
1116
+ * This method stops the session and logs it as "failed."
1117
+ *
1118
+ * @throws {Error} If the request to stop the session fails.
1119
+ */
1120
+ async setSessionFailed() {
1121
+ await this.setSessionStatus("failed");
1122
+ }
889
1123
  /**
890
1124
  * Stops the current GPTDriver session and update its state.
891
1125
  *
@@ -898,7 +1132,13 @@ class GptDriver {
898
1132
  * @throws {Error} If the request to stop the session fails.
899
1133
  */
900
1134
  async setSessionStatus(status) {
1135
+ try {
1136
+ await Promise.all(this.pendingLogPromises);
1137
+ } catch (e) {
1138
+ globalLogger.error(`Error waiting for pending log promises: ${e.message}`);
1139
+ }
901
1140
  if (this.gptDriverSessionId) {
1141
+ this.printSessionSummary();
902
1142
  globalLogger.info(`Stopping session with status: ${status}`);
903
1143
  await axios.post(
904
1144
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
@@ -914,6 +1154,19 @@ class GptDriver {
914
1154
  this.globalActionHistory = [];
915
1155
  }
916
1156
  }
1157
+ printSessionSummary() {
1158
+ if (this._stats_startTime === 0) return;
1159
+ const endTime = performance.now();
1160
+ const totalTime = (endTime - this._stats_startTime) / 1e3;
1161
+ globalLogger.info(`
1162
+ ${"=".repeat(50)}`);
1163
+ globalLogger.info(`Session Execution Summary:`);
1164
+ globalLogger.info(`Total Session Time: ${totalTime.toFixed(2)} seconds`);
1165
+ globalLogger.info(`Total Steps executed: ${this._stats_executedSteps}`);
1166
+ globalLogger.info(`Total Step Cache Hits: ${this._stats_cacheHits}`);
1167
+ globalLogger.info(`${"=".repeat(50)}
1168
+ `);
1169
+ }
917
1170
  // ─────────────────────────────────────────────────────────────────────────────
918
1171
  // SMART LOOP INTEGRATION
919
1172
  // ─────────────────────────────────────────────────────────────────────────────
@@ -921,7 +1174,7 @@ class GptDriver {
921
1174
  * Creates a SmartLoopContext for the current session.
922
1175
  * This context provides all the callbacks needed by the smart loop executor.
923
1176
  */
924
- createSmartLoopContext() {
1177
+ createSmartLoopContext(options) {
925
1178
  if (!this.organisationId) {
926
1179
  throw new Error("Organisation ID is missing, please set it in the GPTDriver constructor");
927
1180
  }
@@ -933,8 +1186,11 @@ class GptDriver {
933
1186
  performTap: (x, y, screenshotWidth, screenshotHeight) => this.performTap(x, y, screenshotWidth, screenshotHeight),
934
1187
  performSwipe: (params) => this.performSwipe(params),
935
1188
  performType: (text) => this.performType(text),
936
- logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command),
937
- organisationId: this.organisationId
1189
+ performPressEnter: () => this.performPressEnter(),
1190
+ logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1191
+ organisationId: this.organisationId,
1192
+ middleLayerAssertFn: options?.middleLayerAssertFn,
1193
+ pendingLogPromises: options?.pendingLogPromises
938
1194
  };
939
1195
  }
940
1196
  /**
@@ -1035,6 +1291,24 @@ class GptDriver {
1035
1291
  await client.keys(text.split(""));
1036
1292
  }
1037
1293
  }
1294
+ async performPressEnter() {
1295
+ const client = await this.getWdioClient();
1296
+ const platform = this.appiumSessionConfig?.platform;
1297
+ if (platform === "iOS") {
1298
+ await client.performActions([
1299
+ {
1300
+ type: "key",
1301
+ id: "keyboard",
1302
+ actions: [
1303
+ { type: "keyDown", value: "\uE007" },
1304
+ { type: "keyUp", value: "\uE007" }
1305
+ ]
1306
+ }
1307
+ ]);
1308
+ } else {
1309
+ await client.keys(["Enter"]);
1310
+ }
1311
+ }
1038
1312
  clamp(value, min, max) {
1039
1313
  return Math.max(min, Math.min(max, value));
1040
1314
  }
@@ -1219,41 +1493,71 @@ class GptDriver {
1219
1493
  }
1220
1494
  }
1221
1495
  async aiExecute(commandOrOptions, options) {
1496
+ const startTime = performance.now();
1497
+ globalLogger.debug(`[Performance] aiExecute started at ${startTime.toFixed(2)}ms`);
1222
1498
  const command = typeof commandOrOptions === "string" ? commandOrOptions : commandOrOptions.command;
1223
1499
  const opts = typeof commandOrOptions === "string" ? options : commandOrOptions;
1224
1500
  const appiumHandler = opts?.appiumHandler;
1225
1501
  const cachingMode = opts?.cachingMode;
1226
1502
  const useSmartLoop = opts?.useSmartLoop ?? false;
1227
1503
  if (!this.appiumSessionStarted) {
1504
+ const sessionStartTime = performance.now();
1505
+ globalLogger.debug(`[Performance] Starting session...`);
1228
1506
  await this.startSession();
1507
+ const sessionEndTime = performance.now();
1508
+ globalLogger.debug(`[Performance] Session started in ${(sessionEndTime - sessionStartTime).toFixed(2)}ms`);
1229
1509
  }
1230
1510
  globalLogger.info(`Executing command: ${command}`);
1231
1511
  const driver = this.driver;
1232
1512
  if (appiumHandler != null) {
1233
1513
  try {
1514
+ const handlerStartTime = performance.now();
1515
+ globalLogger.debug(`[Performance] Executing custom Appium handler...`);
1234
1516
  await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1235
1517
  await appiumHandler(driver);
1518
+ const handlerEndTime = performance.now();
1236
1519
  globalLogger.debug("Custom Appium handler executed successfully");
1520
+ globalLogger.debug(`[Performance] Appium handler completed in ${(handlerEndTime - handlerStartTime).toFixed(2)}ms`);
1237
1521
  this.step_number++;
1522
+ this._stats_executedSteps++;
1523
+ const totalTime2 = performance.now() - startTime;
1524
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime2.toFixed(2)}ms`);
1238
1525
  return;
1239
1526
  } catch (e) {
1240
1527
  globalLogger.warn("Custom Appium handler failed, falling back to AI execution");
1241
1528
  }
1242
1529
  }
1243
1530
  if (useSmartLoop) {
1244
- const ctx = this.createSmartLoopContext();
1531
+ const smartLoopStartTime = performance.now();
1532
+ globalLogger.debug(`[Performance] Starting smart loop execution...`);
1533
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1245
1534
  const result = await executeSmartLoop(ctx, {
1246
1535
  stepNumber: this.step_number,
1247
1536
  description: command,
1248
- instruction: command
1537
+ instruction: command,
1538
+ ...this.testId && {
1539
+ filepath: this.testId
1540
+ }
1249
1541
  });
1542
+ const smartLoopEndTime = performance.now();
1543
+ globalLogger.debug(`[Performance] Smart loop completed in ${(smartLoopEndTime - smartLoopStartTime).toFixed(2)}ms`);
1250
1544
  if (!result.success) {
1251
1545
  throw new Error(result.error || "Smart loop execution failed");
1252
1546
  }
1547
+ if (result.cacheHitCount) {
1548
+ this._stats_cacheHits += result.cacheHitCount;
1549
+ }
1253
1550
  this.step_number++;
1551
+ this._stats_executedSteps += result.iterations ?? 1;
1254
1552
  } else {
1553
+ const gptHandlerStartTime = performance.now();
1554
+ globalLogger.debug(`[Performance] Starting GPT handler execution...`);
1255
1555
  await this.gptHandler(command, cachingMode);
1556
+ const gptHandlerEndTime = performance.now();
1557
+ globalLogger.debug(`[Performance] GPT handler completed in ${(gptHandlerEndTime - gptHandlerStartTime).toFixed(2)}ms`);
1256
1558
  }
1559
+ const totalTime = performance.now() - startTime;
1560
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime.toFixed(2)}ms`);
1257
1561
  }
1258
1562
  /**
1259
1563
  * Asserts a single condition using the GPTDriver.
@@ -1262,15 +1566,37 @@ class GptDriver {
1262
1566
  * If the assertion fails, an error is thrown.
1263
1567
  *
1264
1568
  * @param {string} assertion - The condition to be asserted.
1265
- * @param cachingMode - The caching mode to be used for the assertion.
1569
+ * @param {Object} options - Optional configuration object
1570
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertion.
1571
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1266
1572
  * @throws {Error} If the assertion fails.
1573
+ *
1574
+ * @example
1575
+ * // Basic usage
1576
+ * await driver.assert('Login button is visible');
1577
+ *
1578
+ * @example
1579
+ * // With caching mode
1580
+ * await driver.assert('Login button is visible', {
1581
+ * cachingMode: "FULL_SCREEN"
1582
+ * });
1583
+ *
1584
+ * @example
1585
+ * // With smart loop enabled
1586
+ * await driver.assert('Login button is visible', {
1587
+ * useSmartLoop: true,
1588
+ * cachingMode: "FULL_SCREEN"
1589
+ * });
1267
1590
  */
1268
- async assert(assertion, cachingMode) {
1591
+ async assert(assertion, {
1592
+ cachingMode,
1593
+ useSmartLoop = false
1594
+ }) {
1269
1595
  if (!this.appiumSessionStarted) {
1270
1596
  await this.startSession();
1271
1597
  }
1272
1598
  try {
1273
- const results = await this.checkBulk([assertion], cachingMode);
1599
+ const results = await this.checkBulk([assertion], cachingMode, 2, 1e3, useSmartLoop);
1274
1600
  if (!Object.values(results).at(0)) {
1275
1601
  await this.setSessionStatus("failed");
1276
1602
  globalLogger.error(`Assertion failed: ${assertion}`);
@@ -1290,15 +1616,37 @@ class GptDriver {
1290
1616
  * If any assertion fails, an error is thrown listing all failed assertions.
1291
1617
  *
1292
1618
  * @param {string[]} assertions - An array of conditions to be asserted.
1293
- * @param cachingMode - The caching mode to be used for the assertions.
1619
+ * @param {Object} options - Optional configuration object
1620
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertions.
1621
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution. Default: false
1294
1622
  * @throws {Error} If any of the assertions fail.
1623
+ *
1624
+ * @example
1625
+ * // Basic usage
1626
+ * await driver.assertBulk(['Login button is visible', 'Username field is enabled']);
1627
+ *
1628
+ * @example
1629
+ * // With caching mode
1630
+ * await driver.assertBulk(['Login button is visible'], {
1631
+ * cachingMode: "FULL_SCREEN"
1632
+ * });
1633
+ *
1634
+ * @example
1635
+ * // With smart loop enabled
1636
+ * await driver.assertBulk(['Login button is visible'], {
1637
+ * useSmartLoop: true,
1638
+ * cachingMode: "FULL_SCREEN"
1639
+ * });
1295
1640
  */
1296
- async assertBulk(assertions, cachingMode) {
1641
+ async assertBulk(assertions, {
1642
+ cachingMode,
1643
+ useSmartLoop = false
1644
+ }) {
1297
1645
  if (!this.appiumSessionStarted) {
1298
1646
  await this.startSession();
1299
1647
  }
1300
1648
  try {
1301
- const results = await this.checkBulk(assertions, cachingMode);
1649
+ const results = await this.checkBulk(assertions, cachingMode, 2, 1e3, useSmartLoop);
1302
1650
  const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
1303
1651
  if (!current) {
1304
1652
  return [...prev, assertions.at(currentIndex)];
@@ -1327,14 +1675,30 @@ class GptDriver {
1327
1675
  * @param {CachingMode} cachingMode - The caching mode to be used for the conditions.
1328
1676
  * @param {number} maxRetries - The maximum number of retries if any condition fails (default: 2).
1329
1677
  * @param {number} retryDelayMs - The delay in milliseconds between retries (default: 1000).
1678
+ * @param {boolean} useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1330
1679
  * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
1331
1680
  * to a boolean indicating whether the condition was met.
1681
+ *
1682
+ * @example
1683
+ * // Basic usage
1684
+ * const results = await driver.checkBulk(['Login button is visible', 'Username field is enabled']);
1685
+ * console.log(results); // { 'Login button is visible': true, 'Username field is enabled': false }
1686
+ *
1687
+ * @example
1688
+ * // With smart loop enabled
1689
+ * const results = await driver.checkBulk(
1690
+ * ['Login button is visible'],
1691
+ * "FULL_SCREEN",
1692
+ * 2,
1693
+ * 1000,
1694
+ * true
1695
+ * );
1332
1696
  */
1333
- async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3) {
1697
+ async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3, useSmartLoop = false) {
1334
1698
  let attempt = 0;
1335
1699
  let results = {};
1336
1700
  while (attempt <= maxRetries) {
1337
- results = await this._checkBulkOnce(conditions, cachingMode, attempt);
1701
+ results = await this._checkBulkOnce(conditions, cachingMode, attempt, useSmartLoop);
1338
1702
  const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
1339
1703
  if (failedConditions.length === 0) {
1340
1704
  return results;
@@ -1356,33 +1720,67 @@ class GptDriver {
1356
1720
  *
1357
1721
  * @private
1358
1722
  */
1359
- async _checkBulkOnce(conditions, cachingMode, attempt = 0) {
1723
+ async _checkBulkOnce(conditions, cachingMode, attempt = 0, useSmartLoop = false) {
1360
1724
  if (!this.appiumSessionStarted) {
1361
1725
  await this.startSession();
1362
1726
  }
1363
1727
  globalLogger.info(`Checking conditions (attempt ${attempt}): ${conditions.join(", ")}`);
1728
+ const pendingLogPromises = [];
1364
1729
  try {
1365
- let screenshot;
1366
- if (!this.useGptDriverCloud) {
1367
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
1368
- }
1369
- const response = await axios.post(
1370
- `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1371
- {
1372
- api_key: this.apiKey,
1373
- base64_screenshot: screenshot,
1374
- assertions: conditions,
1375
- command: `Assert: ${JSON.stringify(conditions)}`,
1376
- caching_mode: cachingMode ?? this.cachingMode,
1377
- step_number: this.step_number
1730
+ if (useSmartLoop) {
1731
+ const instruction = `Assert: ${JSON.stringify(conditions)}`;
1732
+ const middleLayerAssertFn = async (screenshot) => {
1733
+ return await axios.post(
1734
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1735
+ {
1736
+ api_key: this.apiKey,
1737
+ base64_screenshot: screenshot,
1738
+ assertions: conditions,
1739
+ command: `Assert: ${JSON.stringify(conditions)}`,
1740
+ caching_mode: cachingMode ?? this.cachingMode,
1741
+ step_number: this.step_number
1742
+ }
1743
+ );
1744
+ };
1745
+ const ctx = this.createSmartLoopContext({ middleLayerAssertFn, pendingLogPromises });
1746
+ const result = await executeSmartLoop(ctx, {
1747
+ stepNumber: this.step_number,
1748
+ description: instruction,
1749
+ instruction,
1750
+ ...this.testId && {
1751
+ filepath: this.testId
1752
+ }
1753
+ });
1754
+ if (result.cacheHitCount) {
1755
+ this._stats_cacheHits += result.cacheHitCount;
1378
1756
  }
1379
- );
1380
- globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1381
- return response.data.results;
1757
+ this._stats_executedSteps += result.iterations ?? 1;
1758
+ return { [instruction]: result.success };
1759
+ } else {
1760
+ let screenshot;
1761
+ if (!this.useGptDriverCloud) {
1762
+ screenshot = await this.getScreenshot(this.appiumSessionConfig);
1763
+ }
1764
+ const response = await axios.post(
1765
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1766
+ {
1767
+ api_key: this.apiKey,
1768
+ base64_screenshot: screenshot,
1769
+ assertions: conditions,
1770
+ command: `Assert: ${JSON.stringify(conditions)}`,
1771
+ caching_mode: cachingMode ?? this.cachingMode,
1772
+ step_number: this.step_number
1773
+ }
1774
+ );
1775
+ globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1776
+ return response.data.results;
1777
+ }
1382
1778
  } catch (e) {
1383
1779
  globalLogger.error("Failed to check conditions", e);
1384
1780
  await this.setSessionStatus("failed");
1385
1781
  throw e;
1782
+ } finally {
1783
+ await Promise.all(pendingLogPromises);
1386
1784
  }
1387
1785
  }
1388
1786
  /**
@@ -1475,10 +1873,12 @@ class GptDriver {
1475
1873
  * const result = await driver.executeFlow('tests/login-flow.json', { useSmartLoop: true });
1476
1874
  */
1477
1875
  async executeFlow(filePath, options) {
1876
+ const flowStartTime = performance.now();
1478
1877
  const useSmartLoop = options?.useSmartLoop ?? false;
1479
1878
  const configFilePath = this.configFilePath;
1480
1879
  let baseDir;
1481
1880
  let absolutePath;
1881
+ let parsedConfigFile;
1482
1882
  if (configFilePath) {
1483
1883
  let raw2;
1484
1884
  try {
@@ -1496,7 +1896,7 @@ class GptDriver {
1496
1896
  globalLogger.error(msg);
1497
1897
  throw new Error(msg);
1498
1898
  }
1499
- const parsedConfigFile = ConfigSchema.parse(json2);
1899
+ parsedConfigFile = ConfigSchema.parse(json2);
1500
1900
  if (path.isAbsolute(parsedConfigFile.testDir)) {
1501
1901
  baseDir = parsedConfigFile.testDir;
1502
1902
  } else {
@@ -1563,7 +1963,7 @@ ${issues}`);
1563
1963
  }
1564
1964
  visited.add(refKey);
1565
1965
  const child = await loadFlow(refPath);
1566
- const mergedParams = { ...inheritedParams, ...step.overrides ?? {} };
1966
+ const mergedParams = { ...inheritedParams, ...child.params, ...step.overrides ?? {} };
1567
1967
  const childDir = path.dirname(refPath);
1568
1968
  const childRelativePath = path.relative(baseDir, refPath).replace(/^\.\//, "");
1569
1969
  const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey], childRelativePath);
@@ -1579,7 +1979,15 @@ ${issues}`);
1579
1979
  }
1580
1980
  return out;
1581
1981
  };
1582
- const effectiveParams = { ...rootFlow.params ?? {} };
1982
+ const envPath = path.join(process.cwd(), ".env");
1983
+ let envVars = {};
1984
+ try {
1985
+ await node_fs.promises.access(envPath);
1986
+ const envContent = await promises.readFile(envPath, "utf-8");
1987
+ envVars = dotenv.parse(envContent);
1988
+ } catch (_) {
1989
+ }
1990
+ const effectiveParams = { ...parsedConfigFile?.variables, ...envVars, ...rootFlow.params };
1583
1991
  const rootRelativePath = path.relative(baseDir, absolutePath).replace(/^\.\//, "");
1584
1992
  const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath], rootRelativePath);
1585
1993
  if (!this.appiumSessionStarted) {
@@ -1587,20 +1995,33 @@ ${issues}`);
1587
1995
  }
1588
1996
  globalLogger.info(`Executing flow '${rootFlow.name}' with ${expandedSteps.length} step(s)...`);
1589
1997
  let executed = 0;
1998
+ let cacheHits = 0;
1999
+ let lastStepWasFromFileRef = false;
2000
+ let fileRefStepNumber = 1;
1590
2001
  try {
2002
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig, false);
2003
+ const screenshotResolution = await getImageDimensions(screenshot);
1591
2004
  for (const step of expandedSteps) {
1592
2005
  const params = step.__params ?? effectiveParams;
1593
2006
  const filepath = step.__filepath ?? rootRelativePath;
1594
- const prefix = `Step #${executed + 1} [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
2007
+ console.log("step.__filepath", step.__filepath);
2008
+ console.log("rootRelativePath", rootRelativePath);
2009
+ const isFromFileRef = filepath !== rootRelativePath;
2010
+ console.log("isFromFileRef", isFromFileRef);
2011
+ if (!lastStepWasFromFileRef && isFromFileRef) {
2012
+ fileRefStepNumber = 1;
2013
+ }
2014
+ const effectiveStepNumber = isFromFileRef ? fileRefStepNumber : this.step_number;
2015
+ const prefix = `Step #${executed + 1} (step_number: ${effectiveStepNumber}) [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
1595
2016
  try {
1596
2017
  switch (step.type) {
1597
2018
  case "ai": {
1598
2019
  const instruction = this.interpolateTemplate(step.instruction, params);
1599
2020
  globalLogger.info(`${prefix}: ${instruction}`);
1600
2021
  if (useSmartLoop) {
1601
- const ctx = this.createSmartLoopContext();
2022
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1602
2023
  const result = await executeSmartLoop(ctx, {
1603
- stepNumber: this.step_number,
2024
+ stepNumber: effectiveStepNumber,
1604
2025
  description: instruction,
1605
2026
  instruction,
1606
2027
  filepath
@@ -1608,7 +2029,14 @@ ${issues}`);
1608
2029
  if (!result.success) {
1609
2030
  throw new Error(result.error || "Smart loop execution failed");
1610
2031
  }
1611
- this.step_number++;
2032
+ if (result.cacheHit) {
2033
+ cacheHits++;
2034
+ }
2035
+ if (isFromFileRef) {
2036
+ fileRefStepNumber++;
2037
+ } else {
2038
+ this.step_number++;
2039
+ }
1612
2040
  } else {
1613
2041
  await this.aiExecute({ command: instruction });
1614
2042
  }
@@ -1621,9 +2049,9 @@ ${issues}`);
1621
2049
  }
1622
2050
  globalLogger.info(`${prefix}: ${description}`);
1623
2051
  if (useSmartLoop) {
1624
- const ctx = this.createSmartLoopContext();
2052
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1625
2053
  const result = await executeSmartLoop(ctx, {
1626
- stepNumber: this.step_number,
2054
+ stepNumber: effectiveStepNumber,
1627
2055
  description,
1628
2056
  instruction: description,
1629
2057
  filepath
@@ -1631,7 +2059,14 @@ ${issues}`);
1631
2059
  if (!result.success) {
1632
2060
  throw new Error(result.error || "Smart loop execution failed");
1633
2061
  }
1634
- this.step_number++;
2062
+ if (result.cacheHit) {
2063
+ cacheHits++;
2064
+ }
2065
+ if (isFromFileRef) {
2066
+ fileRefStepNumber++;
2067
+ } else {
2068
+ this.step_number++;
2069
+ }
1635
2070
  } else {
1636
2071
  await this.aiExecute({ command: description });
1637
2072
  }
@@ -1645,9 +2080,9 @@ ${issues}`);
1645
2080
  globalLogger.info(`${prefix}: ${description}`);
1646
2081
  if (useSmartLoop) {
1647
2082
  const instruction = `Verify that: ${description}`;
1648
- const ctx = this.createSmartLoopContext();
2083
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1649
2084
  const result = await executeSmartLoop(ctx, {
1650
- stepNumber: this.step_number,
2085
+ stepNumber: effectiveStepNumber,
1651
2086
  description,
1652
2087
  instruction,
1653
2088
  filepath
@@ -1655,7 +2090,14 @@ ${issues}`);
1655
2090
  if (!result.success) {
1656
2091
  throw new Error(result.error || "Smart loop execution failed");
1657
2092
  }
1658
- this.step_number++;
2093
+ if (result.cacheHit) {
2094
+ cacheHits++;
2095
+ }
2096
+ if (isFromFileRef) {
2097
+ fileRefStepNumber++;
2098
+ } else {
2099
+ this.step_number++;
2100
+ }
1659
2101
  } else {
1660
2102
  await this.assert(description);
1661
2103
  }
@@ -1664,9 +2106,27 @@ ${issues}`);
1664
2106
  case "type": {
1665
2107
  const text = this.interpolateTemplate(step.text, params);
1666
2108
  globalLogger.info(`${prefix}: Type text`);
1667
- await this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2109
+ this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
1668
2110
  await this.performType(text);
1669
- this.step_number++;
2111
+ if (isFromFileRef) {
2112
+ fileRefStepNumber++;
2113
+ } else {
2114
+ this.step_number++;
2115
+ }
2116
+ break;
2117
+ }
2118
+ case "enter": {
2119
+ globalLogger.info(`${prefix}: Press Enter`);
2120
+ this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2121
+ await this.performPressEnter();
2122
+ if (step.delayNextStep) {
2123
+ await this._delay(step.delayNextStep);
2124
+ }
2125
+ if (isFromFileRef) {
2126
+ fileRefStepNumber++;
2127
+ } else {
2128
+ this.step_number++;
2129
+ }
1670
2130
  break;
1671
2131
  }
1672
2132
  case "scroll":
@@ -1674,9 +2134,9 @@ ${issues}`);
1674
2134
  const description = step.descriptionText ? this.interpolateTemplate(step.descriptionText, params) : void 0;
1675
2135
  if (description && useSmartLoop) {
1676
2136
  globalLogger.info(`${prefix}: ${description}`);
1677
- const ctx = this.createSmartLoopContext();
2137
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1678
2138
  const result = await executeSmartLoop(ctx, {
1679
- stepNumber: this.step_number,
2139
+ stepNumber: effectiveStepNumber,
1680
2140
  description,
1681
2141
  instruction: description,
1682
2142
  filepath
@@ -1684,43 +2144,60 @@ ${issues}`);
1684
2144
  if (!result.success) {
1685
2145
  throw new Error(result.error || "Smart loop execution failed");
1686
2146
  }
1687
- this.step_number++;
2147
+ if (result.cacheHit) {
2148
+ cacheHits++;
2149
+ }
2150
+ if (isFromFileRef) {
2151
+ fileRefStepNumber++;
2152
+ } else {
2153
+ this.step_number++;
2154
+ }
1688
2155
  } else {
1689
2156
  globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
1690
- await this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
1691
- if (step.type === "swipe") {
1692
- await this.performSwipe({
1693
- direction: step.direction,
1694
- x1: step.x1,
1695
- y1: step.y1,
1696
- x2: step.x2,
1697
- y2: step.y2,
1698
- duration: step.duration
1699
- });
2157
+ this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2158
+ await this.performSwipe({
2159
+ direction: step.direction,
2160
+ x1: step.x1,
2161
+ y1: step.y1,
2162
+ x2: step.x2,
2163
+ y2: step.y2,
2164
+ duration: step.duration,
2165
+ screenshotHeight: screenshotResolution.height,
2166
+ screenshotWidth: screenshotResolution.width
2167
+ });
2168
+ if (isFromFileRef) {
2169
+ fileRefStepNumber++;
1700
2170
  } else {
1701
- await this.performSwipe({ direction: step.direction });
2171
+ this.step_number++;
1702
2172
  }
1703
- this.step_number++;
1704
2173
  }
1705
2174
  break;
1706
2175
  }
1707
2176
  case "zoom": {
1708
2177
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
1709
- await this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
1710
- this.step_number++;
2178
+ this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2179
+ if (isFromFileRef) {
2180
+ fileRefStepNumber++;
2181
+ } else {
2182
+ this.step_number++;
2183
+ }
1711
2184
  break;
1712
2185
  }
1713
2186
  case "scrollUntil": {
1714
2187
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
1715
2188
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
1716
- await this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2189
+ this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
1717
2190
  await this.performScrollUntil({
1718
2191
  direction: step.direction,
1719
2192
  text: interpolatedText,
1720
2193
  elementId: step.elementId,
1721
2194
  maxScrolls: step.maxScrolls
1722
2195
  });
1723
- this.step_number++;
2196
+ if (isFromFileRef) {
2197
+ fileRefStepNumber++;
2198
+ } else {
2199
+ this.step_number++;
2200
+ }
1724
2201
  break;
1725
2202
  }
1726
2203
  case "deeplink": {
@@ -1728,7 +2205,7 @@ ${issues}`);
1728
2205
  const bundleId = params["bundleId"];
1729
2206
  const url = this.interpolateTemplate(step.url, params);
1730
2207
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
1731
- await this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2208
+ this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
1732
2209
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
1733
2210
  break;
1734
2211
  }
@@ -1736,10 +2213,12 @@ ${issues}`);
1736
2213
  throw new Error(`Unsupported step type at execution: ${step.type}`);
1737
2214
  }
1738
2215
  }
2216
+ lastStepWasFromFileRef = isFromFileRef;
1739
2217
  executed++;
1740
2218
  } catch (err) {
1741
2219
  if (step.optional) {
1742
2220
  globalLogger.warn(`${prefix} failed but marked optional. Continuing. Error: ${err.message}`);
2221
+ lastStepWasFromFileRef = isFromFileRef;
1743
2222
  continue;
1744
2223
  }
1745
2224
  throw err;
@@ -1752,6 +2231,16 @@ ${issues}`);
1752
2231
  }
1753
2232
  throw e;
1754
2233
  }
2234
+ const flowEndTime = performance.now();
2235
+ const totalTime = (flowEndTime - flowStartTime) / 1e3;
2236
+ globalLogger.info(`
2237
+ ${"=".repeat(50)}`);
2238
+ globalLogger.info(`Flow Execution Summary:`);
2239
+ globalLogger.info(`Total Test Time: ${totalTime.toFixed(2)} seconds`);
2240
+ globalLogger.info(`Total Steps executed: ${executed}`);
2241
+ globalLogger.info(`Total Step Cache Hits: ${cacheHits}`);
2242
+ globalLogger.info(`${"=".repeat(50)}
2243
+ `);
1755
2244
  return rootFlow;
1756
2245
  }
1757
2246
  async gptHandler(command, cachingMode) {
@@ -1759,8 +2248,15 @@ ${issues}`);
1759
2248
  let conditionSucceeded = false;
1760
2249
  while (!conditionSucceeded) {
1761
2250
  let screenshot;
2251
+ let originalScreenshotBase64 = null;
1762
2252
  if (!this.useGptDriverCloud) {
1763
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
2253
+ const stabilityResult = await waitForStableScreen(
2254
+ () => this.getScreenshot(this.appiumSessionConfig)
2255
+ );
2256
+ screenshot = stabilityResult.originalScreenshotBase64;
2257
+ if (!stabilityResult.stable) {
2258
+ globalLogger.warn("Screen did not stabilize within timeout, proceeding with last screenshot");
2259
+ }
1764
2260
  }
1765
2261
  globalLogger.info("Requesting next action from GPT Driver...");
1766
2262
  const response = await axios.request(
@@ -1788,7 +2284,6 @@ ${issues}`);
1788
2284
  for (const appiumCommand of executeResponse.commands) {
1789
2285
  await this.executeCommand(appiumCommand);
1790
2286
  }
1791
- await delay(1500);
1792
2287
  }
1793
2288
  this.step_number = this.step_number + 1;
1794
2289
  globalLogger.info("Command execution completed successfully");
@@ -1816,13 +2311,13 @@ ${issues}`);
1816
2311
  });
1817
2312
  }
1818
2313
  }
1819
- async logCodeExecution(screenshot, command) {
2314
+ async logCodeExecution(screenshot, command, isCacheHit) {
1820
2315
  try {
1821
- const screenshot2 = await this.getScreenshot(this.appiumSessionConfig);
1822
2316
  await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
1823
2317
  api_key: this.apiKey,
1824
- base64_screenshot: screenshot2,
1825
- command
2318
+ base64_screenshot: screenshot,
2319
+ command,
2320
+ from_cache: !!isCacheHit
1826
2321
  });
1827
2322
  } catch (e) {
1828
2323
  globalLogger.error("Failed to log code execution", e);