gpt-driver-node 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2,9 +2,11 @@
2
2
 
3
3
  var node_fs = require('node:fs');
4
4
  var path = require('node:path');
5
+ var promises = require('node:fs/promises');
5
6
  var axios = require('axios');
6
7
  var sharp = require('sharp');
7
8
  var webdriverio = require('webdriverio');
9
+ var dotenv = require('dotenv');
8
10
  var winston = require('winston');
9
11
  var zod = require('zod');
10
12
  var crypto = require('node:crypto');
@@ -93,6 +95,75 @@ ${logStyles.gray(stack)}` : logMessage;
93
95
  ]
94
96
  });
95
97
 
98
+ const waitForStableScreen = async (getScreenshot, options = {}) => {
99
+ const {
100
+ maxTimeoutSec = 5,
101
+ intervalSec = 0.01,
102
+ tolerance = 1e-5,
103
+ pixelThreshold = 0,
104
+ downscaleWidth = 600,
105
+ downscaleHeight = 600
106
+ } = options;
107
+ const startTime = Date.now();
108
+ const maxTimeoutMillis = maxTimeoutSec * 1e3;
109
+ const intervalMillis = intervalSec * 1e3;
110
+ let previousDownsampledBuffer = null;
111
+ let lastScreenshotBase64 = null;
112
+ while (Date.now() - startTime < maxTimeoutMillis) {
113
+ try {
114
+ const screenshotBase64 = await getScreenshot();
115
+ lastScreenshotBase64 = screenshotBase64;
116
+ const imageBuffer = Buffer.from(screenshotBase64, "base64");
117
+ const downsampledBuffer = await sharp(imageBuffer).resize(downscaleWidth, downscaleHeight, { fit: "fill" }).ensureAlpha().raw().toBuffer();
118
+ if (previousDownsampledBuffer) {
119
+ const isSimilar = areImagesSimilar(
120
+ previousDownsampledBuffer,
121
+ downsampledBuffer,
122
+ downscaleWidth,
123
+ downscaleHeight,
124
+ tolerance,
125
+ pixelThreshold
126
+ );
127
+ if (isSimilar) {
128
+ return {
129
+ stable: true,
130
+ originalScreenshotBase64: screenshotBase64
131
+ };
132
+ }
133
+ }
134
+ previousDownsampledBuffer = downsampledBuffer;
135
+ } catch (e) {
136
+ globalLogger.warn(`Error during stability check: ${e}`);
137
+ throw e;
138
+ }
139
+ await delay(intervalMillis);
140
+ }
141
+ return {
142
+ stable: false,
143
+ originalScreenshotBase64: lastScreenshotBase64
144
+ };
145
+ };
146
+ function areImagesSimilar(buffer1, buffer2, width, height, tolerance, pixelThreshold) {
147
+ if (buffer1.length !== buffer2.length) {
148
+ return false;
149
+ }
150
+ const totalPixels = width * height;
151
+ let differentPixels = 0;
152
+ for (let i = 0; i < buffer1.length; i += 4) {
153
+ const r1 = buffer1[i];
154
+ const g1 = buffer1[i + 1];
155
+ const b1 = buffer1[i + 2];
156
+ const r2 = buffer2[i];
157
+ const g2 = buffer2[i + 1];
158
+ const b2 = buffer2[i + 2];
159
+ if (Math.abs(r1 - r2) > pixelThreshold || Math.abs(g1 - g2) > pixelThreshold || Math.abs(b1 - b2) > pixelThreshold) {
160
+ differentPixels++;
161
+ }
162
+ }
163
+ const diffRatio = differentPixels / totalPixels;
164
+ return diffRatio <= tolerance;
165
+ }
166
+
96
167
  const SavableStepBaseSchema = zod.z.object({
97
168
  id: zod.z.number().optional(),
98
169
  descriptionText: zod.z.string().optional(),
@@ -125,9 +196,8 @@ const SavableSwipeStepSchema = SavableStepBaseSchema.extend({
125
196
  y2: zod.z.number().optional(),
126
197
  duration: zod.z.number().optional().default(500)
127
198
  });
128
- const SavableScrollStepSchema = SavableStepBaseSchema.extend({
129
- type: zod.z.literal("scroll"),
130
- direction: zod.z.enum(["up", "down"])
199
+ const SavableScrollStepSchema = SavableSwipeStepSchema.extend({
200
+ type: zod.z.literal("scroll")
131
201
  });
132
202
  const SavableZoomStepSchema = SavableStepBaseSchema.extend({
133
203
  type: zod.z.literal("zoom"),
@@ -148,6 +218,10 @@ const SavableAIStepSchema = SavableStepBaseSchema.extend({
148
218
  type: zod.z.literal("ai"),
149
219
  instruction: zod.z.string()
150
220
  });
221
+ const SavableEnterStepSchema = SavableStepBaseSchema.extend({
222
+ type: zod.z.literal("enter"),
223
+ delayNextStep: zod.z.number().optional().default(500)
224
+ });
151
225
  const SavableFileRefStepSchema = SavableStepBaseSchema.extend({
152
226
  type: zod.z.literal("fileRef"),
153
227
  path: zod.z.string(),
@@ -172,6 +246,8 @@ const SavableStepSchema = zod.z.discriminatedUnion("type", [
172
246
  // type: 'deeplink'
173
247
  SavableAIStepSchema,
174
248
  // type: 'ai'
249
+ SavableEnterStepSchema,
250
+ // type: 'enter'
175
251
  SavableFileRefStepSchema
176
252
  // type: 'fileRef'
177
253
  ]);
@@ -193,7 +269,7 @@ const CACHE_SERVER_URL = "https://cache.mobileboost.io";
193
269
  const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
194
270
  const RESCALE_FACTOR = 4;
195
271
  const SMART_LOOP_MAX_ITERATIONS = 15;
196
- const CACHE_RETRY_MS = 2e3;
272
+ const CACHE_RETRY_MS = 1500;
197
273
  const CACHE_CHECK_INTERVAL_MS = 500;
198
274
 
199
275
  function generateCacheHash(apiKey, filepath, stepNumber, description, platform, resolution) {
@@ -277,15 +353,24 @@ async function executeFromCache(params) {
277
353
  return { found: false };
278
354
  } catch (error) {
279
355
  if (axios.isAxiosError(error)) {
280
- globalLogger.warn(`[Cache] Cache lookup failed: ${error.response?.data || error.message}`);
356
+ const errorMsg = `Cache lookup failed: ${error.response?.data || error.message}`;
357
+ globalLogger.warn(`[Cache] ${errorMsg}`);
281
358
  } else {
282
- globalLogger.error(`[Cache] Error executing from cache: ${error}`);
359
+ const errorMsg = `Error executing from cache: ${error}`;
360
+ globalLogger.error(`[Cache] ${errorMsg}`);
283
361
  }
284
362
  return { found: false };
285
363
  }
286
364
  }
287
365
  async function populateCache(params) {
288
366
  try {
367
+ globalLogger.debug(`[populateCache] Parameters: ${JSON.stringify({
368
+ stepNumber: params.stepNumber,
369
+ filepath: params.filepath,
370
+ platform: params.platform,
371
+ screenResolution: params.screenResolution,
372
+ executionDataCount: params.executionData.length
373
+ })}`);
289
374
  const hash = generateCacheHash(
290
375
  params.apiKey,
291
376
  params.filepath,
@@ -294,13 +379,15 @@ async function populateCache(params) {
294
379
  params.platform,
295
380
  params.screenResolution
296
381
  );
382
+ globalLogger.debug(`[populateCache] Generated hash: ${hash}`);
297
383
  const payload = await Promise.all(params.executionData.map(async (item) => {
298
384
  const resizedBuffer = await resizeScreenshotForCache(item.screenshot);
385
+ const scaledScreenshotBase64 = resizedBuffer.toString("base64");
299
386
  const scaledCommands = item.commands.map(
300
387
  (cmd) => scaleCommand(cmd, "divide")
301
388
  );
302
389
  return {
303
- screenshot: resizedBuffer.toString("base64"),
390
+ screenshot: scaledScreenshotBase64,
304
391
  commands: scaledCommands
305
392
  };
306
393
  }));
@@ -312,9 +399,11 @@ async function populateCache(params) {
312
399
  return { success: true };
313
400
  } catch (error) {
314
401
  if (axios.isAxiosError(error)) {
315
- globalLogger.error(`[Cache] Failed to populate cache: ${error.response?.data || error.message}`);
402
+ const errorMsg = `Failed to populate cache: ${error.response?.data || error.message}`;
403
+ globalLogger.error(`[Cache] ${errorMsg}`);
316
404
  } else {
317
- globalLogger.error(`[Cache] Error populating cache: ${error}`);
405
+ const errorMsg = `Error populating cache: ${error}`;
406
+ globalLogger.error(`[Cache] ${errorMsg}`);
318
407
  }
319
408
  return { success: false };
320
409
  }
@@ -444,6 +533,10 @@ function isWaitCommand(cmd) {
444
533
  function isScrollCommand(cmd) {
445
534
  return cmd.startsWith("scroll:");
446
535
  }
536
+ function isEnterCommand(cmd) {
537
+ const key = cmd.toLowerCase().split("press:")[1]?.trim() ?? "";
538
+ return key === "enter";
539
+ }
447
540
  function isTypeCommand(cmd) {
448
541
  return cmd.startsWith("type:");
449
542
  }
@@ -461,8 +554,8 @@ function parseSlideCommand(cmd) {
461
554
  const directionMap = {
462
555
  down: "up",
463
556
  up: "down",
464
- left: "right",
465
- right: "left"
557
+ left: "left",
558
+ right: "right"
466
559
  };
467
560
  return {
468
561
  direction: directionMap[extractedDirection],
@@ -479,22 +572,38 @@ async function executeSmartLoop(ctx, params) {
479
572
  let lastCacheIndex = void 0;
480
573
  let anyCacheMiss = false;
481
574
  let everHadCacheHit = false;
575
+ let totalCacheHits = 0;
482
576
  const currentExecutionData = [];
577
+ const stepStartTime = performance.now();
578
+ const iterationTimes = [];
483
579
  globalLogger.info(`[SmartLoop] Starting for step ${params.stepNumber}: "${params.description}"`);
580
+ globalLogger.debug(`[Performance][SmartLoop] Step execution started at ${stepStartTime.toFixed(2)}ms`);
484
581
  try {
485
582
  while (loopCount < SMART_LOOP_MAX_ITERATIONS) {
583
+ const iterationStartTime = performance.now();
584
+ globalLogger.debug(`[SmartLoop] Starting iteration ${loopCount + 1}/${SMART_LOOP_MAX_ITERATIONS}`);
486
585
  let screenshot = "";
487
586
  let commands = [];
488
587
  let isCacheHit = false;
489
- const firstScreenshot = await ctx.getScreenshot();
490
- const screenshotResolution = await getImageDimensions(firstScreenshot);
588
+ let screenshotResolution = void 0;
589
+ const stabilityResult = await waitForStableScreen(ctx.getScreenshot);
491
590
  for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
492
- screenshot = await ctx.getScreenshot();
591
+ const screenshotStartTime = performance.now();
592
+ const screenshotEndTime = performance.now();
593
+ globalLogger.debug(`[Performance][SmartLoop] Screenshot capture took ${(screenshotEndTime - screenshotStartTime).toFixed(2)}ms`);
594
+ if (!stabilityResult.stable) {
595
+ globalLogger.warn("[SmartLoop] Screen did not stabilize within timeout, proceeding with last screenshot");
596
+ }
597
+ screenshot = stabilityResult.originalScreenshotBase64;
598
+ if (screenshotResolution === void 0) {
599
+ screenshotResolution = await getImageDimensions(screenshot);
600
+ }
493
601
  const sizeInBytes = screenshot.length * 0.75;
494
602
  const sizeInMB = (sizeInBytes / (1024 * 1024)).toFixed(2);
495
603
  globalLogger.debug(`[SmartLoop] Captured screenshot: ~${sizeInMB} MB`);
496
604
  try {
497
605
  globalLogger.debug(`[SmartLoop] Checking cache (Attempt ${attempt + 1}/${maxCacheAttempts})`);
606
+ const cacheCheckStartTime = performance.now();
498
607
  const cacheResult = await executeFromCache({
499
608
  apiKey: ctx.organisationId,
500
609
  stepNumber: params.stepNumber,
@@ -505,11 +614,14 @@ async function executeSmartLoop(ctx, params) {
505
614
  platform: ctx.platform,
506
615
  filepath: params.filepath
507
616
  });
617
+ const cacheCheckEndTime = performance.now();
618
+ globalLogger.debug(`[Performance][SmartLoop] Cache check took ${(cacheCheckEndTime - cacheCheckStartTime).toFixed(2)}ms`);
508
619
  if (cacheResult.found && cacheResult.cacheCommands) {
509
620
  commands = cacheResult.cacheCommands;
510
621
  lastCacheIndex = cacheResult.cacheIndex;
511
622
  isCacheHit = true;
512
623
  everHadCacheHit = true;
624
+ totalCacheHits++;
513
625
  globalLogger.info(`[SmartLoop] Cache Hit! (${commands.length} commands)`);
514
626
  break;
515
627
  }
@@ -521,18 +633,53 @@ async function executeSmartLoop(ctx, params) {
521
633
  await delay(CACHE_CHECK_INTERVAL_MS);
522
634
  }
523
635
  }
636
+ if (screenshotResolution === void 0) {
637
+ const screenshot2 = await ctx.getScreenshot();
638
+ screenshotResolution = await getImageDimensions(screenshot2);
639
+ }
524
640
  let aiCommands = [];
641
+ let actionDescription = [];
525
642
  if (!isCacheHit) {
526
643
  anyCacheMiss = true;
527
644
  globalLogger.info(`[SmartLoop] Cache Miss. Requesting AI agent...`);
528
- const agentResponse = await executeAgentStep({
529
- apiKey: ctx.organisationId,
530
- base64_screenshot: screenshot,
531
- instruction: params.instruction,
532
- action_history: actionHistory
533
- });
534
- aiCommands = agentResponse.appetizeCommands || [];
535
- const gptCommands = agentResponse.gptCommands || [];
645
+ const aiStartTime = performance.now();
646
+ let agentResponse;
647
+ if (ctx.middleLayerAssertFn) {
648
+ const response = await ctx.middleLayerAssertFn(screenshot);
649
+ const results = response.data.results;
650
+ const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
651
+ if (failedConditions.length === 0) {
652
+ agentResponse = {
653
+ appetizeCommands: [
654
+ "task complete: All assertions passed"
655
+ ]
656
+ };
657
+ } else {
658
+ agentResponse = {
659
+ appetizeCommands: [
660
+ `error detected: Failed conditions: ${failedConditions.join(", ")}`
661
+ ]
662
+ };
663
+ }
664
+ } else {
665
+ agentResponse = await executeAgentStep({
666
+ apiKey: ctx.organisationId,
667
+ base64_screenshot: screenshot,
668
+ instruction: params.instruction,
669
+ action_history: actionHistory
670
+ });
671
+ }
672
+ const aiEndTime = performance.now();
673
+ globalLogger.debug(`[Performance][SmartLoop] AI agent call took ${(aiEndTime - aiStartTime).toFixed(2)}ms`);
674
+ aiCommands = agentResponse.appetizeCommands ?? [];
675
+ const gptCommands = agentResponse.gptCommands ?? [];
676
+ actionDescription = [];
677
+ const actionDescriptionIndex = gptCommands.findIndex(
678
+ (entry) => entry.startsWith("actions_description:")
679
+ );
680
+ if (actionDescriptionIndex !== -1) {
681
+ actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
682
+ }
536
683
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
537
684
  if (reasoningIndex !== -1) {
538
685
  const parsedCommands = gptCommands.slice(reasoningIndex);
@@ -542,19 +689,23 @@ async function executeSmartLoop(ctx, params) {
542
689
  }
543
690
  actionHistory = [...actionHistory, ...parsedCommands];
544
691
  }
545
- commands = [...aiCommands];
692
+ commands = [...actionDescription, ...aiCommands];
546
693
  globalLogger.debug(`[SmartLoop] AI returned ${commands.length} command(s)`);
547
694
  }
548
695
  currentExecutionData.push({
549
696
  screenshot,
550
- commands: aiCommands.length > 0 ? aiCommands : commands
697
+ commands
551
698
  });
552
- await ctx.logCodeExecution(screenshot, commands.join("\n"));
699
+ const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
700
+ if (ctx.pendingLogPromises) {
701
+ ctx.pendingLogPromises.push(logPromise);
702
+ }
553
703
  let actionExecuted = false;
554
704
  let taskCompleted = false;
555
705
  if (commands.length > 0) {
556
706
  globalLogger.debug(`[SmartLoop] Executing ${commands.length} command(s)`);
557
707
  }
708
+ const commandExecutionStartTime = performance.now();
558
709
  for (const cmd of commands) {
559
710
  if (isTaskComplete(cmd)) {
560
711
  taskCompleted = true;
@@ -579,6 +730,10 @@ async function executeSmartLoop(ctx, params) {
579
730
  );
580
731
  actionExecuted = true;
581
732
  }
733
+ } else if (isEnterCommand(cmd)) {
734
+ globalLogger.debug(`[SmartLoop] Pressing Enter`);
735
+ await ctx.performPressEnter();
736
+ actionExecuted = true;
582
737
  } else if (isWaitCommand(cmd)) {
583
738
  const seconds = parseWaitSeconds(cmd);
584
739
  if (seconds) {
@@ -591,6 +746,9 @@ async function executeSmartLoop(ctx, params) {
591
746
  if (direction) {
592
747
  globalLogger.debug(`[SmartLoop] Scrolling ${direction}`);
593
748
  await ctx.performSwipe({ direction });
749
+ if (isCacheHit) {
750
+ await delay(650);
751
+ }
594
752
  actionExecuted = true;
595
753
  }
596
754
  } else if (isSlideCommand(cmd)) {
@@ -628,6 +786,9 @@ async function executeSmartLoop(ctx, params) {
628
786
  screenshotHeight: screenshotResolution.height,
629
787
  duration: 500
630
788
  });
789
+ if (isCacheHit) {
790
+ await delay(650);
791
+ }
631
792
  actionExecuted = true;
632
793
  }
633
794
  } else if (isTypeCommand(cmd)) {
@@ -640,49 +801,83 @@ async function executeSmartLoop(ctx, params) {
640
801
  }
641
802
  }
642
803
  if (actionExecuted) {
804
+ const commandExecutionEndTime = performance.now();
805
+ globalLogger.debug(`[Performance][SmartLoop] Command execution took ${(commandExecutionEndTime - commandExecutionStartTime).toFixed(2)}ms`);
643
806
  if (isCacheHit) {
644
807
  actionHistory.push(...commands);
645
808
  }
646
809
  await delay(100);
647
810
  }
811
+ const iterationEndTime = performance.now();
812
+ const iterationDuration = iterationEndTime - iterationStartTime;
813
+ iterationTimes.push(iterationDuration);
814
+ globalLogger.debug(`[Performance][SmartLoop] Iteration ${loopCount + 1} completed in ${iterationDuration.toFixed(2)}ms (${(iterationDuration / 1e3).toFixed(2)}s)`);
648
815
  if (taskCompleted) {
649
816
  globalLogger.info(`[SmartLoop] Task completed successfully`);
817
+ const stepEndTime2 = performance.now();
818
+ const totalDuration2 = stepEndTime2 - stepStartTime;
819
+ const totalSeconds2 = totalDuration2 / 1e3;
820
+ const totalMinutes2 = totalSeconds2 / 60;
821
+ const averageIterationTime = iterationTimes.reduce((sum, time) => sum + time, 0) / iterationTimes.length;
822
+ globalLogger.debug(`[Performance][SmartLoop] Step execution summary:`);
823
+ globalLogger.debug(`[Performance][SmartLoop] - Total iterations: ${loopCount + 1}`);
824
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration2.toFixed(2)}ms (${totalSeconds2.toFixed(2)}s / ${totalMinutes2.toFixed(2)}min)`);
825
+ globalLogger.debug(`[Performance][SmartLoop] - Average iteration time: ${averageIterationTime.toFixed(2)}ms (${(averageIterationTime / 1e3).toFixed(2)}s)`);
826
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
650
827
  if (anyCacheMiss && currentExecutionData.length > 0) {
651
828
  globalLogger.info(`[SmartLoop] Populating cache with ${currentExecutionData.length} frame(s)...`);
652
- try {
653
- await populateCache({
654
- apiKey: ctx.organisationId,
655
- stepNumber: params.stepNumber,
656
- stepDescription: params.description,
657
- executionData: currentExecutionData,
658
- screenResolution: screenshotResolution,
659
- platform: ctx.platform,
660
- filepath: params.filepath
661
- });
829
+ const cachePopulateStartTime = performance.now();
830
+ populateCache({
831
+ apiKey: ctx.organisationId,
832
+ stepNumber: params.stepNumber,
833
+ stepDescription: params.description,
834
+ executionData: currentExecutionData,
835
+ screenResolution: screenshotResolution,
836
+ platform: ctx.platform,
837
+ filepath: params.filepath
838
+ }).then(() => {
839
+ const cachePopulateEndTime = performance.now();
840
+ globalLogger.debug(`[Performance][SmartLoop] Cache population took ${(cachePopulateEndTime - cachePopulateStartTime).toFixed(2)}ms`);
662
841
  globalLogger.debug(`[SmartLoop] Cache populated successfully`);
663
- } catch (e) {
664
- globalLogger.warn(`[SmartLoop] Failed to populate cache: ${e.message}`);
665
- }
842
+ }).catch((e) => globalLogger.warn(`[SmartLoop] Cache population failed: ${e.message}`));
666
843
  } else if (!anyCacheMiss) {
667
844
  globalLogger.debug(`[SmartLoop] Skipping cache population (all actions were cached)`);
668
845
  }
669
846
  return {
670
847
  success: true,
671
848
  iterations: loopCount + 1,
672
- cacheHit: everHadCacheHit
849
+ cacheHit: everHadCacheHit,
850
+ cacheHitCount: totalCacheHits
673
851
  };
674
852
  }
675
853
  loopCount++;
676
854
  }
855
+ const stepEndTime = performance.now();
856
+ const totalDuration = stepEndTime - stepStartTime;
857
+ const totalSeconds = totalDuration / 1e3;
858
+ const totalMinutes = totalSeconds / 60;
859
+ globalLogger.debug(`[Performance][SmartLoop] Timeout reached after ${loopCount} iterations`);
860
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
861
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
677
862
  throw new Error(`Smart Loop timeout after ${SMART_LOOP_MAX_ITERATIONS} iterations`);
678
863
  } catch (error) {
679
864
  const message = error instanceof Error ? error.message : String(error);
865
+ const stepEndTime = performance.now();
866
+ const totalDuration = stepEndTime - stepStartTime;
867
+ const totalSeconds = totalDuration / 1e3;
868
+ const totalMinutes = totalSeconds / 60;
680
869
  globalLogger.error(`[SmartLoop] Error: ${message}`);
870
+ globalLogger.debug(`[Performance][SmartLoop] Error occurred after ${loopCount + 1} iteration(s)`);
871
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration before error: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
872
+ if (iterationTimes.length > 0) {
873
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
874
+ }
681
875
  return {
682
876
  success: false,
683
877
  error: message,
684
878
  iterations: loopCount + 1,
685
- cacheHit: everHadCacheHit
879
+ cacheHit: everHadCacheHit,
880
+ cacheHitCount: totalCacheHits
686
881
  };
687
882
  }
688
883
  }
@@ -713,8 +908,13 @@ class GptDriver {
713
908
  step_number = 1;
714
909
  organisationId;
715
910
  configFilePath;
911
+ // Session Execution Stats
912
+ _stats_startTime = 0;
913
+ _stats_executedSteps = 0;
914
+ _stats_cacheHits = 0;
716
915
  // Smart loop state - maintains action history across steps for context
717
916
  globalActionHistory = [];
917
+ pendingLogPromises = [];
718
918
  /**
719
919
  * Creates an instance of the GptDriver class.
720
920
  *
@@ -787,6 +987,9 @@ class GptDriver {
787
987
  */
788
988
  async startSession() {
789
989
  globalLogger.info("Starting session...");
990
+ this._stats_startTime = performance.now();
991
+ this._stats_executedSteps = 0;
992
+ this._stats_cacheHits = 0;
790
993
  if (!this.useGptDriverCloud) {
791
994
  if (this.driver) {
792
995
  let platform;
@@ -886,6 +1089,37 @@ class GptDriver {
886
1089
  getSessionLink() {
887
1090
  return `https://app.mobileboost.io/gpt-driver/sessions/${this.gptDriverSessionId}`;
888
1091
  }
1092
+ /**
1093
+ * Stops the current GPTDriver session and update its state.
1094
+ *
1095
+ * This method sends a request to the GPT Driver server to stop the session and logs the session status as either "failed" or "success."
1096
+ *
1097
+ * @param {"failed" | "success"} status - Indicates the outcome of the session.
1098
+ * Use "success" if the session completed as expected,
1099
+ * or "failed" if the session encountered an error or issue.
1100
+ *
1101
+ * @throws {Error} If the request to stop the session fails.
1102
+ */
1103
+ /**
1104
+ * Marks the current GPTDriver session as succeeded.
1105
+ *
1106
+ * This method stops the session and logs it as "succeeded"
1107
+ *
1108
+ * @throws {Error} If the request to stop the session fails.
1109
+ */
1110
+ async setSessionSucceeded() {
1111
+ await this.setSessionStatus("succeeded");
1112
+ }
1113
+ /**
1114
+ * Marks the current GPTDriver session as failed.
1115
+ *
1116
+ * This method stops the session and logs it as "failed."
1117
+ *
1118
+ * @throws {Error} If the request to stop the session fails.
1119
+ */
1120
+ async setSessionFailed() {
1121
+ await this.setSessionStatus("failed");
1122
+ }
889
1123
  /**
890
1124
  * Stops the current GPTDriver session and update its state.
891
1125
  *
@@ -898,7 +1132,13 @@ class GptDriver {
898
1132
  * @throws {Error} If the request to stop the session fails.
899
1133
  */
900
1134
  async setSessionStatus(status) {
1135
+ try {
1136
+ await Promise.all(this.pendingLogPromises);
1137
+ } catch (e) {
1138
+ globalLogger.error(`Error waiting for pending log promises: ${e.message}`);
1139
+ }
901
1140
  if (this.gptDriverSessionId) {
1141
+ this.printSessionSummary();
902
1142
  globalLogger.info(`Stopping session with status: ${status}`);
903
1143
  await axios.post(
904
1144
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
@@ -914,6 +1154,19 @@ class GptDriver {
914
1154
  this.globalActionHistory = [];
915
1155
  }
916
1156
  }
1157
+ printSessionSummary() {
1158
+ if (this._stats_startTime === 0) return;
1159
+ const endTime = performance.now();
1160
+ const totalTime = (endTime - this._stats_startTime) / 1e3;
1161
+ globalLogger.info(`
1162
+ ${"=".repeat(50)}`);
1163
+ globalLogger.info(`Session Execution Summary:`);
1164
+ globalLogger.info(`Total Session Time: ${totalTime.toFixed(2)} seconds`);
1165
+ globalLogger.info(`Total Steps executed: ${this._stats_executedSteps}`);
1166
+ globalLogger.info(`Total Step Cache Hits: ${this._stats_cacheHits}`);
1167
+ globalLogger.info(`${"=".repeat(50)}
1168
+ `);
1169
+ }
917
1170
  // ─────────────────────────────────────────────────────────────────────────────
918
1171
  // SMART LOOP INTEGRATION
919
1172
  // ─────────────────────────────────────────────────────────────────────────────
@@ -921,7 +1174,7 @@ class GptDriver {
921
1174
  * Creates a SmartLoopContext for the current session.
922
1175
  * This context provides all the callbacks needed by the smart loop executor.
923
1176
  */
924
- createSmartLoopContext() {
1177
+ createSmartLoopContext(options) {
925
1178
  if (!this.organisationId) {
926
1179
  throw new Error("Organisation ID is missing, please set it in the GPTDriver constructor");
927
1180
  }
@@ -933,8 +1186,11 @@ class GptDriver {
933
1186
  performTap: (x, y, screenshotWidth, screenshotHeight) => this.performTap(x, y, screenshotWidth, screenshotHeight),
934
1187
  performSwipe: (params) => this.performSwipe(params),
935
1188
  performType: (text) => this.performType(text),
936
- logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command),
937
- organisationId: this.organisationId
1189
+ performPressEnter: () => this.performPressEnter(),
1190
+ logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1191
+ organisationId: this.organisationId,
1192
+ middleLayerAssertFn: options?.middleLayerAssertFn,
1193
+ pendingLogPromises: options?.pendingLogPromises
938
1194
  };
939
1195
  }
940
1196
  /**
@@ -1035,6 +1291,24 @@ class GptDriver {
1035
1291
  await client.keys(text.split(""));
1036
1292
  }
1037
1293
  }
1294
+ async performPressEnter() {
1295
+ const client = await this.getWdioClient();
1296
+ const platform = this.appiumSessionConfig?.platform;
1297
+ if (platform === "iOS") {
1298
+ await client.performActions([
1299
+ {
1300
+ type: "key",
1301
+ id: "keyboard",
1302
+ actions: [
1303
+ { type: "keyDown", value: "\uE007" },
1304
+ { type: "keyUp", value: "\uE007" }
1305
+ ]
1306
+ }
1307
+ ]);
1308
+ } else {
1309
+ await client.keys(["Enter"]);
1310
+ }
1311
+ }
1038
1312
  clamp(value, min, max) {
1039
1313
  return Math.max(min, Math.min(max, value));
1040
1314
  }
@@ -1162,14 +1436,43 @@ class GptDriver {
1162
1436
  }
1163
1437
  async getScreenshot(appiumSessionConfig, shouldScale = true) {
1164
1438
  globalLogger.debug("Capturing screenshot...");
1165
- const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1166
- const screenshotResponse = await axios.get(url);
1167
- let screenshot = await screenshotResponse.data.value;
1168
- if (appiumSessionConfig.platform === "iOS" && shouldScale) {
1169
- globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
1170
- const imageBuffer = Buffer.from(screenshot, "base64");
1171
- const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
1172
- screenshot = transformedImage.toString("base64");
1439
+ let screenshot;
1440
+ if (appiumSessionConfig.platform === "Android") {
1441
+ try {
1442
+ const { execSync } = await import('node:child_process');
1443
+ let udid;
1444
+ if (this.driver) {
1445
+ if (this.driver.sessionId != null) {
1446
+ const caps = this.driver.capabilities;
1447
+ udid = caps["appium:udid"] || caps["udid"];
1448
+ } else {
1449
+ const driver = this.driver;
1450
+ const capabilities = await driver.getCapabilities();
1451
+ udid = capabilities.get("appium:udid") || capabilities.get("udid");
1452
+ }
1453
+ }
1454
+ const deviceArg = udid ? `-s ${udid}` : "";
1455
+ const buffer = execSync(`adb ${deviceArg} exec-out screencap -p`, {
1456
+ encoding: "buffer",
1457
+ maxBuffer: 50 * 1024 * 1024
1458
+ });
1459
+ screenshot = buffer.toString("base64");
1460
+ } catch (e) {
1461
+ globalLogger.warn("ADB screenshot failed, falling back to Appium screenshot");
1462
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1463
+ const screenshotResponse = await axios.get(url);
1464
+ screenshot = screenshotResponse.data.value;
1465
+ }
1466
+ } else {
1467
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1468
+ const screenshotResponse = await axios.get(url);
1469
+ screenshot = screenshotResponse.data.value;
1470
+ if (shouldScale) {
1471
+ globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
1472
+ const imageBuffer = Buffer.from(screenshot, "base64");
1473
+ const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
1474
+ screenshot = transformedImage.toString("base64");
1475
+ }
1173
1476
  }
1174
1477
  return screenshot;
1175
1478
  }
@@ -1219,41 +1522,71 @@ class GptDriver {
1219
1522
  }
1220
1523
  }
1221
1524
  async aiExecute(commandOrOptions, options) {
1525
+ const startTime = performance.now();
1526
+ globalLogger.debug(`[Performance] aiExecute started at ${startTime.toFixed(2)}ms`);
1222
1527
  const command = typeof commandOrOptions === "string" ? commandOrOptions : commandOrOptions.command;
1223
1528
  const opts = typeof commandOrOptions === "string" ? options : commandOrOptions;
1224
1529
  const appiumHandler = opts?.appiumHandler;
1225
1530
  const cachingMode = opts?.cachingMode;
1226
1531
  const useSmartLoop = opts?.useSmartLoop ?? false;
1227
1532
  if (!this.appiumSessionStarted) {
1533
+ const sessionStartTime = performance.now();
1534
+ globalLogger.debug(`[Performance] Starting session...`);
1228
1535
  await this.startSession();
1536
+ const sessionEndTime = performance.now();
1537
+ globalLogger.debug(`[Performance] Session started in ${(sessionEndTime - sessionStartTime).toFixed(2)}ms`);
1229
1538
  }
1230
1539
  globalLogger.info(`Executing command: ${command}`);
1231
1540
  const driver = this.driver;
1232
1541
  if (appiumHandler != null) {
1233
1542
  try {
1543
+ const handlerStartTime = performance.now();
1544
+ globalLogger.debug(`[Performance] Executing custom Appium handler...`);
1234
1545
  await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1235
1546
  await appiumHandler(driver);
1547
+ const handlerEndTime = performance.now();
1236
1548
  globalLogger.debug("Custom Appium handler executed successfully");
1549
+ globalLogger.debug(`[Performance] Appium handler completed in ${(handlerEndTime - handlerStartTime).toFixed(2)}ms`);
1237
1550
  this.step_number++;
1551
+ this._stats_executedSteps++;
1552
+ const totalTime2 = performance.now() - startTime;
1553
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime2.toFixed(2)}ms`);
1238
1554
  return;
1239
1555
  } catch (e) {
1240
1556
  globalLogger.warn("Custom Appium handler failed, falling back to AI execution");
1241
1557
  }
1242
1558
  }
1243
1559
  if (useSmartLoop) {
1244
- const ctx = this.createSmartLoopContext();
1560
+ const smartLoopStartTime = performance.now();
1561
+ globalLogger.debug(`[Performance] Starting smart loop execution...`);
1562
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1245
1563
  const result = await executeSmartLoop(ctx, {
1246
1564
  stepNumber: this.step_number,
1247
1565
  description: command,
1248
- instruction: command
1566
+ instruction: command,
1567
+ ...this.testId && {
1568
+ filepath: this.testId
1569
+ }
1249
1570
  });
1571
+ const smartLoopEndTime = performance.now();
1572
+ globalLogger.debug(`[Performance] Smart loop completed in ${(smartLoopEndTime - smartLoopStartTime).toFixed(2)}ms`);
1250
1573
  if (!result.success) {
1251
1574
  throw new Error(result.error || "Smart loop execution failed");
1252
1575
  }
1576
+ if (result.cacheHitCount) {
1577
+ this._stats_cacheHits += result.cacheHitCount;
1578
+ }
1253
1579
  this.step_number++;
1580
+ this._stats_executedSteps += result.iterations ?? 1;
1254
1581
  } else {
1582
+ const gptHandlerStartTime = performance.now();
1583
+ globalLogger.debug(`[Performance] Starting GPT handler execution...`);
1255
1584
  await this.gptHandler(command, cachingMode);
1585
+ const gptHandlerEndTime = performance.now();
1586
+ globalLogger.debug(`[Performance] GPT handler completed in ${(gptHandlerEndTime - gptHandlerStartTime).toFixed(2)}ms`);
1256
1587
  }
1588
+ const totalTime = performance.now() - startTime;
1589
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime.toFixed(2)}ms`);
1257
1590
  }
1258
1591
  /**
1259
1592
  * Asserts a single condition using the GPTDriver.
@@ -1262,15 +1595,37 @@ class GptDriver {
1262
1595
  * If the assertion fails, an error is thrown.
1263
1596
  *
1264
1597
  * @param {string} assertion - The condition to be asserted.
1265
- * @param cachingMode - The caching mode to be used for the assertion.
1598
+ * @param {Object} options - Optional configuration object
1599
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertion.
1600
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1266
1601
  * @throws {Error} If the assertion fails.
1602
+ *
1603
+ * @example
1604
+ * // Basic usage
1605
+ * await driver.assert('Login button is visible');
1606
+ *
1607
+ * @example
1608
+ * // With caching mode
1609
+ * await driver.assert('Login button is visible', {
1610
+ * cachingMode: "FULL_SCREEN"
1611
+ * });
1612
+ *
1613
+ * @example
1614
+ * // With smart loop enabled
1615
+ * await driver.assert('Login button is visible', {
1616
+ * useSmartLoop: true,
1617
+ * cachingMode: "FULL_SCREEN"
1618
+ * });
1267
1619
  */
1268
- async assert(assertion, cachingMode) {
1620
+ async assert(assertion, {
1621
+ cachingMode,
1622
+ useSmartLoop = false
1623
+ }) {
1269
1624
  if (!this.appiumSessionStarted) {
1270
1625
  await this.startSession();
1271
1626
  }
1272
1627
  try {
1273
- const results = await this.checkBulk([assertion], cachingMode);
1628
+ const results = await this.checkBulk([assertion], cachingMode, 2, 1e3, useSmartLoop);
1274
1629
  if (!Object.values(results).at(0)) {
1275
1630
  await this.setSessionStatus("failed");
1276
1631
  globalLogger.error(`Assertion failed: ${assertion}`);
@@ -1290,15 +1645,37 @@ class GptDriver {
1290
1645
  * If any assertion fails, an error is thrown listing all failed assertions.
1291
1646
  *
1292
1647
  * @param {string[]} assertions - An array of conditions to be asserted.
1293
- * @param cachingMode - The caching mode to be used for the assertions.
1648
+ * @param {Object} options - Optional configuration object
1649
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertions.
1650
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution. Default: false
1294
1651
  * @throws {Error} If any of the assertions fail.
1652
+ *
1653
+ * @example
1654
+ * // Basic usage
1655
+ * await driver.assertBulk(['Login button is visible', 'Username field is enabled']);
1656
+ *
1657
+ * @example
1658
+ * // With caching mode
1659
+ * await driver.assertBulk(['Login button is visible'], {
1660
+ * cachingMode: "FULL_SCREEN"
1661
+ * });
1662
+ *
1663
+ * @example
1664
+ * // With smart loop enabled
1665
+ * await driver.assertBulk(['Login button is visible'], {
1666
+ * useSmartLoop: true,
1667
+ * cachingMode: "FULL_SCREEN"
1668
+ * });
1295
1669
  */
1296
- async assertBulk(assertions, cachingMode) {
1670
+ async assertBulk(assertions, {
1671
+ cachingMode,
1672
+ useSmartLoop = false
1673
+ }) {
1297
1674
  if (!this.appiumSessionStarted) {
1298
1675
  await this.startSession();
1299
1676
  }
1300
1677
  try {
1301
- const results = await this.checkBulk(assertions, cachingMode);
1678
+ const results = await this.checkBulk(assertions, cachingMode, 2, 1e3, useSmartLoop);
1302
1679
  const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
1303
1680
  if (!current) {
1304
1681
  return [...prev, assertions.at(currentIndex)];
@@ -1327,14 +1704,30 @@ class GptDriver {
1327
1704
  * @param {CachingMode} cachingMode - The caching mode to be used for the conditions.
1328
1705
  * @param {number} maxRetries - The maximum number of retries if any condition fails (default: 2).
1329
1706
  * @param {number} retryDelayMs - The delay in milliseconds between retries (default: 1000).
1707
+ * @param {boolean} useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1330
1708
  * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
1331
1709
  * to a boolean indicating whether the condition was met.
1710
+ *
1711
+ * @example
1712
+ * // Basic usage
1713
+ * const results = await driver.checkBulk(['Login button is visible', 'Username field is enabled']);
1714
+ * console.log(results); // { 'Login button is visible': true, 'Username field is enabled': false }
1715
+ *
1716
+ * @example
1717
+ * // With smart loop enabled
1718
+ * const results = await driver.checkBulk(
1719
+ * ['Login button is visible'],
1720
+ * "FULL_SCREEN",
1721
+ * 2,
1722
+ * 1000,
1723
+ * true
1724
+ * );
1332
1725
  */
1333
- async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3) {
1726
+ async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3, useSmartLoop = false) {
1334
1727
  let attempt = 0;
1335
1728
  let results = {};
1336
1729
  while (attempt <= maxRetries) {
1337
- results = await this._checkBulkOnce(conditions, cachingMode, attempt);
1730
+ results = await this._checkBulkOnce(conditions, cachingMode, attempt, useSmartLoop);
1338
1731
  const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
1339
1732
  if (failedConditions.length === 0) {
1340
1733
  return results;
@@ -1356,33 +1749,67 @@ class GptDriver {
1356
1749
  *
1357
1750
  * @private
1358
1751
  */
1359
- async _checkBulkOnce(conditions, cachingMode, attempt = 0) {
1752
+ async _checkBulkOnce(conditions, cachingMode, attempt = 0, useSmartLoop = false) {
1360
1753
  if (!this.appiumSessionStarted) {
1361
1754
  await this.startSession();
1362
1755
  }
1363
1756
  globalLogger.info(`Checking conditions (attempt ${attempt}): ${conditions.join(", ")}`);
1757
+ const pendingLogPromises = [];
1364
1758
  try {
1365
- let screenshot;
1366
- if (!this.useGptDriverCloud) {
1367
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
1368
- }
1369
- const response = await axios.post(
1370
- `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1371
- {
1372
- api_key: this.apiKey,
1373
- base64_screenshot: screenshot,
1374
- assertions: conditions,
1375
- command: `Assert: ${JSON.stringify(conditions)}`,
1376
- caching_mode: cachingMode ?? this.cachingMode,
1377
- step_number: this.step_number
1759
+ if (useSmartLoop) {
1760
+ const instruction = `Assert: ${JSON.stringify(conditions)}`;
1761
+ const middleLayerAssertFn = async (screenshot) => {
1762
+ return await axios.post(
1763
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1764
+ {
1765
+ api_key: this.apiKey,
1766
+ base64_screenshot: screenshot,
1767
+ assertions: conditions,
1768
+ command: `Assert: ${JSON.stringify(conditions)}`,
1769
+ caching_mode: cachingMode ?? this.cachingMode,
1770
+ step_number: this.step_number
1771
+ }
1772
+ );
1773
+ };
1774
+ const ctx = this.createSmartLoopContext({ middleLayerAssertFn, pendingLogPromises });
1775
+ const result = await executeSmartLoop(ctx, {
1776
+ stepNumber: this.step_number,
1777
+ description: instruction,
1778
+ instruction,
1779
+ ...this.testId && {
1780
+ filepath: this.testId
1781
+ }
1782
+ });
1783
+ if (result.cacheHitCount) {
1784
+ this._stats_cacheHits += result.cacheHitCount;
1378
1785
  }
1379
- );
1380
- globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1381
- return response.data.results;
1786
+ this._stats_executedSteps += result.iterations ?? 1;
1787
+ return { [instruction]: result.success };
1788
+ } else {
1789
+ let screenshot;
1790
+ if (!this.useGptDriverCloud) {
1791
+ screenshot = await this.getScreenshot(this.appiumSessionConfig);
1792
+ }
1793
+ const response = await axios.post(
1794
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1795
+ {
1796
+ api_key: this.apiKey,
1797
+ base64_screenshot: screenshot,
1798
+ assertions: conditions,
1799
+ command: `Assert: ${JSON.stringify(conditions)}`,
1800
+ caching_mode: cachingMode ?? this.cachingMode,
1801
+ step_number: this.step_number
1802
+ }
1803
+ );
1804
+ globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1805
+ return response.data.results;
1806
+ }
1382
1807
  } catch (e) {
1383
1808
  globalLogger.error("Failed to check conditions", e);
1384
1809
  await this.setSessionStatus("failed");
1385
1810
  throw e;
1811
+ } finally {
1812
+ await Promise.all(pendingLogPromises);
1386
1813
  }
1387
1814
  }
1388
1815
  /**
@@ -1475,10 +1902,12 @@ class GptDriver {
1475
1902
  * const result = await driver.executeFlow('tests/login-flow.json', { useSmartLoop: true });
1476
1903
  */
1477
1904
  async executeFlow(filePath, options) {
1905
+ const flowStartTime = performance.now();
1478
1906
  const useSmartLoop = options?.useSmartLoop ?? false;
1479
1907
  const configFilePath = this.configFilePath;
1480
1908
  let baseDir;
1481
1909
  let absolutePath;
1910
+ let parsedConfigFile;
1482
1911
  if (configFilePath) {
1483
1912
  let raw2;
1484
1913
  try {
@@ -1496,7 +1925,7 @@ class GptDriver {
1496
1925
  globalLogger.error(msg);
1497
1926
  throw new Error(msg);
1498
1927
  }
1499
- const parsedConfigFile = ConfigSchema.parse(json2);
1928
+ parsedConfigFile = ConfigSchema.parse(json2);
1500
1929
  if (path.isAbsolute(parsedConfigFile.testDir)) {
1501
1930
  baseDir = parsedConfigFile.testDir;
1502
1931
  } else {
@@ -1563,7 +1992,7 @@ ${issues}`);
1563
1992
  }
1564
1993
  visited.add(refKey);
1565
1994
  const child = await loadFlow(refPath);
1566
- const mergedParams = { ...inheritedParams, ...step.overrides ?? {} };
1995
+ const mergedParams = { ...inheritedParams, ...child.params, ...step.overrides ?? {} };
1567
1996
  const childDir = path.dirname(refPath);
1568
1997
  const childRelativePath = path.relative(baseDir, refPath).replace(/^\.\//, "");
1569
1998
  const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey], childRelativePath);
@@ -1579,7 +2008,15 @@ ${issues}`);
1579
2008
  }
1580
2009
  return out;
1581
2010
  };
1582
- const effectiveParams = { ...rootFlow.params ?? {} };
2011
+ const envPath = path.join(process.cwd(), ".env");
2012
+ let envVars = {};
2013
+ try {
2014
+ await node_fs.promises.access(envPath);
2015
+ const envContent = await promises.readFile(envPath, "utf-8");
2016
+ envVars = dotenv.parse(envContent);
2017
+ } catch (_) {
2018
+ }
2019
+ const effectiveParams = { ...parsedConfigFile?.variables, ...envVars, ...rootFlow.params };
1583
2020
  const rootRelativePath = path.relative(baseDir, absolutePath).replace(/^\.\//, "");
1584
2021
  const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath], rootRelativePath);
1585
2022
  if (!this.appiumSessionStarted) {
@@ -1587,20 +2024,33 @@ ${issues}`);
1587
2024
  }
1588
2025
  globalLogger.info(`Executing flow '${rootFlow.name}' with ${expandedSteps.length} step(s)...`);
1589
2026
  let executed = 0;
2027
+ let cacheHits = 0;
2028
+ let lastStepWasFromFileRef = false;
2029
+ let fileRefStepNumber = 1;
1590
2030
  try {
2031
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig, false);
2032
+ const screenshotResolution = await getImageDimensions(screenshot);
1591
2033
  for (const step of expandedSteps) {
1592
2034
  const params = step.__params ?? effectiveParams;
1593
2035
  const filepath = step.__filepath ?? rootRelativePath;
1594
- const prefix = `Step #${executed + 1} [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
2036
+ console.log("step.__filepath", step.__filepath);
2037
+ console.log("rootRelativePath", rootRelativePath);
2038
+ const isFromFileRef = filepath !== rootRelativePath;
2039
+ console.log("isFromFileRef", isFromFileRef);
2040
+ if (!lastStepWasFromFileRef && isFromFileRef) {
2041
+ fileRefStepNumber = 1;
2042
+ }
2043
+ const effectiveStepNumber = isFromFileRef ? fileRefStepNumber : this.step_number;
2044
+ const prefix = `Step #${executed + 1} (step_number: ${effectiveStepNumber}) [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
1595
2045
  try {
1596
2046
  switch (step.type) {
1597
2047
  case "ai": {
1598
2048
  const instruction = this.interpolateTemplate(step.instruction, params);
1599
2049
  globalLogger.info(`${prefix}: ${instruction}`);
1600
2050
  if (useSmartLoop) {
1601
- const ctx = this.createSmartLoopContext();
2051
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1602
2052
  const result = await executeSmartLoop(ctx, {
1603
- stepNumber: this.step_number,
2053
+ stepNumber: effectiveStepNumber,
1604
2054
  description: instruction,
1605
2055
  instruction,
1606
2056
  filepath
@@ -1608,7 +2058,14 @@ ${issues}`);
1608
2058
  if (!result.success) {
1609
2059
  throw new Error(result.error || "Smart loop execution failed");
1610
2060
  }
1611
- this.step_number++;
2061
+ if (result.cacheHit) {
2062
+ cacheHits++;
2063
+ }
2064
+ if (isFromFileRef) {
2065
+ fileRefStepNumber++;
2066
+ } else {
2067
+ this.step_number++;
2068
+ }
1612
2069
  } else {
1613
2070
  await this.aiExecute({ command: instruction });
1614
2071
  }
@@ -1621,9 +2078,9 @@ ${issues}`);
1621
2078
  }
1622
2079
  globalLogger.info(`${prefix}: ${description}`);
1623
2080
  if (useSmartLoop) {
1624
- const ctx = this.createSmartLoopContext();
2081
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1625
2082
  const result = await executeSmartLoop(ctx, {
1626
- stepNumber: this.step_number,
2083
+ stepNumber: effectiveStepNumber,
1627
2084
  description,
1628
2085
  instruction: description,
1629
2086
  filepath
@@ -1631,7 +2088,14 @@ ${issues}`);
1631
2088
  if (!result.success) {
1632
2089
  throw new Error(result.error || "Smart loop execution failed");
1633
2090
  }
1634
- this.step_number++;
2091
+ if (result.cacheHit) {
2092
+ cacheHits++;
2093
+ }
2094
+ if (isFromFileRef) {
2095
+ fileRefStepNumber++;
2096
+ } else {
2097
+ this.step_number++;
2098
+ }
1635
2099
  } else {
1636
2100
  await this.aiExecute({ command: description });
1637
2101
  }
@@ -1645,9 +2109,9 @@ ${issues}`);
1645
2109
  globalLogger.info(`${prefix}: ${description}`);
1646
2110
  if (useSmartLoop) {
1647
2111
  const instruction = `Verify that: ${description}`;
1648
- const ctx = this.createSmartLoopContext();
2112
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1649
2113
  const result = await executeSmartLoop(ctx, {
1650
- stepNumber: this.step_number,
2114
+ stepNumber: effectiveStepNumber,
1651
2115
  description,
1652
2116
  instruction,
1653
2117
  filepath
@@ -1655,7 +2119,14 @@ ${issues}`);
1655
2119
  if (!result.success) {
1656
2120
  throw new Error(result.error || "Smart loop execution failed");
1657
2121
  }
1658
- this.step_number++;
2122
+ if (result.cacheHit) {
2123
+ cacheHits++;
2124
+ }
2125
+ if (isFromFileRef) {
2126
+ fileRefStepNumber++;
2127
+ } else {
2128
+ this.step_number++;
2129
+ }
1659
2130
  } else {
1660
2131
  await this.assert(description);
1661
2132
  }
@@ -1664,9 +2135,27 @@ ${issues}`);
1664
2135
  case "type": {
1665
2136
  const text = this.interpolateTemplate(step.text, params);
1666
2137
  globalLogger.info(`${prefix}: Type text`);
1667
- await this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2138
+ this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
1668
2139
  await this.performType(text);
1669
- this.step_number++;
2140
+ if (isFromFileRef) {
2141
+ fileRefStepNumber++;
2142
+ } else {
2143
+ this.step_number++;
2144
+ }
2145
+ break;
2146
+ }
2147
+ case "enter": {
2148
+ globalLogger.info(`${prefix}: Press Enter`);
2149
+ this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2150
+ await this.performPressEnter();
2151
+ if (step.delayNextStep) {
2152
+ await this._delay(step.delayNextStep);
2153
+ }
2154
+ if (isFromFileRef) {
2155
+ fileRefStepNumber++;
2156
+ } else {
2157
+ this.step_number++;
2158
+ }
1670
2159
  break;
1671
2160
  }
1672
2161
  case "scroll":
@@ -1674,9 +2163,9 @@ ${issues}`);
1674
2163
  const description = step.descriptionText ? this.interpolateTemplate(step.descriptionText, params) : void 0;
1675
2164
  if (description && useSmartLoop) {
1676
2165
  globalLogger.info(`${prefix}: ${description}`);
1677
- const ctx = this.createSmartLoopContext();
2166
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1678
2167
  const result = await executeSmartLoop(ctx, {
1679
- stepNumber: this.step_number,
2168
+ stepNumber: effectiveStepNumber,
1680
2169
  description,
1681
2170
  instruction: description,
1682
2171
  filepath
@@ -1684,43 +2173,60 @@ ${issues}`);
1684
2173
  if (!result.success) {
1685
2174
  throw new Error(result.error || "Smart loop execution failed");
1686
2175
  }
1687
- this.step_number++;
2176
+ if (result.cacheHit) {
2177
+ cacheHits++;
2178
+ }
2179
+ if (isFromFileRef) {
2180
+ fileRefStepNumber++;
2181
+ } else {
2182
+ this.step_number++;
2183
+ }
1688
2184
  } else {
1689
2185
  globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
1690
- await this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
1691
- if (step.type === "swipe") {
1692
- await this.performSwipe({
1693
- direction: step.direction,
1694
- x1: step.x1,
1695
- y1: step.y1,
1696
- x2: step.x2,
1697
- y2: step.y2,
1698
- duration: step.duration
1699
- });
2186
+ this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2187
+ await this.performSwipe({
2188
+ direction: step.direction,
2189
+ x1: step.x1,
2190
+ y1: step.y1,
2191
+ x2: step.x2,
2192
+ y2: step.y2,
2193
+ duration: step.duration,
2194
+ screenshotHeight: screenshotResolution.height,
2195
+ screenshotWidth: screenshotResolution.width
2196
+ });
2197
+ if (isFromFileRef) {
2198
+ fileRefStepNumber++;
1700
2199
  } else {
1701
- await this.performSwipe({ direction: step.direction });
2200
+ this.step_number++;
1702
2201
  }
1703
- this.step_number++;
1704
2202
  }
1705
2203
  break;
1706
2204
  }
1707
2205
  case "zoom": {
1708
2206
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
1709
- await this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
1710
- this.step_number++;
2207
+ this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2208
+ if (isFromFileRef) {
2209
+ fileRefStepNumber++;
2210
+ } else {
2211
+ this.step_number++;
2212
+ }
1711
2213
  break;
1712
2214
  }
1713
2215
  case "scrollUntil": {
1714
2216
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
1715
2217
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
1716
- await this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2218
+ this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
1717
2219
  await this.performScrollUntil({
1718
2220
  direction: step.direction,
1719
2221
  text: interpolatedText,
1720
2222
  elementId: step.elementId,
1721
2223
  maxScrolls: step.maxScrolls
1722
2224
  });
1723
- this.step_number++;
2225
+ if (isFromFileRef) {
2226
+ fileRefStepNumber++;
2227
+ } else {
2228
+ this.step_number++;
2229
+ }
1724
2230
  break;
1725
2231
  }
1726
2232
  case "deeplink": {
@@ -1728,7 +2234,7 @@ ${issues}`);
1728
2234
  const bundleId = params["bundleId"];
1729
2235
  const url = this.interpolateTemplate(step.url, params);
1730
2236
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
1731
- await this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2237
+ this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
1732
2238
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
1733
2239
  break;
1734
2240
  }
@@ -1736,10 +2242,12 @@ ${issues}`);
1736
2242
  throw new Error(`Unsupported step type at execution: ${step.type}`);
1737
2243
  }
1738
2244
  }
2245
+ lastStepWasFromFileRef = isFromFileRef;
1739
2246
  executed++;
1740
2247
  } catch (err) {
1741
2248
  if (step.optional) {
1742
2249
  globalLogger.warn(`${prefix} failed but marked optional. Continuing. Error: ${err.message}`);
2250
+ lastStepWasFromFileRef = isFromFileRef;
1743
2251
  continue;
1744
2252
  }
1745
2253
  throw err;
@@ -1752,6 +2260,16 @@ ${issues}`);
1752
2260
  }
1753
2261
  throw e;
1754
2262
  }
2263
+ const flowEndTime = performance.now();
2264
+ const totalTime = (flowEndTime - flowStartTime) / 1e3;
2265
+ globalLogger.info(`
2266
+ ${"=".repeat(50)}`);
2267
+ globalLogger.info(`Flow Execution Summary:`);
2268
+ globalLogger.info(`Total Test Time: ${totalTime.toFixed(2)} seconds`);
2269
+ globalLogger.info(`Total Steps executed: ${executed}`);
2270
+ globalLogger.info(`Total Step Cache Hits: ${cacheHits}`);
2271
+ globalLogger.info(`${"=".repeat(50)}
2272
+ `);
1755
2273
  return rootFlow;
1756
2274
  }
1757
2275
  async gptHandler(command, cachingMode) {
@@ -1759,8 +2277,15 @@ ${issues}`);
1759
2277
  let conditionSucceeded = false;
1760
2278
  while (!conditionSucceeded) {
1761
2279
  let screenshot;
2280
+ let originalScreenshotBase64 = null;
1762
2281
  if (!this.useGptDriverCloud) {
1763
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
2282
+ const stabilityResult = await waitForStableScreen(
2283
+ () => this.getScreenshot(this.appiumSessionConfig)
2284
+ );
2285
+ screenshot = stabilityResult.originalScreenshotBase64;
2286
+ if (!stabilityResult.stable) {
2287
+ globalLogger.warn("Screen did not stabilize within timeout, proceeding with last screenshot");
2288
+ }
1764
2289
  }
1765
2290
  globalLogger.info("Requesting next action from GPT Driver...");
1766
2291
  const response = await axios.request(
@@ -1788,7 +2313,6 @@ ${issues}`);
1788
2313
  for (const appiumCommand of executeResponse.commands) {
1789
2314
  await this.executeCommand(appiumCommand);
1790
2315
  }
1791
- await delay(1500);
1792
2316
  }
1793
2317
  this.step_number = this.step_number + 1;
1794
2318
  globalLogger.info("Command execution completed successfully");
@@ -1816,13 +2340,13 @@ ${issues}`);
1816
2340
  });
1817
2341
  }
1818
2342
  }
1819
- async logCodeExecution(screenshot, command) {
2343
+ async logCodeExecution(screenshot, command, isCacheHit) {
1820
2344
  try {
1821
- const screenshot2 = await this.getScreenshot(this.appiumSessionConfig);
1822
2345
  await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
1823
2346
  api_key: this.apiKey,
1824
- base64_screenshot: screenshot2,
1825
- command
2347
+ base64_screenshot: screenshot,
2348
+ command,
2349
+ from_cache: !!isCacheHit
1826
2350
  });
1827
2351
  } catch (e) {
1828
2352
  globalLogger.error("Failed to log code execution", e);