gpt-driver-node 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,8 +1,10 @@
1
1
  import { promises } from 'node:fs';
2
2
  import path from 'node:path';
3
+ import { readFile } from 'node:fs/promises';
3
4
  import axios from 'axios';
4
5
  import sharp from 'sharp';
5
6
  import { attach } from 'webdriverio';
7
+ import dotenv from 'dotenv';
6
8
  import winston from 'winston';
7
9
  import { z } from 'zod';
8
10
  import crypto from 'node:crypto';
@@ -91,6 +93,75 @@ ${logStyles.gray(stack)}` : logMessage;
91
93
  ]
92
94
  });
93
95
 
96
+ const waitForStableScreen = async (getScreenshot, options = {}) => {
97
+ const {
98
+ maxTimeoutSec = 5,
99
+ intervalSec = 0.01,
100
+ tolerance = 1e-5,
101
+ pixelThreshold = 0,
102
+ downscaleWidth = 600,
103
+ downscaleHeight = 600
104
+ } = options;
105
+ const startTime = Date.now();
106
+ const maxTimeoutMillis = maxTimeoutSec * 1e3;
107
+ const intervalMillis = intervalSec * 1e3;
108
+ let previousDownsampledBuffer = null;
109
+ let lastScreenshotBase64 = null;
110
+ while (Date.now() - startTime < maxTimeoutMillis) {
111
+ try {
112
+ const screenshotBase64 = await getScreenshot();
113
+ lastScreenshotBase64 = screenshotBase64;
114
+ const imageBuffer = Buffer.from(screenshotBase64, "base64");
115
+ const downsampledBuffer = await sharp(imageBuffer).resize(downscaleWidth, downscaleHeight, { fit: "fill" }).ensureAlpha().raw().toBuffer();
116
+ if (previousDownsampledBuffer) {
117
+ const isSimilar = areImagesSimilar(
118
+ previousDownsampledBuffer,
119
+ downsampledBuffer,
120
+ downscaleWidth,
121
+ downscaleHeight,
122
+ tolerance,
123
+ pixelThreshold
124
+ );
125
+ if (isSimilar) {
126
+ return {
127
+ stable: true,
128
+ originalScreenshotBase64: screenshotBase64
129
+ };
130
+ }
131
+ }
132
+ previousDownsampledBuffer = downsampledBuffer;
133
+ } catch (e) {
134
+ globalLogger.warn(`Error during stability check: ${e}`);
135
+ throw e;
136
+ }
137
+ await delay(intervalMillis);
138
+ }
139
+ return {
140
+ stable: false,
141
+ originalScreenshotBase64: lastScreenshotBase64
142
+ };
143
+ };
144
+ function areImagesSimilar(buffer1, buffer2, width, height, tolerance, pixelThreshold) {
145
+ if (buffer1.length !== buffer2.length) {
146
+ return false;
147
+ }
148
+ const totalPixels = width * height;
149
+ let differentPixels = 0;
150
+ for (let i = 0; i < buffer1.length; i += 4) {
151
+ const r1 = buffer1[i];
152
+ const g1 = buffer1[i + 1];
153
+ const b1 = buffer1[i + 2];
154
+ const r2 = buffer2[i];
155
+ const g2 = buffer2[i + 1];
156
+ const b2 = buffer2[i + 2];
157
+ if (Math.abs(r1 - r2) > pixelThreshold || Math.abs(g1 - g2) > pixelThreshold || Math.abs(b1 - b2) > pixelThreshold) {
158
+ differentPixels++;
159
+ }
160
+ }
161
+ const diffRatio = differentPixels / totalPixels;
162
+ return diffRatio <= tolerance;
163
+ }
164
+
94
165
  const SavableStepBaseSchema = z.object({
95
166
  id: z.number().optional(),
96
167
  descriptionText: z.string().optional(),
@@ -123,9 +194,8 @@ const SavableSwipeStepSchema = SavableStepBaseSchema.extend({
123
194
  y2: z.number().optional(),
124
195
  duration: z.number().optional().default(500)
125
196
  });
126
- const SavableScrollStepSchema = SavableStepBaseSchema.extend({
127
- type: z.literal("scroll"),
128
- direction: z.enum(["up", "down"])
197
+ const SavableScrollStepSchema = SavableSwipeStepSchema.extend({
198
+ type: z.literal("scroll")
129
199
  });
130
200
  const SavableZoomStepSchema = SavableStepBaseSchema.extend({
131
201
  type: z.literal("zoom"),
@@ -146,6 +216,10 @@ const SavableAIStepSchema = SavableStepBaseSchema.extend({
146
216
  type: z.literal("ai"),
147
217
  instruction: z.string()
148
218
  });
219
+ const SavableEnterStepSchema = SavableStepBaseSchema.extend({
220
+ type: z.literal("enter"),
221
+ delayNextStep: z.number().optional().default(500)
222
+ });
149
223
  const SavableFileRefStepSchema = SavableStepBaseSchema.extend({
150
224
  type: z.literal("fileRef"),
151
225
  path: z.string(),
@@ -170,6 +244,8 @@ const SavableStepSchema = z.discriminatedUnion("type", [
170
244
  // type: 'deeplink'
171
245
  SavableAIStepSchema,
172
246
  // type: 'ai'
247
+ SavableEnterStepSchema,
248
+ // type: 'enter'
173
249
  SavableFileRefStepSchema
174
250
  // type: 'fileRef'
175
251
  ]);
@@ -191,7 +267,7 @@ const CACHE_SERVER_URL = "https://cache.mobileboost.io";
191
267
  const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
192
268
  const RESCALE_FACTOR = 4;
193
269
  const SMART_LOOP_MAX_ITERATIONS = 15;
194
- const CACHE_RETRY_MS = 2e3;
270
+ const CACHE_RETRY_MS = 1500;
195
271
  const CACHE_CHECK_INTERVAL_MS = 500;
196
272
 
197
273
  function generateCacheHash(apiKey, filepath, stepNumber, description, platform, resolution) {
@@ -275,15 +351,24 @@ async function executeFromCache(params) {
275
351
  return { found: false };
276
352
  } catch (error) {
277
353
  if (axios.isAxiosError(error)) {
278
- globalLogger.warn(`[Cache] Cache lookup failed: ${error.response?.data || error.message}`);
354
+ const errorMsg = `Cache lookup failed: ${error.response?.data || error.message}`;
355
+ globalLogger.warn(`[Cache] ${errorMsg}`);
279
356
  } else {
280
- globalLogger.error(`[Cache] Error executing from cache: ${error}`);
357
+ const errorMsg = `Error executing from cache: ${error}`;
358
+ globalLogger.error(`[Cache] ${errorMsg}`);
281
359
  }
282
360
  return { found: false };
283
361
  }
284
362
  }
285
363
  async function populateCache(params) {
286
364
  try {
365
+ globalLogger.debug(`[populateCache] Parameters: ${JSON.stringify({
366
+ stepNumber: params.stepNumber,
367
+ filepath: params.filepath,
368
+ platform: params.platform,
369
+ screenResolution: params.screenResolution,
370
+ executionDataCount: params.executionData.length
371
+ })}`);
287
372
  const hash = generateCacheHash(
288
373
  params.apiKey,
289
374
  params.filepath,
@@ -292,13 +377,15 @@ async function populateCache(params) {
292
377
  params.platform,
293
378
  params.screenResolution
294
379
  );
380
+ globalLogger.debug(`[populateCache] Generated hash: ${hash}`);
295
381
  const payload = await Promise.all(params.executionData.map(async (item) => {
296
382
  const resizedBuffer = await resizeScreenshotForCache(item.screenshot);
383
+ const scaledScreenshotBase64 = resizedBuffer.toString("base64");
297
384
  const scaledCommands = item.commands.map(
298
385
  (cmd) => scaleCommand(cmd, "divide")
299
386
  );
300
387
  return {
301
- screenshot: resizedBuffer.toString("base64"),
388
+ screenshot: scaledScreenshotBase64,
302
389
  commands: scaledCommands
303
390
  };
304
391
  }));
@@ -310,9 +397,11 @@ async function populateCache(params) {
310
397
  return { success: true };
311
398
  } catch (error) {
312
399
  if (axios.isAxiosError(error)) {
313
- globalLogger.error(`[Cache] Failed to populate cache: ${error.response?.data || error.message}`);
400
+ const errorMsg = `Failed to populate cache: ${error.response?.data || error.message}`;
401
+ globalLogger.error(`[Cache] ${errorMsg}`);
314
402
  } else {
315
- globalLogger.error(`[Cache] Error populating cache: ${error}`);
403
+ const errorMsg = `Error populating cache: ${error}`;
404
+ globalLogger.error(`[Cache] ${errorMsg}`);
316
405
  }
317
406
  return { success: false };
318
407
  }
@@ -442,6 +531,10 @@ function isWaitCommand(cmd) {
442
531
  function isScrollCommand(cmd) {
443
532
  return cmd.startsWith("scroll:");
444
533
  }
534
+ function isEnterCommand(cmd) {
535
+ const key = cmd.toLowerCase().split("press:")[1]?.trim() ?? "";
536
+ return key === "enter";
537
+ }
445
538
  function isTypeCommand(cmd) {
446
539
  return cmd.startsWith("type:");
447
540
  }
@@ -459,8 +552,8 @@ function parseSlideCommand(cmd) {
459
552
  const directionMap = {
460
553
  down: "up",
461
554
  up: "down",
462
- left: "right",
463
- right: "left"
555
+ left: "left",
556
+ right: "right"
464
557
  };
465
558
  return {
466
559
  direction: directionMap[extractedDirection],
@@ -477,22 +570,38 @@ async function executeSmartLoop(ctx, params) {
477
570
  let lastCacheIndex = void 0;
478
571
  let anyCacheMiss = false;
479
572
  let everHadCacheHit = false;
573
+ let totalCacheHits = 0;
480
574
  const currentExecutionData = [];
575
+ const stepStartTime = performance.now();
576
+ const iterationTimes = [];
481
577
  globalLogger.info(`[SmartLoop] Starting for step ${params.stepNumber}: "${params.description}"`);
578
+ globalLogger.debug(`[Performance][SmartLoop] Step execution started at ${stepStartTime.toFixed(2)}ms`);
482
579
  try {
483
580
  while (loopCount < SMART_LOOP_MAX_ITERATIONS) {
581
+ const iterationStartTime = performance.now();
582
+ globalLogger.debug(`[SmartLoop] Starting iteration ${loopCount + 1}/${SMART_LOOP_MAX_ITERATIONS}`);
484
583
  let screenshot = "";
485
584
  let commands = [];
486
585
  let isCacheHit = false;
487
- const firstScreenshot = await ctx.getScreenshot();
488
- const screenshotResolution = await getImageDimensions(firstScreenshot);
586
+ let screenshotResolution = void 0;
587
+ const stabilityResult = await waitForStableScreen(ctx.getScreenshot);
489
588
  for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
490
- screenshot = await ctx.getScreenshot();
589
+ const screenshotStartTime = performance.now();
590
+ const screenshotEndTime = performance.now();
591
+ globalLogger.debug(`[Performance][SmartLoop] Screenshot capture took ${(screenshotEndTime - screenshotStartTime).toFixed(2)}ms`);
592
+ if (!stabilityResult.stable) {
593
+ globalLogger.warn("[SmartLoop] Screen did not stabilize within timeout, proceeding with last screenshot");
594
+ }
595
+ screenshot = stabilityResult.originalScreenshotBase64;
596
+ if (screenshotResolution === void 0) {
597
+ screenshotResolution = await getImageDimensions(screenshot);
598
+ }
491
599
  const sizeInBytes = screenshot.length * 0.75;
492
600
  const sizeInMB = (sizeInBytes / (1024 * 1024)).toFixed(2);
493
601
  globalLogger.debug(`[SmartLoop] Captured screenshot: ~${sizeInMB} MB`);
494
602
  try {
495
603
  globalLogger.debug(`[SmartLoop] Checking cache (Attempt ${attempt + 1}/${maxCacheAttempts})`);
604
+ const cacheCheckStartTime = performance.now();
496
605
  const cacheResult = await executeFromCache({
497
606
  apiKey: ctx.organisationId,
498
607
  stepNumber: params.stepNumber,
@@ -503,11 +612,14 @@ async function executeSmartLoop(ctx, params) {
503
612
  platform: ctx.platform,
504
613
  filepath: params.filepath
505
614
  });
615
+ const cacheCheckEndTime = performance.now();
616
+ globalLogger.debug(`[Performance][SmartLoop] Cache check took ${(cacheCheckEndTime - cacheCheckStartTime).toFixed(2)}ms`);
506
617
  if (cacheResult.found && cacheResult.cacheCommands) {
507
618
  commands = cacheResult.cacheCommands;
508
619
  lastCacheIndex = cacheResult.cacheIndex;
509
620
  isCacheHit = true;
510
621
  everHadCacheHit = true;
622
+ totalCacheHits++;
511
623
  globalLogger.info(`[SmartLoop] Cache Hit! (${commands.length} commands)`);
512
624
  break;
513
625
  }
@@ -519,18 +631,53 @@ async function executeSmartLoop(ctx, params) {
519
631
  await delay(CACHE_CHECK_INTERVAL_MS);
520
632
  }
521
633
  }
634
+ if (screenshotResolution === void 0) {
635
+ const screenshot2 = await ctx.getScreenshot();
636
+ screenshotResolution = await getImageDimensions(screenshot2);
637
+ }
522
638
  let aiCommands = [];
639
+ let actionDescription = [];
523
640
  if (!isCacheHit) {
524
641
  anyCacheMiss = true;
525
642
  globalLogger.info(`[SmartLoop] Cache Miss. Requesting AI agent...`);
526
- const agentResponse = await executeAgentStep({
527
- apiKey: ctx.organisationId,
528
- base64_screenshot: screenshot,
529
- instruction: params.instruction,
530
- action_history: actionHistory
531
- });
532
- aiCommands = agentResponse.appetizeCommands || [];
533
- const gptCommands = agentResponse.gptCommands || [];
643
+ const aiStartTime = performance.now();
644
+ let agentResponse;
645
+ if (ctx.middleLayerAssertFn) {
646
+ const response = await ctx.middleLayerAssertFn(screenshot);
647
+ const results = response.data.results;
648
+ const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
649
+ if (failedConditions.length === 0) {
650
+ agentResponse = {
651
+ appetizeCommands: [
652
+ "task complete: All assertions passed"
653
+ ]
654
+ };
655
+ } else {
656
+ agentResponse = {
657
+ appetizeCommands: [
658
+ `error detected: Failed conditions: ${failedConditions.join(", ")}`
659
+ ]
660
+ };
661
+ }
662
+ } else {
663
+ agentResponse = await executeAgentStep({
664
+ apiKey: ctx.organisationId,
665
+ base64_screenshot: screenshot,
666
+ instruction: params.instruction,
667
+ action_history: actionHistory
668
+ });
669
+ }
670
+ const aiEndTime = performance.now();
671
+ globalLogger.debug(`[Performance][SmartLoop] AI agent call took ${(aiEndTime - aiStartTime).toFixed(2)}ms`);
672
+ aiCommands = agentResponse.appetizeCommands ?? [];
673
+ const gptCommands = agentResponse.gptCommands ?? [];
674
+ actionDescription = [];
675
+ const actionDescriptionIndex = gptCommands.findIndex(
676
+ (entry) => entry.startsWith("actions_description:")
677
+ );
678
+ if (actionDescriptionIndex !== -1) {
679
+ actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
680
+ }
534
681
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
535
682
  if (reasoningIndex !== -1) {
536
683
  const parsedCommands = gptCommands.slice(reasoningIndex);
@@ -540,19 +687,23 @@ async function executeSmartLoop(ctx, params) {
540
687
  }
541
688
  actionHistory = [...actionHistory, ...parsedCommands];
542
689
  }
543
- commands = [...aiCommands];
690
+ commands = [...actionDescription, ...aiCommands];
544
691
  globalLogger.debug(`[SmartLoop] AI returned ${commands.length} command(s)`);
545
692
  }
546
693
  currentExecutionData.push({
547
694
  screenshot,
548
- commands: aiCommands.length > 0 ? aiCommands : commands
695
+ commands
549
696
  });
550
- await ctx.logCodeExecution(screenshot, commands.join("\n"));
697
+ const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
698
+ if (ctx.pendingLogPromises) {
699
+ ctx.pendingLogPromises.push(logPromise);
700
+ }
551
701
  let actionExecuted = false;
552
702
  let taskCompleted = false;
553
703
  if (commands.length > 0) {
554
704
  globalLogger.debug(`[SmartLoop] Executing ${commands.length} command(s)`);
555
705
  }
706
+ const commandExecutionStartTime = performance.now();
556
707
  for (const cmd of commands) {
557
708
  if (isTaskComplete(cmd)) {
558
709
  taskCompleted = true;
@@ -577,6 +728,10 @@ async function executeSmartLoop(ctx, params) {
577
728
  );
578
729
  actionExecuted = true;
579
730
  }
731
+ } else if (isEnterCommand(cmd)) {
732
+ globalLogger.debug(`[SmartLoop] Pressing Enter`);
733
+ await ctx.performPressEnter();
734
+ actionExecuted = true;
580
735
  } else if (isWaitCommand(cmd)) {
581
736
  const seconds = parseWaitSeconds(cmd);
582
737
  if (seconds) {
@@ -589,6 +744,9 @@ async function executeSmartLoop(ctx, params) {
589
744
  if (direction) {
590
745
  globalLogger.debug(`[SmartLoop] Scrolling ${direction}`);
591
746
  await ctx.performSwipe({ direction });
747
+ if (isCacheHit) {
748
+ await delay(650);
749
+ }
592
750
  actionExecuted = true;
593
751
  }
594
752
  } else if (isSlideCommand(cmd)) {
@@ -626,6 +784,9 @@ async function executeSmartLoop(ctx, params) {
626
784
  screenshotHeight: screenshotResolution.height,
627
785
  duration: 500
628
786
  });
787
+ if (isCacheHit) {
788
+ await delay(650);
789
+ }
629
790
  actionExecuted = true;
630
791
  }
631
792
  } else if (isTypeCommand(cmd)) {
@@ -638,49 +799,83 @@ async function executeSmartLoop(ctx, params) {
638
799
  }
639
800
  }
640
801
  if (actionExecuted) {
802
+ const commandExecutionEndTime = performance.now();
803
+ globalLogger.debug(`[Performance][SmartLoop] Command execution took ${(commandExecutionEndTime - commandExecutionStartTime).toFixed(2)}ms`);
641
804
  if (isCacheHit) {
642
805
  actionHistory.push(...commands);
643
806
  }
644
807
  await delay(100);
645
808
  }
809
+ const iterationEndTime = performance.now();
810
+ const iterationDuration = iterationEndTime - iterationStartTime;
811
+ iterationTimes.push(iterationDuration);
812
+ globalLogger.debug(`[Performance][SmartLoop] Iteration ${loopCount + 1} completed in ${iterationDuration.toFixed(2)}ms (${(iterationDuration / 1e3).toFixed(2)}s)`);
646
813
  if (taskCompleted) {
647
814
  globalLogger.info(`[SmartLoop] Task completed successfully`);
815
+ const stepEndTime2 = performance.now();
816
+ const totalDuration2 = stepEndTime2 - stepStartTime;
817
+ const totalSeconds2 = totalDuration2 / 1e3;
818
+ const totalMinutes2 = totalSeconds2 / 60;
819
+ const averageIterationTime = iterationTimes.reduce((sum, time) => sum + time, 0) / iterationTimes.length;
820
+ globalLogger.debug(`[Performance][SmartLoop] Step execution summary:`);
821
+ globalLogger.debug(`[Performance][SmartLoop] - Total iterations: ${loopCount + 1}`);
822
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration2.toFixed(2)}ms (${totalSeconds2.toFixed(2)}s / ${totalMinutes2.toFixed(2)}min)`);
823
+ globalLogger.debug(`[Performance][SmartLoop] - Average iteration time: ${averageIterationTime.toFixed(2)}ms (${(averageIterationTime / 1e3).toFixed(2)}s)`);
824
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
648
825
  if (anyCacheMiss && currentExecutionData.length > 0) {
649
826
  globalLogger.info(`[SmartLoop] Populating cache with ${currentExecutionData.length} frame(s)...`);
650
- try {
651
- await populateCache({
652
- apiKey: ctx.organisationId,
653
- stepNumber: params.stepNumber,
654
- stepDescription: params.description,
655
- executionData: currentExecutionData,
656
- screenResolution: screenshotResolution,
657
- platform: ctx.platform,
658
- filepath: params.filepath
659
- });
827
+ const cachePopulateStartTime = performance.now();
828
+ populateCache({
829
+ apiKey: ctx.organisationId,
830
+ stepNumber: params.stepNumber,
831
+ stepDescription: params.description,
832
+ executionData: currentExecutionData,
833
+ screenResolution: screenshotResolution,
834
+ platform: ctx.platform,
835
+ filepath: params.filepath
836
+ }).then(() => {
837
+ const cachePopulateEndTime = performance.now();
838
+ globalLogger.debug(`[Performance][SmartLoop] Cache population took ${(cachePopulateEndTime - cachePopulateStartTime).toFixed(2)}ms`);
660
839
  globalLogger.debug(`[SmartLoop] Cache populated successfully`);
661
- } catch (e) {
662
- globalLogger.warn(`[SmartLoop] Failed to populate cache: ${e.message}`);
663
- }
840
+ }).catch((e) => globalLogger.warn(`[SmartLoop] Cache population failed: ${e.message}`));
664
841
  } else if (!anyCacheMiss) {
665
842
  globalLogger.debug(`[SmartLoop] Skipping cache population (all actions were cached)`);
666
843
  }
667
844
  return {
668
845
  success: true,
669
846
  iterations: loopCount + 1,
670
- cacheHit: everHadCacheHit
847
+ cacheHit: everHadCacheHit,
848
+ cacheHitCount: totalCacheHits
671
849
  };
672
850
  }
673
851
  loopCount++;
674
852
  }
853
+ const stepEndTime = performance.now();
854
+ const totalDuration = stepEndTime - stepStartTime;
855
+ const totalSeconds = totalDuration / 1e3;
856
+ const totalMinutes = totalSeconds / 60;
857
+ globalLogger.debug(`[Performance][SmartLoop] Timeout reached after ${loopCount} iterations`);
858
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
859
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
675
860
  throw new Error(`Smart Loop timeout after ${SMART_LOOP_MAX_ITERATIONS} iterations`);
676
861
  } catch (error) {
677
862
  const message = error instanceof Error ? error.message : String(error);
863
+ const stepEndTime = performance.now();
864
+ const totalDuration = stepEndTime - stepStartTime;
865
+ const totalSeconds = totalDuration / 1e3;
866
+ const totalMinutes = totalSeconds / 60;
678
867
  globalLogger.error(`[SmartLoop] Error: ${message}`);
868
+ globalLogger.debug(`[Performance][SmartLoop] Error occurred after ${loopCount + 1} iteration(s)`);
869
+ globalLogger.debug(`[Performance][SmartLoop] - Total duration before error: ${totalDuration.toFixed(2)}ms (${totalSeconds.toFixed(2)}s / ${totalMinutes.toFixed(2)}min)`);
870
+ if (iterationTimes.length > 0) {
871
+ globalLogger.debug(`[Performance][SmartLoop] - Individual iteration times: ${iterationTimes.map((t) => `${t.toFixed(2)}ms`).join(", ")}`);
872
+ }
679
873
  return {
680
874
  success: false,
681
875
  error: message,
682
876
  iterations: loopCount + 1,
683
- cacheHit: everHadCacheHit
877
+ cacheHit: everHadCacheHit,
878
+ cacheHitCount: totalCacheHits
684
879
  };
685
880
  }
686
881
  }
@@ -711,8 +906,13 @@ class GptDriver {
711
906
  step_number = 1;
712
907
  organisationId;
713
908
  configFilePath;
909
+ // Session Execution Stats
910
+ _stats_startTime = 0;
911
+ _stats_executedSteps = 0;
912
+ _stats_cacheHits = 0;
714
913
  // Smart loop state - maintains action history across steps for context
715
914
  globalActionHistory = [];
915
+ pendingLogPromises = [];
716
916
  /**
717
917
  * Creates an instance of the GptDriver class.
718
918
  *
@@ -785,6 +985,9 @@ class GptDriver {
785
985
  */
786
986
  async startSession() {
787
987
  globalLogger.info("Starting session...");
988
+ this._stats_startTime = performance.now();
989
+ this._stats_executedSteps = 0;
990
+ this._stats_cacheHits = 0;
788
991
  if (!this.useGptDriverCloud) {
789
992
  if (this.driver) {
790
993
  let platform;
@@ -884,6 +1087,37 @@ class GptDriver {
884
1087
  getSessionLink() {
885
1088
  return `https://app.mobileboost.io/gpt-driver/sessions/${this.gptDriverSessionId}`;
886
1089
  }
1090
+ /**
1091
+ * Stops the current GPTDriver session and update its state.
1092
+ *
1093
+ * This method sends a request to the GPT Driver server to stop the session and logs the session status as either "failed" or "success."
1094
+ *
1095
+ * @param {"failed" | "success"} status - Indicates the outcome of the session.
1096
+ * Use "success" if the session completed as expected,
1097
+ * or "failed" if the session encountered an error or issue.
1098
+ *
1099
+ * @throws {Error} If the request to stop the session fails.
1100
+ */
1101
+ /**
1102
+ * Marks the current GPTDriver session as succeeded.
1103
+ *
1104
+ * This method stops the session and logs it as "succeeded"
1105
+ *
1106
+ * @throws {Error} If the request to stop the session fails.
1107
+ */
1108
+ async setSessionSucceeded() {
1109
+ await this.setSessionStatus("succeeded");
1110
+ }
1111
+ /**
1112
+ * Marks the current GPTDriver session as failed.
1113
+ *
1114
+ * This method stops the session and logs it as "failed."
1115
+ *
1116
+ * @throws {Error} If the request to stop the session fails.
1117
+ */
1118
+ async setSessionFailed() {
1119
+ await this.setSessionStatus("failed");
1120
+ }
887
1121
  /**
888
1122
  * Stops the current GPTDriver session and update its state.
889
1123
  *
@@ -896,7 +1130,13 @@ class GptDriver {
896
1130
  * @throws {Error} If the request to stop the session fails.
897
1131
  */
898
1132
  async setSessionStatus(status) {
1133
+ try {
1134
+ await Promise.all(this.pendingLogPromises);
1135
+ } catch (e) {
1136
+ globalLogger.error(`Error waiting for pending log promises: ${e.message}`);
1137
+ }
899
1138
  if (this.gptDriverSessionId) {
1139
+ this.printSessionSummary();
900
1140
  globalLogger.info(`Stopping session with status: ${status}`);
901
1141
  await axios.post(
902
1142
  `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
@@ -912,6 +1152,19 @@ class GptDriver {
912
1152
  this.globalActionHistory = [];
913
1153
  }
914
1154
  }
1155
+ printSessionSummary() {
1156
+ if (this._stats_startTime === 0) return;
1157
+ const endTime = performance.now();
1158
+ const totalTime = (endTime - this._stats_startTime) / 1e3;
1159
+ globalLogger.info(`
1160
+ ${"=".repeat(50)}`);
1161
+ globalLogger.info(`Session Execution Summary:`);
1162
+ globalLogger.info(`Total Session Time: ${totalTime.toFixed(2)} seconds`);
1163
+ globalLogger.info(`Total Steps executed: ${this._stats_executedSteps}`);
1164
+ globalLogger.info(`Total Step Cache Hits: ${this._stats_cacheHits}`);
1165
+ globalLogger.info(`${"=".repeat(50)}
1166
+ `);
1167
+ }
915
1168
  // ─────────────────────────────────────────────────────────────────────────────
916
1169
  // SMART LOOP INTEGRATION
917
1170
  // ─────────────────────────────────────────────────────────────────────────────
@@ -919,7 +1172,7 @@ class GptDriver {
919
1172
  * Creates a SmartLoopContext for the current session.
920
1173
  * This context provides all the callbacks needed by the smart loop executor.
921
1174
  */
922
- createSmartLoopContext() {
1175
+ createSmartLoopContext(options) {
923
1176
  if (!this.organisationId) {
924
1177
  throw new Error("Organisation ID is missing, please set it in the GPTDriver constructor");
925
1178
  }
@@ -931,8 +1184,11 @@ class GptDriver {
931
1184
  performTap: (x, y, screenshotWidth, screenshotHeight) => this.performTap(x, y, screenshotWidth, screenshotHeight),
932
1185
  performSwipe: (params) => this.performSwipe(params),
933
1186
  performType: (text) => this.performType(text),
934
- logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command),
935
- organisationId: this.organisationId
1187
+ performPressEnter: () => this.performPressEnter(),
1188
+ logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1189
+ organisationId: this.organisationId,
1190
+ middleLayerAssertFn: options?.middleLayerAssertFn,
1191
+ pendingLogPromises: options?.pendingLogPromises
936
1192
  };
937
1193
  }
938
1194
  /**
@@ -1033,6 +1289,24 @@ class GptDriver {
1033
1289
  await client.keys(text.split(""));
1034
1290
  }
1035
1291
  }
1292
+ async performPressEnter() {
1293
+ const client = await this.getWdioClient();
1294
+ const platform = this.appiumSessionConfig?.platform;
1295
+ if (platform === "iOS") {
1296
+ await client.performActions([
1297
+ {
1298
+ type: "key",
1299
+ id: "keyboard",
1300
+ actions: [
1301
+ { type: "keyDown", value: "\uE007" },
1302
+ { type: "keyUp", value: "\uE007" }
1303
+ ]
1304
+ }
1305
+ ]);
1306
+ } else {
1307
+ await client.keys(["Enter"]);
1308
+ }
1309
+ }
1036
1310
  clamp(value, min, max) {
1037
1311
  return Math.max(min, Math.min(max, value));
1038
1312
  }
@@ -1160,14 +1434,43 @@ class GptDriver {
1160
1434
  }
1161
1435
  async getScreenshot(appiumSessionConfig, shouldScale = true) {
1162
1436
  globalLogger.debug("Capturing screenshot...");
1163
- const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1164
- const screenshotResponse = await axios.get(url);
1165
- let screenshot = await screenshotResponse.data.value;
1166
- if (appiumSessionConfig.platform === "iOS" && shouldScale) {
1167
- globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
1168
- const imageBuffer = Buffer.from(screenshot, "base64");
1169
- const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
1170
- screenshot = transformedImage.toString("base64");
1437
+ let screenshot;
1438
+ if (appiumSessionConfig.platform === "Android") {
1439
+ try {
1440
+ const { execSync } = await import('node:child_process');
1441
+ let udid;
1442
+ if (this.driver) {
1443
+ if (this.driver.sessionId != null) {
1444
+ const caps = this.driver.capabilities;
1445
+ udid = caps["appium:udid"] || caps["udid"];
1446
+ } else {
1447
+ const driver = this.driver;
1448
+ const capabilities = await driver.getCapabilities();
1449
+ udid = capabilities.get("appium:udid") || capabilities.get("udid");
1450
+ }
1451
+ }
1452
+ const deviceArg = udid ? `-s ${udid}` : "";
1453
+ const buffer = execSync(`adb ${deviceArg} exec-out screencap -p`, {
1454
+ encoding: "buffer",
1455
+ maxBuffer: 50 * 1024 * 1024
1456
+ });
1457
+ screenshot = buffer.toString("base64");
1458
+ } catch (e) {
1459
+ globalLogger.warn("ADB screenshot failed, falling back to Appium screenshot");
1460
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1461
+ const screenshotResponse = await axios.get(url);
1462
+ screenshot = screenshotResponse.data.value;
1463
+ }
1464
+ } else {
1465
+ const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
1466
+ const screenshotResponse = await axios.get(url);
1467
+ screenshot = screenshotResponse.data.value;
1468
+ if (shouldScale) {
1469
+ globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
1470
+ const imageBuffer = Buffer.from(screenshot, "base64");
1471
+ const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
1472
+ screenshot = transformedImage.toString("base64");
1473
+ }
1171
1474
  }
1172
1475
  return screenshot;
1173
1476
  }
@@ -1217,41 +1520,71 @@ class GptDriver {
1217
1520
  }
1218
1521
  }
1219
1522
  async aiExecute(commandOrOptions, options) {
1523
+ const startTime = performance.now();
1524
+ globalLogger.debug(`[Performance] aiExecute started at ${startTime.toFixed(2)}ms`);
1220
1525
  const command = typeof commandOrOptions === "string" ? commandOrOptions : commandOrOptions.command;
1221
1526
  const opts = typeof commandOrOptions === "string" ? options : commandOrOptions;
1222
1527
  const appiumHandler = opts?.appiumHandler;
1223
1528
  const cachingMode = opts?.cachingMode;
1224
1529
  const useSmartLoop = opts?.useSmartLoop ?? false;
1225
1530
  if (!this.appiumSessionStarted) {
1531
+ const sessionStartTime = performance.now();
1532
+ globalLogger.debug(`[Performance] Starting session...`);
1226
1533
  await this.startSession();
1534
+ const sessionEndTime = performance.now();
1535
+ globalLogger.debug(`[Performance] Session started in ${(sessionEndTime - sessionStartTime).toFixed(2)}ms`);
1227
1536
  }
1228
1537
  globalLogger.info(`Executing command: ${command}`);
1229
1538
  const driver = this.driver;
1230
1539
  if (appiumHandler != null) {
1231
1540
  try {
1541
+ const handlerStartTime = performance.now();
1542
+ globalLogger.debug(`[Performance] Executing custom Appium handler...`);
1232
1543
  await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
1233
1544
  await appiumHandler(driver);
1545
+ const handlerEndTime = performance.now();
1234
1546
  globalLogger.debug("Custom Appium handler executed successfully");
1547
+ globalLogger.debug(`[Performance] Appium handler completed in ${(handlerEndTime - handlerStartTime).toFixed(2)}ms`);
1235
1548
  this.step_number++;
1549
+ this._stats_executedSteps++;
1550
+ const totalTime2 = performance.now() - startTime;
1551
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime2.toFixed(2)}ms`);
1236
1552
  return;
1237
1553
  } catch (e) {
1238
1554
  globalLogger.warn("Custom Appium handler failed, falling back to AI execution");
1239
1555
  }
1240
1556
  }
1241
1557
  if (useSmartLoop) {
1242
- const ctx = this.createSmartLoopContext();
1558
+ const smartLoopStartTime = performance.now();
1559
+ globalLogger.debug(`[Performance] Starting smart loop execution...`);
1560
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1243
1561
  const result = await executeSmartLoop(ctx, {
1244
1562
  stepNumber: this.step_number,
1245
1563
  description: command,
1246
- instruction: command
1564
+ instruction: command,
1565
+ ...this.testId && {
1566
+ filepath: this.testId
1567
+ }
1247
1568
  });
1569
+ const smartLoopEndTime = performance.now();
1570
+ globalLogger.debug(`[Performance] Smart loop completed in ${(smartLoopEndTime - smartLoopStartTime).toFixed(2)}ms`);
1248
1571
  if (!result.success) {
1249
1572
  throw new Error(result.error || "Smart loop execution failed");
1250
1573
  }
1574
+ if (result.cacheHitCount) {
1575
+ this._stats_cacheHits += result.cacheHitCount;
1576
+ }
1251
1577
  this.step_number++;
1578
+ this._stats_executedSteps += result.iterations ?? 1;
1252
1579
  } else {
1580
+ const gptHandlerStartTime = performance.now();
1581
+ globalLogger.debug(`[Performance] Starting GPT handler execution...`);
1253
1582
  await this.gptHandler(command, cachingMode);
1583
+ const gptHandlerEndTime = performance.now();
1584
+ globalLogger.debug(`[Performance] GPT handler completed in ${(gptHandlerEndTime - gptHandlerStartTime).toFixed(2)}ms`);
1254
1585
  }
1586
+ const totalTime = performance.now() - startTime;
1587
+ globalLogger.debug(`[Performance] aiExecute total time: ${totalTime.toFixed(2)}ms`);
1255
1588
  }
1256
1589
  /**
1257
1590
  * Asserts a single condition using the GPTDriver.
@@ -1260,15 +1593,37 @@ class GptDriver {
1260
1593
  * If the assertion fails, an error is thrown.
1261
1594
  *
1262
1595
  * @param {string} assertion - The condition to be asserted.
1263
- * @param cachingMode - The caching mode to be used for the assertion.
1596
+ * @param {Object} options - Optional configuration object
1597
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertion.
1598
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1264
1599
  * @throws {Error} If the assertion fails.
1600
+ *
1601
+ * @example
1602
+ * // Basic usage
1603
+ * await driver.assert('Login button is visible');
1604
+ *
1605
+ * @example
1606
+ * // With caching mode
1607
+ * await driver.assert('Login button is visible', {
1608
+ * cachingMode: "FULL_SCREEN"
1609
+ * });
1610
+ *
1611
+ * @example
1612
+ * // With smart loop enabled
1613
+ * await driver.assert('Login button is visible', {
1614
+ * useSmartLoop: true,
1615
+ * cachingMode: "FULL_SCREEN"
1616
+ * });
1265
1617
  */
1266
- async assert(assertion, cachingMode) {
1618
+ async assert(assertion, {
1619
+ cachingMode,
1620
+ useSmartLoop = false
1621
+ }) {
1267
1622
  if (!this.appiumSessionStarted) {
1268
1623
  await this.startSession();
1269
1624
  }
1270
1625
  try {
1271
- const results = await this.checkBulk([assertion], cachingMode);
1626
+ const results = await this.checkBulk([assertion], cachingMode, 2, 1e3, useSmartLoop);
1272
1627
  if (!Object.values(results).at(0)) {
1273
1628
  await this.setSessionStatus("failed");
1274
1629
  globalLogger.error(`Assertion failed: ${assertion}`);
@@ -1288,15 +1643,37 @@ class GptDriver {
1288
1643
  * If any assertion fails, an error is thrown listing all failed assertions.
1289
1644
  *
1290
1645
  * @param {string[]} assertions - An array of conditions to be asserted.
1291
- * @param cachingMode - The caching mode to be used for the assertions.
1646
+ * @param {Object} options - Optional configuration object
1647
+ * @param {CachingMode} options.cachingMode - The caching mode to be used for the assertions.
1648
+ * @param {boolean} options.useSmartLoop - If true, uses the smart loop execution. Default: false
1292
1649
  * @throws {Error} If any of the assertions fail.
1650
+ *
1651
+ * @example
1652
+ * // Basic usage
1653
+ * await driver.assertBulk(['Login button is visible', 'Username field is enabled']);
1654
+ *
1655
+ * @example
1656
+ * // With caching mode
1657
+ * await driver.assertBulk(['Login button is visible'], {
1658
+ * cachingMode: "FULL_SCREEN"
1659
+ * });
1660
+ *
1661
+ * @example
1662
+ * // With smart loop enabled
1663
+ * await driver.assertBulk(['Login button is visible'], {
1664
+ * useSmartLoop: true,
1665
+ * cachingMode: "FULL_SCREEN"
1666
+ * });
1293
1667
  */
1294
- async assertBulk(assertions, cachingMode) {
1668
+ async assertBulk(assertions, {
1669
+ cachingMode,
1670
+ useSmartLoop = false
1671
+ }) {
1295
1672
  if (!this.appiumSessionStarted) {
1296
1673
  await this.startSession();
1297
1674
  }
1298
1675
  try {
1299
- const results = await this.checkBulk(assertions, cachingMode);
1676
+ const results = await this.checkBulk(assertions, cachingMode, 2, 1e3, useSmartLoop);
1300
1677
  const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
1301
1678
  if (!current) {
1302
1679
  return [...prev, assertions.at(currentIndex)];
@@ -1325,14 +1702,30 @@ class GptDriver {
1325
1702
  * @param {CachingMode} cachingMode - The caching mode to be used for the conditions.
1326
1703
  * @param {number} maxRetries - The maximum number of retries if any condition fails (default: 2).
1327
1704
  * @param {number} retryDelayMs - The delay in milliseconds between retries (default: 1000).
1705
+ * @param {boolean} useSmartLoop - If true, uses the smart loop execution for optimized caching. Default: false
1328
1706
  * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
1329
1707
  * to a boolean indicating whether the condition was met.
1708
+ *
1709
+ * @example
1710
+ * // Basic usage
1711
+ * const results = await driver.checkBulk(['Login button is visible', 'Username field is enabled']);
1712
+ * console.log(results); // { 'Login button is visible': true, 'Username field is enabled': false }
1713
+ *
1714
+ * @example
1715
+ * // With smart loop enabled
1716
+ * const results = await driver.checkBulk(
1717
+ * ['Login button is visible'],
1718
+ * "FULL_SCREEN",
1719
+ * 2,
1720
+ * 1000,
1721
+ * true
1722
+ * );
1330
1723
  */
1331
- async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3) {
1724
+ async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3, useSmartLoop = false) {
1332
1725
  let attempt = 0;
1333
1726
  let results = {};
1334
1727
  while (attempt <= maxRetries) {
1335
- results = await this._checkBulkOnce(conditions, cachingMode, attempt);
1728
+ results = await this._checkBulkOnce(conditions, cachingMode, attempt, useSmartLoop);
1336
1729
  const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
1337
1730
  if (failedConditions.length === 0) {
1338
1731
  return results;
@@ -1354,33 +1747,67 @@ class GptDriver {
1354
1747
  *
1355
1748
  * @private
1356
1749
  */
1357
- async _checkBulkOnce(conditions, cachingMode, attempt = 0) {
1750
+ async _checkBulkOnce(conditions, cachingMode, attempt = 0, useSmartLoop = false) {
1358
1751
  if (!this.appiumSessionStarted) {
1359
1752
  await this.startSession();
1360
1753
  }
1361
1754
  globalLogger.info(`Checking conditions (attempt ${attempt}): ${conditions.join(", ")}`);
1755
+ const pendingLogPromises = [];
1362
1756
  try {
1363
- let screenshot;
1364
- if (!this.useGptDriverCloud) {
1365
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
1366
- }
1367
- const response = await axios.post(
1368
- `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1369
- {
1370
- api_key: this.apiKey,
1371
- base64_screenshot: screenshot,
1372
- assertions: conditions,
1373
- command: `Assert: ${JSON.stringify(conditions)}`,
1374
- caching_mode: cachingMode ?? this.cachingMode,
1375
- step_number: this.step_number
1757
+ if (useSmartLoop) {
1758
+ const instruction = `Assert: ${JSON.stringify(conditions)}`;
1759
+ const middleLayerAssertFn = async (screenshot) => {
1760
+ return await axios.post(
1761
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1762
+ {
1763
+ api_key: this.apiKey,
1764
+ base64_screenshot: screenshot,
1765
+ assertions: conditions,
1766
+ command: `Assert: ${JSON.stringify(conditions)}`,
1767
+ caching_mode: cachingMode ?? this.cachingMode,
1768
+ step_number: this.step_number
1769
+ }
1770
+ );
1771
+ };
1772
+ const ctx = this.createSmartLoopContext({ middleLayerAssertFn, pendingLogPromises });
1773
+ const result = await executeSmartLoop(ctx, {
1774
+ stepNumber: this.step_number,
1775
+ description: instruction,
1776
+ instruction,
1777
+ ...this.testId && {
1778
+ filepath: this.testId
1779
+ }
1780
+ });
1781
+ if (result.cacheHitCount) {
1782
+ this._stats_cacheHits += result.cacheHitCount;
1376
1783
  }
1377
- );
1378
- globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1379
- return response.data.results;
1784
+ this._stats_executedSteps += result.iterations ?? 1;
1785
+ return { [instruction]: result.success };
1786
+ } else {
1787
+ let screenshot;
1788
+ if (!this.useGptDriverCloud) {
1789
+ screenshot = await this.getScreenshot(this.appiumSessionConfig);
1790
+ }
1791
+ const response = await axios.post(
1792
+ `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
1793
+ {
1794
+ api_key: this.apiKey,
1795
+ base64_screenshot: screenshot,
1796
+ assertions: conditions,
1797
+ command: `Assert: ${JSON.stringify(conditions)}`,
1798
+ caching_mode: cachingMode ?? this.cachingMode,
1799
+ step_number: this.step_number
1800
+ }
1801
+ );
1802
+ globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
1803
+ return response.data.results;
1804
+ }
1380
1805
  } catch (e) {
1381
1806
  globalLogger.error("Failed to check conditions", e);
1382
1807
  await this.setSessionStatus("failed");
1383
1808
  throw e;
1809
+ } finally {
1810
+ await Promise.all(pendingLogPromises);
1384
1811
  }
1385
1812
  }
1386
1813
  /**
@@ -1473,10 +1900,12 @@ class GptDriver {
1473
1900
  * const result = await driver.executeFlow('tests/login-flow.json', { useSmartLoop: true });
1474
1901
  */
1475
1902
  async executeFlow(filePath, options) {
1903
+ const flowStartTime = performance.now();
1476
1904
  const useSmartLoop = options?.useSmartLoop ?? false;
1477
1905
  const configFilePath = this.configFilePath;
1478
1906
  let baseDir;
1479
1907
  let absolutePath;
1908
+ let parsedConfigFile;
1480
1909
  if (configFilePath) {
1481
1910
  let raw2;
1482
1911
  try {
@@ -1494,7 +1923,7 @@ class GptDriver {
1494
1923
  globalLogger.error(msg);
1495
1924
  throw new Error(msg);
1496
1925
  }
1497
- const parsedConfigFile = ConfigSchema.parse(json2);
1926
+ parsedConfigFile = ConfigSchema.parse(json2);
1498
1927
  if (path.isAbsolute(parsedConfigFile.testDir)) {
1499
1928
  baseDir = parsedConfigFile.testDir;
1500
1929
  } else {
@@ -1561,7 +1990,7 @@ ${issues}`);
1561
1990
  }
1562
1991
  visited.add(refKey);
1563
1992
  const child = await loadFlow(refPath);
1564
- const mergedParams = { ...inheritedParams, ...step.overrides ?? {} };
1993
+ const mergedParams = { ...inheritedParams, ...child.params, ...step.overrides ?? {} };
1565
1994
  const childDir = path.dirname(refPath);
1566
1995
  const childRelativePath = path.relative(baseDir, refPath).replace(/^\.\//, "");
1567
1996
  const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey], childRelativePath);
@@ -1577,7 +2006,15 @@ ${issues}`);
1577
2006
  }
1578
2007
  return out;
1579
2008
  };
1580
- const effectiveParams = { ...rootFlow.params ?? {} };
2009
+ const envPath = path.join(process.cwd(), ".env");
2010
+ let envVars = {};
2011
+ try {
2012
+ await promises.access(envPath);
2013
+ const envContent = await readFile(envPath, "utf-8");
2014
+ envVars = dotenv.parse(envContent);
2015
+ } catch (_) {
2016
+ }
2017
+ const effectiveParams = { ...parsedConfigFile?.variables, ...envVars, ...rootFlow.params };
1581
2018
  const rootRelativePath = path.relative(baseDir, absolutePath).replace(/^\.\//, "");
1582
2019
  const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath], rootRelativePath);
1583
2020
  if (!this.appiumSessionStarted) {
@@ -1585,20 +2022,33 @@ ${issues}`);
1585
2022
  }
1586
2023
  globalLogger.info(`Executing flow '${rootFlow.name}' with ${expandedSteps.length} step(s)...`);
1587
2024
  let executed = 0;
2025
+ let cacheHits = 0;
2026
+ let lastStepWasFromFileRef = false;
2027
+ let fileRefStepNumber = 1;
1588
2028
  try {
2029
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig, false);
2030
+ const screenshotResolution = await getImageDimensions(screenshot);
1589
2031
  for (const step of expandedSteps) {
1590
2032
  const params = step.__params ?? effectiveParams;
1591
2033
  const filepath = step.__filepath ?? rootRelativePath;
1592
- const prefix = `Step #${executed + 1} [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
2034
+ console.log("step.__filepath", step.__filepath);
2035
+ console.log("rootRelativePath", rootRelativePath);
2036
+ const isFromFileRef = filepath !== rootRelativePath;
2037
+ console.log("isFromFileRef", isFromFileRef);
2038
+ if (!lastStepWasFromFileRef && isFromFileRef) {
2039
+ fileRefStepNumber = 1;
2040
+ }
2041
+ const effectiveStepNumber = isFromFileRef ? fileRefStepNumber : this.step_number;
2042
+ const prefix = `Step #${executed + 1} (step_number: ${effectiveStepNumber}) [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
1593
2043
  try {
1594
2044
  switch (step.type) {
1595
2045
  case "ai": {
1596
2046
  const instruction = this.interpolateTemplate(step.instruction, params);
1597
2047
  globalLogger.info(`${prefix}: ${instruction}`);
1598
2048
  if (useSmartLoop) {
1599
- const ctx = this.createSmartLoopContext();
2049
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1600
2050
  const result = await executeSmartLoop(ctx, {
1601
- stepNumber: this.step_number,
2051
+ stepNumber: effectiveStepNumber,
1602
2052
  description: instruction,
1603
2053
  instruction,
1604
2054
  filepath
@@ -1606,7 +2056,14 @@ ${issues}`);
1606
2056
  if (!result.success) {
1607
2057
  throw new Error(result.error || "Smart loop execution failed");
1608
2058
  }
1609
- this.step_number++;
2059
+ if (result.cacheHit) {
2060
+ cacheHits++;
2061
+ }
2062
+ if (isFromFileRef) {
2063
+ fileRefStepNumber++;
2064
+ } else {
2065
+ this.step_number++;
2066
+ }
1610
2067
  } else {
1611
2068
  await this.aiExecute({ command: instruction });
1612
2069
  }
@@ -1619,9 +2076,9 @@ ${issues}`);
1619
2076
  }
1620
2077
  globalLogger.info(`${prefix}: ${description}`);
1621
2078
  if (useSmartLoop) {
1622
- const ctx = this.createSmartLoopContext();
2079
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1623
2080
  const result = await executeSmartLoop(ctx, {
1624
- stepNumber: this.step_number,
2081
+ stepNumber: effectiveStepNumber,
1625
2082
  description,
1626
2083
  instruction: description,
1627
2084
  filepath
@@ -1629,7 +2086,14 @@ ${issues}`);
1629
2086
  if (!result.success) {
1630
2087
  throw new Error(result.error || "Smart loop execution failed");
1631
2088
  }
1632
- this.step_number++;
2089
+ if (result.cacheHit) {
2090
+ cacheHits++;
2091
+ }
2092
+ if (isFromFileRef) {
2093
+ fileRefStepNumber++;
2094
+ } else {
2095
+ this.step_number++;
2096
+ }
1633
2097
  } else {
1634
2098
  await this.aiExecute({ command: description });
1635
2099
  }
@@ -1643,9 +2107,9 @@ ${issues}`);
1643
2107
  globalLogger.info(`${prefix}: ${description}`);
1644
2108
  if (useSmartLoop) {
1645
2109
  const instruction = `Verify that: ${description}`;
1646
- const ctx = this.createSmartLoopContext();
2110
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1647
2111
  const result = await executeSmartLoop(ctx, {
1648
- stepNumber: this.step_number,
2112
+ stepNumber: effectiveStepNumber,
1649
2113
  description,
1650
2114
  instruction,
1651
2115
  filepath
@@ -1653,7 +2117,14 @@ ${issues}`);
1653
2117
  if (!result.success) {
1654
2118
  throw new Error(result.error || "Smart loop execution failed");
1655
2119
  }
1656
- this.step_number++;
2120
+ if (result.cacheHit) {
2121
+ cacheHits++;
2122
+ }
2123
+ if (isFromFileRef) {
2124
+ fileRefStepNumber++;
2125
+ } else {
2126
+ this.step_number++;
2127
+ }
1657
2128
  } else {
1658
2129
  await this.assert(description);
1659
2130
  }
@@ -1662,9 +2133,27 @@ ${issues}`);
1662
2133
  case "type": {
1663
2134
  const text = this.interpolateTemplate(step.text, params);
1664
2135
  globalLogger.info(`${prefix}: Type text`);
1665
- await this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2136
+ this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
1666
2137
  await this.performType(text);
1667
- this.step_number++;
2138
+ if (isFromFileRef) {
2139
+ fileRefStepNumber++;
2140
+ } else {
2141
+ this.step_number++;
2142
+ }
2143
+ break;
2144
+ }
2145
+ case "enter": {
2146
+ globalLogger.info(`${prefix}: Press Enter`);
2147
+ this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2148
+ await this.performPressEnter();
2149
+ if (step.delayNextStep) {
2150
+ await this._delay(step.delayNextStep);
2151
+ }
2152
+ if (isFromFileRef) {
2153
+ fileRefStepNumber++;
2154
+ } else {
2155
+ this.step_number++;
2156
+ }
1668
2157
  break;
1669
2158
  }
1670
2159
  case "scroll":
@@ -1672,9 +2161,9 @@ ${issues}`);
1672
2161
  const description = step.descriptionText ? this.interpolateTemplate(step.descriptionText, params) : void 0;
1673
2162
  if (description && useSmartLoop) {
1674
2163
  globalLogger.info(`${prefix}: ${description}`);
1675
- const ctx = this.createSmartLoopContext();
2164
+ const ctx = this.createSmartLoopContext({ pendingLogPromises: this.pendingLogPromises });
1676
2165
  const result = await executeSmartLoop(ctx, {
1677
- stepNumber: this.step_number,
2166
+ stepNumber: effectiveStepNumber,
1678
2167
  description,
1679
2168
  instruction: description,
1680
2169
  filepath
@@ -1682,43 +2171,60 @@ ${issues}`);
1682
2171
  if (!result.success) {
1683
2172
  throw new Error(result.error || "Smart loop execution failed");
1684
2173
  }
1685
- this.step_number++;
2174
+ if (result.cacheHit) {
2175
+ cacheHits++;
2176
+ }
2177
+ if (isFromFileRef) {
2178
+ fileRefStepNumber++;
2179
+ } else {
2180
+ this.step_number++;
2181
+ }
1686
2182
  } else {
1687
2183
  globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
1688
- await this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
1689
- if (step.type === "swipe") {
1690
- await this.performSwipe({
1691
- direction: step.direction,
1692
- x1: step.x1,
1693
- y1: step.y1,
1694
- x2: step.x2,
1695
- y2: step.y2,
1696
- duration: step.duration
1697
- });
2184
+ this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2185
+ await this.performSwipe({
2186
+ direction: step.direction,
2187
+ x1: step.x1,
2188
+ y1: step.y1,
2189
+ x2: step.x2,
2190
+ y2: step.y2,
2191
+ duration: step.duration,
2192
+ screenshotHeight: screenshotResolution.height,
2193
+ screenshotWidth: screenshotResolution.width
2194
+ });
2195
+ if (isFromFileRef) {
2196
+ fileRefStepNumber++;
1698
2197
  } else {
1699
- await this.performSwipe({ direction: step.direction });
2198
+ this.step_number++;
1700
2199
  }
1701
- this.step_number++;
1702
2200
  }
1703
2201
  break;
1704
2202
  }
1705
2203
  case "zoom": {
1706
2204
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
1707
- await this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
1708
- this.step_number++;
2205
+ this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2206
+ if (isFromFileRef) {
2207
+ fileRefStepNumber++;
2208
+ } else {
2209
+ this.step_number++;
2210
+ }
1709
2211
  break;
1710
2212
  }
1711
2213
  case "scrollUntil": {
1712
2214
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
1713
2215
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
1714
- await this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2216
+ this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
1715
2217
  await this.performScrollUntil({
1716
2218
  direction: step.direction,
1717
2219
  text: interpolatedText,
1718
2220
  elementId: step.elementId,
1719
2221
  maxScrolls: step.maxScrolls
1720
2222
  });
1721
- this.step_number++;
2223
+ if (isFromFileRef) {
2224
+ fileRefStepNumber++;
2225
+ } else {
2226
+ this.step_number++;
2227
+ }
1722
2228
  break;
1723
2229
  }
1724
2230
  case "deeplink": {
@@ -1726,7 +2232,7 @@ ${issues}`);
1726
2232
  const bundleId = params["bundleId"];
1727
2233
  const url = this.interpolateTemplate(step.url, params);
1728
2234
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
1729
- await this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2235
+ this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
1730
2236
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
1731
2237
  break;
1732
2238
  }
@@ -1734,10 +2240,12 @@ ${issues}`);
1734
2240
  throw new Error(`Unsupported step type at execution: ${step.type}`);
1735
2241
  }
1736
2242
  }
2243
+ lastStepWasFromFileRef = isFromFileRef;
1737
2244
  executed++;
1738
2245
  } catch (err) {
1739
2246
  if (step.optional) {
1740
2247
  globalLogger.warn(`${prefix} failed but marked optional. Continuing. Error: ${err.message}`);
2248
+ lastStepWasFromFileRef = isFromFileRef;
1741
2249
  continue;
1742
2250
  }
1743
2251
  throw err;
@@ -1750,6 +2258,16 @@ ${issues}`);
1750
2258
  }
1751
2259
  throw e;
1752
2260
  }
2261
+ const flowEndTime = performance.now();
2262
+ const totalTime = (flowEndTime - flowStartTime) / 1e3;
2263
+ globalLogger.info(`
2264
+ ${"=".repeat(50)}`);
2265
+ globalLogger.info(`Flow Execution Summary:`);
2266
+ globalLogger.info(`Total Test Time: ${totalTime.toFixed(2)} seconds`);
2267
+ globalLogger.info(`Total Steps executed: ${executed}`);
2268
+ globalLogger.info(`Total Step Cache Hits: ${cacheHits}`);
2269
+ globalLogger.info(`${"=".repeat(50)}
2270
+ `);
1753
2271
  return rootFlow;
1754
2272
  }
1755
2273
  async gptHandler(command, cachingMode) {
@@ -1757,8 +2275,15 @@ ${issues}`);
1757
2275
  let conditionSucceeded = false;
1758
2276
  while (!conditionSucceeded) {
1759
2277
  let screenshot;
2278
+ let originalScreenshotBase64 = null;
1760
2279
  if (!this.useGptDriverCloud) {
1761
- screenshot = await this.getScreenshot(this.appiumSessionConfig);
2280
+ const stabilityResult = await waitForStableScreen(
2281
+ () => this.getScreenshot(this.appiumSessionConfig)
2282
+ );
2283
+ screenshot = stabilityResult.originalScreenshotBase64;
2284
+ if (!stabilityResult.stable) {
2285
+ globalLogger.warn("Screen did not stabilize within timeout, proceeding with last screenshot");
2286
+ }
1762
2287
  }
1763
2288
  globalLogger.info("Requesting next action from GPT Driver...");
1764
2289
  const response = await axios.request(
@@ -1786,7 +2311,6 @@ ${issues}`);
1786
2311
  for (const appiumCommand of executeResponse.commands) {
1787
2312
  await this.executeCommand(appiumCommand);
1788
2313
  }
1789
- await delay(1500);
1790
2314
  }
1791
2315
  this.step_number = this.step_number + 1;
1792
2316
  globalLogger.info("Command execution completed successfully");
@@ -1814,13 +2338,13 @@ ${issues}`);
1814
2338
  });
1815
2339
  }
1816
2340
  }
1817
- async logCodeExecution(screenshot, command) {
2341
+ async logCodeExecution(screenshot, command, isCacheHit) {
1818
2342
  try {
1819
- const screenshot2 = await this.getScreenshot(this.appiumSessionConfig);
1820
2343
  await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
1821
2344
  api_key: this.apiKey,
1822
- base64_screenshot: screenshot2,
1823
- command
2345
+ base64_screenshot: screenshot,
2346
+ command,
2347
+ from_cache: !!isCacheHit
1824
2348
  });
1825
2349
  } catch (e) {
1826
2350
  globalLogger.error("Failed to log code execution", e);