gpt-driver-node 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -22,6 +22,15 @@ function buildUrl(base, extraPath) {
22
22
  }
23
23
  return `${baseUrl}${extraPath}`;
24
24
  }
25
+ const getImageDimensions = async (base64) => {
26
+ const base64Data = base64.replace(/^data:image\/\w+;base64,/, "");
27
+ const buffer = Buffer.from(base64Data, "base64");
28
+ const metadata = await sharp(buffer).metadata();
29
+ if (!metadata.width || !metadata.height) {
30
+ throw new Error("Unable to get image dimensions");
31
+ }
32
+ return { width: metadata.width, height: metadata.height };
33
+ };
25
34
 
26
35
  const colors = {
27
36
  reset: "\x1B[0m",
@@ -107,6 +116,15 @@ const SavableTypeStepSchema = SavableStepBaseSchema.extend({
107
116
  type: zod.z.literal("type"),
108
117
  text: zod.z.string()
109
118
  });
119
+ const SavableSwipeStepSchema = SavableStepBaseSchema.extend({
120
+ type: zod.z.literal("swipe"),
121
+ direction: zod.z.enum(["left", "right", "up", "down"]),
122
+ x1: zod.z.number().optional(),
123
+ y1: zod.z.number().optional(),
124
+ x2: zod.z.number().optional(),
125
+ y2: zod.z.number().optional(),
126
+ duration: zod.z.number().optional().default(500)
127
+ });
110
128
  const SavableScrollStepSchema = SavableStepBaseSchema.extend({
111
129
  type: zod.z.literal("scroll"),
112
130
  direction: zod.z.enum(["up", "down"])
@@ -142,6 +160,8 @@ const SavableStepSchema = zod.z.discriminatedUnion("type", [
142
160
  // type: 'assert'
143
161
  SavableTypeStepSchema,
144
162
  // type: 'type'
163
+ SavableSwipeStepSchema,
164
+ // type: 'swipe'
145
165
  SavableScrollStepSchema,
146
166
  // type: 'scroll'
147
167
  SavableZoomStepSchema,
@@ -160,6 +180,14 @@ const SavableTestStoreSchema = zod.z.object({
160
180
  steps: zod.z.array(SavableStepSchema),
161
181
  params: zod.z.record(zod.z.string(), zod.z.string()).optional()
162
182
  });
183
+ const VariablesSchema = zod.z.record(zod.z.string(), zod.z.string()).optional().default({});
184
+ const ConfigSchema = zod.z.object({
185
+ testDir: zod.z.string(),
186
+ driver: zod.z.string(),
187
+ port: zod.z.number(),
188
+ apiKey: zod.z.string(),
189
+ variables: VariablesSchema
190
+ });
163
191
 
164
192
  const CACHE_SERVER_URL = "https://cache.mobileboost.io";
165
193
  const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
@@ -419,6 +447,30 @@ function isScrollCommand(cmd) {
419
447
  function isTypeCommand(cmd) {
420
448
  return cmd.startsWith("type:");
421
449
  }
450
+ function isSlideCommand(cmd) {
451
+ return cmd.startsWith("slide");
452
+ }
453
+ function parseSlideCommand(cmd) {
454
+ const slideMatch = cmd.match(
455
+ /slide\s+(up|down|left|right)\s+(\d+)%(?::\s*[^;]*)?;(\d+);(\d+)/i
456
+ );
457
+ if (!slideMatch) {
458
+ return null;
459
+ }
460
+ const extractedDirection = slideMatch[1].toLowerCase();
461
+ const directionMap = {
462
+ down: "up",
463
+ up: "down",
464
+ left: "right",
465
+ right: "left"
466
+ };
467
+ return {
468
+ direction: directionMap[extractedDirection],
469
+ percentage: parseInt(slideMatch[2], 10),
470
+ startX: parseInt(slideMatch[3], 10),
471
+ startY: parseInt(slideMatch[4], 10)
472
+ };
473
+ }
422
474
 
423
475
  async function executeSmartLoop(ctx, params) {
424
476
  const maxCacheAttempts = Math.floor(CACHE_RETRY_MS / CACHE_CHECK_INTERVAL_MS);
@@ -434,6 +486,8 @@ async function executeSmartLoop(ctx, params) {
434
486
  let screenshot = "";
435
487
  let commands = [];
436
488
  let isCacheHit = false;
489
+ const firstScreenshot = await ctx.getScreenshot();
490
+ const screenshotResolution = await getImageDimensions(firstScreenshot);
437
491
  for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
438
492
  screenshot = await ctx.getScreenshot();
439
493
  const sizeInBytes = screenshot.length * 0.75;
@@ -446,7 +500,7 @@ async function executeSmartLoop(ctx, params) {
446
500
  stepNumber: params.stepNumber,
447
501
  stepDescription: params.description,
448
502
  screenshot,
449
- screenResolution: ctx.screenSize,
503
+ screenResolution: screenshotResolution,
450
504
  highestUsedIndex: lastCacheIndex,
451
505
  platform: ctx.platform,
452
506
  filepath: params.filepath
@@ -517,7 +571,12 @@ async function executeSmartLoop(ctx, params) {
517
571
  const coords = parseTapCoordinates(cmd);
518
572
  if (coords) {
519
573
  globalLogger.debug(`[SmartLoop] Executing tap at (${coords.x}, ${coords.y})`);
520
- await ctx.performTap(coords.x, coords.y);
574
+ await ctx.performTap(
575
+ coords.x,
576
+ coords.y,
577
+ screenshotResolution.width,
578
+ screenshotResolution.height
579
+ );
521
580
  actionExecuted = true;
522
581
  }
523
582
  } else if (isWaitCommand(cmd)) {
@@ -531,7 +590,44 @@ async function executeSmartLoop(ctx, params) {
531
590
  const direction = parseScrollDirection(cmd);
532
591
  if (direction) {
533
592
  globalLogger.debug(`[SmartLoop] Scrolling ${direction}`);
534
- await ctx.performScroll(direction);
593
+ await ctx.performSwipe({ direction });
594
+ actionExecuted = true;
595
+ }
596
+ } else if (isSlideCommand(cmd)) {
597
+ const slideParams = parseSlideCommand(cmd);
598
+ if (slideParams) {
599
+ const { direction, percentage, startX, startY } = slideParams;
600
+ const isVertical = direction === "up" || direction === "down";
601
+ const distance = Math.round(
602
+ (isVertical ? screenshotResolution.height : screenshotResolution.width) * (percentage / 100)
603
+ );
604
+ let endX = startX;
605
+ let endY = startY;
606
+ switch (direction) {
607
+ case "up":
608
+ endY = startY + distance;
609
+ break;
610
+ case "down":
611
+ endY = startY - distance;
612
+ break;
613
+ case "left":
614
+ endX = startX - distance;
615
+ break;
616
+ case "right":
617
+ endX = startX + distance;
618
+ break;
619
+ }
620
+ globalLogger.debug(`[SmartLoop] Sliding ${direction} ${percentage}% from (${startX}, ${startY}) to (${endX}, ${endY})`);
621
+ await ctx.performSwipe({
622
+ direction,
623
+ x1: startX,
624
+ y1: startY,
625
+ x2: endX,
626
+ y2: endY,
627
+ screenshotWidth: screenshotResolution.width,
628
+ screenshotHeight: screenshotResolution.height,
629
+ duration: 500
630
+ });
535
631
  actionExecuted = true;
536
632
  }
537
633
  } else if (isTypeCommand(cmd)) {
@@ -559,7 +655,7 @@ async function executeSmartLoop(ctx, params) {
559
655
  stepNumber: params.stepNumber,
560
656
  stepDescription: params.description,
561
657
  executionData: currentExecutionData,
562
- screenResolution: ctx.screenSize,
658
+ screenResolution: screenshotResolution,
563
659
  platform: ctx.platform,
564
660
  filepath: params.filepath
565
661
  });
@@ -604,7 +700,6 @@ class GptDriver {
604
700
  });
605
701
  }
606
702
  apiKey;
607
- organisationId;
608
703
  gptDriverSessionId;
609
704
  gptDriverBaseUrl;
610
705
  appiumSessionConfig;
@@ -616,6 +711,8 @@ class GptDriver {
616
711
  buildId;
617
712
  testId;
618
713
  step_number = 1;
714
+ organisationId;
715
+ configFilePath;
619
716
  // Smart loop state - maintains action history across steps for context
620
717
  globalActionHistory = [];
621
718
  /**
@@ -640,11 +737,12 @@ class GptDriver {
640
737
  constructor(config) {
641
738
  this.testId = config.testId;
642
739
  this.apiKey = config.apiKey;
643
- this.organisationId = config.organisationId;
644
740
  this.buildId = config.buildId;
645
741
  this.useGptDriverCloud = config.useGptDriverCloud;
646
742
  this.gptDriverBaseUrl = GPT_DRIVER_BASE_URL;
647
743
  this.cachingMode = config.cachingMode ?? "NONE";
744
+ this.organisationId = config.organisationId;
745
+ this.configFilePath = config.configFilePath;
648
746
  if (config.useGptDriverCloud) {
649
747
  if (config.serverConfig.device?.platform == null) {
650
748
  throw new Error("Platform is missing. Please specify the platform when using GPTDriver Cloud.");
@@ -830,11 +928,10 @@ class GptDriver {
830
928
  return {
831
929
  apiKey: this.apiKey,
832
930
  platform: this.appiumSessionConfig?.platform,
833
- screenSize: this.appiumSessionConfig.size,
834
931
  globalActionHistory: this.globalActionHistory,
835
- getScreenshot: () => this.getScreenshot(this.appiumSessionConfig),
836
- performTap: (x, y) => this.performTap(x, y),
837
- performScroll: (direction) => this.performScroll(direction),
932
+ getScreenshot: () => this.getScreenshot(this.appiumSessionConfig, false),
933
+ performTap: (x, y, screenshotWidth, screenshotHeight) => this.performTap(x, y, screenshotWidth, screenshotHeight),
934
+ performSwipe: (params) => this.performSwipe(params),
838
935
  performType: (text) => this.performType(text),
839
936
  logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command),
840
937
  organisationId: this.organisationId
@@ -887,15 +984,31 @@ class GptDriver {
887
984
  /**
888
985
  * Performs a tap action at the specified coordinates.
889
986
  */
890
- async performTap(x, y) {
987
+ async performTap(x, y, screenshotWidth, screenshotHeight) {
891
988
  const client = await this.getWdioClient();
989
+ const platform = this.appiumSessionConfig?.platform;
990
+ const { width: w, height: h } = this.appiumSessionConfig?.size ?? { width: 1080, height: 1920 };
991
+ const scaled = this.scaleForIOS(
992
+ x,
993
+ y,
994
+ platform,
995
+ screenshotWidth,
996
+ screenshotHeight,
997
+ w,
998
+ h
999
+ );
1000
+ const clampedX = this.clamp(scaled.x, 0, w - 1);
1001
+ const clampedY = this.clamp(scaled.y, 0, h - 1);
1002
+ globalLogger.debug(
1003
+ `[Tap] Platform: ${platform}, Input: ${x},${y}, Window: ${w}x${h}, Final: ${clampedX},${clampedY}`
1004
+ );
892
1005
  await client.performActions([
893
1006
  {
894
1007
  type: "pointer",
895
1008
  id: "finger1",
896
1009
  parameters: { pointerType: "touch" },
897
1010
  actions: [
898
- { type: "pointerMove", duration: 0, x, y },
1011
+ { type: "pointerMove", duration: 0, x: clampedX, y: clampedY },
899
1012
  { type: "pointerDown", button: 0 },
900
1013
  { type: "pause", duration: 100 },
901
1014
  { type: "pointerUp", button: 0 }
@@ -905,25 +1018,125 @@ class GptDriver {
905
1018
  }
906
1019
  async performType(text) {
907
1020
  const client = await this.getWdioClient();
908
- await client.keys(text.split(""));
1021
+ const platform = this.appiumSessionConfig?.platform;
1022
+ if (platform === "iOS") {
1023
+ const actions = text.split("").flatMap((char) => [
1024
+ { type: "keyDown", value: char },
1025
+ { type: "keyUp", value: char }
1026
+ ]);
1027
+ await client.performActions([
1028
+ {
1029
+ type: "key",
1030
+ id: "keyboard",
1031
+ actions
1032
+ }
1033
+ ]);
1034
+ } else {
1035
+ await client.keys(text.split(""));
1036
+ }
909
1037
  }
910
- async performScroll(direction) {
1038
+ clamp(value, min, max) {
1039
+ return Math.max(min, Math.min(max, value));
1040
+ }
1041
+ scaleForIOS(x, y, platform, screenshotWidth, screenshotHeight, windowWidth, windowHeight) {
1042
+ if (platform !== "iOS" || !screenshotWidth || !screenshotHeight || !windowWidth || !windowHeight) {
1043
+ return { x: Math.round(x), y: Math.round(y) };
1044
+ }
1045
+ const scaleX = windowWidth / screenshotWidth;
1046
+ const scaleY = windowHeight / screenshotHeight;
1047
+ return {
1048
+ x: Math.round(x * scaleX),
1049
+ y: Math.round(y * scaleY)
1050
+ };
1051
+ }
1052
+ async performSwipe(params) {
911
1053
  const client = await this.getWdioClient();
912
- const w = this.appiumSessionConfig?.size?.width ?? 1080;
913
- const h = this.appiumSessionConfig?.size?.height ?? 1920;
914
- const x = Math.round(w / 2);
915
- const startY = direction === "down" ? Math.round(h * 0.8) : Math.round(h * 0.2);
916
- const endY = direction === "down" ? Math.round(h * 0.2) : Math.round(h * 0.8);
1054
+ const platform = this.appiumSessionConfig?.platform;
1055
+ const { width: w, height: h } = this.appiumSessionConfig?.size ?? { width: 1080, height: 1920 };
1056
+ const defaultStartX = w / 2;
1057
+ let defaultStartY;
1058
+ if (params.direction === "down") {
1059
+ defaultStartY = h * 0.75;
1060
+ } else if (params.direction === "up") {
1061
+ defaultStartY = h * 0.25;
1062
+ } else {
1063
+ defaultStartY = h / 2;
1064
+ }
1065
+ let startX;
1066
+ let startY;
1067
+ if (params.x1 !== void 0 || params.y1 !== void 0) {
1068
+ const scaled = this.scaleForIOS(
1069
+ params.x1 ?? defaultStartX,
1070
+ params.y1 ?? defaultStartY,
1071
+ platform,
1072
+ params.screenshotWidth,
1073
+ params.screenshotHeight,
1074
+ w,
1075
+ h
1076
+ );
1077
+ startX = scaled.x;
1078
+ startY = scaled.y;
1079
+ } else {
1080
+ startX = Math.round(defaultStartX);
1081
+ startY = Math.round(defaultStartY);
1082
+ }
1083
+ startX = this.clamp(startX, 0, w - 1);
1084
+ startY = this.clamp(startY, 0, h - 1);
1085
+ let endX;
1086
+ let endY;
1087
+ if (params.x2 !== void 0 || params.y2 !== void 0) {
1088
+ const scaled = this.scaleForIOS(
1089
+ params.x2 ?? startX,
1090
+ params.y2 ?? startY,
1091
+ platform,
1092
+ params.screenshotWidth,
1093
+ params.screenshotHeight,
1094
+ w,
1095
+ h
1096
+ );
1097
+ endX = scaled.x;
1098
+ endY = scaled.y;
1099
+ } else {
1100
+ const deltaX = Math.round(w * 0.5);
1101
+ const deltaY = Math.round(h * 0.5);
1102
+ switch (params.direction) {
1103
+ case "left":
1104
+ endX = Math.max(0, startX - deltaX);
1105
+ endY = startY;
1106
+ break;
1107
+ case "right":
1108
+ endX = Math.min(w - 1, startX + deltaX);
1109
+ endY = startY;
1110
+ break;
1111
+ case "up":
1112
+ endX = startX;
1113
+ endY = Math.min(h - 1, startY + deltaY);
1114
+ break;
1115
+ case "down":
1116
+ endX = startX;
1117
+ endY = Math.max(0, startY - deltaY);
1118
+ break;
1119
+ default:
1120
+ endX = startX;
1121
+ endY = startY;
1122
+ }
1123
+ }
1124
+ endX = this.clamp(endX, 0, w - 1);
1125
+ endY = this.clamp(endY, 0, h - 1);
1126
+ globalLogger.debug(
1127
+ `[Swipe] Platform: ${platform}, Direction: ${params.direction}, Start: ${startX},${startY}, End: ${endX},${endY}`
1128
+ );
1129
+ const duration = params.duration ?? 500;
917
1130
  await client.performActions([
918
1131
  {
919
1132
  type: "pointer",
920
1133
  id: "finger1",
921
1134
  parameters: { pointerType: "touch" },
922
1135
  actions: [
923
- { type: "pointerMove", duration: 0, x, y: startY },
1136
+ { type: "pointerMove", duration: 0, x: startX, y: startY },
924
1137
  { type: "pointerDown", button: 0 },
925
1138
  { type: "pause", duration: 100 },
926
- { type: "pointerMove", duration: 500, x, y: endY },
1139
+ { type: "pointerMove", duration, x: endX, y: endY },
927
1140
  { type: "pointerUp", button: 0 }
928
1141
  ]
929
1142
  }
@@ -942,17 +1155,17 @@ class GptDriver {
942
1155
  if (found) {
943
1156
  return;
944
1157
  }
945
- await this.performScroll(direction);
1158
+ await this.performSwipe({ direction });
946
1159
  await this._delay(500);
947
1160
  }
948
1161
  throw new Error(`scrollUntil target not found after ${max} scroll(s)`);
949
1162
  }
950
- async getScreenshot(appiumSessionConfig) {
1163
+ async getScreenshot(appiumSessionConfig, shouldScale = true) {
951
1164
  globalLogger.debug("Capturing screenshot...");
952
1165
  const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
953
1166
  const screenshotResponse = await axios.get(url);
954
1167
  let screenshot = await screenshotResponse.data.value;
955
- if (appiumSessionConfig.platform === "iOS") {
1168
+ if (appiumSessionConfig.platform === "iOS" && shouldScale) {
956
1169
  globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
957
1170
  const imageBuffer = Buffer.from(screenshot, "base64");
958
1171
  const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
@@ -1005,86 +1218,12 @@ class GptDriver {
1005
1218
  await this.gptHandler(command);
1006
1219
  }
1007
1220
  }
1008
- /**
1009
- * Executes a specified command within the WebDriver session with configurable caching options.
1010
- *
1011
- * This is the recommended method for executing commands. It provides fine-grained control over
1012
- * caching behavior, allowing you to optimize performance and costs for repetitive test scenarios.
1013
- *
1014
- * If an `appiumHandler` is provided, it will be invoked with the WebDriver instance to perform
1015
- * the command-specific operations. After executing the handler, the executed commands get logged
1016
- * on the GPTDriver servers. If the handler execution fails or no handler is provided, the command
1017
- * gets executed by the GPTDriver using natural language processing.
1018
- *
1019
- * @param {Object} params - The execution parameters
1020
- * @param {string} params.command - The natural language command to be executed by the GPTDriver.
1021
- * Examples: "Click the login button", "Enter 'test@example.com' in the email field"
1022
- * @param {AppiumHandler} [params.appiumHandler] - An optional function that processes Appium-specific commands.
1023
- * If provided, this handler is executed instead of calling
1024
- * the GPTDriver API. Useful for performance optimization when
1025
- * you know the exact Appium commands to execute.
1026
- * @param {CachingMode} [params.cachingMode] - Controls how the GPTDriver caches this command execution.
1027
- * If not specified, uses the global caching mode set in the constructor.
1028
- * Options:
1029
- * - "NONE"
1030
- * - "FULL_SCREEN"
1031
- * - "INTERACTION_REGION"
1032
- * @param {boolean} [params.useSmartLoop] - If true, uses the smart loop execution (Cache -> AI -> Execute -> Populate)
1033
- * which optimizes execution by checking cache first and populating it after.
1034
- * Default: false (uses legacy gptHandler)
1035
- *
1036
- * @returns {Promise<void>} A promise that resolves when the command execution is complete.
1037
- *
1038
- * @throws {Error} If an error occurs during the execution of the Appium handler or while processing
1039
- * the command by the GPTDriver.
1040
- *
1041
- * @example
1042
- * // Basic usage with natural language (no caching)
1043
- * await driver.aiExecute({
1044
- * command: "Click the submit button"
1045
- * });
1046
- *
1047
- * @example
1048
- * // Full screen caching for repetitive navigation on similar screens
1049
- * await driver.aiExecute({
1050
- * command: "Navigate to the settings page",
1051
- * cachingMode: "FULL_SCREEN"
1052
- * });
1053
- *
1054
- * @example
1055
- * // Interaction region caching for repeated actions on the same button
1056
- * await driver.aiExecute({
1057
- * command: "Click the login button",
1058
- * cachingMode: "INTERACTION_REGION"
1059
- * });
1060
- *
1061
- * @example
1062
- * // With custom Appium handler as fallback
1063
- * await driver.aiExecute({
1064
- * command: "Click the login button",
1065
- * appiumHandler: async (driver) => {
1066
- * const loginBtn = await driver.$('~loginButton');
1067
- * await loginBtn.click();
1068
- * },
1069
- * cachingMode: "INTERACTION_REGION"
1070
- * });
1071
- *
1072
- * @example
1073
- * // Force fresh execution for dynamic content
1074
- * await driver.aiExecute({
1075
- * command: "Verify the current timestamp",
1076
- * cachingMode: "NONE"
1077
- * });
1078
- *
1079
- * @example
1080
- * // Using smart loop for optimized caching
1081
- * await driver.aiExecute({
1082
- * command: "Click the login button",
1083
- * useSmartLoop: true,
1084
- * cachingMode: "FULL_SCREEN"
1085
- * });
1086
- */
1087
- async aiExecute({ command, appiumHandler, cachingMode, useSmartLoop = false }) {
1221
+ async aiExecute(commandOrOptions, options) {
1222
+ const command = typeof commandOrOptions === "string" ? commandOrOptions : commandOrOptions.command;
1223
+ const opts = typeof commandOrOptions === "string" ? options : commandOrOptions;
1224
+ const appiumHandler = opts?.appiumHandler;
1225
+ const cachingMode = opts?.cachingMode;
1226
+ const useSmartLoop = opts?.useSmartLoop ?? false;
1088
1227
  if (!this.appiumSessionStarted) {
1089
1228
  await this.startSession();
1090
1229
  }
@@ -1337,9 +1476,42 @@ class GptDriver {
1337
1476
  */
1338
1477
  async executeFlow(filePath, options) {
1339
1478
  const useSmartLoop = options?.useSmartLoop ?? false;
1479
+ const configFilePath = this.configFilePath;
1480
+ let baseDir;
1481
+ let absolutePath;
1482
+ if (configFilePath) {
1483
+ let raw2;
1484
+ try {
1485
+ raw2 = await node_fs.promises.readFile(configFilePath, "utf-8");
1486
+ } catch (e) {
1487
+ const msg = `Failed to read file at ${configFilePath}: ${e?.message ?? e}`;
1488
+ globalLogger.error(msg);
1489
+ throw new Error(msg);
1490
+ }
1491
+ let json2;
1492
+ try {
1493
+ json2 = JSON.parse(raw2);
1494
+ } catch (e) {
1495
+ const msg = `Invalid JSON in flow file ${configFilePath}: ${e?.message ?? e}`;
1496
+ globalLogger.error(msg);
1497
+ throw new Error(msg);
1498
+ }
1499
+ const parsedConfigFile = ConfigSchema.parse(json2);
1500
+ if (path.isAbsolute(parsedConfigFile.testDir)) {
1501
+ baseDir = parsedConfigFile.testDir;
1502
+ } else {
1503
+ baseDir = path.resolve(path.dirname(configFilePath), parsedConfigFile.testDir);
1504
+ }
1505
+ absolutePath = path.isAbsolute(filePath) ? filePath : path.resolve(baseDir, filePath);
1506
+ } else {
1507
+ if (useSmartLoop) {
1508
+ throw new Error("Config file is required when using SmartLoop, please provide the path in the constructor");
1509
+ } else {
1510
+ absolutePath = path.resolve(filePath);
1511
+ baseDir = path.dirname(absolutePath);
1512
+ }
1513
+ }
1340
1514
  globalLogger.info(`Loading flow from file: ${filePath}`);
1341
- const absolutePath = path.resolve(filePath);
1342
- const baseDir = path.dirname(absolutePath);
1343
1515
  let raw;
1344
1516
  try {
1345
1517
  raw = await node_fs.promises.readFile(absolutePath, "utf-8");
@@ -1379,7 +1551,7 @@ ${issues}`);
1379
1551
  }
1380
1552
  return val.data;
1381
1553
  };
1382
- const expandSteps = async (steps, inheritedParams, parentDir, stack) => {
1554
+ const expandSteps = async (steps, inheritedParams, parentDir, stack, currentFilePath) => {
1383
1555
  const out = [];
1384
1556
  for (const step of steps) {
1385
1557
  if (step.type === "fileRef") {
@@ -1393,17 +1565,23 @@ ${issues}`);
1393
1565
  const child = await loadFlow(refPath);
1394
1566
  const mergedParams = { ...inheritedParams, ...step.overrides ?? {} };
1395
1567
  const childDir = path.dirname(refPath);
1396
- const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey]);
1568
+ const childRelativePath = path.relative(baseDir, refPath).replace(/^\.\//, "");
1569
+ const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey], childRelativePath);
1397
1570
  out.push(...childExpanded);
1398
1571
  } else {
1399
- const resolved = { ...step, __params: { ...inheritedParams } };
1572
+ const resolved = {
1573
+ ...step,
1574
+ __params: { ...inheritedParams },
1575
+ __filepath: currentFilePath
1576
+ };
1400
1577
  out.push(resolved);
1401
1578
  }
1402
1579
  }
1403
1580
  return out;
1404
1581
  };
1405
1582
  const effectiveParams = { ...rootFlow.params ?? {} };
1406
- const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath]);
1583
+ const rootRelativePath = path.relative(baseDir, absolutePath).replace(/^\.\//, "");
1584
+ const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath], rootRelativePath);
1407
1585
  if (!this.appiumSessionStarted) {
1408
1586
  await this.startSession();
1409
1587
  }
@@ -1412,7 +1590,8 @@ ${issues}`);
1412
1590
  try {
1413
1591
  for (const step of expandedSteps) {
1414
1592
  const params = step.__params ?? effectiveParams;
1415
- const prefix = `Step #${executed + 1} [${step.type}${step.optional ? ", optional" : ""}]`;
1593
+ const filepath = step.__filepath ?? rootRelativePath;
1594
+ const prefix = `Step #${executed + 1} [${step.type}${step.optional ? ", optional" : ""}] (filepath: ${filepath})`;
1416
1595
  try {
1417
1596
  switch (step.type) {
1418
1597
  case "ai": {
@@ -1423,7 +1602,8 @@ ${issues}`);
1423
1602
  const result = await executeSmartLoop(ctx, {
1424
1603
  stepNumber: this.step_number,
1425
1604
  description: instruction,
1426
- instruction
1605
+ instruction,
1606
+ filepath
1427
1607
  });
1428
1608
  if (!result.success) {
1429
1609
  throw new Error(result.error || "Smart loop execution failed");
@@ -1445,7 +1625,8 @@ ${issues}`);
1445
1625
  const result = await executeSmartLoop(ctx, {
1446
1626
  stepNumber: this.step_number,
1447
1627
  description,
1448
- instruction: description
1628
+ instruction: description,
1629
+ filepath
1449
1630
  });
1450
1631
  if (!result.success) {
1451
1632
  throw new Error(result.error || "Smart loop execution failed");
@@ -1468,7 +1649,8 @@ ${issues}`);
1468
1649
  const result = await executeSmartLoop(ctx, {
1469
1650
  stepNumber: this.step_number,
1470
1651
  description,
1471
- instruction
1652
+ instruction,
1653
+ filepath
1472
1654
  });
1473
1655
  if (!result.success) {
1474
1656
  throw new Error(result.error || "Smart loop execution failed");
@@ -1487,11 +1669,39 @@ ${issues}`);
1487
1669
  this.step_number++;
1488
1670
  break;
1489
1671
  }
1490
- case "scroll": {
1491
- globalLogger.info(`${prefix}: Scroll ${step.direction}`);
1492
- await this.takeScreenshotAndLogCodeExecution(`scroll: direction=${step.direction}`);
1493
- await this.performScroll(step.direction);
1494
- this.step_number++;
1672
+ case "scroll":
1673
+ case "swipe": {
1674
+ const description = step.descriptionText ? this.interpolateTemplate(step.descriptionText, params) : void 0;
1675
+ if (description && useSmartLoop) {
1676
+ globalLogger.info(`${prefix}: ${description}`);
1677
+ const ctx = this.createSmartLoopContext();
1678
+ const result = await executeSmartLoop(ctx, {
1679
+ stepNumber: this.step_number,
1680
+ description,
1681
+ instruction: description,
1682
+ filepath
1683
+ });
1684
+ if (!result.success) {
1685
+ throw new Error(result.error || "Smart loop execution failed");
1686
+ }
1687
+ this.step_number++;
1688
+ } else {
1689
+ globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
1690
+ await this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
1691
+ if (step.type === "swipe") {
1692
+ await this.performSwipe({
1693
+ direction: step.direction,
1694
+ x1: step.x1,
1695
+ y1: step.y1,
1696
+ x2: step.x2,
1697
+ y2: step.y2,
1698
+ duration: step.duration
1699
+ });
1700
+ } else {
1701
+ await this.performSwipe({ direction: step.direction });
1702
+ }
1703
+ this.step_number++;
1704
+ }
1495
1705
  break;
1496
1706
  }
1497
1707
  case "zoom": {
@@ -1578,10 +1788,7 @@ ${issues}`);
1578
1788
  for (const appiumCommand of executeResponse.commands) {
1579
1789
  await this.executeCommand(appiumCommand);
1580
1790
  }
1581
- if (!conditionSucceeded) {
1582
- globalLogger.debug("Command still in progress, waiting...");
1583
- await delay(1500);
1584
- }
1791
+ await delay(1500);
1585
1792
  }
1586
1793
  this.step_number = this.step_number + 1;
1587
1794
  globalLogger.info("Command execution completed successfully");
@@ -1594,8 +1801,7 @@ ${issues}`);
1594
1801
  async executeCommand(command) {
1595
1802
  const firstAction = command.data?.actions?.at(0);
1596
1803
  if (firstAction?.type === "pause" && firstAction.duration != null) {
1597
- globalLogger.debug(`Pausing for ${firstAction.duration} seconds`);
1598
- await delay(firstAction * 1e3);
1804
+ await delay(firstAction.duration * 1e3);
1599
1805
  } else if (!this.useGptDriverCloud) {
1600
1806
  const parsedUrl = new URL(command.url);
1601
1807
  parsedUrl.protocol = this.appiumSessionConfig.serverUrl.protocol;