gpt-driver-node 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -160,6 +160,14 @@ const SavableTestStoreSchema = zod.z.object({
160
160
  steps: zod.z.array(SavableStepSchema),
161
161
  params: zod.z.record(zod.z.string(), zod.z.string()).optional()
162
162
  });
163
+ const VariablesSchema = zod.z.record(zod.z.string(), zod.z.string()).optional().default({});
164
+ const ConfigSchema = zod.z.object({
165
+ testDir: zod.z.string(),
166
+ driver: zod.z.string(),
167
+ port: zod.z.number(),
168
+ apiKey: zod.z.string(),
169
+ variables: VariablesSchema
170
+ });
163
171
 
164
172
  const CACHE_SERVER_URL = "https://cache.mobileboost.io";
165
173
  const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
@@ -442,7 +450,7 @@ async function executeSmartLoop(ctx, params) {
442
450
  try {
443
451
  globalLogger.debug(`[SmartLoop] Checking cache (Attempt ${attempt + 1}/${maxCacheAttempts})`);
444
452
  const cacheResult = await executeFromCache({
445
- apiKey: ctx.apiKey,
453
+ apiKey: ctx.organisationId,
446
454
  stepNumber: params.stepNumber,
447
455
  stepDescription: params.description,
448
456
  screenshot,
@@ -472,7 +480,7 @@ async function executeSmartLoop(ctx, params) {
472
480
  anyCacheMiss = true;
473
481
  globalLogger.info(`[SmartLoop] Cache Miss. Requesting AI agent...`);
474
482
  const agentResponse = await executeAgentStep({
475
- apiKey: ctx.apiKey,
483
+ apiKey: ctx.organisationId,
476
484
  base64_screenshot: screenshot,
477
485
  instruction: params.instruction,
478
486
  action_history: actionHistory
@@ -555,7 +563,7 @@ async function executeSmartLoop(ctx, params) {
555
563
  globalLogger.info(`[SmartLoop] Populating cache with ${currentExecutionData.length} frame(s)...`);
556
564
  try {
557
565
  await populateCache({
558
- apiKey: ctx.apiKey,
566
+ apiKey: ctx.organisationId,
559
567
  stepNumber: params.stepNumber,
560
568
  stepDescription: params.description,
561
569
  executionData: currentExecutionData,
@@ -615,6 +623,8 @@ class GptDriver {
615
623
  buildId;
616
624
  testId;
617
625
  step_number = 1;
626
+ organisationId;
627
+ configFilePath;
618
628
  // Smart loop state - maintains action history across steps for context
619
629
  globalActionHistory = [];
620
630
  /**
@@ -628,6 +638,7 @@ class GptDriver {
628
638
  *
629
639
  * @param {GptDriverConfig} config - The configuration object for initializing the GptDriver instance. This includes:
630
640
  * - `apiKey`: The API key for authenticating requests to the GPT Driver server.
641
+ * - `organisationId` (optional): The Organisation Identifier for authenticating requests to the GPT Driver Cache Server.
631
642
  * - `driver` (optional): An existing WebDriver instance.
632
643
  * - `severConfig` (optional): Configuration for the Appium server, including URL and device settings.
633
644
  * @throws {Error} If a WebDriver instance is provided without a server URL, or if neither a WebDriver instance nor
@@ -642,6 +653,8 @@ class GptDriver {
642
653
  this.useGptDriverCloud = config.useGptDriverCloud;
643
654
  this.gptDriverBaseUrl = GPT_DRIVER_BASE_URL;
644
655
  this.cachingMode = config.cachingMode ?? "NONE";
656
+ this.organisationId = config.organisationId;
657
+ this.configFilePath = config.configFilePath;
645
658
  if (config.useGptDriverCloud) {
646
659
  if (config.serverConfig.device?.platform == null) {
647
660
  throw new Error("Platform is missing. Please specify the platform when using GPTDriver Cloud.");
@@ -821,6 +834,9 @@ class GptDriver {
821
834
  * This context provides all the callbacks needed by the smart loop executor.
822
835
  */
823
836
  createSmartLoopContext() {
837
+ if (!this.organisationId) {
838
+ throw new Error("Organisation ID is missing, please set it in the GPTDriver constructor");
839
+ }
824
840
  return {
825
841
  apiKey: this.apiKey,
826
842
  platform: this.appiumSessionConfig?.platform,
@@ -830,7 +846,8 @@ class GptDriver {
830
846
  performTap: (x, y) => this.performTap(x, y),
831
847
  performScroll: (direction) => this.performScroll(direction),
832
848
  performType: (text) => this.performType(text),
833
- logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command)
849
+ logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command),
850
+ organisationId: this.organisationId
834
851
  };
835
852
  }
836
853
  /**
@@ -998,86 +1015,12 @@ class GptDriver {
998
1015
  await this.gptHandler(command);
999
1016
  }
1000
1017
  }
1001
- /**
1002
- * Executes a specified command within the WebDriver session with configurable caching options.
1003
- *
1004
- * This is the recommended method for executing commands. It provides fine-grained control over
1005
- * caching behavior, allowing you to optimize performance and costs for repetitive test scenarios.
1006
- *
1007
- * If an `appiumHandler` is provided, it will be invoked with the WebDriver instance to perform
1008
- * the command-specific operations. After executing the handler, the executed commands get logged
1009
- * on the GPTDriver servers. If the handler execution fails or no handler is provided, the command
1010
- * gets executed by the GPTDriver using natural language processing.
1011
- *
1012
- * @param {Object} params - The execution parameters
1013
- * @param {string} params.command - The natural language command to be executed by the GPTDriver.
1014
- * Examples: "Click the login button", "Enter 'test@example.com' in the email field"
1015
- * @param {AppiumHandler} [params.appiumHandler] - An optional function that processes Appium-specific commands.
1016
- * If provided, this handler is executed instead of calling
1017
- * the GPTDriver API. Useful for performance optimization when
1018
- * you know the exact Appium commands to execute.
1019
- * @param {CachingMode} [params.cachingMode] - Controls how the GPTDriver caches this command execution.
1020
- * If not specified, uses the global caching mode set in the constructor.
1021
- * Options:
1022
- * - "NONE"
1023
- * - "FULL_SCREEN"
1024
- * - "INTERACTION_REGION"
1025
- * @param {boolean} [params.useSmartLoop] - If true, uses the smart loop execution (Cache -> AI -> Execute -> Populate)
1026
- * which optimizes execution by checking cache first and populating it after.
1027
- * Default: false (uses legacy gptHandler)
1028
- *
1029
- * @returns {Promise<void>} A promise that resolves when the command execution is complete.
1030
- *
1031
- * @throws {Error} If an error occurs during the execution of the Appium handler or while processing
1032
- * the command by the GPTDriver.
1033
- *
1034
- * @example
1035
- * // Basic usage with natural language (no caching)
1036
- * await driver.aiExecute({
1037
- * command: "Click the submit button"
1038
- * });
1039
- *
1040
- * @example
1041
- * // Full screen caching for repetitive navigation on similar screens
1042
- * await driver.aiExecute({
1043
- * command: "Navigate to the settings page",
1044
- * cachingMode: "FULL_SCREEN"
1045
- * });
1046
- *
1047
- * @example
1048
- * // Interaction region caching for repeated actions on the same button
1049
- * await driver.aiExecute({
1050
- * command: "Click the login button",
1051
- * cachingMode: "INTERACTION_REGION"
1052
- * });
1053
- *
1054
- * @example
1055
- * // With custom Appium handler as fallback
1056
- * await driver.aiExecute({
1057
- * command: "Click the login button",
1058
- * appiumHandler: async (driver) => {
1059
- * const loginBtn = await driver.$('~loginButton');
1060
- * await loginBtn.click();
1061
- * },
1062
- * cachingMode: "INTERACTION_REGION"
1063
- * });
1064
- *
1065
- * @example
1066
- * // Force fresh execution for dynamic content
1067
- * await driver.aiExecute({
1068
- * command: "Verify the current timestamp",
1069
- * cachingMode: "NONE"
1070
- * });
1071
- *
1072
- * @example
1073
- * // Using smart loop for optimized caching
1074
- * await driver.aiExecute({
1075
- * command: "Click the login button",
1076
- * useSmartLoop: true,
1077
- * cachingMode: "FULL_SCREEN"
1078
- * });
1079
- */
1080
- async aiExecute({ command, appiumHandler, cachingMode, useSmartLoop = false }) {
1018
+ async aiExecute(commandOrOptions, options) {
1019
+ const command = typeof commandOrOptions === "string" ? commandOrOptions : commandOrOptions.command;
1020
+ const opts = typeof commandOrOptions === "string" ? options : commandOrOptions;
1021
+ const appiumHandler = opts?.appiumHandler;
1022
+ const cachingMode = opts?.cachingMode;
1023
+ const useSmartLoop = opts?.useSmartLoop ?? false;
1081
1024
  if (!this.appiumSessionStarted) {
1082
1025
  await this.startSession();
1083
1026
  }
@@ -1330,9 +1273,42 @@ class GptDriver {
1330
1273
  */
1331
1274
  async executeFlow(filePath, options) {
1332
1275
  const useSmartLoop = options?.useSmartLoop ?? false;
1276
+ const configFilePath = this.configFilePath;
1277
+ let baseDir;
1278
+ let absolutePath;
1279
+ if (configFilePath) {
1280
+ let raw2;
1281
+ try {
1282
+ raw2 = await node_fs.promises.readFile(configFilePath, "utf-8");
1283
+ } catch (e) {
1284
+ const msg = `Failed to read file at ${configFilePath}: ${e?.message ?? e}`;
1285
+ globalLogger.error(msg);
1286
+ throw new Error(msg);
1287
+ }
1288
+ let json2;
1289
+ try {
1290
+ json2 = JSON.parse(raw2);
1291
+ } catch (e) {
1292
+ const msg = `Invalid JSON in flow file ${configFilePath}: ${e?.message ?? e}`;
1293
+ globalLogger.error(msg);
1294
+ throw new Error(msg);
1295
+ }
1296
+ const parsedConfigFile = ConfigSchema.parse(json2);
1297
+ if (path.isAbsolute(parsedConfigFile.testDir)) {
1298
+ baseDir = parsedConfigFile.testDir;
1299
+ } else {
1300
+ baseDir = path.resolve(path.dirname(configFilePath), parsedConfigFile.testDir);
1301
+ }
1302
+ absolutePath = path.isAbsolute(filePath) ? filePath : path.resolve(baseDir, filePath);
1303
+ } else {
1304
+ if (useSmartLoop) {
1305
+ throw new Error("Config file is required when using SmartLoop, please provide the path in the constructor");
1306
+ } else {
1307
+ absolutePath = path.resolve(filePath);
1308
+ baseDir = path.dirname(absolutePath);
1309
+ }
1310
+ }
1333
1311
  globalLogger.info(`Loading flow from file: ${filePath}`);
1334
- const absolutePath = path.resolve(filePath);
1335
- const baseDir = path.dirname(absolutePath);
1336
1312
  let raw;
1337
1313
  try {
1338
1314
  raw = await node_fs.promises.readFile(absolutePath, "utf-8");
@@ -1571,10 +1547,7 @@ ${issues}`);
1571
1547
  for (const appiumCommand of executeResponse.commands) {
1572
1548
  await this.executeCommand(appiumCommand);
1573
1549
  }
1574
- if (!conditionSucceeded) {
1575
- globalLogger.debug("Command still in progress, waiting...");
1576
- await delay(1500);
1577
- }
1550
+ await delay(1500);
1578
1551
  }
1579
1552
  this.step_number = this.step_number + 1;
1580
1553
  globalLogger.info("Command execution completed successfully");
@@ -1587,8 +1560,7 @@ ${issues}`);
1587
1560
  async executeCommand(command) {
1588
1561
  const firstAction = command.data?.actions?.at(0);
1589
1562
  if (firstAction?.type === "pause" && firstAction.duration != null) {
1590
- globalLogger.debug(`Pausing for ${firstAction.duration} seconds`);
1591
- await delay(firstAction * 1e3);
1563
+ await delay(firstAction.duration * 1e3);
1592
1564
  } else if (!this.useGptDriverCloud) {
1593
1565
  const parsedUrl = new URL(command.url);
1594
1566
  parsedUrl.protocol = this.appiumSessionConfig.serverUrl.protocol;
package/dist/index.d.cts CHANGED
@@ -22,6 +22,8 @@ interface GptDriverConfig {
22
22
  useGptDriverCloud?: boolean;
23
23
  buildId?: string;
24
24
  cachingMode?: CachingMode;
25
+ organisationId?: string;
26
+ configFilePath?: string;
25
27
  }
26
28
  /**
27
29
  * Parameters for opening a deep link url in the Appium session.
@@ -396,6 +398,8 @@ declare class GptDriver {
396
398
  private buildId?;
397
399
  private testId?;
398
400
  private step_number;
401
+ private organisationId?;
402
+ private configFilePath?;
399
403
  private globalActionHistory;
400
404
  /**
401
405
  * Creates an instance of the GptDriver class.
@@ -408,6 +412,7 @@ declare class GptDriver {
408
412
  *
409
413
  * @param {GptDriverConfig} config - The configuration object for initializing the GptDriver instance. This includes:
410
414
  * - `apiKey`: The API key for authenticating requests to the GPT Driver server.
415
+ * - `organisationId` (optional): The Organisation Identifier for authenticating requests to the GPT Driver Cache Server.
411
416
  * - `driver` (optional): An existing WebDriver instance.
412
417
  * - `severConfig` (optional): Configuration for the Appium server, including URL and device settings.
413
418
  * @throws {Error} If a WebDriver instance is provided without a server URL, or if neither a WebDriver instance nor
@@ -492,22 +497,26 @@ declare class GptDriver {
492
497
  * on the GPTDriver servers. If the handler execution fails or no handler is provided, the command
493
498
  * gets executed by the GPTDriver using natural language processing.
494
499
  *
495
- * @param {Object} params - The execution parameters
496
- * @param {string} params.command - The natural language command to be executed by the GPTDriver.
497
- * Examples: "Click the login button", "Enter 'test@example.com' in the email field"
498
- * @param {AppiumHandler} [params.appiumHandler] - An optional function that processes Appium-specific commands.
500
+ * @param {string | Object} commandOrOptions - Either a command string or an options object
501
+ * @param {string} commandOrOptions.command - The natural language command to be executed by the GPTDriver.
502
+ * Examples: "Click the login button", "Enter 'test@example.com' in the email field"
503
+ * @param {AppiumHandler} [commandOrOptions.appiumHandler] - An optional function that processes Appium-specific commands.
504
+ * @param {CachingMode} [commandOrOptions.cachingMode] - Controls how the GPTDriver caches this command execution.
505
+ * @param {boolean} [commandOrOptions.useSmartLoop] - If true, uses the smart loop execution. Default: false
506
+ * @param {Object} [options] - Additional options when first parameter is a command string
507
+ * @param {AppiumHandler} [options.appiumHandler] - An optional function that processes Appium-specific commands.
499
508
  * If provided, this handler is executed instead of calling
500
509
  * the GPTDriver API. Useful for performance optimization when
501
510
  * you know the exact Appium commands to execute.
502
- * @param {CachingMode} [params.cachingMode] - Controls how the GPTDriver caches this command execution.
503
- * If not specified, uses the global caching mode set in the constructor.
504
- * Options:
505
- * - "NONE"
506
- * - "FULL_SCREEN"
507
- * - "INTERACTION_REGION"
508
- * @param {boolean} [params.useSmartLoop] - If true, uses the smart loop execution (Cache -> AI -> Execute -> Populate)
509
- * which optimizes execution by checking cache first and populating it after.
510
- * Default: false (uses legacy gptHandler)
511
+ * @param {CachingMode} [options.cachingMode] - Controls how the GPTDriver caches this command execution.
512
+ * If not specified, uses the global caching mode set in the constructor.
513
+ * Options:
514
+ * - "NONE"
515
+ * - "FULL_SCREEN"
516
+ * - "INTERACTION_REGION"
517
+ * @param {boolean} [options.useSmartLoop] - If true, uses the smart loop execution (Cache -> AI -> Execute -> Populate)
518
+ * which optimizes execution by checking cache first and populating it after.
519
+ * Default: false (uses legacy gptHandler)
511
520
  *
512
521
  * @returns {Promise<void>} A promise that resolves when the command execution is complete.
513
522
  *
@@ -515,27 +524,30 @@ declare class GptDriver {
515
524
  * the command by the GPTDriver.
516
525
  *
517
526
  * @example
518
- * // Basic usage with natural language (no caching)
527
+ * // Basic usage with command string
528
+ * await driver.aiExecute("Click the submit button");
529
+ *
530
+ * @example
531
+ * // Basic usage with options object
519
532
  * await driver.aiExecute({
520
533
  * command: "Click the submit button"
521
534
  * });
522
535
  *
523
536
  * @example
524
- * // Full screen caching for repetitive navigation on similar screens
525
- * await driver.aiExecute({
526
- * command: "Navigate to the settings page",
537
+ * // Command string with options
538
+ * await driver.aiExecute("Navigate to settings", {
527
539
  * cachingMode: "FULL_SCREEN"
528
540
  * });
529
541
  *
530
542
  * @example
531
- * // Interaction region caching for repeated actions on the same button
543
+ * // Full screen caching using options object
532
544
  * await driver.aiExecute({
533
- * command: "Click the login button",
534
- * cachingMode: "INTERACTION_REGION"
545
+ * command: "Navigate to the settings page",
546
+ * cachingMode: "FULL_SCREEN"
535
547
  * });
536
548
  *
537
549
  * @example
538
- * // With custom Appium handler as fallback
550
+ * // With custom Appium handler
539
551
  * await driver.aiExecute({
540
552
  * command: "Click the login button",
541
553
  * appiumHandler: async (driver) => {
@@ -546,13 +558,6 @@ declare class GptDriver {
546
558
  * });
547
559
  *
548
560
  * @example
549
- * // Force fresh execution for dynamic content
550
- * await driver.aiExecute({
551
- * command: "Verify the current timestamp",
552
- * cachingMode: "NONE"
553
- * });
554
- *
555
- * @example
556
561
  * // Using smart loop for optimized caching
557
562
  * await driver.aiExecute({
558
563
  * command: "Click the login button",
@@ -560,12 +565,17 @@ declare class GptDriver {
560
565
  * cachingMode: "FULL_SCREEN"
561
566
  * });
562
567
  */
563
- aiExecute({ command, appiumHandler, cachingMode, useSmartLoop }: {
568
+ aiExecute(options: {
564
569
  command: string;
565
570
  appiumHandler?: AppiumHandler;
566
571
  cachingMode?: CachingMode;
567
572
  useSmartLoop?: boolean;
568
573
  }): Promise<void>;
574
+ aiExecute(command: string, options?: {
575
+ appiumHandler?: AppiumHandler;
576
+ cachingMode?: CachingMode;
577
+ useSmartLoop?: boolean;
578
+ }): Promise<void>;
569
579
  /**
570
580
  * Asserts a single condition using the GPTDriver.
571
581
  *
package/dist/index.mjs CHANGED
@@ -158,6 +158,14 @@ const SavableTestStoreSchema = z.object({
158
158
  steps: z.array(SavableStepSchema),
159
159
  params: z.record(z.string(), z.string()).optional()
160
160
  });
161
+ const VariablesSchema = z.record(z.string(), z.string()).optional().default({});
162
+ const ConfigSchema = z.object({
163
+ testDir: z.string(),
164
+ driver: z.string(),
165
+ port: z.number(),
166
+ apiKey: z.string(),
167
+ variables: VariablesSchema
168
+ });
161
169
 
162
170
  const CACHE_SERVER_URL = "https://cache.mobileboost.io";
163
171
  const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
@@ -440,7 +448,7 @@ async function executeSmartLoop(ctx, params) {
440
448
  try {
441
449
  globalLogger.debug(`[SmartLoop] Checking cache (Attempt ${attempt + 1}/${maxCacheAttempts})`);
442
450
  const cacheResult = await executeFromCache({
443
- apiKey: ctx.apiKey,
451
+ apiKey: ctx.organisationId,
444
452
  stepNumber: params.stepNumber,
445
453
  stepDescription: params.description,
446
454
  screenshot,
@@ -470,7 +478,7 @@ async function executeSmartLoop(ctx, params) {
470
478
  anyCacheMiss = true;
471
479
  globalLogger.info(`[SmartLoop] Cache Miss. Requesting AI agent...`);
472
480
  const agentResponse = await executeAgentStep({
473
- apiKey: ctx.apiKey,
481
+ apiKey: ctx.organisationId,
474
482
  base64_screenshot: screenshot,
475
483
  instruction: params.instruction,
476
484
  action_history: actionHistory
@@ -553,7 +561,7 @@ async function executeSmartLoop(ctx, params) {
553
561
  globalLogger.info(`[SmartLoop] Populating cache with ${currentExecutionData.length} frame(s)...`);
554
562
  try {
555
563
  await populateCache({
556
- apiKey: ctx.apiKey,
564
+ apiKey: ctx.organisationId,
557
565
  stepNumber: params.stepNumber,
558
566
  stepDescription: params.description,
559
567
  executionData: currentExecutionData,
@@ -613,6 +621,8 @@ class GptDriver {
613
621
  buildId;
614
622
  testId;
615
623
  step_number = 1;
624
+ organisationId;
625
+ configFilePath;
616
626
  // Smart loop state - maintains action history across steps for context
617
627
  globalActionHistory = [];
618
628
  /**
@@ -626,6 +636,7 @@ class GptDriver {
626
636
  *
627
637
  * @param {GptDriverConfig} config - The configuration object for initializing the GptDriver instance. This includes:
628
638
  * - `apiKey`: The API key for authenticating requests to the GPT Driver server.
639
+ * - `organisationId` (optional): The Organisation Identifier for authenticating requests to the GPT Driver Cache Server.
629
640
  * - `driver` (optional): An existing WebDriver instance.
630
641
  * - `severConfig` (optional): Configuration for the Appium server, including URL and device settings.
631
642
  * @throws {Error} If a WebDriver instance is provided without a server URL, or if neither a WebDriver instance nor
@@ -640,6 +651,8 @@ class GptDriver {
640
651
  this.useGptDriverCloud = config.useGptDriverCloud;
641
652
  this.gptDriverBaseUrl = GPT_DRIVER_BASE_URL;
642
653
  this.cachingMode = config.cachingMode ?? "NONE";
654
+ this.organisationId = config.organisationId;
655
+ this.configFilePath = config.configFilePath;
643
656
  if (config.useGptDriverCloud) {
644
657
  if (config.serverConfig.device?.platform == null) {
645
658
  throw new Error("Platform is missing. Please specify the platform when using GPTDriver Cloud.");
@@ -819,6 +832,9 @@ class GptDriver {
819
832
  * This context provides all the callbacks needed by the smart loop executor.
820
833
  */
821
834
  createSmartLoopContext() {
835
+ if (!this.organisationId) {
836
+ throw new Error("Organisation ID is missing, please set it in the GPTDriver constructor");
837
+ }
822
838
  return {
823
839
  apiKey: this.apiKey,
824
840
  platform: this.appiumSessionConfig?.platform,
@@ -828,7 +844,8 @@ class GptDriver {
828
844
  performTap: (x, y) => this.performTap(x, y),
829
845
  performScroll: (direction) => this.performScroll(direction),
830
846
  performType: (text) => this.performType(text),
831
- logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command)
847
+ logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command),
848
+ organisationId: this.organisationId
832
849
  };
833
850
  }
834
851
  /**
@@ -996,86 +1013,12 @@ class GptDriver {
996
1013
  await this.gptHandler(command);
997
1014
  }
998
1015
  }
999
- /**
1000
- * Executes a specified command within the WebDriver session with configurable caching options.
1001
- *
1002
- * This is the recommended method for executing commands. It provides fine-grained control over
1003
- * caching behavior, allowing you to optimize performance and costs for repetitive test scenarios.
1004
- *
1005
- * If an `appiumHandler` is provided, it will be invoked with the WebDriver instance to perform
1006
- * the command-specific operations. After executing the handler, the executed commands get logged
1007
- * on the GPTDriver servers. If the handler execution fails or no handler is provided, the command
1008
- * gets executed by the GPTDriver using natural language processing.
1009
- *
1010
- * @param {Object} params - The execution parameters
1011
- * @param {string} params.command - The natural language command to be executed by the GPTDriver.
1012
- * Examples: "Click the login button", "Enter 'test@example.com' in the email field"
1013
- * @param {AppiumHandler} [params.appiumHandler] - An optional function that processes Appium-specific commands.
1014
- * If provided, this handler is executed instead of calling
1015
- * the GPTDriver API. Useful for performance optimization when
1016
- * you know the exact Appium commands to execute.
1017
- * @param {CachingMode} [params.cachingMode] - Controls how the GPTDriver caches this command execution.
1018
- * If not specified, uses the global caching mode set in the constructor.
1019
- * Options:
1020
- * - "NONE"
1021
- * - "FULL_SCREEN"
1022
- * - "INTERACTION_REGION"
1023
- * @param {boolean} [params.useSmartLoop] - If true, uses the smart loop execution (Cache -> AI -> Execute -> Populate)
1024
- * which optimizes execution by checking cache first and populating it after.
1025
- * Default: false (uses legacy gptHandler)
1026
- *
1027
- * @returns {Promise<void>} A promise that resolves when the command execution is complete.
1028
- *
1029
- * @throws {Error} If an error occurs during the execution of the Appium handler or while processing
1030
- * the command by the GPTDriver.
1031
- *
1032
- * @example
1033
- * // Basic usage with natural language (no caching)
1034
- * await driver.aiExecute({
1035
- * command: "Click the submit button"
1036
- * });
1037
- *
1038
- * @example
1039
- * // Full screen caching for repetitive navigation on similar screens
1040
- * await driver.aiExecute({
1041
- * command: "Navigate to the settings page",
1042
- * cachingMode: "FULL_SCREEN"
1043
- * });
1044
- *
1045
- * @example
1046
- * // Interaction region caching for repeated actions on the same button
1047
- * await driver.aiExecute({
1048
- * command: "Click the login button",
1049
- * cachingMode: "INTERACTION_REGION"
1050
- * });
1051
- *
1052
- * @example
1053
- * // With custom Appium handler as fallback
1054
- * await driver.aiExecute({
1055
- * command: "Click the login button",
1056
- * appiumHandler: async (driver) => {
1057
- * const loginBtn = await driver.$('~loginButton');
1058
- * await loginBtn.click();
1059
- * },
1060
- * cachingMode: "INTERACTION_REGION"
1061
- * });
1062
- *
1063
- * @example
1064
- * // Force fresh execution for dynamic content
1065
- * await driver.aiExecute({
1066
- * command: "Verify the current timestamp",
1067
- * cachingMode: "NONE"
1068
- * });
1069
- *
1070
- * @example
1071
- * // Using smart loop for optimized caching
1072
- * await driver.aiExecute({
1073
- * command: "Click the login button",
1074
- * useSmartLoop: true,
1075
- * cachingMode: "FULL_SCREEN"
1076
- * });
1077
- */
1078
- async aiExecute({ command, appiumHandler, cachingMode, useSmartLoop = false }) {
1016
+ async aiExecute(commandOrOptions, options) {
1017
+ const command = typeof commandOrOptions === "string" ? commandOrOptions : commandOrOptions.command;
1018
+ const opts = typeof commandOrOptions === "string" ? options : commandOrOptions;
1019
+ const appiumHandler = opts?.appiumHandler;
1020
+ const cachingMode = opts?.cachingMode;
1021
+ const useSmartLoop = opts?.useSmartLoop ?? false;
1079
1022
  if (!this.appiumSessionStarted) {
1080
1023
  await this.startSession();
1081
1024
  }
@@ -1328,9 +1271,42 @@ class GptDriver {
1328
1271
  */
1329
1272
  async executeFlow(filePath, options) {
1330
1273
  const useSmartLoop = options?.useSmartLoop ?? false;
1274
+ const configFilePath = this.configFilePath;
1275
+ let baseDir;
1276
+ let absolutePath;
1277
+ if (configFilePath) {
1278
+ let raw2;
1279
+ try {
1280
+ raw2 = await promises.readFile(configFilePath, "utf-8");
1281
+ } catch (e) {
1282
+ const msg = `Failed to read file at ${configFilePath}: ${e?.message ?? e}`;
1283
+ globalLogger.error(msg);
1284
+ throw new Error(msg);
1285
+ }
1286
+ let json2;
1287
+ try {
1288
+ json2 = JSON.parse(raw2);
1289
+ } catch (e) {
1290
+ const msg = `Invalid JSON in flow file ${configFilePath}: ${e?.message ?? e}`;
1291
+ globalLogger.error(msg);
1292
+ throw new Error(msg);
1293
+ }
1294
+ const parsedConfigFile = ConfigSchema.parse(json2);
1295
+ if (path.isAbsolute(parsedConfigFile.testDir)) {
1296
+ baseDir = parsedConfigFile.testDir;
1297
+ } else {
1298
+ baseDir = path.resolve(path.dirname(configFilePath), parsedConfigFile.testDir);
1299
+ }
1300
+ absolutePath = path.isAbsolute(filePath) ? filePath : path.resolve(baseDir, filePath);
1301
+ } else {
1302
+ if (useSmartLoop) {
1303
+ throw new Error("Config file is required when using SmartLoop, please provide the path in the constructor");
1304
+ } else {
1305
+ absolutePath = path.resolve(filePath);
1306
+ baseDir = path.dirname(absolutePath);
1307
+ }
1308
+ }
1331
1309
  globalLogger.info(`Loading flow from file: ${filePath}`);
1332
- const absolutePath = path.resolve(filePath);
1333
- const baseDir = path.dirname(absolutePath);
1334
1310
  let raw;
1335
1311
  try {
1336
1312
  raw = await promises.readFile(absolutePath, "utf-8");
@@ -1569,10 +1545,7 @@ ${issues}`);
1569
1545
  for (const appiumCommand of executeResponse.commands) {
1570
1546
  await this.executeCommand(appiumCommand);
1571
1547
  }
1572
- if (!conditionSucceeded) {
1573
- globalLogger.debug("Command still in progress, waiting...");
1574
- await delay(1500);
1575
- }
1548
+ await delay(1500);
1576
1549
  }
1577
1550
  this.step_number = this.step_number + 1;
1578
1551
  globalLogger.info("Command execution completed successfully");
@@ -1585,8 +1558,7 @@ ${issues}`);
1585
1558
  async executeCommand(command) {
1586
1559
  const firstAction = command.data?.actions?.at(0);
1587
1560
  if (firstAction?.type === "pause" && firstAction.duration != null) {
1588
- globalLogger.debug(`Pausing for ${firstAction.duration} seconds`);
1589
- await delay(firstAction * 1e3);
1561
+ await delay(firstAction.duration * 1e3);
1590
1562
  } else if (!this.useGptDriverCloud) {
1591
1563
  const parsedUrl = new URL(command.url);
1592
1564
  parsedUrl.protocol = this.appiumSessionConfig.serverUrl.protocol;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gpt-driver-node",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "main": "./dist/index.cjs",
5
5
  "module": "./dist/index.mjs",
6
6
  "types": "./dist/index.d.cts",