npm - gpt-driver-node - Versions diffs - 1.0.0-alpha.9 → 1.0.0 - Mend

gpt-driver-node 1.0.0-alpha.9 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs CHANGED Viewed

@@ -1,7 +1,13 @@
 'use strict';
+var node_fs = require('node:fs');
+var path = require('node:path');
 var axios = require('axios');
 var sharp = require('sharp');
+var webdriverio = require('webdriverio');
+var winston = require('winston');
+var zod = require('zod');
+var crypto = require('node:crypto');
 const delay = async (milliseconds) => {
   await new Promise((resolve) => setTimeout(resolve, milliseconds));
@@ -17,12 +23,600 @@ function buildUrl(base, extraPath) {
   return `${baseUrl}${extraPath}`;
 }
+const colors = {
+  reset: "\x1B[0m",
+  bold: "\x1B[1m",
+  gray: "\x1B[90m",
+  red: "\x1B[31m",
+  green: "\x1B[32m",
+  yellow: "\x1B[33m",
+  cyan: "\x1B[36m"
+};
+const logStyles = {
+  bold: (text) => `${colors.bold}${text}${colors.reset}`,
+  cyan: (text) => `${colors.cyan}${text}${colors.reset}`,
+  yellow: (text) => `${colors.yellow}${text}${colors.reset}`,
+  green: (text) => `${colors.green}${text}${colors.reset}`,
+  red: (text) => `${colors.red}${text}${colors.reset}`,
+  gray: (text) => `${colors.gray}${text}${colors.reset}`,
+  highlight: (text) => `${colors.bold}${colors.cyan}${text}${colors.reset}`,
+  success: (text) => `${colors.bold}${colors.green}${text}${colors.reset}`,
+  error: (text) => `${colors.bold}${colors.red}${text}${colors.reset}`,
+  warning: (text) => `${colors.bold}${colors.yellow}${text}${colors.reset}`
+};
+const globalLogger = winston.createLogger({
+  level: process.env.GPT_DRIVER_LOG_LEVEL || "info",
+  format: winston.format.combine(
+    winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
+    winston.format.errors({ stack: true }),
+    winston.format.printf(({ timestamp, level, message, stack }) => {
+      const logMessage = `${timestamp} [${level.toUpperCase()}]: ${message}`;
+      return stack ? `${logMessage}
+${stack}` : logMessage;
+    })
+  ),
+  transports: [
+    new winston.transports.Console({
+      format: winston.format.combine(
+        winston.format.printf(({ timestamp, level, message, stack }) => {
+          let coloredLevel = level.toUpperCase();
+          switch (level) {
+            case "error":
+              coloredLevel = logStyles.error(coloredLevel);
+              break;
+            case "warn":
+              coloredLevel = logStyles.warning(coloredLevel);
+              break;
+            case "info":
+              coloredLevel = logStyles.cyan(coloredLevel);
+              break;
+            case "debug":
+              coloredLevel = logStyles.gray(coloredLevel);
+              break;
+          }
+          const formattedTimestamp = logStyles.gray(timestamp);
+          const logMessage = `${formattedTimestamp} [${coloredLevel}]: ${message}`;
+          return stack ? `${logMessage}
+${logStyles.gray(stack)}` : logMessage;
+        })
+      )
+    })
+  ]
+});
+const SavableStepBaseSchema = zod.z.object({
+  id: zod.z.number().optional(),
+  descriptionText: zod.z.string().optional(),
+  optional: zod.z.boolean().optional()
+});
+const SavableTapStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("tap"),
+  elementId: zod.z.string().optional(),
+  timeout: zod.z.number().optional(),
+  useLlmOnly: zod.z.boolean().optional(),
+  cropBase64: zod.z.string().optional()
+});
+const SavableAssertStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("assert"),
+  elementId: zod.z.string().optional(),
+  timeout: zod.z.number().optional(),
+  useLlmOnly: zod.z.boolean().optional(),
+  cropBase64: zod.z.string().optional()
+});
+const SavableTypeStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("type"),
+  text: zod.z.string()
+});
+const SavableScrollStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("scroll"),
+  direction: zod.z.enum(["up", "down"])
+});
+const SavableZoomStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("zoom"),
+  direction: zod.z.enum(["in", "out"])
+});
+const SavableScrollUntilStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("scrollUntil"),
+  text: zod.z.string().optional(),
+  elementId: zod.z.string().optional(),
+  direction: zod.z.enum(["up", "down"]),
+  maxScrolls: zod.z.number().optional()
+});
+const SavableDeeplinkStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("deeplink"),
+  url: zod.z.string()
+});
+const SavableAIStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("ai"),
+  instruction: zod.z.string()
+});
+const SavableFileRefStepSchema = SavableStepBaseSchema.extend({
+  type: zod.z.literal("fileRef"),
+  path: zod.z.string(),
+  overrides: zod.z.record(zod.z.string(), zod.z.string()).optional()
+});
+const SavableStepSchema = zod.z.discriminatedUnion("type", [
+  SavableTapStepSchema,
+  // type: 'tap'
+  SavableAssertStepSchema,
+  // type: 'assert'
+  SavableTypeStepSchema,
+  // type: 'type'
+  SavableScrollStepSchema,
+  // type: 'scroll'
+  SavableZoomStepSchema,
+  // type: 'zoom'
+  SavableScrollUntilStepSchema,
+  // type: 'scrollUntil'
+  SavableDeeplinkStepSchema,
+  // type: 'deeplink'
+  SavableAIStepSchema,
+  // type: 'ai'
+  SavableFileRefStepSchema
+  // type: 'fileRef'
+]);
+const SavableTestStoreSchema = zod.z.object({
+  name: zod.z.string(),
+  steps: zod.z.array(SavableStepSchema),
+  params: zod.z.record(zod.z.string(), zod.z.string()).optional()
+});
+const CACHE_SERVER_URL = "https://cache.mobileboost.io";
+const GPT_DRIVER_BASE_URL = "https://api.mobileboost.io";
+const RESCALE_FACTOR = 4;
+const SMART_LOOP_MAX_ITERATIONS = 15;
+const CACHE_RETRY_MS = 2e3;
+const CACHE_CHECK_INTERVAL_MS = 500;
+function generateCacheHash(apiKey, filepath, stepNumber, description, platform, resolution) {
+  const resString = resolution ? `${resolution.width}x${resolution.height}` : "";
+  const normalizedPlatform = platform?.toLowerCase() || "";
+  const data = `${apiKey}${filepath || ""}${stepNumber}${description}${normalizedPlatform || ""}${resString}`;
+  return crypto.createHash("sha256").update(data).digest("hex");
+}
+function scaleCommand(cmd, operation) {
+  if (cmd.match(/([xy])=(\d+)/)) {
+    return cmd.replace(/([xy])=(\d+)/g, (_match, axis, val) => {
+      const num = parseInt(val, 10);
+      let scaled;
+      if (operation === "multiply") {
+        scaled = Math.round(num * RESCALE_FACTOR);
+      } else {
+        scaled = Math.round(num / RESCALE_FACTOR);
+      }
+      return `${axis}=${scaled}`;
+    });
+  }
+  return cmd.replace(/(^|;)(\d+);(\d+)(;|$)/, (_match, prefix, xStr, yStr, suffix) => {
+    const x = parseInt(xStr, 10);
+    const y = parseInt(yStr, 10);
+    let scaledX;
+    let scaledY;
+    if (operation === "multiply") {
+      scaledX = Math.round(x * RESCALE_FACTOR);
+      scaledY = Math.round(y * RESCALE_FACTOR);
+    } else {
+      scaledX = Math.round(x / RESCALE_FACTOR);
+      scaledY = Math.round(y / RESCALE_FACTOR);
+    }
+    return `${prefix}${scaledX};${scaledY}${suffix}`;
+  });
+}
+async function resizeScreenshotForCache(screenshotBase64) {
+  const buffer = Buffer.from(
+    screenshotBase64.replace(/^data:image\/\w+;base64,/, ""),
+    "base64"
+  );
+  const metadata = await sharp(buffer).metadata();
+  const originalWidth = metadata.width ?? 1080;
+  const desiredWidth = Math.round(originalWidth / RESCALE_FACTOR);
+  return sharp(buffer).resize({ width: desiredWidth, withoutEnlargement: true }).toBuffer();
+}
+async function executeFromCache(params) {
+  try {
+    const hash = generateCacheHash(
+      params.apiKey,
+      params.filepath,
+      params.stepNumber,
+      params.stepDescription,
+      params.platform,
+      params.screenResolution
+    );
+    const resizedBuffer = await resizeScreenshotForCache(params.screenshot);
+    const formData = new FormData();
+    formData.append("hash", hash);
+    const blob = new Blob([new Uint8Array(resizedBuffer)], { type: "image/png" });
+    const blobSizeMB = (blob.size / (1024 * 1024)).toFixed(2);
+    globalLogger.debug(`[Cache] Executing from cache with screenshot size: ${blobSizeMB} MB`);
+    formData.append("screenshot", blob, "screenshot.png");
+    if (params.highestUsedIndex !== void 0 && params.highestUsedIndex !== null) {
+      globalLogger.debug(`[Cache] Sending highest_used_index: ${params.highestUsedIndex}`);
+      formData.append("highest_used_index", String(params.highestUsedIndex));
+    }
+    const response = await axios.post(`${CACHE_SERVER_URL}/execute-from-cache`, formData);
+    const result = response.data;
+    if (result.found && result.cacheCommands) {
+      const scaledCommands = result.cacheCommands.map(
+        (cmd) => scaleCommand(cmd, "multiply")
+      );
+      return {
+        found: true,
+        cacheCommands: scaledCommands,
+        cacheIndex: result.cacheIndex
+      };
+    }
+    return { found: false };
+  } catch (error) {
+    if (axios.isAxiosError(error)) {
+      globalLogger.warn(`[Cache] Cache lookup failed: ${error.response?.data || error.message}`);
+    } else {
+      globalLogger.error(`[Cache] Error executing from cache: ${error}`);
+    }
+    return { found: false };
+  }
+}
+async function populateCache(params) {
+  try {
+    const hash = generateCacheHash(
+      params.apiKey,
+      params.filepath,
+      params.stepNumber,
+      params.stepDescription,
+      params.platform,
+      params.screenResolution
+    );
+    const payload = await Promise.all(params.executionData.map(async (item) => {
+      const resizedBuffer = await resizeScreenshotForCache(item.screenshot);
+      const scaledCommands = item.commands.map(
+        (cmd) => scaleCommand(cmd, "divide")
+      );
+      return {
+        screenshot: resizedBuffer.toString("base64"),
+        commands: scaledCommands
+      };
+    }));
+    const payloadSizeMB = (JSON.stringify(payload).length / (1024 * 1024)).toFixed(2);
+    globalLogger.debug(`[Cache] Populating cache with payload size: ~${payloadSizeMB} MB (Hash: ${hash})`);
+    await axios.post(`${CACHE_SERVER_URL}/populate-cache`, payload, {
+      params: { hash }
+    });
+    return { success: true };
+  } catch (error) {
+    if (axios.isAxiosError(error)) {
+      globalLogger.error(`[Cache] Failed to populate cache: ${error.response?.data || error.message}`);
+    } else {
+      globalLogger.error(`[Cache] Error populating cache: ${error}`);
+    }
+    return { success: false };
+  }
+}
+const AI_AGENT_ENDPOINT = "https://api.mobileboost.io/call_lambda";
+async function executeAgentStep(params) {
+  const imageBuffer = Buffer.from(params.base64_screenshot, "base64");
+  const metadata = await sharp(imageBuffer).metadata();
+  const originalWidth = metadata.width ?? 1080;
+  const originalHeight = metadata.height ?? 1920;
+  const desiredWidth = Math.round(originalWidth / RESCALE_FACTOR);
+  const resizedBuffer = await sharp(imageBuffer).resize({ width: desiredWidth, withoutEnlargement: true }).toBuffer();
+  const resizedMetadata = await sharp(resizedBuffer).metadata();
+  const resizedWidth = resizedMetadata.width ?? desiredWidth;
+  const resizedHeight = resizedMetadata.height ?? Math.round(originalHeight * (desiredWidth / originalWidth));
+  globalLogger.debug(`[AI Client] Resized screenshot: ${originalWidth}x${originalHeight} -> ${resizedWidth}x${resizedHeight}`);
+  const payload = {
+    lambda_flow: "get_next_step",
+    current_date: (/* @__PURE__ */ new Date()).toLocaleDateString("en-GB", {
+      day: "numeric",
+      month: "long",
+      year: "numeric"
+    }),
+    base64_screenshot: resizedBuffer.toString("base64"),
+    getUI_elements: [],
+    uiHierarchy: [],
+    test_task_string: JSON.stringify([
+      {
+        id: "step-1",
+        text: `1. ${params.instruction}`,
+        plainText: params.instruction
+      }
+    ]),
+    image_width: resizedWidth,
+    image_height: resizedHeight,
+    action_history: params.action_history,
+    orgKey: params.apiKey,
+    template_images: {},
+    model_provider: "vellum",
+    model_version: "claude-agent",
+    fallbackModel: "claude-agent",
+    utilize_fullTextAnnotation: false,
+    enableSortingOCR: true,
+    enableActionHistoryCut: true,
+    removeOverlappingText: false,
+    currentAndPreviousScreenMatch: false,
+    popupDetectionEnabled: true,
+    ocrProvider: "gcp"
+  };
+  globalLogger.debug(`[AI Client] Sending request to ${AI_AGENT_ENDPOINT}`);
+  try {
+    const response = await axios.post(
+      AI_AGENT_ENDPOINT,
+      payload,
+      {
+        headers: {
+          "Content-Type": "application/json"
+        }
+      }
+    );
+    const result = response.data;
+    globalLogger.debug("[AI Client] Received response from backend");
+    if (result.appetizeCommands) {
+      result.appetizeCommands = result.appetizeCommands.map(
+        (cmd) => scaleCommand(cmd, "multiply")
+      );
+    }
+    return result;
+  } catch (error) {
+    if (axios.isAxiosError(error)) {
+      const status = error.response?.status ?? "unknown";
+      const errorText = error.response?.data ?? error.message;
+      globalLogger.error(`[AI Client] Backend error (${status}): ${JSON.stringify(errorText)}`);
+      throw new Error(`AI Backend Error: ${status} - ${error.message}`);
+    }
+    throw error;
+  }
+}
+function parseTapCoordinates(cmd) {
+  const xMatch = cmd.match(/x=(\d+)/);
+  const yMatch = cmd.match(/y=(\d+)/);
+  if (xMatch && yMatch) {
+    return {
+      x: parseInt(xMatch[1], 10),
+      y: parseInt(yMatch[1], 10)
+    };
+  }
+  const parts = cmd.split(";");
+  if (parts.length >= 3) {
+    const x = parseInt(parts[1], 10);
+    const y = parseInt(parts[2], 10);
+    if (!isNaN(x) && !isNaN(y)) {
+      return { x, y };
+    }
+  }
+  return null;
+}
+function parseWaitSeconds(cmd) {
+  const match = cmd.match(/wait:\s*(\d+)/);
+  return match ? parseInt(match[1], 10) : null;
+}
+function parseScrollDirection(cmd) {
+  const match = cmd.match(/scroll:\s*(up|down)/i);
+  return match ? match[1].toLowerCase() : null;
+}
+function parseTypeText(cmd) {
+  const match = cmd.match(/^type:\s*(.+)$/);
+  return match ? match[1] : null;
+}
+function isTaskComplete(cmd) {
+  return cmd.toLowerCase().includes("task complete:");
+}
+function isErrorDetected(cmd) {
+  return cmd.toLowerCase().includes("error detected:");
+}
+function isRememberCommand(cmd) {
+  return cmd.startsWith("remember:");
+}
+function isTapCommand(cmd) {
+  return /^t(ap|ab)On:/.test(cmd);
+}
+function isWaitCommand(cmd) {
+  return cmd.startsWith("wait:");
+}
+function isScrollCommand(cmd) {
+  return cmd.startsWith("scroll:");
+}
+function isTypeCommand(cmd) {
+  return cmd.startsWith("type:");
+}
+async function executeSmartLoop(ctx, params) {
+  const maxCacheAttempts = Math.floor(CACHE_RETRY_MS / CACHE_CHECK_INTERVAL_MS);
+  let loopCount = 0;
+  let actionHistory = [...ctx.globalActionHistory];
+  let lastCacheIndex = void 0;
+  let anyCacheMiss = false;
+  let everHadCacheHit = false;
+  const currentExecutionData = [];
+  globalLogger.info(`[SmartLoop] Starting for step ${params.stepNumber}: "${params.description}"`);
+  try {
+    while (loopCount < SMART_LOOP_MAX_ITERATIONS) {
+      let screenshot = "";
+      let commands = [];
+      let isCacheHit = false;
+      for (let attempt = 0; attempt < maxCacheAttempts; attempt++) {
+        screenshot = await ctx.getScreenshot();
+        const sizeInBytes = screenshot.length * 0.75;
+        const sizeInMB = (sizeInBytes / (1024 * 1024)).toFixed(2);
+        globalLogger.debug(`[SmartLoop] Captured screenshot: ~${sizeInMB} MB`);
+        try {
+          globalLogger.debug(`[SmartLoop] Checking cache (Attempt ${attempt + 1}/${maxCacheAttempts})`);
+          const cacheResult = await executeFromCache({
+            apiKey: ctx.apiKey,
+            stepNumber: params.stepNumber,
+            stepDescription: params.description,
+            screenshot,
+            screenResolution: ctx.screenSize,
+            highestUsedIndex: lastCacheIndex,
+            platform: ctx.platform,
+            filepath: params.filepath
+          });
+          if (cacheResult.found && cacheResult.cacheCommands) {
+            commands = cacheResult.cacheCommands;
+            lastCacheIndex = cacheResult.cacheIndex;
+            isCacheHit = true;
+            everHadCacheHit = true;
+            globalLogger.info(`[SmartLoop] Cache Hit! (${commands.length} commands)`);
+            break;
+          }
+        } catch (e) {
+          globalLogger.warn(`[SmartLoop] Cache check failed: ${e.message}`);
+        }
+        if (attempt < maxCacheAttempts - 1) {
+          globalLogger.debug(`[SmartLoop] Cache miss, retrying in ${CACHE_CHECK_INTERVAL_MS}ms...`);
+          await delay(CACHE_CHECK_INTERVAL_MS);
+        }
+      }
+      let aiCommands = [];
+      if (!isCacheHit) {
+        anyCacheMiss = true;
+        globalLogger.info(`[SmartLoop] Cache Miss. Requesting AI agent...`);
+        const agentResponse = await executeAgentStep({
+          apiKey: ctx.apiKey,
+          base64_screenshot: screenshot,
+          instruction: params.instruction,
+          action_history: actionHistory
+        });
+        aiCommands = agentResponse.appetizeCommands || [];
+        const gptCommands = agentResponse.gptCommands || [];
+        const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
+        if (reasoningIndex !== -1) {
+          const parsedCommands = gptCommands.slice(reasoningIndex);
+          const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
+          if (rememberCommands.length > 0) {
+            ctx.globalActionHistory.push(...rememberCommands);
+          }
+          actionHistory = [...actionHistory, ...parsedCommands];
+        }
+        commands = [...aiCommands];
+        globalLogger.debug(`[SmartLoop] AI returned ${commands.length} command(s)`);
+      }
+      currentExecutionData.push({
+        screenshot,
+        commands: aiCommands.length > 0 ? aiCommands : commands
+      });
+      await ctx.logCodeExecution(screenshot, commands.join("\n"));
+      let actionExecuted = false;
+      let taskCompleted = false;
+      if (commands.length > 0) {
+        globalLogger.debug(`[SmartLoop] Executing ${commands.length} command(s)`);
+      }
+      for (const cmd of commands) {
+        if (isTaskComplete(cmd)) {
+          taskCompleted = true;
+          globalLogger.info(`[SmartLoop] Task completed signal received`);
+          continue;
+        }
+        if (isErrorDetected(cmd)) {
+          throw new Error(`AI Reported Error: ${cmd}`);
+        }
+        if (isRememberCommand(cmd)) {
+          ctx.globalActionHistory.push(cmd);
+        }
+        if (isTapCommand(cmd)) {
+          const coords = parseTapCoordinates(cmd);
+          if (coords) {
+            globalLogger.debug(`[SmartLoop] Executing tap at (${coords.x}, ${coords.y})`);
+            await ctx.performTap(coords.x, coords.y);
+            actionExecuted = true;
+          }
+        } else if (isWaitCommand(cmd)) {
+          const seconds = parseWaitSeconds(cmd);
+          if (seconds) {
+            globalLogger.debug(`[SmartLoop] Waiting ${seconds}s`);
+            await delay(seconds * 1e3);
+            actionExecuted = true;
+          }
+        } else if (isScrollCommand(cmd)) {
+          const direction = parseScrollDirection(cmd);
+          if (direction) {
+            globalLogger.debug(`[SmartLoop] Scrolling ${direction}`);
+            await ctx.performScroll(direction);
+            actionExecuted = true;
+          }
+        } else if (isTypeCommand(cmd)) {
+          const text = parseTypeText(cmd);
+          if (text) {
+            globalLogger.debug(`[SmartLoop] Typing text`);
+            await ctx.performType(text);
+            actionExecuted = true;
+          }
+        }
+      }
+      if (actionExecuted) {
+        if (isCacheHit) {
+          actionHistory.push(...commands);
+        }
+        await delay(100);
+      }
+      if (taskCompleted) {
+        globalLogger.info(`[SmartLoop] Task completed successfully`);
+        if (anyCacheMiss && currentExecutionData.length > 0) {
+          globalLogger.info(`[SmartLoop] Populating cache with ${currentExecutionData.length} frame(s)...`);
+          try {
+            await populateCache({
+              apiKey: ctx.apiKey,
+              stepNumber: params.stepNumber,
+              stepDescription: params.description,
+              executionData: currentExecutionData,
+              screenResolution: ctx.screenSize,
+              platform: ctx.platform,
+              filepath: params.filepath
+            });
+            globalLogger.debug(`[SmartLoop] Cache populated successfully`);
+          } catch (e) {
+            globalLogger.warn(`[SmartLoop] Failed to populate cache: ${e.message}`);
+          }
+        } else if (!anyCacheMiss) {
+          globalLogger.debug(`[SmartLoop] Skipping cache population (all actions were cached)`);
+        }
+        return {
+          success: true,
+          iterations: loopCount + 1,
+          cacheHit: everHadCacheHit
+        };
+      }
+      loopCount++;
+    }
+    throw new Error(`Smart Loop timeout after ${SMART_LOOP_MAX_ITERATIONS} iterations`);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    globalLogger.error(`[SmartLoop] Error: ${message}`);
+    return {
+      success: false,
+      error: message,
+      iterations: loopCount + 1,
+      cacheHit: everHadCacheHit
+    };
+  }
+}
 class GptDriver {
+  interpolateTemplate(input, params) {
+    if (typeof input !== "string" || !input.includes("{{")) return input;
+    const pattern = /{{\s*([^}]+?)\s*}}/g;
+    return input.replace(pattern, (_match, keyRaw) => {
+      const key = String(keyRaw);
+      if (!(key in params)) {
+        throw new Error(`Missing flow param: {{${key}}}`);
+      }
+      return params[key];
+    });
+  }
   apiKey;
   gptDriverSessionId;
   gptDriverBaseUrl;
   appiumSessionConfig;
+  cachingMode;
   driver;
+  appiumSessionStarted;
+  useGptDriverCloud;
+  gptDriverCloudConfig;
+  buildId;
+  testId;
+  step_number = 1;
+  // Smart loop state - maintains action history across steps for context
+  globalActionHistory = [];
   /**
    * Creates an instance of the GptDriver class.
    *
@@ -42,19 +636,34 @@ class GptDriver {
    *                 - `device.platform`: The platform name of the device (e.g., iOS, Android).
    */
   constructor(config) {
+    this.testId = config.testId;
     this.apiKey = config.apiKey;
-    this.gptDriverBaseUrl = "https://api.mobileboost.io";
-    this.initializeDriver(config);
-    this.initializeAppiumConfig(config);
+    this.buildId = config.buildId;
+    this.useGptDriverCloud = config.useGptDriverCloud;
+    this.gptDriverBaseUrl = GPT_DRIVER_BASE_URL;
+    this.cachingMode = config.cachingMode ?? "NONE";
+    if (config.useGptDriverCloud) {
+      if (config.serverConfig.device?.platform == null) {
+        throw new Error("Platform is missing. Please specify the platform when using GPTDriver Cloud.");
+      }
+      this.gptDriverCloudConfig = {
+        platform: config.serverConfig.device.platform,
+        deviceName: config.serverConfig.device.deviceName,
+        platformVersion: config.serverConfig.device.platformVersion
+      };
+    } else {
+      this.initializeDriver(config);
+      this.initializeAppiumConfig(config);
+    }
   }
   initializeDriver(config) {
     if (config.driver) {
       this.driver = config.driver;
-      if (!config.severConfig?.url) {
+      if (!config.serverConfig.url) {
         throw new Error("Server url is missing. Please specify the server url when providing a driver.");
       }
     } else {
-      const isValidServerConfig = config.severConfig?.url && config.severConfig.device?.platform;
+      const isValidServerConfig = config.serverConfig.url && config.serverConfig.device?.platform;
       if (!isValidServerConfig) {
         throw new Error("Either provide a driver, or a valid severConfig object.");
       }
@@ -63,10 +672,10 @@ class GptDriver {
   initializeAppiumConfig(config) {
     const defaultPort = parseInt(process.env.APPIUM_PORT ?? "4723", 10);
     const defaultHost = process.env.APPIUM_HOST ?? "127.0.0.1";
-    let serverUrl = config.severConfig?.url instanceof URL ? config.severConfig.url : new URL(config.severConfig?.url ?? `http://${defaultHost}:${defaultPort}`);
+    const serverUrl = config.serverConfig.url instanceof URL ? config.serverConfig.url : new URL(config.serverConfig.url ?? `http://${defaultHost}:${defaultPort}`);
     this.appiumSessionConfig = {
       serverUrl,
-      ...config.severConfig?.device
+      ...config.serverConfig.device
     };
   }
   /**
@@ -76,50 +685,53 @@ class GptDriver {
    * @throws {Error} If the session cannot be started or the driver is not properly initialized.
    */
   async startSession() {
-    console.log(">> Starting session...");
-    if (this.driver) {
-      let platform;
-      let platformVersion;
-      let deviceName;
-      let sessionId;
-      if (this.driver.sessionId == null) {
-        const driver = this.driver;
-        const capabilities = await driver.getCapabilities();
-        platform = capabilities.get("platformName");
-        platformVersion = capabilities.get("platformVersion") ?? this.appiumSessionConfig?.platformVersion;
-        deviceName = this.appiumSessionConfig?.deviceName ?? capabilities.get("deviceName");
-        const session = await driver.getSession();
-        sessionId = session.getId();
+    globalLogger.info("Starting session...");
+    if (!this.useGptDriverCloud) {
+      if (this.driver) {
+        let platform;
+        let platformVersion;
+        let deviceName;
+        let sessionId;
+        if (this.driver.sessionId == null) {
+          const driver = this.driver;
+          const capabilities = await driver.getCapabilities();
+          platform = capabilities.get("platformName");
+          platformVersion = capabilities.get("platformVersion") ?? this.appiumSessionConfig?.platformVersion;
+          deviceName = this.appiumSessionConfig?.deviceName ?? capabilities.get("deviceName");
+          const session = await driver.getSession();
+          sessionId = session.getId();
+        } else {
+          const driver = this.driver;
+          platform = driver.capabilities["appium:platformName"] ?? driver.capabilities["platformName"];
+          platformVersion = driver.capabilities["appium:platformVersion"] ?? driver.capabilities["platformVersion"];
+          deviceName = this.appiumSessionConfig?.deviceName ?? driver.capabilities["appium:deviceName"] ?? driver.capabilities["deviceName"];
+          sessionId = driver.sessionId;
+        }
+        this.appiumSessionConfig = {
+          ...this.appiumSessionConfig,
+          id: sessionId,
+          platform,
+          platformVersion,
+          deviceName
+        };
+        globalLogger.debug(`Session config: ${JSON.stringify(this.appiumSessionConfig)}`);
       } else {
-        const driver = this.driver;
-        platform = driver.capabilities["appium:platformName"];
-        platformVersion = driver.capabilities["appium:platformVersion"];
-        deviceName = this.appiumSessionConfig?.deviceName ?? driver.capabilities["appium:deviceName"] ?? "";
-        sessionId = driver.sessionId;
-      }
-      this.appiumSessionConfig = {
-        ...this.appiumSessionConfig,
-        id: sessionId,
-        platform,
-        platformVersion,
-        deviceName
+        this.appiumSessionConfig.id = await this.createSession();
+      }
+      const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/window/rect`);
+      const rectResponse = await axios.get(url);
+      this.appiumSessionConfig.size = {
+        width: rectResponse.data.value.width,
+        height: rectResponse.data.value.height
       };
-    } else {
-      this.appiumSessionConfig.id = await this.createSession();
+      this.appiumSessionStarted = true;
     }
     await this.createGptDriverSession();
-    const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/window/rect`);
-    const rectResponse = await axios.get(
-      url
-    );
-    this.appiumSessionConfig.size = {
-      width: rectResponse.data.value.width,
-      height: rectResponse.data.value.height
-    };
-    console.log(`>> Session created. Monitor execution at: ${this.getSessionLink()}`);
+    globalLogger.info(logStyles.highlight(`Session created. Monitor execution at: ${this.getSessionLink()}`));
   }
   async createSession() {
     const { platform, deviceName, platformVersion, serverUrl } = this.appiumSessionConfig;
+    globalLogger.debug(`Creating Appium session for ${platform} ${platformVersion} on ${deviceName}`);
     const url = buildUrl(serverUrl, `/session`);
     const response = await axios.post(
       url,
@@ -134,22 +746,41 @@ class GptDriver {
         }
       }
     );
-    return response.data.value.sessionId;
+    const sessionId = response.data.value.sessionId;
+    globalLogger.debug(`Appium session created with ID: ${sessionId}`);
+    return sessionId;
   }
   async createGptDriverSession() {
+    globalLogger.debug("Creating GPT Driver session...");
     const response = await axios.post(
       `${this.gptDriverBaseUrl}/sessions/create`,
       {
+        ...this.testId && { test_id: this.testId },
         api_key: this.apiKey,
-        appium_session_id: this.appiumSessionConfig.id,
+        appium_session_id: this.appiumSessionConfig?.id,
         device_config: {
-          platform: this.appiumSessionConfig.platform,
-          device: this.appiumSessionConfig.deviceName,
-          os: this.appiumSessionConfig.platformVersion
-        }
+          platform: this.appiumSessionConfig?.platform ?? this.gptDriverCloudConfig?.platform,
+          device: this.appiumSessionConfig?.deviceName ?? this.gptDriverCloudConfig?.deviceName,
+          os: this.appiumSessionConfig?.platformVersion ?? this.gptDriverCloudConfig?.platformVersion
+        },
+        use_internal_virtual_device: this.useGptDriverCloud,
+        build_id: this.buildId,
+        caching_mode: this.cachingMode
       }
     );
     this.gptDriverSessionId = response.data.sessionId;
+    globalLogger.debug(`GPT Driver session created with ID: ${this.gptDriverSessionId}`);
+    if (this.useGptDriverCloud) {
+      const parsedUrl = new URL(response.data.appiumServerUrl);
+      this.driver = await webdriverio.attach({
+        options: {
+          hostname: parsedUrl.hostname,
+          path: parsedUrl.pathname
+        },
+        sessionId: response.data.appiumSessionId
+      });
+      this.appiumSessionStarted = true;
+    }
   }
   getSessionLink() {
     return `https://app.mobileboost.io/gpt-driver/sessions/${this.gptDriverSessionId}`;
@@ -165,20 +796,174 @@ class GptDriver {
    *
    * @throws {Error} If the request to stop the session fails.
    */
-  async stopSession(status) {
-    console.log(">> Stopping session...");
-    await axios.post(
-      `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
+  async setSessionStatus(status) {
+    if (this.gptDriverSessionId) {
+      globalLogger.info(`Stopping session with status: ${status}`);
+      await axios.post(
+        `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/stop`,
+        {
+          api_key: this.apiKey,
+          status
+        }
+      );
+      globalLogger.info("Session stopped successfully");
+      this.appiumSessionStarted = false;
+      this.gptDriverSessionId = void 0;
+      this.step_number = 1;
+      this.globalActionHistory = [];
+    }
+  }
+  // ─────────────────────────────────────────────────────────────────────────────
+  // SMART LOOP INTEGRATION
+  // ─────────────────────────────────────────────────────────────────────────────
+  /**
+   * Creates a SmartLoopContext for the current session.
+   * This context provides all the callbacks needed by the smart loop executor.
+   */
+  createSmartLoopContext() {
+    return {
+      apiKey: this.apiKey,
+      platform: this.appiumSessionConfig?.platform,
+      screenSize: this.appiumSessionConfig.size,
+      globalActionHistory: this.globalActionHistory,
+      getScreenshot: () => this.getScreenshot(this.appiumSessionConfig),
+      performTap: (x, y) => this.performTap(x, y),
+      performScroll: (direction) => this.performScroll(direction),
+      performType: (text) => this.performType(text),
+      logCodeExecution: async (screenshot, command) => this.logCodeExecution(screenshot, command)
+    };
+  }
+  /**
+   * Calls the AI agent to determine the next actions based on the current screenshot.
+   * This requires an active GPT Driver session.
+   */
+  async executeAgentStep(params) {
+    const response = await axios.post(
+      `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/agent/execute`,
       {
         api_key: this.apiKey,
-        status
+        base64_screenshot: params.screenshot.replace(/^data:image\/\w+;base64,/, ""),
+        instruction: params.instruction,
+        action_history: params.actionHistory
       }
     );
-    const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}`);
-    await axios.delete(url);
-    console.log(">> Session stopped.");
-    this.gptDriverSessionId = void 0;
+    return {
+      gptCommands: response.data.gpt_commands,
+      appetizeCommands: response.data.appetize_commands,
+      actionHistory: response.data.action_history
+    };
+  }
+  // ─────────────────────────────────────────────────────────────────────────────
+  // DEVICE ACTION METHODS
+  // ─────────────────────────────────────────────────────────────────────────────
+  async getWdioClient() {
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    if (this.driver?.sessionId != null) {
+      return this.driver;
+    }
+    const url = this.appiumSessionConfig.serverUrl;
+    const parsed = new URL(url);
+    const client = await webdriverio.attach({
+      sessionId: this.appiumSessionConfig.id,
+      options: {
+        protocol: parsed.protocol.replace(":", ""),
+        hostname: parsed.hostname,
+        port: parsed.port ? Number(parsed.port) : parsed.protocol === "https:" ? 443 : 80,
+        path: parsed.pathname && parsed.pathname !== "/" ? parsed.pathname : "/"
+      }
+    });
+    this.driver = client;
+    return client;
+  }
+  /**
+   * Performs a tap action at the specified coordinates.
+   */
+  async performTap(x, y) {
+    const client = await this.getWdioClient();
+    await client.performActions([
+      {
+        type: "pointer",
+        id: "finger1",
+        parameters: { pointerType: "touch" },
+        actions: [
+          { type: "pointerMove", duration: 0, x, y },
+          { type: "pointerDown", button: 0 },
+          { type: "pause", duration: 100 },
+          { type: "pointerUp", button: 0 }
+        ]
+      }
+    ]);
+  }
+  async performType(text) {
+    const client = await this.getWdioClient();
+    await client.keys(text.split(""));
+  }
+  async performScroll(direction) {
+    const client = await this.getWdioClient();
+    const w = this.appiumSessionConfig?.size?.width ?? 1080;
+    const h = this.appiumSessionConfig?.size?.height ?? 1920;
+    const x = Math.round(w / 2);
+    const startY = direction === "down" ? Math.round(h * 0.8) : Math.round(h * 0.2);
+    const endY = direction === "down" ? Math.round(h * 0.2) : Math.round(h * 0.8);
+    await client.performActions([
+      {
+        type: "pointer",
+        id: "finger1",
+        parameters: { pointerType: "touch" },
+        actions: [
+          { type: "pointerMove", duration: 0, x, y: startY },
+          { type: "pointerDown", button: 0 },
+          { type: "pause", duration: 100 },
+          { type: "pointerMove", duration: 500, x, y: endY },
+          { type: "pointerUp", button: 0 }
+        ]
+      }
+    ]);
+  }
+  async getPageSource() {
+    const client = await this.getWdioClient();
+    return client.getPageSource();
+  }
+  async performScrollUntil(params) {
+    const { direction, text, elementId } = params;
+    const max = params.maxScrolls ?? 10;
+    for (let i = 0; i < max; i++) {
+      const source = await this.getPageSource();
+      const found = elementId ? source.includes(elementId) : text ? source.includes(text) : false;
+      if (found) {
+        return;
+      }
+      await this.performScroll(direction);
+      await this._delay(500);
+    }
+    throw new Error(`scrollUntil target not found after ${max} scroll(s)`);
+  }
+  async getScreenshot(appiumSessionConfig) {
+    globalLogger.debug("Capturing screenshot...");
+    const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
+    const screenshotResponse = await axios.get(url);
+    let screenshot = await screenshotResponse.data.value;
+    if (appiumSessionConfig.platform === "iOS") {
+      globalLogger.debug(`Resizing iOS screenshot to ${appiumSessionConfig.size.width}x${appiumSessionConfig.size.height}`);
+      const imageBuffer = Buffer.from(screenshot, "base64");
+      const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
+      screenshot = transformedImage.toString("base64");
+    }
+    return screenshot;
+  }
+  /**
+   * Helper method to delay execution.
+   *
+   * @private
+   */
+  _delay(ms) {
+    return new Promise((resolve) => setTimeout(resolve, ms));
   }
+  // ─────────────────────────────────────────────────────────────────────────────
+  // PUBLIC API METHODS
+  // ─────────────────────────────────────────────────────────────────────────────
   /**
    * Executes a specified command within the WebDriver session, optionally using an Appium handler.
    *
@@ -186,6 +971,7 @@ class GptDriver {
    * the command-specific operations. After executing the handler, the executed commands get logged on the GPTDriver servers.
    * If the handler execution fails or no handler is provided, the command gets executed by the GPTDriver using just natural language.
    *
+   * @deprecated Use `aiExecute()` instead. This method will be removed in a future version.
    * @param {string} command - The natural language command to be executed by the GPTDriver.
    * @param {AppiumHandler} [appiumHandler] - An optional function that processes Appium-specific commands.
    *                                          If provided, this handler is executed instead of calling the GPTDriver serves.
@@ -193,24 +979,136 @@ class GptDriver {
    * @throws {Error} If an error occurs during the execution of the Appium handler or while processing the command by the GPTDriver.
    */
   async execute(command, appiumHandler) {
-    console.log(">> Executing command:", command);
+    globalLogger.warn("Method 'execute()' is deprecated. Please use 'aiExecute()' instead.");
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    globalLogger.info(`Executing command: ${command}`);
     const driver = this.driver;
     if (appiumHandler != null) {
       try {
+        await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
         await appiumHandler(driver);
-        const screenshot = await this.getScreenshot(this.appiumSessionConfig);
-        await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
-          api_key: this.apiKey,
-          base64_screenshot: screenshot,
-          command: appiumHandler.toString()
-        });
+        globalLogger.debug("Custom Appium handler executed successfully");
       } catch (e) {
+        globalLogger.warn("Custom Appium handler failed, falling back to GPT handler");
         await this.gptHandler(command);
       }
     } else {
       await this.gptHandler(command);
     }
   }
+  /**
+   * Executes a specified command within the WebDriver session with configurable caching options.
+   *
+   * This is the recommended method for executing commands. It provides fine-grained control over
+   * caching behavior, allowing you to optimize performance and costs for repetitive test scenarios.
+   *
+   * If an `appiumHandler` is provided, it will be invoked with the WebDriver instance to perform
+   * the command-specific operations. After executing the handler, the executed commands get logged
+   * on the GPTDriver servers. If the handler execution fails or no handler is provided, the command
+   * gets executed by the GPTDriver using natural language processing.
+   *
+   * @param {Object} params - The execution parameters
+   * @param {string} params.command - The natural language command to be executed by the GPTDriver.
+   *                                  Examples: "Click the login button", "Enter 'test@example.com' in the email field"
+   * @param {AppiumHandler} [params.appiumHandler] - An optional function that processes Appium-specific commands.
+   *                                                  If provided, this handler is executed instead of calling
+   *                                                  the GPTDriver API. Useful for performance optimization when
+   *                                                  you know the exact Appium commands to execute.
+   * @param {CachingMode} [params.cachingMode] - Controls how the GPTDriver caches this command execution.
+   *                                             If not specified, uses the global caching mode set in the constructor.
+   *                                             Options:
+   *                                             - "NONE"
+   *                                             - "FULL_SCREEN"
+   *                                             - "INTERACTION_REGION"
+   * @param {boolean} [params.useSmartLoop] - If true, uses the smart loop execution (Cache -> AI -> Execute -> Populate)
+   *                                          which optimizes execution by checking cache first and populating it after.
+   *                                          Default: false (uses legacy gptHandler)
+   *
+   * @returns {Promise<void>} A promise that resolves when the command execution is complete.
+   *
+   * @throws {Error} If an error occurs during the execution of the Appium handler or while processing
+   *                 the command by the GPTDriver.
+   *
+   * @example
+   * // Basic usage with natural language (no caching)
+   * await driver.aiExecute({
+   *   command: "Click the submit button"
+   * });
+   *
+   * @example
+   * // Full screen caching for repetitive navigation on similar screens
+   * await driver.aiExecute({
+   *   command: "Navigate to the settings page",
+   *   cachingMode: "FULL_SCREEN"
+   * });
+   *
+   * @example
+   * // Interaction region caching for repeated actions on the same button
+   * await driver.aiExecute({
+   *   command: "Click the login button",
+   *   cachingMode: "INTERACTION_REGION"
+   * });
+   *
+   * @example
+   * // With custom Appium handler as fallback
+   * await driver.aiExecute({
+   *   command: "Click the login button",
+   *   appiumHandler: async (driver) => {
+   *     const loginBtn = await driver.$('~loginButton');
+   *     await loginBtn.click();
+   *   },
+   *   cachingMode: "INTERACTION_REGION"
+   * });
+   *
+   * @example
+   * // Force fresh execution for dynamic content
+   * await driver.aiExecute({
+   *   command: "Verify the current timestamp",
+   *   cachingMode: "NONE"
+   * });
+   *
+   * @example
+   * // Using smart loop for optimized caching
+   * await driver.aiExecute({
+   *   command: "Click the login button",
+   *   useSmartLoop: true,
+   *   cachingMode: "FULL_SCREEN"
+   * });
+   */
+  async aiExecute({ command, appiumHandler, cachingMode, useSmartLoop = false }) {
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    globalLogger.info(`Executing command: ${command}`);
+    const driver = this.driver;
+    if (appiumHandler != null) {
+      try {
+        await this.takeScreenshotAndLogCodeExecution(appiumHandler.toString());
+        await appiumHandler(driver);
+        globalLogger.debug("Custom Appium handler executed successfully");
+        this.step_number++;
+        return;
+      } catch (e) {
+        globalLogger.warn("Custom Appium handler failed, falling back to AI execution");
+      }
+    }
+    if (useSmartLoop) {
+      const ctx = this.createSmartLoopContext();
+      const result = await executeSmartLoop(ctx, {
+        stepNumber: this.step_number,
+        description: command,
+        instruction: command
+      });
+      if (!result.success) {
+        throw new Error(result.error || "Smart loop execution failed");
+      }
+      this.step_number++;
+    } else {
+      await this.gptHandler(command, cachingMode);
+    }
+  }
   /**
    * Asserts a single condition using the GPTDriver.
    *
@@ -218,13 +1116,25 @@ class GptDriver {
    * If the assertion fails, an error is thrown.
    *
    * @param {string} assertion - The condition to be asserted.
+   * @param cachingMode - The caching mode to be used for the assertion.
    * @throws {Error} If the assertion fails.
    */
-  async assert(assertion) {
-    console.log(">> Asserting:", assertion);
-    const results = await this.checkBulk([assertion]);
-    if (!Object.values(results).at(0)) {
-      throw new Error(`Failed assertion: ${assertion}`);
+  async assert(assertion, cachingMode) {
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    try {
+      const results = await this.checkBulk([assertion], cachingMode);
+      if (!Object.values(results).at(0)) {
+        await this.setSessionStatus("failed");
+        globalLogger.error(`Assertion failed: ${assertion}`);
+        throw new Error(`Failed assertion: ${assertion}`);
+      }
+      this.step_number = this.step_number + 1;
+      globalLogger.info(`Assertion passed: ${assertion}`);
+    } catch (e) {
+      await this.setSessionStatus("failed");
+      throw e;
     }
   }
   /**
@@ -234,43 +1144,100 @@ class GptDriver {
    * If any assertion fails, an error is thrown listing all failed assertions.
    *
    * @param {string[]} assertions - An array of conditions to be asserted.
+   * @param cachingMode - The caching mode to be used for the assertions.
    * @throws {Error} If any of the assertions fail.
    */
-  async assertBulk(assertions) {
-    console.log(">> Asserting:", assertions);
-    const results = await this.checkBulk(assertions);
-    const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
-      if (!current) {
-        return [...prev, assertions.at(currentIndex)];
+  async assertBulk(assertions, cachingMode) {
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    try {
+      const results = await this.checkBulk(assertions, cachingMode);
+      const failedAssertions = Object.values(results).reduce((prev, current, currentIndex) => {
+        if (!current) {
+          return [...prev, assertions.at(currentIndex)];
+        }
+        return prev;
+      }, []);
+      if (failedAssertions.length > 0) {
+        await this.setSessionStatus("failed");
+        globalLogger.error(`Multiple assertions failed: ${failedAssertions.join(", ")}`);
+        throw new Error(`Failed assertions: ${failedAssertions.join(", ")}`);
       }
-      return prev;
-    }, []);
-    if (failedAssertions.length > 0) {
-      throw new Error(`Failed assertions: ${failedAssertions.join(", ")}`);
+      this.step_number = this.step_number + 1;
+      globalLogger.info(`All ${assertions.length} assertions passed`);
+    } catch (e) {
+      await this.setSessionStatus("failed");
+      throw e;
     }
   }
   /**
    * Checks multiple conditions and returns their results using the GPTDriver.
    *
    * This method sends a bulk condition request and returns the results of the conditions.
+   * Failed conditions will be retried up to maxRetries times.
    *
    * @param {string[]} conditions - An array of conditions to be checked.
+   * @param {CachingMode} cachingMode - The caching mode to be used for the conditions.
+   * @param {number} maxRetries - The maximum number of retries if any condition fails (default: 2).
+   * @param {number} retryDelayMs - The delay in milliseconds between retries (default: 1000).
    * @returns {Promise<Record<string, boolean>>} A promise that resolves with an object mapping each condition
    *                                             to a boolean indicating whether the condition was met.
    */
-  async checkBulk(conditions) {
-    console.log(">> Checking:", conditions);
-    const screenshot = await this.getScreenshot(this.appiumSessionConfig);
-    const response = await axios.post(
-      `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
-      {
-        api_key: this.apiKey,
-        base64_screenshot: screenshot,
-        assertions: conditions,
-        command: `Assert: ${JSON.stringify(conditions)}`
+  async checkBulk(conditions, cachingMode, maxRetries = 2, retryDelayMs = 1e3) {
+    let attempt = 0;
+    let results = {};
+    while (attempt <= maxRetries) {
+      results = await this._checkBulkOnce(conditions, cachingMode, attempt);
+      const failedConditions = Object.entries(results).filter(([_, success]) => !success).map(([key, _]) => key);
+      if (failedConditions.length === 0) {
+        return results;
       }
-    );
-    return response.data.results;
+      attempt++;
+      if (attempt <= maxRetries) {
+        globalLogger.info(
+          `>>	 Conditions failed ${JSON.stringify(failedConditions)}. Retrying in ${retryDelayMs}ms... (Attempt ${attempt}/${maxRetries})`
+        );
+        await this._delay(retryDelayMs);
+      } else {
+        globalLogger.info(`>>	 Conditions failed: ${JSON.stringify(failedConditions)}`);
+      }
+    }
+    return results;
+  }
+  /**
+   * Internal method to check conditions once without retry logic.
+   *
+   * @private
+   */
+  async _checkBulkOnce(conditions, cachingMode, attempt = 0) {
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    globalLogger.info(`Checking conditions (attempt ${attempt}): ${conditions.join(", ")}`);
+    try {
+      let screenshot;
+      if (!this.useGptDriverCloud) {
+        screenshot = await this.getScreenshot(this.appiumSessionConfig);
+      }
+      const response = await axios.post(
+        `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/assert`,
+        {
+          api_key: this.apiKey,
+          base64_screenshot: screenshot,
+          assertions: conditions,
+          command: `Assert: ${JSON.stringify(conditions)}`,
+          caching_mode: cachingMode ?? this.cachingMode,
+          step_number: this.step_number
+        }
+      );
+      globalLogger.debug(`Check results: ${JSON.stringify(response.data.results)}`);
+      return response.data.results;
+    } catch (e) {
+      globalLogger.error("Failed to check conditions", e);
+      await this.setSessionStatus("failed");
+      throw e;
+    }
   }
   /**
    * Extracts specified information using the GPTDriver.
@@ -280,30 +1247,305 @@ class GptDriver {
    *
    * @param {string[]} extractions - An array of extraction criteria. Each criterion specifies what information
    *                                 should be extracted from the session.
+   * @param cachingMode - The caching mode to be used for the extraction.
    * @returns {Promise<Record<string, any>>} A promise that resolves with an object mapping each extraction criterion
    *                                         to the extracted data. The structure of the returned data depends on the
    *                                         specifics of the extraction criteria.
    */
-  async extract(extractions) {
-    console.log(">> Extracting:", extractions);
-    const screenshot = await this.getScreenshot(this.appiumSessionConfig);
+  async extract(extractions, cachingMode) {
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    globalLogger.info(`Extracting data: ${extractions.join(", ")}`);
+    let screenshot;
+    if (!this.useGptDriverCloud) {
+      screenshot = await this.getScreenshot(this.appiumSessionConfig);
+    }
     const response = await axios.post(
       `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/extract`,
       {
         api_key: this.apiKey,
         base64_screenshot: screenshot,
         extractions,
-        command: `Extract: ${JSON.stringify(extractions)}`
+        command: `Extract: ${JSON.stringify(extractions)}`,
+        step_number: this.step_number
       }
     );
+    this.step_number = this.step_number + 1;
+    globalLogger.debug(`Extraction results: ${JSON.stringify(response.data.results)}`);
     return response.data.results;
   }
-  async gptHandler(command) {
+  /**
+   * Opens a deep link url in the Appium session.
+   *
+   * This method sends a request to the GPT Driver server to open a deep link url in the Appium session.
+   *
+   * @param {OpenDeepLinkUrlParams} params - The parameters for opening the deep link url.
+   * @returns {Promise<void>} A promise that resolves when the deep link url is opened.
+   */
+  async openDeepLinkUrl(params) {
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    globalLogger.info(`Opening deep link: ${params.url}`);
+    if (params.package == null && this.appiumSessionConfig?.platform === "Android") {
+      throw new Error("Package is required for Android platform");
+    }
+    await this.executeCommand(
+      {
+        url: `http://localhost:4723/session/${this.appiumSessionConfig?.id}/execute/sync`,
+        method: "POST",
+        data: {
+          "script": "mobile:deepLink",
+          "args": [{
+            url: params.url,
+            ...params.bundleId && { bundleId: params.bundleId },
+            ...params.package && { package: params.package }
+          }]
+        }
+      }
+    );
+    this.step_number = this.step_number + 1;
+    globalLogger.debug("Deep link opened successfully");
+  }
+  /**
+   * Reads a flow JSON file from disk and validates it using the SavableTestStoreSchema.
+   *
+   * Returns the parsed and validated object on success; throws a detailed error on failure.
+   *
+   * @param filePath - Path to the flow file (JSON)
+   * @param options - Optional execution options
+   * @param options.useSmartLoop - If true, uses the smart loop execution (Cache -> AI -> Execute -> Populate)
+   *                               for AI, tap, and assert steps. This optimizes execution by checking cache
+   *                               first and populating it after successful execution. Default: false
+   * @returns The validated flow data
+   *
+   * @example
+   * // Execute flow with default settings (legacy gptHandler)
+   * const result = await driver.executeFlow('tests/login-flow.json');
+   *
+   * @example
+   * // Execute flow with smart loop enabled for optimized caching
+   * const result = await driver.executeFlow('tests/login-flow.json', { useSmartLoop: true });
+   */
+  async executeFlow(filePath, options) {
+    const useSmartLoop = options?.useSmartLoop ?? false;
+    globalLogger.info(`Loading flow from file: ${filePath}`);
+    const absolutePath = path.resolve(filePath);
+    const baseDir = path.dirname(absolutePath);
+    let raw;
+    try {
+      raw = await node_fs.promises.readFile(absolutePath, "utf-8");
+    } catch (e) {
+      const msg = `Failed to read file at ${filePath}: ${e?.message ?? e}`;
+      globalLogger.error(msg);
+      throw new Error(msg);
+    }
+    let json;
+    try {
+      json = JSON.parse(raw);
+    } catch (e) {
+      const msg = `Invalid JSON in flow file ${filePath}: ${e?.message ?? e}`;
+      globalLogger.error(msg);
+      throw new Error(msg);
+    }
+    const parsed = SavableTestStoreSchema.safeParse(json);
+    if (!parsed.success) {
+      const issues = parsed.error.issues.map((iss) => `- ${iss.path.join(".") || "<root>"}: ${iss.message}`).join("\n");
+      const msg = `Flow validation failed for ${filePath}:
+${issues}`;
+      globalLogger.error(msg);
+      throw new Error(msg);
+    }
+    const rootFlow = parsed.data;
+    globalLogger.info(`Flow file validated successfully: ${filePath}`);
+    const visited = /* @__PURE__ */ new Set();
+    const loadFlow = async (p) => {
+      const abs = path.isAbsolute(p) ? p : path.resolve(baseDir, p);
+      const rawChild = await node_fs.promises.readFile(abs, "utf-8");
+      const childJson = JSON.parse(rawChild);
+      const val = SavableTestStoreSchema.safeParse(childJson);
+      if (!val.success) {
+        const issues = val.error.issues.map((iss) => `- ${iss.path.join(".") || "<root>"}: ${iss.message}`).join("\n");
+        throw new Error(`Flow validation failed for referenced file ${abs}:
+${issues}`);
+      }
+      return val.data;
+    };
+    const expandSteps = async (steps, inheritedParams, parentDir, stack) => {
+      const out = [];
+      for (const step of steps) {
+        if (step.type === "fileRef") {
+          const refPath = path.isAbsolute(step.path) ? step.path : path.resolve(parentDir, step.path);
+          const refKey = path.normalize(refPath);
+          if (visited.has(refKey)) {
+            const cycle = [...stack, refKey].map((p) => path.basename(p)).join(" -> ");
+            throw new Error(`Detected circular fileRef: ${cycle}`);
+          }
+          visited.add(refKey);
+          const child = await loadFlow(refPath);
+          const mergedParams = { ...inheritedParams, ...step.overrides ?? {} };
+          const childDir = path.dirname(refPath);
+          const childExpanded = await expandSteps(child.steps, mergedParams, childDir, [...stack, refKey]);
+          out.push(...childExpanded);
+        } else {
+          const resolved = { ...step, __params: { ...inheritedParams } };
+          out.push(resolved);
+        }
+      }
+      return out;
+    };
+    const effectiveParams = { ...rootFlow.params ?? {} };
+    const expandedSteps = await expandSteps(rootFlow.steps, effectiveParams, baseDir, [absolutePath]);
+    if (!this.appiumSessionStarted) {
+      await this.startSession();
+    }
+    globalLogger.info(`Executing flow '${rootFlow.name}' with ${expandedSteps.length} step(s)...`);
+    let executed = 0;
+    try {
+      for (const step of expandedSteps) {
+        const params = step.__params ?? effectiveParams;
+        const prefix = `Step #${executed + 1} [${step.type}${step.optional ? ", optional" : ""}]`;
+        try {
+          switch (step.type) {
+            case "ai": {
+              const instruction = this.interpolateTemplate(step.instruction, params);
+              globalLogger.info(`${prefix}: ${instruction}`);
+              if (useSmartLoop) {
+                const ctx = this.createSmartLoopContext();
+                const result = await executeSmartLoop(ctx, {
+                  stepNumber: this.step_number,
+                  description: instruction,
+                  instruction
+                });
+                if (!result.success) {
+                  throw new Error(result.error || "Smart loop execution failed");
+                }
+                this.step_number++;
+              } else {
+                await this.aiExecute({ command: instruction });
+              }
+              break;
+            }
+            case "tap": {
+              const description = step.descriptionText ? this.interpolateTemplate(step.descriptionText, params) : void 0;
+              if (!description) {
+                throw new Error("Tap step requires a descriptionText. Coordinate-based taps are no longer supported.");
+              }
+              globalLogger.info(`${prefix}: ${description}`);
+              if (useSmartLoop) {
+                const ctx = this.createSmartLoopContext();
+                const result = await executeSmartLoop(ctx, {
+                  stepNumber: this.step_number,
+                  description,
+                  instruction: description
+                });
+                if (!result.success) {
+                  throw new Error(result.error || "Smart loop execution failed");
+                }
+                this.step_number++;
+              } else {
+                await this.aiExecute({ command: description });
+              }
+              break;
+            }
+            case "assert": {
+              const description = step.descriptionText ? this.interpolateTemplate(step.descriptionText, params) : void 0;
+              if (!description) {
+                throw new Error("Assert step requires a descriptionText. Coordinate-based assertions are no longer supported.");
+              }
+              globalLogger.info(`${prefix}: ${description}`);
+              if (useSmartLoop) {
+                const instruction = `Verify that: ${description}`;
+                const ctx = this.createSmartLoopContext();
+                const result = await executeSmartLoop(ctx, {
+                  stepNumber: this.step_number,
+                  description,
+                  instruction
+                });
+                if (!result.success) {
+                  throw new Error(result.error || "Smart loop execution failed");
+                }
+                this.step_number++;
+              } else {
+                await this.assert(description);
+              }
+              break;
+            }
+            case "type": {
+              const text = this.interpolateTemplate(step.text, params);
+              globalLogger.info(`${prefix}: Type text`);
+              await this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
+              await this.performType(text);
+              this.step_number++;
+              break;
+            }
+            case "scroll": {
+              globalLogger.info(`${prefix}: Scroll ${step.direction}`);
+              await this.takeScreenshotAndLogCodeExecution(`scroll: direction=${step.direction}`);
+              await this.performScroll(step.direction);
+              this.step_number++;
+              break;
+            }
+            case "zoom": {
+              globalLogger.info(`${prefix}: Zoom ${step.direction}`);
+              await this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
+              this.step_number++;
+              break;
+            }
+            case "scrollUntil": {
+              const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
+              globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
+              await this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
+              await this.performScrollUntil({
+                direction: step.direction,
+                text: interpolatedText,
+                elementId: step.elementId,
+                maxScrolls: step.maxScrolls
+              });
+              this.step_number++;
+              break;
+            }
+            case "deeplink": {
+              const pkg = params["package"];
+              const bundleId = params["bundleId"];
+              const url = this.interpolateTemplate(step.url, params);
+              globalLogger.info(`${prefix}: Open deeplink ${url}`);
+              await this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
+              await this.openDeepLinkUrl({ url, package: pkg, bundleId });
+              break;
+            }
+            default: {
+              throw new Error(`Unsupported step type at execution: ${step.type}`);
+            }
+          }
+          executed++;
+        } catch (err) {
+          if (step.optional) {
+            globalLogger.warn(`${prefix} failed but marked optional. Continuing. Error: ${err.message}`);
+            continue;
+          }
+          throw err;
+        }
+      }
+    } catch (e) {
+      try {
+        await this.setSessionStatus("failed");
+      } catch {
+      }
+      throw e;
+    }
+    return rootFlow;
+  }
+  async gptHandler(command, cachingMode) {
     try {
       let conditionSucceeded = false;
       while (!conditionSucceeded) {
-        const screenshot = await this.getScreenshot(this.appiumSessionConfig);
-        console.log(">> Asking GTP Driver for next action...");
+        let screenshot;
+        if (!this.useGptDriverCloud) {
+          screenshot = await this.getScreenshot(this.appiumSessionConfig);
+        }
+        globalLogger.info("Requesting next action from GPT Driver...");
         const response = await axios.request(
           {
             url: `${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/execute`,
@@ -311,39 +1553,49 @@ class GptDriver {
             data: {
               api_key: this.apiKey,
               command,
-              base64_screenshot: screenshot
+              base64_screenshot: screenshot,
+              caching_mode: cachingMode ?? this.cachingMode,
+              step_number: this.step_number
             }
           }
         );
         const executeStatus = response.data.status;
         if (executeStatus === "failed") {
-          const errorMessage = response?.data?.commands?.at(0)?.data;
+          const errorMessage = response.data?.commands?.at(0)?.data;
+          globalLogger.error(`Execution failed: ${errorMessage ?? "Unknown error"}`);
           throw new Error(errorMessage ?? "Execution failed");
         }
         conditionSucceeded = executeStatus !== "inProgress";
         const executeResponse = response.data;
-        for (const command2 of executeResponse.commands) {
-          await this.executeCommand(command2);
+        globalLogger.debug(`Received ${executeResponse.commands.length} command(s) to execute`);
+        for (const appiumCommand of executeResponse.commands) {
+          await this.executeCommand(appiumCommand);
         }
         if (!conditionSucceeded) {
+          globalLogger.debug("Command still in progress, waiting...");
           await delay(1500);
         }
       }
+      this.step_number = this.step_number + 1;
+      globalLogger.info("Command execution completed successfully");
     } catch (e) {
-      await this.stopSession("failed");
+      globalLogger.error("GPT handler failed", e);
+      await this.setSessionStatus("failed");
       throw e;
     }
   }
   async executeCommand(command) {
-    const firstAction = command.data.actions?.at(0);
+    const firstAction = command.data?.actions?.at(0);
     if (firstAction?.type === "pause" && firstAction.duration != null) {
+      globalLogger.debug(`Pausing for ${firstAction.duration} seconds`);
       await delay(firstAction * 1e3);
-    } else {
+    } else if (!this.useGptDriverCloud) {
       const parsedUrl = new URL(command.url);
       parsedUrl.protocol = this.appiumSessionConfig.serverUrl.protocol;
       parsedUrl.host = this.appiumSessionConfig.serverUrl.host;
       parsedUrl.port = this.appiumSessionConfig.serverUrl.port != "" ? `${this.appiumSessionConfig.serverUrl.port}` : "";
       parsedUrl.pathname = this.appiumSessionConfig.serverUrl.pathname != "/" ? `${this.appiumSessionConfig.serverUrl.pathname}${parsedUrl.pathname}` : parsedUrl.pathname;
+      globalLogger.debug(`Executing ${command.method} request to ${parsedUrl.pathname}`);
       await axios.request({
         url: parsedUrl.toString(),
         method: command.method,
@@ -351,16 +1603,25 @@ class GptDriver {
       });
     }
   }
-  async getScreenshot(appiumSessionConfig) {
-    const url = buildUrl(this.appiumSessionConfig.serverUrl, `/session/${this.appiumSessionConfig.id}/screenshot`);
-    const screenshotResponse = await axios.get(url);
-    let screenshot = await screenshotResponse.data.value;
-    if (appiumSessionConfig.platform === "iOS") {
-      const imageBuffer = Buffer.from(screenshot, "base64");
-      const transformedImage = await sharp(imageBuffer).resize(appiumSessionConfig.size.width, appiumSessionConfig.size.height).toBuffer();
-      screenshot = transformedImage.toString("base64");
+  async logCodeExecution(screenshot, command) {
+    try {
+      const screenshot2 = await this.getScreenshot(this.appiumSessionConfig);
+      await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_code_execution`, {
+        api_key: this.apiKey,
+        base64_screenshot: screenshot2,
+        command
+      });
+    } catch (e) {
+      globalLogger.error("Failed to log code execution", e);
+    }
+  }
+  async takeScreenshotAndLogCodeExecution(command) {
+    try {
+      const screenshot = await this.getScreenshot(this.appiumSessionConfig);
+      await this.logCodeExecution(screenshot, command);
+    } catch (e) {
+      globalLogger.error("Failed to log code execution", e);
     }
-    return screenshot;
   }
 }