@midscene/core 1.5.4-beta-20260310030546.0 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,9 +34,16 @@ async function commonContextParser(interfaceInstance, _opt) {
34
34
  if (!Number.isFinite(imgWidth) || !Number.isFinite(imgHeight)) throw new Error(`Invalid screenshot dimensions: width and height must be finite numbers. Received width: ${imgWidth}, height: ${imgHeight}`);
35
35
  if (imgWidth <= 0 || imgHeight <= 0) throw new Error(`Invalid screenshot dimensions: width and height must be positive numbers. Received width: ${imgWidth}, height: ${imgHeight}`);
36
36
  debug('screenshot dimensions', imgWidth, 'x', imgHeight);
37
+ const logicalIsPortrait = logicalWidth < logicalHeight;
38
+ const screenshotIsPortrait = imgWidth < imgHeight;
39
+ let finalLogicalWidth = logicalWidth;
40
+ if (logicalIsPortrait !== screenshotIsPortrait) {
41
+ debug(`Orientation mismatch detected: logical size ${logicalWidth}x${logicalHeight} (${logicalIsPortrait ? 'portrait' : 'landscape'}) vs screenshot ${imgWidth}x${imgHeight} (${screenshotIsPortrait ? 'portrait' : 'landscape'}). Swapping logical dimensions.`);
42
+ finalLogicalWidth = logicalHeight;
43
+ }
37
44
  const userShrinkFactor = _opt.screenshotShrinkFactor ?? 1;
38
45
  if (!Number.isFinite(userShrinkFactor) || userShrinkFactor < 1) throw new Error(`Invalid screenshotShrinkFactor: must be a finite number >= 1. Received: ${userShrinkFactor}`);
39
- const dpr = imgWidth / logicalWidth;
46
+ const dpr = imgWidth / finalLogicalWidth;
40
47
  debug('calculated dpr:', dpr);
41
48
  const shrunkShotToLogicalRatio = dpr / userShrinkFactor;
42
49
  debug('shrunkShotToLogicalRatio', shrunkShotToLogicalRatio);
@@ -145,7 +152,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
145
152
  return;
146
153
  }
147
154
  }
148
- const getMidsceneVersion = ()=>"1.5.4-beta-20260310030546.0";
155
+ const getMidsceneVersion = ()=>"1.5.4";
149
156
  const parsePrompt = (prompt)=>{
150
157
  if ('string' == typeof prompt) return {
151
158
  textPrompt: prompt,
@@ -1 +1 @@
1
- {"version":3,"file":"agent/utils.mjs","sources":["../../../src/agent/utils.ts"],"sourcesContent":["import type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface } from '@/device';\nimport { ScreenshotItem } from '@/screenshot-item';\nimport type {\n ElementCacheFeature,\n LocateResultElement,\n PlanningLocateParam,\n Rect,\n UIContext,\n} from '@/types';\nimport { uploadTestInfoToServer } from '@/utils';\nimport {\n MIDSCENE_REPORT_QUIET,\n MIDSCENE_REPORT_TAG_NAME,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { imageInfoOfBase64, resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { assert, logMsg, uuid } from '@midscene/shared/utils';\nimport dayjs from 'dayjs';\nimport type { TaskCache } from './task-cache';\nimport { debug as cacheDebug } from './task-cache';\n\nexport async function commonContextParser(\n interfaceInstance: AbstractInterface,\n _opt: { uploadServerUrl?: string; screenshotShrinkFactor?: number },\n): Promise<UIContext> {\n const debug = getDebug('commonContextParser');\n\n assert(interfaceInstance, 'interfaceInstance is required');\n\n debug('Getting interface description');\n const description = interfaceInstance.describe?.() || '';\n debug('Interface description end');\n\n debug('Uploading test info to server');\n uploadTestInfoToServer({\n testUrl: description,\n serverUrl: _opt.uploadServerUrl,\n });\n debug('UploadTestInfoToServer end');\n\n debug('will get size');\n const interfaceSize = await interfaceInstance.size();\n const { width: logicalWidth, height: logicalHeight } = interfaceSize;\n\n if ((interfaceSize as unknown as { dpr: number }).dpr) {\n console.warn(\n 'Warning: return value of interface.size() include a dpr property, which is not expected and ignored. ',\n );\n }\n\n if (!Number.isFinite(logicalWidth) || !Number.isFinite(logicalHeight)) {\n throw new Error(\n `Invalid interface size: width and height must be finite numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`,\n );\n }\n\n if (logicalWidth <= 0 || logicalHeight <= 0) {\n throw new Error(\n `Invalid interface size: width and height must be positive numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`,\n );\n }\n\n debug(`size: ${logicalWidth}x${logicalHeight}`);\n\n const screenshotBase64 = await interfaceInstance.screenshotBase64();\n const screenshotCapturedAt = Date.now();\n assert(screenshotBase64!, 'screenshotBase64 is required');\n\n // Get physical screenshot dimensions\n debug('will get screenshot dimensions');\n const { width: imgWidth, height: imgHeight } =\n await imageInfoOfBase64(screenshotBase64);\n\n if (!Number.isFinite(imgWidth) || !Number.isFinite(imgHeight)) {\n throw new Error(\n `Invalid screenshot dimensions: width and height must be finite numbers. Received width: ${imgWidth}, height: ${imgHeight}`,\n );\n }\n if (imgWidth <= 0 || imgHeight <= 0) {\n throw new Error(\n `Invalid screenshot dimensions: width and height must be positive numbers. Received width: ${imgWidth}, height: ${imgHeight}`,\n );\n }\n debug('screenshot dimensions', imgWidth, 'x', imgHeight);\n\n // Validate user-specified shrink factor\n const userShrinkFactor = _opt.screenshotShrinkFactor ?? 1;\n\n if (!Number.isFinite(userShrinkFactor) || userShrinkFactor < 1) {\n throw new Error(\n `Invalid screenshotShrinkFactor: must be a finite number >= 1. Received: ${userShrinkFactor}`,\n );\n }\n\n const dpr = imgWidth / logicalWidth;\n\n debug('calculated dpr:', dpr);\n\n const shrunkShotToLogicalRatio = dpr / userShrinkFactor;\n\n debug('shrunkShotToLogicalRatio', shrunkShotToLogicalRatio);\n\n if (userShrinkFactor !== 1) {\n const targetWidth = Math.round(imgWidth / userShrinkFactor);\n const targetHeight = Math.round(imgHeight / userShrinkFactor);\n\n debug(\n `Applying screenshot shrink factor: ${userShrinkFactor} (physical: ${imgWidth}x${imgHeight} -> target: ${targetWidth}x${targetHeight})`,\n );\n\n const resizedBase64 = await resizeImgBase64(screenshotBase64, {\n width: targetWidth,\n height: targetHeight,\n });\n return {\n shotSize: {\n width: targetWidth,\n height: targetHeight,\n },\n deprecatedDpr: dpr,\n screenshot: ScreenshotItem.create(resizedBase64, screenshotCapturedAt),\n shrunkShotToLogicalRatio,\n };\n }\n\n return {\n shotSize: {\n width: imgWidth,\n height: imgHeight,\n },\n deprecatedDpr: dpr,\n screenshot: ScreenshotItem.create(screenshotBase64, screenshotCapturedAt),\n shrunkShotToLogicalRatio,\n };\n}\n\nexport function getReportFileName(tag = 'web') {\n const reportTagName = globalConfigManager.getEnvConfigValue(\n MIDSCENE_REPORT_TAG_NAME,\n );\n const dateTimeInFileName = dayjs().format('YYYY-MM-DD_HH-mm-ss');\n // ensure uniqueness at the same time\n const uniqueId = uuid().substring(0, 8);\n return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;\n}\n\nexport function printReportMsg(filepath: string) {\n if (globalConfigManager.getEnvConfigInBoolean(MIDSCENE_REPORT_QUIET)) {\n return;\n }\n logMsg(`Midscene - report file updated: ${filepath}`);\n}\n\n/**\n * Get the current execution file name\n * @returns The name of the current execution file\n */\nexport function getCurrentExecutionFile(trace?: string): string | false {\n const error = new Error();\n const stackTrace = trace || error.stack;\n const pkgDir = process.cwd() || '';\n if (stackTrace) {\n const stackLines = stackTrace.split('\\n');\n for (const line of stackLines) {\n if (\n line.includes('.spec.') ||\n line.includes('.test.') ||\n line.includes('.ts') ||\n line.includes('.js')\n ) {\n const match = line.match(/(?:at\\s+)?(.*?\\.(?:spec|test)\\.[jt]s)/);\n if (match?.[1]) {\n const targetFileName = match[1]\n .replace(pkgDir, '')\n .trim()\n .replace('at ', '');\n return targetFileName;\n }\n }\n }\n }\n return false;\n}\n\nconst testFileIndex = new Map<string, number>();\n\nexport function generateCacheId(fileName?: string): string {\n let taskFile = fileName || getCurrentExecutionFile();\n if (!taskFile) {\n taskFile = uuid();\n console.warn(\n 'Midscene - using random UUID for cache id. Cache may be invalid.',\n );\n }\n\n if (testFileIndex.has(taskFile)) {\n const currentIndex = testFileIndex.get(taskFile);\n if (currentIndex !== undefined) {\n testFileIndex.set(taskFile, currentIndex + 1);\n }\n } else {\n testFileIndex.set(taskFile, 1);\n }\n return `${taskFile}-${testFileIndex.get(taskFile)}`;\n}\n\nexport function ifPlanLocateParamIsBbox(\n planLocateParam: PlanningLocateParam,\n): boolean {\n return !!(\n planLocateParam.bbox &&\n Array.isArray(planLocateParam.bbox) &&\n planLocateParam.bbox.length === 4\n );\n}\n\nexport function matchElementFromPlan(\n planLocateParam: PlanningLocateParam,\n): LocateResultElement | undefined {\n if (!planLocateParam) {\n return undefined;\n }\n\n if (planLocateParam.bbox) {\n // Convert bbox [x1, y1, x2, y2] to rect {left, top, width, height}\n const rect = {\n left: planLocateParam.bbox[0],\n top: planLocateParam.bbox[1],\n width: planLocateParam.bbox[2] - planLocateParam.bbox[0] + 1,\n height: planLocateParam.bbox[3] - planLocateParam.bbox[1] + 1,\n };\n\n const element = generateElementByRect(\n rect,\n typeof planLocateParam.prompt === 'string'\n ? planLocateParam.prompt\n : planLocateParam.prompt?.prompt || '',\n );\n return element;\n }\n\n return undefined;\n}\n\nexport async function matchElementFromCache(\n context: {\n taskCache?: TaskCache;\n interfaceInstance: AbstractInterface;\n },\n cacheEntry: ElementCacheFeature | undefined,\n cachePrompt: TUserPrompt,\n cacheable: boolean | undefined,\n): Promise<LocateResultElement | undefined> {\n if (!cacheEntry) {\n return undefined;\n }\n\n if (cacheable === false) {\n cacheDebug('cache disabled for prompt: %s', cachePrompt);\n return undefined;\n }\n\n if (!context.taskCache?.isCacheResultUsed) {\n return undefined;\n }\n\n if (!context.interfaceInstance.rectMatchesCacheFeature) {\n cacheDebug(\n 'interface does not implement rectMatchesCacheFeature, skip cache',\n );\n return undefined;\n }\n\n try {\n const rect =\n await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry);\n const element: LocateResultElement = {\n center: [\n Math.round(rect.left + rect.width / 2),\n Math.round(rect.top + rect.height / 2),\n ],\n rect,\n description:\n typeof cachePrompt === 'string'\n ? cachePrompt\n : cachePrompt.prompt || '',\n };\n\n cacheDebug('cache hit, prompt: %s', cachePrompt);\n return element;\n } catch (error) {\n cacheDebug('rectMatchesCacheFeature error: %s', error);\n return undefined;\n }\n}\n\ndeclare const __VERSION__: string | undefined;\n\nexport const getMidsceneVersion = (): string => {\n if (typeof __VERSION__ !== 'undefined') {\n return __VERSION__;\n } else if (\n process.env.__VERSION__ &&\n process.env.__VERSION__ !== 'undefined'\n ) {\n return process.env.__VERSION__;\n }\n throw new Error('__VERSION__ inject failed during build');\n};\n\nexport const parsePrompt = (\n prompt: TUserPrompt,\n): {\n textPrompt: string;\n multimodalPrompt?: TMultimodalPrompt;\n} => {\n if (typeof prompt === 'string') {\n return {\n textPrompt: prompt,\n multimodalPrompt: undefined,\n };\n }\n return {\n textPrompt: prompt.prompt,\n multimodalPrompt: prompt.images\n ? {\n images: prompt.images,\n convertHttpImage2Base64: !!prompt.convertHttpImage2Base64,\n }\n : undefined,\n };\n};\n\nexport const transformLogicalElementToScreenshot = (\n element: LocateResultElement,\n shrunkShotToLogicalRatio: number,\n): LocateResultElement => {\n if (shrunkShotToLogicalRatio === 1) {\n return element;\n }\n\n return {\n ...element,\n center: [\n Math.round(element.center[0] * shrunkShotToLogicalRatio),\n Math.round(element.center[1] * shrunkShotToLogicalRatio),\n ],\n rect: {\n ...element.rect,\n left: Math.round(element.rect.left * shrunkShotToLogicalRatio),\n top: Math.round(element.rect.top * shrunkShotToLogicalRatio),\n width: Math.round(element.rect.width * shrunkShotToLogicalRatio),\n height: Math.round(element.rect.height * shrunkShotToLogicalRatio),\n },\n };\n};\n\nexport const transformLogicalRectToScreenshotRect = (\n rect: Rect,\n shrunkShotToLogicalRatio: number,\n): Rect => {\n if (shrunkShotToLogicalRatio === 1) {\n return rect;\n }\n\n return {\n ...rect,\n left: Math.round(rect.left * shrunkShotToLogicalRatio),\n top: Math.round(rect.top * shrunkShotToLogicalRatio),\n width: Math.round(rect.width * shrunkShotToLogicalRatio),\n height: Math.round(rect.height * shrunkShotToLogicalRatio),\n };\n};\n"],"names":["commonContextParser","interfaceInstance","_opt","debug","getDebug","assert","description","uploadTestInfoToServer","interfaceSize","logicalWidth","logicalHeight","console","Number","Error","screenshotBase64","screenshotCapturedAt","Date","imgWidth","imgHeight","imageInfoOfBase64","userShrinkFactor","dpr","shrunkShotToLogicalRatio","targetWidth","Math","targetHeight","resizedBase64","resizeImgBase64","ScreenshotItem","getReportFileName","tag","reportTagName","globalConfigManager","MIDSCENE_REPORT_TAG_NAME","dateTimeInFileName","dayjs","uniqueId","uuid","printReportMsg","filepath","MIDSCENE_REPORT_QUIET","logMsg","getCurrentExecutionFile","trace","error","stackTrace","pkgDir","process","stackLines","line","match","targetFileName","testFileIndex","Map","generateCacheId","fileName","taskFile","currentIndex","undefined","ifPlanLocateParamIsBbox","planLocateParam","Array","matchElementFromPlan","rect","element","generateElementByRect","matchElementFromCache","context","cacheEntry","cachePrompt","cacheable","cacheDebug","getMidsceneVersion","__VERSION__","parsePrompt","prompt","transformLogicalElementToScreenshot","transformLogicalRectToScreenshotRect"],"mappings":";;;;;;;;;AAyBO,eAAeA,oBACpBC,iBAAoC,EACpCC,IAAmE;IAEnE,MAAMC,QAAQC,SAAS;IAEvBC,OAAOJ,mBAAmB;IAE1BE,MAAM;IACN,MAAMG,cAAcL,kBAAkB,QAAQ,QAAQ;IACtDE,MAAM;IAENA,MAAM;IACNI,uBAAuB;QACrB,SAASD;QACT,WAAWJ,KAAK,eAAe;IACjC;IACAC,MAAM;IAENA,MAAM;IACN,MAAMK,gBAAgB,MAAMP,kBAAkB,IAAI;IAClD,MAAM,EAAE,OAAOQ,YAAY,EAAE,QAAQC,aAAa,EAAE,GAAGF;IAEvD,IAAKA,cAA6C,GAAG,EACnDG,QAAQ,IAAI,CACV;IAIJ,IAAI,CAACC,OAAO,QAAQ,CAACH,iBAAiB,CAACG,OAAO,QAAQ,CAACF,gBACrD,MAAM,IAAIG,MACR,CAAC,iFAAiF,EAAEJ,aAAa,UAAU,EAAEC,eAAe;IAIhI,IAAID,gBAAgB,KAAKC,iBAAiB,GACxC,MAAM,IAAIG,MACR,CAAC,mFAAmF,EAAEJ,aAAa,UAAU,EAAEC,eAAe;IAIlIP,MAAM,CAAC,MAAM,EAAEM,aAAa,CAAC,EAAEC,eAAe;IAE9C,MAAMI,mBAAmB,MAAMb,kBAAkB,gBAAgB;IACjE,MAAMc,uBAAuBC,KAAK,GAAG;IACrCX,OAAOS,kBAAmB;IAG1BX,MAAM;IACN,MAAM,EAAE,OAAOc,QAAQ,EAAE,QAAQC,SAAS,EAAE,GAC1C,MAAMC,kBAAkBL;IAE1B,IAAI,CAACF,OAAO,QAAQ,CAACK,aAAa,CAACL,OAAO,QAAQ,CAACM,YACjD,MAAM,IAAIL,MACR,CAAC,wFAAwF,EAAEI,SAAS,UAAU,EAAEC,WAAW;IAG/H,IAAID,YAAY,KAAKC,aAAa,GAChC,MAAM,IAAIL,MACR,CAAC,0FAA0F,EAAEI,SAAS,UAAU,EAAEC,WAAW;IAGjIf,MAAM,yBAAyBc,UAAU,KAAKC;IAG9C,MAAME,mBAAmBlB,KAAK,sBAAsB,IAAI;IAExD,IAAI,CAACU,OAAO,QAAQ,CAACQ,qBAAqBA,mBAAmB,GAC3D,MAAM,IAAIP,MACR,CAAC,wEAAwE,EAAEO,kBAAkB;IAIjG,MAAMC,MAAMJ,WAAWR;IAEvBN,MAAM,mBAAmBkB;IAEzB,MAAMC,2BAA2BD,MAAMD;IAEvCjB,MAAM,4BAA4BmB;IAElC,IAAIF,AAAqB,MAArBA,kBAAwB;QAC1B,MAAMG,cAAcC,KAAK,KAAK,CAACP,WAAWG;QAC1C,MAAMK,eAAeD,KAAK,KAAK,CAACN,YAAYE;QAE5CjB,MACE,CAAC,mCAAmC,EAAEiB,iBAAiB,YAAY,EAAEH,SAAS,CAAC,EAAEC,UAAU,YAAY,EAAEK,YAAY,CAAC,EAAEE,aAAa,CAAC,CAAC;QAGzI,MAAMC,gBAAgB,MAAMC,gBAAgBb,kBAAkB;YAC5D,OAAOS;YACP,QAAQE;QACV;QACA,OAAO;YACL,UAAU;gBACR,OAAOF;gBACP,QAAQE;YACV;YACA,eAAeJ;YACf,YAAYO,eAAe,MAAM,CAACF,eAAeX;YACjDO;QACF;IACF;IAEA,OAAO;QACL,UAAU;YACR,OAAOL;YACP,QAAQC;QACV;QACA,eAAeG;QACf,YAAYO,eAAe,MAAM,CAACd,kBAAkBC;QACpDO;IACF;AACF;AAEO,SAASO,kBAAkBC,MAAM,KAAK;IAC3C,MAAMC,gBAAgBC,oBAAoB,iBAAiB,CACzDC;IAEF,MAAMC,qBAAqBC,QAAQ,MAAM,CAAC;IAE1C,MAAMC,WAAWC,OAAO,SAAS,CAAC,GAAG;IACrC,OAAO,GAAGN,iBAAiBD,IAAI,CAAC,EAAEI,mBAAmB,CAAC,EAAEE,UAAU;AACpE;AAEO,SAASE,eAAeC,QAAgB;IAC7C,IAAIP,oBAAoB,qBAAqB,CAACQ,wBAC5C;IAEFC,OAAO,CAAC,gCAAgC,EAAEF,UAAU;AACtD;AAMO,SAASG,wBAAwBC,KAAc;IACpD,MAAMC,QAAQ,IAAI/B;IAClB,MAAMgC,aAAaF,SAASC,MAAM,KAAK;IACvC,MAAME,SAASC,QAAQ,GAAG,MAAM;IAChC,IAAIF,YAAY;QACd,MAAMG,aAAaH,WAAW,KAAK,CAAC;QACpC,KAAK,MAAMI,QAAQD,WACjB,IACEC,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,UACdA,KAAK,QAAQ,CAAC,QACd;YACA,MAAMC,QAAQD,KAAK,KAAK,CAAC;YACzB,IAAIC,OAAO,CAAC,EAAE,EAAE;gBACd,MAAMC,iBAAiBD,KAAK,CAAC,EAAE,CAC5B,OAAO,CAACJ,QAAQ,IAChB,IAAI,GACJ,OAAO,CAAC,OAAO;gBAClB,OAAOK;YACT;QACF;IAEJ;IACA,OAAO;AACT;AAEA,MAAMC,gBAAgB,IAAIC;AAEnB,SAASC,gBAAgBC,QAAiB;IAC/C,IAAIC,WAAWD,YAAYb;IAC3B,IAAI,CAACc,UAAU;QACbA,WAAWnB;QACX1B,QAAQ,IAAI,CACV;IAEJ;IAEA,IAAIyC,cAAc,GAAG,CAACI,WAAW;QAC/B,MAAMC,eAAeL,cAAc,GAAG,CAACI;QACvC,IAAIC,AAAiBC,WAAjBD,cACFL,cAAc,GAAG,CAACI,UAAUC,eAAe;IAE/C,OACEL,cAAc,GAAG,CAACI,UAAU;IAE9B,OAAO,GAAGA,SAAS,CAAC,EAAEJ,cAAc,GAAG,CAACI,WAAW;AACrD;AAEO,SAASG,wBACdC,eAAoC;IAEpC,OAAO,CAAC,CACNA,CAAAA,gBAAgB,IAAI,IACpBC,MAAM,OAAO,CAACD,gBAAgB,IAAI,KAClCA,AAAgC,MAAhCA,gBAAgB,IAAI,CAAC,MAAM,AAAK;AAEpC;AAEO,SAASE,qBACdF,eAAoC;IAEpC,IAAI,CAACA,iBACH;IAGF,IAAIA,gBAAgB,IAAI,EAAE;QAExB,MAAMG,OAAO;YACX,MAAMH,gBAAgB,IAAI,CAAC,EAAE;YAC7B,KAAKA,gBAAgB,IAAI,CAAC,EAAE;YAC5B,OAAOA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAE,GAAG;YAC3D,QAAQA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAE,GAAG;QAC9D;QAEA,MAAMI,UAAUC,sBACdF,MACA,AAAkC,YAAlC,OAAOH,gBAAgB,MAAM,GACzBA,gBAAgB,MAAM,GACtBA,gBAAgB,MAAM,EAAE,UAAU;QAExC,OAAOI;IACT;AAGF;AAEO,eAAeE,sBACpBC,OAGC,EACDC,UAA2C,EAC3CC,WAAwB,EACxBC,SAA8B;IAE9B,IAAI,CAACF,YACH;IAGF,IAAIE,AAAc,UAAdA,WAAqB,YACvBC,8BAAW,iCAAiCF;IAI9C,IAAI,CAACF,QAAQ,SAAS,EAAE,mBACtB;IAGF,IAAI,CAACA,QAAQ,iBAAiB,CAAC,uBAAuB,EAAE,YACtDI,8BACE;IAKJ,IAAI;QACF,MAAMR,OACJ,MAAMI,QAAQ,iBAAiB,CAAC,uBAAuB,CAACC;QAC1D,MAAMJ,UAA+B;YACnC,QAAQ;gBACNxC,KAAK,KAAK,CAACuC,KAAK,IAAI,GAAGA,KAAK,KAAK,GAAG;gBACpCvC,KAAK,KAAK,CAACuC,KAAK,GAAG,GAAGA,KAAK,MAAM,GAAG;aACrC;YACDA;YACA,aACE,AAAuB,YAAvB,OAAOM,cACHA,cACAA,YAAY,MAAM,IAAI;QAC9B;QAEAE,8BAAW,yBAAyBF;QACpC,OAAOL;IACT,EAAE,OAAOpB,OAAO;QACd2B,8BAAW,qCAAqC3B;QAChD;IACF;AACF;AAIO,MAAM4B,qBAAqB,IAEvBC;AAUJ,MAAMC,cAAc,CACzBC;IAKA,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAO;QACL,YAAYA;QACZ,kBAAkBjB;IACpB;IAEF,OAAO;QACL,YAAYiB,OAAO,MAAM;QACzB,kBAAkBA,OAAO,MAAM,GAC3B;YACE,QAAQA,OAAO,MAAM;YACrB,yBAAyB,CAAC,CAACA,OAAO,uBAAuB;QAC3D,IACAjB;IACN;AACF;AAEO,MAAMkB,sCAAsC,CACjDZ,SACA1C;IAEA,IAAIA,AAA6B,MAA7BA,0BACF,OAAO0C;IAGT,OAAO;QACL,GAAGA,OAAO;QACV,QAAQ;YACNxC,KAAK,KAAK,CAACwC,QAAQ,MAAM,CAAC,EAAE,GAAG1C;YAC/BE,KAAK,KAAK,CAACwC,QAAQ,MAAM,CAAC,EAAE,GAAG1C;SAChC;QACD,MAAM;YACJ,GAAG0C,QAAQ,IAAI;YACf,MAAMxC,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,IAAI,GAAG1C;YACrC,KAAKE,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,GAAG,GAAG1C;YACnC,OAAOE,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,KAAK,GAAG1C;YACvC,QAAQE,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,MAAM,GAAG1C;QAC3C;IACF;AACF;AAEO,MAAMuD,uCAAuC,CAClDd,MACAzC;IAEA,IAAIA,AAA6B,MAA7BA,0BACF,OAAOyC;IAGT,OAAO;QACL,GAAGA,IAAI;QACP,MAAMvC,KAAK,KAAK,CAACuC,KAAK,IAAI,GAAGzC;QAC7B,KAAKE,KAAK,KAAK,CAACuC,KAAK,GAAG,GAAGzC;QAC3B,OAAOE,KAAK,KAAK,CAACuC,KAAK,KAAK,GAAGzC;QAC/B,QAAQE,KAAK,KAAK,CAACuC,KAAK,MAAM,GAAGzC;IACnC;AACF"}
1
+ {"version":3,"file":"agent/utils.mjs","sources":["../../../src/agent/utils.ts"],"sourcesContent":["import type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface } from '@/device';\nimport { ScreenshotItem } from '@/screenshot-item';\nimport type {\n ElementCacheFeature,\n LocateResultElement,\n PlanningLocateParam,\n Rect,\n UIContext,\n} from '@/types';\nimport { uploadTestInfoToServer } from '@/utils';\nimport {\n MIDSCENE_REPORT_QUIET,\n MIDSCENE_REPORT_TAG_NAME,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { imageInfoOfBase64, resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { assert, logMsg, uuid } from '@midscene/shared/utils';\nimport dayjs from 'dayjs';\nimport type { TaskCache } from './task-cache';\nimport { debug as cacheDebug } from './task-cache';\n\nexport async function commonContextParser(\n interfaceInstance: AbstractInterface,\n _opt: { uploadServerUrl?: string; screenshotShrinkFactor?: number },\n): Promise<UIContext> {\n const debug = getDebug('commonContextParser');\n\n assert(interfaceInstance, 'interfaceInstance is required');\n\n debug('Getting interface description');\n const description = interfaceInstance.describe?.() || '';\n debug('Interface description end');\n\n debug('Uploading test info to server');\n uploadTestInfoToServer({\n testUrl: description,\n serverUrl: _opt.uploadServerUrl,\n });\n debug('UploadTestInfoToServer end');\n\n debug('will get size');\n const interfaceSize = await interfaceInstance.size();\n const { width: logicalWidth, height: logicalHeight } = interfaceSize;\n\n if ((interfaceSize as unknown as { dpr: number }).dpr) {\n console.warn(\n 'Warning: return value of interface.size() include a dpr property, which is not expected and ignored. ',\n );\n }\n\n if (!Number.isFinite(logicalWidth) || !Number.isFinite(logicalHeight)) {\n throw new Error(\n `Invalid interface size: width and height must be finite numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`,\n );\n }\n\n if (logicalWidth <= 0 || logicalHeight <= 0) {\n throw new Error(\n `Invalid interface size: width and height must be positive numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`,\n );\n }\n\n debug(`size: ${logicalWidth}x${logicalHeight}`);\n\n const screenshotBase64 = await interfaceInstance.screenshotBase64();\n const screenshotCapturedAt = Date.now();\n assert(screenshotBase64!, 'screenshotBase64 is required');\n\n // Get physical screenshot dimensions\n debug('will get screenshot dimensions');\n const { width: imgWidth, height: imgHeight } =\n await imageInfoOfBase64(screenshotBase64);\n\n if (!Number.isFinite(imgWidth) || !Number.isFinite(imgHeight)) {\n throw new Error(\n `Invalid screenshot dimensions: width and height must be finite numbers. Received width: ${imgWidth}, height: ${imgHeight}`,\n );\n }\n if (imgWidth <= 0 || imgHeight <= 0) {\n throw new Error(\n `Invalid screenshot dimensions: width and height must be positive numbers. Received width: ${imgWidth}, height: ${imgHeight}`,\n );\n }\n debug('screenshot dimensions', imgWidth, 'x', imgHeight);\n\n // Detect orientation mismatch between logical size and screenshot.\n // Some devices (e.g. OPPO) report wrong orientation via ADB, causing\n // size() to return portrait dimensions even when the device is landscape.\n // We detect this by comparing aspect ratios and swap if they disagree.\n const logicalIsPortrait = logicalWidth < logicalHeight;\n const screenshotIsPortrait = imgWidth < imgHeight;\n let finalLogicalWidth = logicalWidth;\n let finalLogicalHeight = logicalHeight;\n if (logicalIsPortrait !== screenshotIsPortrait) {\n debug(\n `Orientation mismatch detected: logical size ${logicalWidth}x${logicalHeight} (${logicalIsPortrait ? 'portrait' : 'landscape'}) vs screenshot ${imgWidth}x${imgHeight} (${screenshotIsPortrait ? 'portrait' : 'landscape'}). Swapping logical dimensions.`,\n );\n finalLogicalWidth = logicalHeight;\n finalLogicalHeight = logicalWidth;\n }\n\n // Validate user-specified shrink factor\n const userShrinkFactor = _opt.screenshotShrinkFactor ?? 1;\n\n if (!Number.isFinite(userShrinkFactor) || userShrinkFactor < 1) {\n throw new Error(\n `Invalid screenshotShrinkFactor: must be a finite number >= 1. Received: ${userShrinkFactor}`,\n );\n }\n\n const dpr = imgWidth / finalLogicalWidth;\n\n debug('calculated dpr:', dpr);\n\n const shrunkShotToLogicalRatio = dpr / userShrinkFactor;\n\n debug('shrunkShotToLogicalRatio', shrunkShotToLogicalRatio);\n\n if (userShrinkFactor !== 1) {\n const targetWidth = Math.round(imgWidth / userShrinkFactor);\n const targetHeight = Math.round(imgHeight / userShrinkFactor);\n\n debug(\n `Applying screenshot shrink factor: ${userShrinkFactor} (physical: ${imgWidth}x${imgHeight} -> target: ${targetWidth}x${targetHeight})`,\n );\n\n const resizedBase64 = await resizeImgBase64(screenshotBase64, {\n width: targetWidth,\n height: targetHeight,\n });\n return {\n shotSize: {\n width: targetWidth,\n height: targetHeight,\n },\n deprecatedDpr: dpr,\n screenshot: ScreenshotItem.create(resizedBase64, screenshotCapturedAt),\n shrunkShotToLogicalRatio,\n };\n }\n\n return {\n shotSize: {\n width: imgWidth,\n height: imgHeight,\n },\n deprecatedDpr: dpr,\n screenshot: ScreenshotItem.create(screenshotBase64, screenshotCapturedAt),\n shrunkShotToLogicalRatio,\n };\n}\n\nexport function getReportFileName(tag = 'web') {\n const reportTagName = globalConfigManager.getEnvConfigValue(\n MIDSCENE_REPORT_TAG_NAME,\n );\n const dateTimeInFileName = dayjs().format('YYYY-MM-DD_HH-mm-ss');\n // ensure uniqueness at the same time\n const uniqueId = uuid().substring(0, 8);\n return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;\n}\n\nexport function printReportMsg(filepath: string) {\n if (globalConfigManager.getEnvConfigInBoolean(MIDSCENE_REPORT_QUIET)) {\n return;\n }\n logMsg(`Midscene - report file updated: ${filepath}`);\n}\n\n/**\n * Get the current execution file name\n * @returns The name of the current execution file\n */\nexport function getCurrentExecutionFile(trace?: string): string | false {\n const error = new Error();\n const stackTrace = trace || error.stack;\n const pkgDir = process.cwd() || '';\n if (stackTrace) {\n const stackLines = stackTrace.split('\\n');\n for (const line of stackLines) {\n if (\n line.includes('.spec.') ||\n line.includes('.test.') ||\n line.includes('.ts') ||\n line.includes('.js')\n ) {\n const match = line.match(/(?:at\\s+)?(.*?\\.(?:spec|test)\\.[jt]s)/);\n if (match?.[1]) {\n const targetFileName = match[1]\n .replace(pkgDir, '')\n .trim()\n .replace('at ', '');\n return targetFileName;\n }\n }\n }\n }\n return false;\n}\n\nconst testFileIndex = new Map<string, number>();\n\nexport function generateCacheId(fileName?: string): string {\n let taskFile = fileName || getCurrentExecutionFile();\n if (!taskFile) {\n taskFile = uuid();\n console.warn(\n 'Midscene - using random UUID for cache id. Cache may be invalid.',\n );\n }\n\n if (testFileIndex.has(taskFile)) {\n const currentIndex = testFileIndex.get(taskFile);\n if (currentIndex !== undefined) {\n testFileIndex.set(taskFile, currentIndex + 1);\n }\n } else {\n testFileIndex.set(taskFile, 1);\n }\n return `${taskFile}-${testFileIndex.get(taskFile)}`;\n}\n\nexport function ifPlanLocateParamIsBbox(\n planLocateParam: PlanningLocateParam,\n): boolean {\n return !!(\n planLocateParam.bbox &&\n Array.isArray(planLocateParam.bbox) &&\n planLocateParam.bbox.length === 4\n );\n}\n\nexport function matchElementFromPlan(\n planLocateParam: PlanningLocateParam,\n): LocateResultElement | undefined {\n if (!planLocateParam) {\n return undefined;\n }\n\n if (planLocateParam.bbox) {\n // Convert bbox [x1, y1, x2, y2] to rect {left, top, width, height}\n const rect = {\n left: planLocateParam.bbox[0],\n top: planLocateParam.bbox[1],\n width: planLocateParam.bbox[2] - planLocateParam.bbox[0] + 1,\n height: planLocateParam.bbox[3] - planLocateParam.bbox[1] + 1,\n };\n\n const element = generateElementByRect(\n rect,\n typeof planLocateParam.prompt === 'string'\n ? planLocateParam.prompt\n : planLocateParam.prompt?.prompt || '',\n );\n return element;\n }\n\n return undefined;\n}\n\nexport async function matchElementFromCache(\n context: {\n taskCache?: TaskCache;\n interfaceInstance: AbstractInterface;\n },\n cacheEntry: ElementCacheFeature | undefined,\n cachePrompt: TUserPrompt,\n cacheable: boolean | undefined,\n): Promise<LocateResultElement | undefined> {\n if (!cacheEntry) {\n return undefined;\n }\n\n if (cacheable === false) {\n cacheDebug('cache disabled for prompt: %s', cachePrompt);\n return undefined;\n }\n\n if (!context.taskCache?.isCacheResultUsed) {\n return undefined;\n }\n\n if (!context.interfaceInstance.rectMatchesCacheFeature) {\n cacheDebug(\n 'interface does not implement rectMatchesCacheFeature, skip cache',\n );\n return undefined;\n }\n\n try {\n const rect =\n await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry);\n const element: LocateResultElement = {\n center: [\n Math.round(rect.left + rect.width / 2),\n Math.round(rect.top + rect.height / 2),\n ],\n rect,\n description:\n typeof cachePrompt === 'string'\n ? cachePrompt\n : cachePrompt.prompt || '',\n };\n\n cacheDebug('cache hit, prompt: %s', cachePrompt);\n return element;\n } catch (error) {\n cacheDebug('rectMatchesCacheFeature error: %s', error);\n return undefined;\n }\n}\n\ndeclare const __VERSION__: string | undefined;\n\nexport const getMidsceneVersion = (): string => {\n if (typeof __VERSION__ !== 'undefined') {\n return __VERSION__;\n } else if (\n process.env.__VERSION__ &&\n process.env.__VERSION__ !== 'undefined'\n ) {\n return process.env.__VERSION__;\n }\n throw new Error('__VERSION__ inject failed during build');\n};\n\nexport const parsePrompt = (\n prompt: TUserPrompt,\n): {\n textPrompt: string;\n multimodalPrompt?: TMultimodalPrompt;\n} => {\n if (typeof prompt === 'string') {\n return {\n textPrompt: prompt,\n multimodalPrompt: undefined,\n };\n }\n return {\n textPrompt: prompt.prompt,\n multimodalPrompt: prompt.images\n ? {\n images: prompt.images,\n convertHttpImage2Base64: !!prompt.convertHttpImage2Base64,\n }\n : undefined,\n };\n};\n\nexport const transformLogicalElementToScreenshot = (\n element: LocateResultElement,\n shrunkShotToLogicalRatio: number,\n): LocateResultElement => {\n if (shrunkShotToLogicalRatio === 1) {\n return element;\n }\n\n return {\n ...element,\n center: [\n Math.round(element.center[0] * shrunkShotToLogicalRatio),\n Math.round(element.center[1] * shrunkShotToLogicalRatio),\n ],\n rect: {\n ...element.rect,\n left: Math.round(element.rect.left * shrunkShotToLogicalRatio),\n top: Math.round(element.rect.top * shrunkShotToLogicalRatio),\n width: Math.round(element.rect.width * shrunkShotToLogicalRatio),\n height: Math.round(element.rect.height * shrunkShotToLogicalRatio),\n },\n };\n};\n\nexport const transformLogicalRectToScreenshotRect = (\n rect: Rect,\n shrunkShotToLogicalRatio: number,\n): Rect => {\n if (shrunkShotToLogicalRatio === 1) {\n return rect;\n }\n\n return {\n ...rect,\n left: Math.round(rect.left * shrunkShotToLogicalRatio),\n top: Math.round(rect.top * shrunkShotToLogicalRatio),\n width: Math.round(rect.width * shrunkShotToLogicalRatio),\n height: Math.round(rect.height * shrunkShotToLogicalRatio),\n };\n};\n"],"names":["commonContextParser","interfaceInstance","_opt","debug","getDebug","assert","description","uploadTestInfoToServer","interfaceSize","logicalWidth","logicalHeight","console","Number","Error","screenshotBase64","screenshotCapturedAt","Date","imgWidth","imgHeight","imageInfoOfBase64","logicalIsPortrait","screenshotIsPortrait","finalLogicalWidth","userShrinkFactor","dpr","shrunkShotToLogicalRatio","targetWidth","Math","targetHeight","resizedBase64","resizeImgBase64","ScreenshotItem","getReportFileName","tag","reportTagName","globalConfigManager","MIDSCENE_REPORT_TAG_NAME","dateTimeInFileName","dayjs","uniqueId","uuid","printReportMsg","filepath","MIDSCENE_REPORT_QUIET","logMsg","getCurrentExecutionFile","trace","error","stackTrace","pkgDir","process","stackLines","line","match","targetFileName","testFileIndex","Map","generateCacheId","fileName","taskFile","currentIndex","undefined","ifPlanLocateParamIsBbox","planLocateParam","Array","matchElementFromPlan","rect","element","generateElementByRect","matchElementFromCache","context","cacheEntry","cachePrompt","cacheable","cacheDebug","getMidsceneVersion","__VERSION__","parsePrompt","prompt","transformLogicalElementToScreenshot","transformLogicalRectToScreenshotRect"],"mappings":";;;;;;;;;AAyBO,eAAeA,oBACpBC,iBAAoC,EACpCC,IAAmE;IAEnE,MAAMC,QAAQC,SAAS;IAEvBC,OAAOJ,mBAAmB;IAE1BE,MAAM;IACN,MAAMG,cAAcL,kBAAkB,QAAQ,QAAQ;IACtDE,MAAM;IAENA,MAAM;IACNI,uBAAuB;QACrB,SAASD;QACT,WAAWJ,KAAK,eAAe;IACjC;IACAC,MAAM;IAENA,MAAM;IACN,MAAMK,gBAAgB,MAAMP,kBAAkB,IAAI;IAClD,MAAM,EAAE,OAAOQ,YAAY,EAAE,QAAQC,aAAa,EAAE,GAAGF;IAEvD,IAAKA,cAA6C,GAAG,EACnDG,QAAQ,IAAI,CACV;IAIJ,IAAI,CAACC,OAAO,QAAQ,CAACH,iBAAiB,CAACG,OAAO,QAAQ,CAACF,gBACrD,MAAM,IAAIG,MACR,CAAC,iFAAiF,EAAEJ,aAAa,UAAU,EAAEC,eAAe;IAIhI,IAAID,gBAAgB,KAAKC,iBAAiB,GACxC,MAAM,IAAIG,MACR,CAAC,mFAAmF,EAAEJ,aAAa,UAAU,EAAEC,eAAe;IAIlIP,MAAM,CAAC,MAAM,EAAEM,aAAa,CAAC,EAAEC,eAAe;IAE9C,MAAMI,mBAAmB,MAAMb,kBAAkB,gBAAgB;IACjE,MAAMc,uBAAuBC,KAAK,GAAG;IACrCX,OAAOS,kBAAmB;IAG1BX,MAAM;IACN,MAAM,EAAE,OAAOc,QAAQ,EAAE,QAAQC,SAAS,EAAE,GAC1C,MAAMC,kBAAkBL;IAE1B,IAAI,CAACF,OAAO,QAAQ,CAACK,aAAa,CAACL,OAAO,QAAQ,CAACM,YACjD,MAAM,IAAIL,MACR,CAAC,wFAAwF,EAAEI,SAAS,UAAU,EAAEC,WAAW;IAG/H,IAAID,YAAY,KAAKC,aAAa,GAChC,MAAM,IAAIL,MACR,CAAC,0FAA0F,EAAEI,SAAS,UAAU,EAAEC,WAAW;IAGjIf,MAAM,yBAAyBc,UAAU,KAAKC;IAM9C,MAAME,oBAAoBX,eAAeC;IACzC,MAAMW,uBAAuBJ,WAAWC;IACxC,IAAII,oBAAoBb;IAExB,IAAIW,sBAAsBC,sBAAsB;QAC9ClB,MACE,CAAC,4CAA4C,EAAEM,aAAa,CAAC,EAAEC,cAAc,EAAE,EAAEU,oBAAoB,aAAa,YAAY,gBAAgB,EAAEH,SAAS,CAAC,EAAEC,UAAU,EAAE,EAAEG,uBAAuB,aAAa,YAAY,+BAA+B,CAAC;QAE5PC,oBAAoBZ;IAEtB;IAGA,MAAMa,mBAAmBrB,KAAK,sBAAsB,IAAI;IAExD,IAAI,CAACU,OAAO,QAAQ,CAACW,qBAAqBA,mBAAmB,GAC3D,MAAM,IAAIV,MACR,CAAC,wEAAwE,EAAEU,kBAAkB;IAIjG,MAAMC,MAAMP,WAAWK;IAEvBnB,MAAM,mBAAmBqB;IAEzB,MAAMC,2BAA2BD,MAAMD;IAEvCpB,MAAM,4BAA4BsB;IAElC,IAAIF,AAAqB,MAArBA,kBAAwB;QAC1B,MAAMG,cAAcC,KAAK,KAAK,CAACV,WAAWM;QAC1C,MAAMK,eAAeD,KAAK,KAAK,CAACT,YAAYK;QAE5CpB,MACE,CAAC,mCAAmC,EAAEoB,iBAAiB,YAAY,EAAEN,SAAS,CAAC,EAAEC,UAAU,YAAY,EAAEQ,YAAY,CAAC,EAAEE,aAAa,CAAC,CAAC;QAGzI,MAAMC,gBAAgB,MAAMC,gBAAgBhB,kBAAkB;YAC5D,OAAOY;YACP,QAAQE;QACV;QACA,OAAO;YACL,UAAU;gBACR,OAAOF;gBACP,QAAQE;YACV;YACA,eAAeJ;YACf,YAAYO,eAAe,MAAM,CAACF,eAAed;YACjDU;QACF;IACF;IAEA,OAAO;QACL,UAAU;YACR,OAAOR;YACP,QAAQC;QACV;QACA,eAAeM;QACf,YAAYO,eAAe,MAAM,CAACjB,kBAAkBC;QACpDU;IACF;AACF;AAEO,SAASO,kBAAkBC,MAAM,KAAK;IAC3C,MAAMC,gBAAgBC,oBAAoB,iBAAiB,CACzDC;IAEF,MAAMC,qBAAqBC,QAAQ,MAAM,CAAC;IAE1C,MAAMC,WAAWC,OAAO,SAAS,CAAC,GAAG;IACrC,OAAO,GAAGN,iBAAiBD,IAAI,CAAC,EAAEI,mBAAmB,CAAC,EAAEE,UAAU;AACpE;AAEO,SAASE,eAAeC,QAAgB;IAC7C,IAAIP,oBAAoB,qBAAqB,CAACQ,wBAC5C;IAEFC,OAAO,CAAC,gCAAgC,EAAEF,UAAU;AACtD;AAMO,SAASG,wBAAwBC,KAAc;IACpD,MAAMC,QAAQ,IAAIlC;IAClB,MAAMmC,aAAaF,SAASC,MAAM,KAAK;IACvC,MAAME,SAASC,QAAQ,GAAG,MAAM;IAChC,IAAIF,YAAY;QACd,MAAMG,aAAaH,WAAW,KAAK,CAAC;QACpC,KAAK,MAAMI,QAAQD,WACjB,IACEC,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,UACdA,KAAK,QAAQ,CAAC,QACd;YACA,MAAMC,QAAQD,KAAK,KAAK,CAAC;YACzB,IAAIC,OAAO,CAAC,EAAE,EAAE;gBACd,MAAMC,iBAAiBD,KAAK,CAAC,EAAE,CAC5B,OAAO,CAACJ,QAAQ,IAChB,IAAI,GACJ,OAAO,CAAC,OAAO;gBAClB,OAAOK;YACT;QACF;IAEJ;IACA,OAAO;AACT;AAEA,MAAMC,gBAAgB,IAAIC;AAEnB,SAASC,gBAAgBC,QAAiB;IAC/C,IAAIC,WAAWD,YAAYb;IAC3B,IAAI,CAACc,UAAU;QACbA,WAAWnB;QACX7B,QAAQ,IAAI,CACV;IAEJ;IAEA,IAAI4C,cAAc,GAAG,CAACI,WAAW;QAC/B,MAAMC,eAAeL,cAAc,GAAG,CAACI;QACvC,IAAIC,AAAiBC,WAAjBD,cACFL,cAAc,GAAG,CAACI,UAAUC,eAAe;IAE/C,OACEL,cAAc,GAAG,CAACI,UAAU;IAE9B,OAAO,GAAGA,SAAS,CAAC,EAAEJ,cAAc,GAAG,CAACI,WAAW;AACrD;AAEO,SAASG,wBACdC,eAAoC;IAEpC,OAAO,CAAC,CACNA,CAAAA,gBAAgB,IAAI,IACpBC,MAAM,OAAO,CAACD,gBAAgB,IAAI,KAClCA,AAAgC,MAAhCA,gBAAgB,IAAI,CAAC,MAAM,AAAK;AAEpC;AAEO,SAASE,qBACdF,eAAoC;IAEpC,IAAI,CAACA,iBACH;IAGF,IAAIA,gBAAgB,IAAI,EAAE;QAExB,MAAMG,OAAO;YACX,MAAMH,gBAAgB,IAAI,CAAC,EAAE;YAC7B,KAAKA,gBAAgB,IAAI,CAAC,EAAE;YAC5B,OAAOA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAE,GAAG;YAC3D,QAAQA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAE,GAAG;QAC9D;QAEA,MAAMI,UAAUC,sBACdF,MACA,AAAkC,YAAlC,OAAOH,gBAAgB,MAAM,GACzBA,gBAAgB,MAAM,GACtBA,gBAAgB,MAAM,EAAE,UAAU;QAExC,OAAOI;IACT;AAGF;AAEO,eAAeE,sBACpBC,OAGC,EACDC,UAA2C,EAC3CC,WAAwB,EACxBC,SAA8B;IAE9B,IAAI,CAACF,YACH;IAGF,IAAIE,AAAc,UAAdA,WAAqB,YACvBC,8BAAW,iCAAiCF;IAI9C,IAAI,CAACF,QAAQ,SAAS,EAAE,mBACtB;IAGF,IAAI,CAACA,QAAQ,iBAAiB,CAAC,uBAAuB,EAAE,YACtDI,8BACE;IAKJ,IAAI;QACF,MAAMR,OACJ,MAAMI,QAAQ,iBAAiB,CAAC,uBAAuB,CAACC;QAC1D,MAAMJ,UAA+B;YACnC,QAAQ;gBACNxC,KAAK,KAAK,CAACuC,KAAK,IAAI,GAAGA,KAAK,KAAK,GAAG;gBACpCvC,KAAK,KAAK,CAACuC,KAAK,GAAG,GAAGA,KAAK,MAAM,GAAG;aACrC;YACDA;YACA,aACE,AAAuB,YAAvB,OAAOM,cACHA,cACAA,YAAY,MAAM,IAAI;QAC9B;QAEAE,8BAAW,yBAAyBF;QACpC,OAAOL;IACT,EAAE,OAAOpB,OAAO;QACd2B,8BAAW,qCAAqC3B;QAChD;IACF;AACF;AAIO,MAAM4B,qBAAqB,IAEvBC;AAUJ,MAAMC,cAAc,CACzBC;IAKA,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAO;QACL,YAAYA;QACZ,kBAAkBjB;IACpB;IAEF,OAAO;QACL,YAAYiB,OAAO,MAAM;QACzB,kBAAkBA,OAAO,MAAM,GAC3B;YACE,QAAQA,OAAO,MAAM;YACrB,yBAAyB,CAAC,CAACA,OAAO,uBAAuB;QAC3D,IACAjB;IACN;AACF;AAEO,MAAMkB,sCAAsC,CACjDZ,SACA1C;IAEA,IAAIA,AAA6B,MAA7BA,0BACF,OAAO0C;IAGT,OAAO;QACL,GAAGA,OAAO;QACV,QAAQ;YACNxC,KAAK,KAAK,CAACwC,QAAQ,MAAM,CAAC,EAAE,GAAG1C;YAC/BE,KAAK,KAAK,CAACwC,QAAQ,MAAM,CAAC,EAAE,GAAG1C;SAChC;QACD,MAAM;YACJ,GAAG0C,QAAQ,IAAI;YACf,MAAMxC,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,IAAI,GAAG1C;YACrC,KAAKE,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,GAAG,GAAG1C;YACnC,OAAOE,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,KAAK,GAAG1C;YACvC,QAAQE,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,MAAM,GAAG1C;QAC3C;IACF;AACF;AAEO,MAAMuD,uCAAuC,CAClDd,MACAzC;IAEA,IAAIA,AAA6B,MAA7BA,0BACF,OAAOyC;IAGT,OAAO;QACL,GAAGA,IAAI;QACP,MAAMvC,KAAK,KAAK,CAACuC,KAAK,IAAI,GAAGzC;QAC7B,KAAKE,KAAK,KAAK,CAACuC,KAAK,GAAG,GAAGzC;QAC3B,OAAOE,KAAK,KAAK,CAACuC,KAAK,KAAK,GAAGzC;QAC/B,QAAQE,KAAK,KAAK,CAACuC,KAAK,MAAM,GAAGzC;IACnC;AACF"}
@@ -25,6 +25,11 @@ const defineActionTap = (call)=>defineAction({
25
25
  description: 'Tap the element',
26
26
  interfaceAlias: 'aiTap',
27
27
  paramSchema: actionTapParamSchema,
28
+ sample: {
29
+ locate: {
30
+ prompt: 'the "Submit" button'
31
+ }
32
+ },
28
33
  call
29
34
  });
30
35
  const actionRightClickParamSchema = z.object({
@@ -35,6 +40,11 @@ const defineActionRightClick = (call)=>defineAction({
35
40
  description: 'Right click the element',
36
41
  interfaceAlias: 'aiRightClick',
37
42
  paramSchema: actionRightClickParamSchema,
43
+ sample: {
44
+ locate: {
45
+ prompt: 'the file icon on the desktop'
46
+ }
47
+ },
38
48
  call
39
49
  });
40
50
  const actionDoubleClickParamSchema = z.object({
@@ -45,6 +55,11 @@ const defineActionDoubleClick = (call)=>defineAction({
45
55
  description: 'Double click the element',
46
56
  interfaceAlias: 'aiDoubleClick',
47
57
  paramSchema: actionDoubleClickParamSchema,
58
+ sample: {
59
+ locate: {
60
+ prompt: 'the folder icon'
61
+ }
62
+ },
48
63
  call
49
64
  });
50
65
  const actionHoverParamSchema = z.object({
@@ -55,6 +70,11 @@ const defineActionHover = (call)=>defineAction({
55
70
  description: 'Move the mouse to the element',
56
71
  interfaceAlias: 'aiHover',
57
72
  paramSchema: actionHoverParamSchema,
73
+ sample: {
74
+ locate: {
75
+ prompt: 'the navigation menu item "Products"'
76
+ }
77
+ },
58
78
  call
59
79
  });
60
80
  const inputLocateDescription = 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';
@@ -75,6 +95,12 @@ const defineActionInput = (call)=>defineAction({
75
95
  description: 'Input the value into the element',
76
96
  interfaceAlias: 'aiInput',
77
97
  paramSchema: actionInputParamSchema,
98
+ sample: {
99
+ value: 'test@example.com',
100
+ locate: {
101
+ prompt: 'the email input field'
102
+ }
103
+ },
78
104
  call: (param)=>{
79
105
  if ('append' === param.mode) param.mode = 'typeOnly';
80
106
  return call(param);
@@ -89,6 +115,9 @@ const defineActionKeyboardPress = (call)=>defineAction({
89
115
  description: 'Press a key or key combination, like "Enter", "Tab", "Escape", or "Control+A", "Shift+Enter". Do not use this to type text.',
90
116
  interfaceAlias: 'aiKeyboardPress',
91
117
  paramSchema: actionKeyboardPressParamSchema,
118
+ sample: {
119
+ keyName: 'Enter'
120
+ },
92
121
  call
93
122
  });
94
123
  const actionScrollParamSchema = z.object({
@@ -113,6 +142,13 @@ const defineActionScroll = (call)=>defineAction({
113
142
  description: 'Scroll the page or a scrollable element to browse content. This is the preferred way to scroll on all platforms, including mobile. Supports scrollToBottom/scrollToTop for boundary navigation. Default: direction `down`, scrollType `singleAction`, distance `null`.',
114
143
  interfaceAlias: 'aiScroll',
115
144
  paramSchema: actionScrollParamSchema,
145
+ sample: {
146
+ direction: 'down',
147
+ scrollType: 'singleAction',
148
+ locate: {
149
+ prompt: 'the center of the product list area'
150
+ }
151
+ },
116
152
  call
117
153
  });
118
154
  const actionDragAndDropParamSchema = z.object({
@@ -142,6 +178,11 @@ const defineActionLongPress = (call)=>defineAction({
142
178
  name: 'LongPress',
143
179
  description: 'Long press the element',
144
180
  paramSchema: ActionLongPressParamSchema,
181
+ sample: {
182
+ locate: {
183
+ prompt: 'the message bubble'
184
+ }
185
+ },
145
186
  call
146
187
  });
147
188
  const ActionSwipeParamSchema = z.object({
@@ -198,10 +239,10 @@ const defineActionSwipe = (call)=>defineAction({
198
239
  paramSchema: ActionSwipeParamSchema,
199
240
  sample: {
200
241
  start: {
201
- prompt: 'middle-lower area of the screen'
242
+ prompt: 'center of the notification'
202
243
  },
203
244
  end: {
204
- prompt: 'upper-middle area of the screen'
245
+ prompt: 'upper edge of the screen'
205
246
  }
206
247
  },
207
248
  call
@@ -214,6 +255,11 @@ const defineActionClearInput = (call)=>defineAction({
214
255
  description: inputLocateDescription,
215
256
  interfaceAlias: 'aiClearInput',
216
257
  paramSchema: actionClearInputParamSchema,
258
+ sample: {
259
+ locate: {
260
+ prompt: 'the search input field'
261
+ }
262
+ },
217
263
  call
218
264
  });
219
265
  const actionCursorMoveParamSchema = z.object({
@@ -227,6 +273,10 @@ const defineActionCursorMove = (call)=>defineAction({
227
273
  name: 'CursorMove',
228
274
  description: 'Move the text cursor (caret) left or right within an input field or text area. Use this to reposition the cursor without selecting text.',
229
275
  paramSchema: actionCursorMoveParamSchema,
276
+ sample: {
277
+ direction: 'left',
278
+ times: 3
279
+ },
230
280
  call
231
281
  });
232
282
  const ActionSleepParamSchema = z.object({
@@ -236,6 +286,9 @@ const defineActionSleep = ()=>defineAction({
236
286
  name: 'Sleep',
237
287
  description: 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',
238
288
  paramSchema: ActionSleepParamSchema,
289
+ sample: {
290
+ timeMs: 2000
291
+ },
239
292
  call: async (param)=>{
240
293
  const duration = param?.timeMs ?? 1000;
241
294
  getDebug('device:common-action')(`Sleeping for ${duration}ms`);
@@ -1 +1 @@
1
- {"version":3,"file":"device/index.mjs","sources":["../../../src/device/index.ts"],"sourcesContent":["import { getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForPoint?(\n center: [number, number],\n options?: {\n targetDescription?: string;\n modelConfig?: IModelConfig;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n /**\n * Get the current time from the device.\n * Returns the device's current timestamp in milliseconds.\n * This is useful when the system time and device time are not synchronized.\n */\n getTimestamp?(): Promise<number>;\n\n /** URL of native MJPEG stream for real-time screen preview (e.g. WDA MJPEG server) */\n mjpegStreamUrl?: string;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n call,\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'typeOnly'])\n .default('replace')\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"typeOnly\" - type the value directly without clearing the field first; \"clear\" - clear the field without inputting new text.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'typeOnly' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n call: (param) => {\n // backward compat: convert deprecated 'append' to 'typeOnly'\n if ((param.mode as string) === 'append') {\n param.mode = 'typeOnly';\n }\n return call(param);\n },\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling all the way to the bottom by rapidly scrolling 5-10 times (skipping intermediate content until reaching the bottom), \"scrollToTop\" for scrolling all the way to the top by rapidly scrolling 5-10 times (skipping intermediate content until reaching the top), \"scrollToRight\" for scrolling all the way to the right by rapidly scrolling multiple times, \"scrollToLeft\" for scrolling all the way to the left by rapidly scrolling multiple times',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or a scrollable element to browse content. This is the preferred way to scroll on all platforms, including mobile. Supports scrollToBottom/scrollToTop for boundary navigation. Default: direction `down`, scrollType `singleAction`, distance `null`.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Pick up a specific UI element and move it to a new position (e.g., reorder a card, move a file into a folder, sort list items). The element itself moves with your finger/mouse.',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n sample: {\n from: { prompt: 'the \"report.pdf\" file icon' },\n to: { prompt: 'the upload drop zone' },\n },\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport function normalizeMobileSwipeParam(\n param: ActionSwipeParam,\n screenSize: { width: number; height: number },\n): {\n startPoint: { x: number; y: number };\n endPoint: { x: number; y: number };\n duration: number;\n repeatCount: number;\n} {\n const { width, height } = screenSize;\n const { start, end } = param;\n\n const startPoint = start\n ? { x: start.center[0], y: start.center[1] }\n : { x: width / 2, y: height / 2 };\n\n let endPoint: { x: number; y: number };\n\n if (end) {\n endPoint = { x: end.center[0], y: end.center[1] };\n } else if (param.distance) {\n const direction = param.direction;\n if (!direction) {\n throw new Error('direction is required for swipe gesture');\n }\n endPoint = {\n x:\n startPoint.x +\n (direction === 'right'\n ? param.distance\n : direction === 'left'\n ? -param.distance\n : 0),\n y:\n startPoint.y +\n (direction === 'down'\n ? param.distance\n : direction === 'up'\n ? -param.distance\n : 0),\n };\n } else {\n throw new Error(\n 'Either end or distance must be specified for swipe gesture',\n );\n }\n\n endPoint.x = Math.max(0, Math.min(endPoint.x, width));\n endPoint.y = Math.max(0, Math.min(endPoint.y, height));\n\n const duration = param.duration ?? 300;\n\n let repeatCount = typeof param.repeat === 'number' ? param.repeat : 1;\n if (repeatCount === 0) {\n repeatCount = 10;\n }\n\n return { startPoint, endPoint, duration, repeatCount };\n}\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a touch gesture for interactions beyond regular scrolling (e.g., flip pages in a carousel, dismiss a notification, swipe-to-delete a list item). For regular content scrolling, use Scroll instead. Use \"distance\" + \"direction\" for relative movement, or \"end\" for precise endpoint.',\n paramSchema: ActionSwipeParamSchema,\n sample: {\n start: { prompt: 'middle-lower area of the screen' },\n end: { prompt: 'upper-middle area of the screen' },\n },\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The input field to be cleared')\n .optional(),\n});\nexport type ActionClearInputParam = {\n locate?: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n call,\n });\n};\n\n// CursorMove\nexport const actionCursorMoveParamSchema = z.object({\n direction: z\n .enum(['left', 'right'])\n .describe('The direction to move the cursor'),\n times: z\n .number()\n .int()\n .min(1)\n .default(1)\n .describe(\n 'The number of times to move the cursor in the specified direction',\n ),\n});\nexport type ActionCursorMoveParam = {\n direction: 'left' | 'right';\n times?: number;\n};\n\nexport const defineActionCursorMove = (\n call: (param: ActionCursorMoveParam) => Promise<void>,\n): DeviceAction<ActionCursorMoveParam> => {\n return defineAction<\n typeof actionCursorMoveParamSchema,\n ActionCursorMoveParam\n >({\n name: 'CursorMove',\n description:\n 'Move the text cursor (caret) left or right within an input field or text area. Use this to reposition the cursor without selecting text.',\n paramSchema: actionCursorMoveParamSchema,\n call,\n });\n};\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n timeMs: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n timeMs?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n call: async (param) => {\n const duration = param?.timeMs ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n HarmonyDeviceOpt,\n HarmonyDeviceInputOpt,\n} from './device-options';\n"],"names":["AbstractInterface","defineAction","config","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","param","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","normalizeMobileSwipeParam","screenSize","width","height","start","end","startPoint","endPoint","direction","Error","Math","duration","repeatCount","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput","actionCursorMoveParamSchema","defineActionCursorMove","ActionSleepParamSchema","defineActionSleep","getDebug","Promise","resolve","setTimeout"],"mappings":";;;;;;;;;;;;;AAiBO,MAAeA;;QA8CpB;;AACF;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAIF,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACbI;IACF;AAIK,MAAMC,8BAA8BJ,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACbD;IACF;AAIK,MAAMG,+BAA+BN,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACbH;IACF;AAIK,MAAMK,yBAAyBR,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACbL;IACF;AAIF,MAAMO,yBACJ;AACK,MAAMC,yBAAyBX,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACY,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQX,4BACL,QAAQ,CAACS,wBACT,QAAQ;IACX,MAAMV,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAW,EACrC,OAAO,CAAC,WACR,QAAQ,CACP;AAEN;AAOO,MAAMc,oBAAoB,CAC/BX,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACb,MAAM,CAACI;YAEL,IAAKA,AAA0B,aAA1BA,MAAM,IAAI,EACbA,MAAM,IAAI,GAAG;YAEf,OAAOZ,KAAKY;QACd;IACF;AAIK,MAAMC,iCAAiChB,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMiB,4BAA4B,CACvCd,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamB;QACbb;IACF;AAIK,MAAMe,0BAA0BlB,EAAE,MAAM,CAAC;IAC9C,YAAYA,CAAC,CAADA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMkB,qBAAqB,CAChChB,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaqB;QACbf;IACF;AAIK,MAAMiB,+BAA+BpB,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMoB,0BAA0B,CACrClB,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAauB;QACb,QAAQ;YACN,MAAM;gBAAE,QAAQ;YAA6B;YAC7C,IAAI;gBAAE,QAAQ;YAAuB;QACvC;QACAjB;IACF;AAGK,MAAMmB,6BAA6BtB,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMuB,wBAAwB,CACnCpB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAayB;QACbnB;IACF;AAGK,MAAMqB,yBAAyBxB,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,SAASyB,0BACdV,KAAuB,EACvBW,UAA6C;IAO7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAM,EAAEG,KAAK,EAAEC,GAAG,EAAE,GAAGf;IAEvB,MAAMgB,aAAaF,QACf;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;IAAC,IACzC;QAAE,GAAGF,QAAQ;QAAG,GAAGC,SAAS;IAAE;IAElC,IAAII;IAEJ,IAAIF,KACFE,WAAW;QAAE,GAAGF,IAAI,MAAM,CAAC,EAAE;QAAE,GAAGA,IAAI,MAAM,CAAC,EAAE;IAAC;SAC3C,IAAIf,MAAM,QAAQ,EAAE;QACzB,MAAMkB,YAAYlB,MAAM,SAAS;QACjC,IAAI,CAACkB,WACH,MAAM,IAAIC,MAAM;QAElBF,WAAW;YACT,GACED,WAAW,CAAC,GACXE,CAAAA,AAAc,YAAdA,YACGlB,MAAM,QAAQ,GACdkB,AAAc,WAAdA,YACE,CAAClB,MAAM,QAAQ,GACf;YACR,GACEgB,WAAW,CAAC,GACXE,CAAAA,AAAc,WAAdA,YACGlB,MAAM,QAAQ,GACdkB,AAAc,SAAdA,YACE,CAAClB,MAAM,QAAQ,GACf;QACV;IACF,OACE,MAAM,IAAImB,MACR;IAIJF,SAAS,CAAC,GAAGG,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACH,SAAS,CAAC,EAAEL;IAC9CK,SAAS,CAAC,GAAGG,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACH,SAAS,CAAC,EAAEJ;IAE9C,MAAMQ,WAAWrB,MAAM,QAAQ,IAAI;IAEnC,IAAIsB,cAAc,AAAwB,YAAxB,OAAOtB,MAAM,MAAM,GAAgBA,MAAM,MAAM,GAAG;IACpE,IAAIsB,AAAgB,MAAhBA,aACFA,cAAc;IAGhB,OAAO;QAAEN;QAAYC;QAAUI;QAAUC;IAAY;AACvD;AAEO,MAAMC,oBAAoB,CAC/BnC,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa2B;QACb,QAAQ;YACN,OAAO;gBAAE,QAAQ;YAAkC;YACnD,KAAK;gBAAE,QAAQ;YAAkC;QACnD;QACArB;IACF;AAIK,MAAMoC,8BAA8BvC,EAAE,MAAM,CAAC;IAClD,QAAQC,4BACL,QAAQ,CAAC,iCACT,QAAQ;AACb;AAKO,MAAMuC,yBAAyB,CACpCrC,OAEON,aAGL;QACA,MAAM;QACN,aAAaa;QACb,gBAAgB;QAChB,aAAa6B;QACbpC;IACF;AAIK,MAAMsC,8BAA8BzC,EAAE,MAAM,CAAC;IAClD,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;KAAQ,EACtB,QAAQ,CAAC;IACZ,OAAOA,EAAAA,MACE,GACN,GAAG,GACH,GAAG,CAAC,GACJ,OAAO,CAAC,GACR,QAAQ,CACP;AAEN;AAMO,MAAM0C,yBAAyB,CACpCvC,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,aAAa4C;QACbtC;IACF;AAGK,MAAMwC,yBAAyB3C,EAAE,MAAM,CAAC;IAC7C,QAAQA,EAAAA,MACC,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAM4C,oBAAoB,IACxB/C,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa8C;QACb,MAAM,OAAO5B;YACX,MAAMqB,WAAWrB,OAAO,UAAU;YAClC8B,SAAS,wBAAwB,CAAC,aAAa,EAAET,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIU,QAAQ,CAACC,UAAYC,WAAWD,SAASX;QACrD;IACF"}
1
+ {"version":3,"file":"device/index.mjs","sources":["../../../src/device/index.ts"],"sourcesContent":["import { getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForPoint?(\n center: [number, number],\n options?: {\n targetDescription?: string;\n modelConfig?: IModelConfig;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n /**\n * Get the current time from the device.\n * Returns the device's current timestamp in milliseconds.\n * This is useful when the system time and device time are not synchronized.\n */\n getTimestamp?(): Promise<number>;\n\n /** URL of native MJPEG stream for real-time screen preview (e.g. WDA MJPEG server) */\n mjpegStreamUrl?: string;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n sample: {\n locate: { prompt: 'the \"Submit\" button' },\n },\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n sample: {\n locate: { prompt: 'the file icon on the desktop' },\n },\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n sample: {\n locate: { prompt: 'the folder icon' },\n },\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n sample: {\n locate: { prompt: 'the navigation menu item \"Products\"' },\n },\n call,\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'typeOnly'])\n .default('replace')\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"typeOnly\" - type the value directly without clearing the field first; \"clear\" - clear the field without inputting new text.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'typeOnly' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n sample: {\n value: 'test@example.com',\n locate: { prompt: 'the email input field' },\n },\n call: (param) => {\n // backward compat: convert deprecated 'append' to 'typeOnly'\n if ((param.mode as string) === 'append') {\n param.mode = 'typeOnly';\n }\n return call(param);\n },\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n sample: {\n keyName: 'Enter',\n },\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling all the way to the bottom by rapidly scrolling 5-10 times (skipping intermediate content until reaching the bottom), \"scrollToTop\" for scrolling all the way to the top by rapidly scrolling 5-10 times (skipping intermediate content until reaching the top), \"scrollToRight\" for scrolling all the way to the right by rapidly scrolling multiple times, \"scrollToLeft\" for scrolling all the way to the left by rapidly scrolling multiple times',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or a scrollable element to browse content. This is the preferred way to scroll on all platforms, including mobile. Supports scrollToBottom/scrollToTop for boundary navigation. Default: direction `down`, scrollType `singleAction`, distance `null`.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n sample: {\n direction: 'down',\n scrollType: 'singleAction',\n locate: { prompt: 'the center of the product list area' },\n },\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Pick up a specific UI element and move it to a new position (e.g., reorder a card, move a file into a folder, sort list items). The element itself moves with your finger/mouse.',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n sample: {\n from: { prompt: 'the \"report.pdf\" file icon' },\n to: { prompt: 'the upload drop zone' },\n },\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n sample: {\n locate: { prompt: 'the message bubble' },\n },\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport function normalizeMobileSwipeParam(\n param: ActionSwipeParam,\n screenSize: { width: number; height: number },\n): {\n startPoint: { x: number; y: number };\n endPoint: { x: number; y: number };\n duration: number;\n repeatCount: number;\n} {\n const { width, height } = screenSize;\n const { start, end } = param;\n\n const startPoint = start\n ? { x: start.center[0], y: start.center[1] }\n : { x: width / 2, y: height / 2 };\n\n let endPoint: { x: number; y: number };\n\n if (end) {\n endPoint = { x: end.center[0], y: end.center[1] };\n } else if (param.distance) {\n const direction = param.direction;\n if (!direction) {\n throw new Error('direction is required for swipe gesture');\n }\n endPoint = {\n x:\n startPoint.x +\n (direction === 'right'\n ? param.distance\n : direction === 'left'\n ? -param.distance\n : 0),\n y:\n startPoint.y +\n (direction === 'down'\n ? param.distance\n : direction === 'up'\n ? -param.distance\n : 0),\n };\n } else {\n throw new Error(\n 'Either end or distance must be specified for swipe gesture',\n );\n }\n\n endPoint.x = Math.max(0, Math.min(endPoint.x, width));\n endPoint.y = Math.max(0, Math.min(endPoint.y, height));\n\n const duration = param.duration ?? 300;\n\n let repeatCount = typeof param.repeat === 'number' ? param.repeat : 1;\n if (repeatCount === 0) {\n repeatCount = 10;\n }\n\n return { startPoint, endPoint, duration, repeatCount };\n}\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a touch gesture for interactions beyond regular scrolling (e.g., flip pages in a carousel, dismiss a notification, swipe-to-delete a list item). For regular content scrolling, use Scroll instead. Use \"distance\" + \"direction\" for relative movement, or \"end\" for precise endpoint.',\n paramSchema: ActionSwipeParamSchema,\n sample: {\n start: { prompt: 'center of the notification' },\n end: { prompt: 'upper edge of the screen' },\n },\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The input field to be cleared')\n .optional(),\n});\nexport type ActionClearInputParam = {\n locate?: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n sample: {\n locate: { prompt: 'the search input field' },\n },\n call,\n });\n};\n\n// CursorMove\nexport const actionCursorMoveParamSchema = z.object({\n direction: z\n .enum(['left', 'right'])\n .describe('The direction to move the cursor'),\n times: z\n .number()\n .int()\n .min(1)\n .default(1)\n .describe(\n 'The number of times to move the cursor in the specified direction',\n ),\n});\nexport type ActionCursorMoveParam = {\n direction: 'left' | 'right';\n times?: number;\n};\n\nexport const defineActionCursorMove = (\n call: (param: ActionCursorMoveParam) => Promise<void>,\n): DeviceAction<ActionCursorMoveParam> => {\n return defineAction<\n typeof actionCursorMoveParamSchema,\n ActionCursorMoveParam\n >({\n name: 'CursorMove',\n description:\n 'Move the text cursor (caret) left or right within an input field or text area. Use this to reposition the cursor without selecting text.',\n paramSchema: actionCursorMoveParamSchema,\n sample: {\n direction: 'left',\n times: 3,\n },\n call,\n });\n};\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n timeMs: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n timeMs?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n sample: {\n timeMs: 2000,\n },\n call: async (param) => {\n const duration = param?.timeMs ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n HarmonyDeviceOpt,\n HarmonyDeviceInputOpt,\n} from './device-options';\n"],"names":["AbstractInterface","defineAction","config","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","param","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","normalizeMobileSwipeParam","screenSize","width","height","start","end","startPoint","endPoint","direction","Error","Math","duration","repeatCount","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput","actionCursorMoveParamSchema","defineActionCursorMove","ActionSleepParamSchema","defineActionSleep","getDebug","Promise","resolve","setTimeout"],"mappings":";;;;;;;;;;;;;AAiBO,MAAeA;;QA8CpB;;AACF;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAIF,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsB;QAC1C;QACAI;IACF;AAIK,MAAMC,8BAA8BJ,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAA+B;QACnD;QACAD;IACF;AAIK,MAAMG,+BAA+BN,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAkB;QACtC;QACAH;IACF;AAIK,MAAMK,yBAAyBR,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACAL;IACF;AAIF,MAAMO,yBACJ;AACK,MAAMC,yBAAyBX,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACY,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQX,4BACL,QAAQ,CAACS,wBACT,QAAQ;IACX,MAAMV,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAW,EACrC,OAAO,CAAC,WACR,QAAQ,CACP;AAEN;AAOO,MAAMc,oBAAoB,CAC/BX,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACb,QAAQ;YACN,OAAO;YACP,QAAQ;gBAAE,QAAQ;YAAwB;QAC5C;QACA,MAAM,CAACI;YAEL,IAAKA,AAA0B,aAA1BA,MAAM,IAAI,EACbA,MAAM,IAAI,GAAG;YAEf,OAAOZ,KAAKY;QACd;IACF;AAIK,MAAMC,iCAAiChB,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMiB,4BAA4B,CACvCd,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamB;QACb,QAAQ;YACN,SAAS;QACX;QACAb;IACF;AAIK,MAAMe,0BAA0BlB,EAAE,MAAM,CAAC;IAC9C,YAAYA,CAAC,CAADA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMkB,qBAAqB,CAChChB,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaqB;QACb,QAAQ;YACN,WAAW;YACX,YAAY;YACZ,QAAQ;gBAAE,QAAQ;YAAsC;QAC1D;QACAf;IACF;AAIK,MAAMiB,+BAA+BpB,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMoB,0BAA0B,CACrClB,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAauB;QACb,QAAQ;YACN,MAAM;gBAAE,QAAQ;YAA6B;YAC7C,IAAI;gBAAE,QAAQ;YAAuB;QACvC;QACAjB;IACF;AAGK,MAAMmB,6BAA6BtB,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMuB,wBAAwB,CACnCpB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAayB;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAqB;QACzC;QACAnB;IACF;AAGK,MAAMqB,yBAAyBxB,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,SAASyB,0BACdV,KAAuB,EACvBW,UAA6C;IAO7C,MAAM,EAAEC,KAAK,EAAEC,MAAM,EAAE,GAAGF;IAC1B,MAAM,EAAEG,KAAK,EAAEC,GAAG,EAAE,GAAGf;IAEvB,MAAMgB,aAAaF,QACf;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;QAAE,GAAGA,MAAM,MAAM,CAAC,EAAE;IAAC,IACzC;QAAE,GAAGF,QAAQ;QAAG,GAAGC,SAAS;IAAE;IAElC,IAAII;IAEJ,IAAIF,KACFE,WAAW;QAAE,GAAGF,IAAI,MAAM,CAAC,EAAE;QAAE,GAAGA,IAAI,MAAM,CAAC,EAAE;IAAC;SAC3C,IAAIf,MAAM,QAAQ,EAAE;QACzB,MAAMkB,YAAYlB,MAAM,SAAS;QACjC,IAAI,CAACkB,WACH,MAAM,IAAIC,MAAM;QAElBF,WAAW;YACT,GACED,WAAW,CAAC,GACXE,CAAAA,AAAc,YAAdA,YACGlB,MAAM,QAAQ,GACdkB,AAAc,WAAdA,YACE,CAAClB,MAAM,QAAQ,GACf;YACR,GACEgB,WAAW,CAAC,GACXE,CAAAA,AAAc,WAAdA,YACGlB,MAAM,QAAQ,GACdkB,AAAc,SAAdA,YACE,CAAClB,MAAM,QAAQ,GACf;QACV;IACF,OACE,MAAM,IAAImB,MACR;IAIJF,SAAS,CAAC,GAAGG,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACH,SAAS,CAAC,EAAEL;IAC9CK,SAAS,CAAC,GAAGG,KAAK,GAAG,CAAC,GAAGA,KAAK,GAAG,CAACH,SAAS,CAAC,EAAEJ;IAE9C,MAAMQ,WAAWrB,MAAM,QAAQ,IAAI;IAEnC,IAAIsB,cAAc,AAAwB,YAAxB,OAAOtB,MAAM,MAAM,GAAgBA,MAAM,MAAM,GAAG;IACpE,IAAIsB,AAAgB,MAAhBA,aACFA,cAAc;IAGhB,OAAO;QAAEN;QAAYC;QAAUI;QAAUC;IAAY;AACvD;AAEO,MAAMC,oBAAoB,CAC/BnC,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa2B;QACb,QAAQ;YACN,OAAO;gBAAE,QAAQ;YAA6B;YAC9C,KAAK;gBAAE,QAAQ;YAA2B;QAC5C;QACArB;IACF;AAIK,MAAMoC,8BAA8BvC,EAAE,MAAM,CAAC;IAClD,QAAQC,4BACL,QAAQ,CAAC,iCACT,QAAQ;AACb;AAKO,MAAMuC,yBAAyB,CACpCrC,OAEON,aAGL;QACA,MAAM;QACN,aAAaa;QACb,gBAAgB;QAChB,aAAa6B;QACb,QAAQ;YACN,QAAQ;gBAAE,QAAQ;YAAyB;QAC7C;QACApC;IACF;AAIK,MAAMsC,8BAA8BzC,EAAE,MAAM,CAAC;IAClD,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;KAAQ,EACtB,QAAQ,CAAC;IACZ,OAAOA,EAAAA,MACE,GACN,GAAG,GACH,GAAG,CAAC,GACJ,OAAO,CAAC,GACR,QAAQ,CACP;AAEN;AAMO,MAAM0C,yBAAyB,CACpCvC,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,aAAa4C;QACb,QAAQ;YACN,WAAW;YACX,OAAO;QACT;QACAtC;IACF;AAGK,MAAMwC,yBAAyB3C,EAAE,MAAM,CAAC;IAC7C,QAAQA,EAAAA,MACC,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAM4C,oBAAoB,IACxB/C,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa8C;QACb,QAAQ;YACN,QAAQ;QACV;QACA,MAAM,OAAO5B;YACX,MAAMqB,WAAWrB,OAAO,UAAU;YAClC8B,SAAS,wBAAwB,CAAC,aAAa,EAAET,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIU,QAAQ,CAACC,UAAYC,WAAWD,SAASX;QACrD;IACF"}
package/dist/es/types.mjs CHANGED
@@ -87,7 +87,8 @@ class GroupedActionDump {
87
87
  serializeWithInlineScreenshots(indents) {
88
88
  const processValue = (obj)=>{
89
89
  if (obj instanceof ScreenshotItem) return {
90
- base64: obj.base64
90
+ base64: obj.base64,
91
+ capturedAt: obj.capturedAt
91
92
  };
92
93
  if (Array.isArray(obj)) return obj.map(processValue);
93
94
  if (obj && 'object' == typeof obj) {
@@ -1 +1 @@
1
- {"version":3,"file":"types.mjs","sources":["../../src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport {\n existsSync,\n mkdirSync,\n readFileSync,\n rmSync,\n writeFileSync,\n} from 'node:fs';\nimport { join } from 'node:path';\nimport type { NodeType } from '@midscene/shared/constants';\nimport type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';\nimport type {\n BaseElement,\n LocateResultElement,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { z } from 'zod';\nimport type { TUserPrompt } from './common';\nimport { restoreImageReferences } from './dump/image-restoration';\nimport { ScreenshotItem } from './screenshot-item';\nimport type {\n DetailedLocateParam,\n MidsceneYamlFlowItem,\n ServiceExtractOption,\n} from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n cached_input: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n model_description: string | undefined;\n intent: string | undefined;\n request_id: string | undefined;\n};\n\nexport type { LocateResultElement };\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n errors?: string[];\n}\n\nexport type AIElementResponse = AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepLocate: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext {\n /**\n * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),\n */\n abstract screenshot: ScreenshotItem;\n\n /**\n * screenshot size after shrinking\n */\n abstract shotSize: Size;\n\n /**\n * The ratio for converting shrunk screenshot coordinates to logical coordinates.\n *\n * Example:\n * - Physical screen width: 3000px, dpr=6\n * - Logical width: 500px\n * - User-defined screenshotShrinkFactor: 2\n * - Actual shrunk screenshot width: 3000 / 2 = 1500px\n * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3\n * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px\n */\n abstract shrunkShotToLogicalRatio: number;\n\n abstract _isFrozen?: boolean;\n\n // @deprecated - backward compatibility for aiLocate\n abstract deprecatedDpr?: number;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type ServiceExtractParam = string | Record<string, string>;\n\nexport type ElementCacheFeature = Record<string, unknown>;\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport type ThinkingLevel = 'off' | 'medium' | 'high';\n\nexport type DeepThinkOption = 'unset' | true | false;\n\nexport interface ServiceTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport interface DumpMeta {\n logTime: number;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface ServiceDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: ServiceExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: LocateResultElement[];\n matchedRect?: Rect;\n deepLocate?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: ServiceTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialServiceDumpFromSDK = Omit<\n ServiceDump,\n 'logTime' | 'logId' | 'model_name'\n>;\n\nexport interface ServiceResultBase {\n dump: ServiceDump;\n}\n\nexport type LocateResultWithDump = LocateResult & ServiceResultBase;\n\nexport interface ServiceExtractResult<T> extends ServiceResultBase {\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport class ServiceError extends Error {\n dump: ServiceDump;\n\n constructor(message: string, dump: ServiceDump) {\n super(message);\n this.name = 'ServiceError';\n this.dump = dump;\n }\n}\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type ServiceAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt extends ServiceExtractOption {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n log?: string; // a brief preamble to the user explaining what you’re about to do\n type: string;\n param: ParamType;\n}\n\nexport type SubGoalStatus = 'pending' | 'running' | 'finished';\n\nexport interface SubGoal {\n index: number;\n status: SubGoalStatus;\n description: string;\n logs?: string[];\n}\n\nexport interface RawResponsePlanningAIResponse {\n action: PlanningAction;\n thought?: string;\n log: string;\n memory?: string;\n error?: string;\n finalizeMessage?: string;\n finalizeSuccess?: boolean;\n updateSubGoals?: SubGoal[];\n markFinishedIndexes?: number[];\n}\n\nexport interface PlanningAIResponse\n extends Omit<RawResponsePlanningAIResponse, 'action'> {\n actions?: PlanningAction[];\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n error?: string;\n reasoning_content?: string;\n shouldContinuePlanning: boolean;\n output?: string; // Output message from <complete> tag (same as finalizeMessage)\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {};\n\nexport interface LongPressParam {\n duration?: number;\n}\n\nexport interface PullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: ScreenshotItem;\n timing?: string;\n}\n\nexport type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n uiContext?: UIContext;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n uiContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: void is intentionally allowed as some executors may not return a value\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n taskId: string;\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n getUiContextStart?: number;\n getUiContextEnd?: number;\n callAiStart?: number;\n callAiEnd?: number;\n beforeInvokeActionHookStart?: number;\n beforeInvokeActionHookEnd?: number;\n callActionStart?: number;\n callActionEnd?: number;\n afterInvokeActionHookStart?: number;\n afterInvokeActionHookEnd?: number;\n captureAfterCallingSnapshotStart?: number;\n captureAfterCallingSnapshotEnd?: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n };\n\nexport interface IExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n aiActContext?: string;\n}\n\n/**\n * Replacer function for JSON serialization that handles Page, Browser objects and ScreenshotItem\n */\nfunction replacerForDumpSerialization(_key: string, value: any): any {\n if (value && value.constructor?.name === 'Page') {\n return '[Page object]';\n }\n if (value && value.constructor?.name === 'Browser') {\n return '[Browser object]';\n }\n // Handle ScreenshotItem serialization\n if (value && typeof value.toSerializable === 'function') {\n return value.toSerializable();\n }\n return value;\n}\n\n/**\n * Reviver function for JSON deserialization that handles ScreenshotItem formats.\n *\n * BEHAVIOR:\n * - For { $screenshot: \"id\" } format: Left as-is (plain object)\n * Consumer must use imageMap to restore base64 data\n * - For { base64: \"...\" } format: Creates ScreenshotItem from base64 data\n *\n * @param key - JSON key being processed\n * @param value - JSON value being processed\n * @returns Restored value\n */\nfunction reviverForDumpDeserialization(key: string, value: any): any {\n // Only process screenshot fields\n if (key !== 'screenshot' || typeof value !== 'object' || value === null) {\n return value;\n }\n\n // Handle serialized format: { $screenshot: \"id\" }\n // Leave as plain object — consumer uses imageMap to restore\n if (ScreenshotItem.isSerialized(value)) {\n return value;\n }\n\n // Handle inline base64 format: { base64: \"...\" }\n if ('base64' in value && typeof value.base64 === 'string') {\n return value;\n }\n\n return value;\n}\n\n/**\n * ExecutionDump class for serializing and deserializing execution dumps\n */\nexport class ExecutionDump implements IExecutionDump {\n logTime: number;\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n aiActContext?: string;\n\n constructor(data: IExecutionDump) {\n this.logTime = data.logTime;\n this.name = data.name;\n this.description = data.description;\n this.tasks = data.tasks;\n this.aiActContext = data.aiActContext;\n }\n\n /**\n * Serialize the ExecutionDump to a JSON string\n */\n serialize(indents?: number): string {\n return JSON.stringify(this.toJSON(), replacerForDumpSerialization, indents);\n }\n\n /**\n * Convert to a plain object for JSON serialization\n */\n toJSON(): IExecutionDump {\n return {\n logTime: this.logTime,\n name: this.name,\n description: this.description,\n tasks: this.tasks.map((task) => ({\n ...task,\n recorder: task.recorder || [],\n })),\n aiActContext: this.aiActContext,\n };\n }\n\n /**\n * Create an ExecutionDump instance from a serialized JSON string\n */\n static fromSerializedString(serialized: string): ExecutionDump {\n const parsed = JSON.parse(\n serialized,\n reviverForDumpDeserialization,\n ) as IExecutionDump;\n return new ExecutionDump(parsed);\n }\n\n /**\n * Create an ExecutionDump instance from a plain object\n */\n static fromJSON(data: IExecutionDump): ExecutionDump {\n return new ExecutionDump(data);\n }\n\n /**\n * Collect all ScreenshotItem instances from tasks.\n * Scans through uiContext and recorder items to find screenshots.\n *\n * @returns Array of ScreenshotItem instances\n */\n collectScreenshots(): ScreenshotItem[] {\n const screenshots: ScreenshotItem[] = [];\n\n for (const task of this.tasks) {\n // Collect uiContext.screenshot if present\n if (task.uiContext?.screenshot instanceof ScreenshotItem) {\n screenshots.push(task.uiContext.screenshot);\n }\n\n // Collect recorder screenshots\n if (task.recorder) {\n for (const record of task.recorder) {\n if (record.screenshot instanceof ScreenshotItem) {\n screenshots.push(record.screenshot);\n }\n }\n }\n }\n\n return screenshots;\n }\n}\n\n/*\ntask - service-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskInsightDump = ServiceDump;\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - service-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: ServiceExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n ServiceAssertionResponse,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action Space',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n aiActContext?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\ntask - planning-locate\n*/\nexport type ExecutionTaskPlanningLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskPlanningLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskPlanningDump = ServiceDump;\n\nexport type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<\n 'Planning',\n ExecutionTaskPlanningLocateParam,\n ExecutionTaskPlanningLocateOutput,\n ExecutionTaskPlanningDump\n>;\n\nexport type ExecutionTaskPlanningLocate =\n ExecutionTask<ExecutionTaskPlanningLocateApply>;\n\n/*\nGrouped dump\n*/\nexport interface IGroupedActionDump {\n sdkVersion: string;\n groupName: string;\n groupDescription?: string;\n modelBriefs: string[];\n executions: IExecutionDump[];\n deviceType?: string;\n}\n\n/**\n * GroupedActionDump class for serializing and deserializing grouped action dumps\n */\nexport class GroupedActionDump implements IGroupedActionDump {\n sdkVersion: string;\n groupName: string;\n groupDescription?: string;\n modelBriefs: string[];\n executions: ExecutionDump[];\n deviceType?: string;\n\n constructor(data: IGroupedActionDump) {\n this.sdkVersion = data.sdkVersion;\n this.groupName = data.groupName;\n this.groupDescription = data.groupDescription;\n this.modelBriefs = data.modelBriefs;\n this.executions = data.executions.map((exec) =>\n exec instanceof ExecutionDump ? exec : ExecutionDump.fromJSON(exec),\n );\n this.deviceType = data.deviceType;\n }\n\n /**\n * Serialize the GroupedActionDump to a JSON string\n * Uses compact { $screenshot: id } format\n */\n serialize(indents?: number): string {\n return JSON.stringify(this.toJSON(), replacerForDumpSerialization, indents);\n }\n\n /**\n * Serialize the GroupedActionDump with inline screenshots to a JSON string.\n * Each ScreenshotItem is replaced with { base64: \"...\" }.\n */\n serializeWithInlineScreenshots(indents?: number): string {\n const processValue = (obj: unknown): unknown => {\n if (obj instanceof ScreenshotItem) {\n return { base64: obj.base64 };\n }\n if (Array.isArray(obj)) {\n return obj.map(processValue);\n }\n if (obj && typeof obj === 'object') {\n const entries = Object.entries(obj).map(([key, value]) => [\n key,\n processValue(value),\n ]);\n return Object.fromEntries(entries);\n }\n return obj;\n };\n\n const data = processValue(this.toJSON());\n return JSON.stringify(data, null, indents);\n }\n\n /**\n * Convert to a plain object for JSON serialization\n */\n toJSON(): IGroupedActionDump {\n return {\n sdkVersion: this.sdkVersion,\n groupName: this.groupName,\n groupDescription: this.groupDescription,\n modelBriefs: this.modelBriefs,\n executions: this.executions.map((exec) => exec.toJSON()),\n deviceType: this.deviceType,\n };\n }\n\n /**\n * Create a GroupedActionDump instance from a serialized JSON string\n */\n static fromSerializedString(serialized: string): GroupedActionDump {\n const parsed = JSON.parse(\n serialized,\n reviverForDumpDeserialization,\n ) as IGroupedActionDump;\n return new GroupedActionDump(parsed);\n }\n\n /**\n * Create a GroupedActionDump instance from a plain object\n */\n static fromJSON(data: IGroupedActionDump): GroupedActionDump {\n return new GroupedActionDump(data);\n }\n\n /**\n * Collect all ScreenshotItem instances from all executions.\n *\n * @returns Array of all ScreenshotItem instances across all executions\n */\n collectAllScreenshots(): ScreenshotItem[] {\n const screenshots: ScreenshotItem[] = [];\n for (const execution of this.executions) {\n screenshots.push(...execution.collectScreenshots());\n }\n return screenshots;\n }\n\n /**\n * Serialize the dump to files with screenshots as separate PNG files.\n * Creates:\n * - {basePath} - dump JSON with { $screenshot: id } references\n * - {basePath}.screenshots/ - PNG files\n * - {basePath}.screenshots.json - ID to path mapping\n *\n * @param basePath - Base path for the dump file\n */\n serializeToFiles(basePath: string): void {\n const screenshotsDir = `${basePath}.screenshots`;\n if (!existsSync(screenshotsDir)) {\n mkdirSync(screenshotsDir, { recursive: true });\n }\n\n // Write screenshots to separate files\n const screenshotMap: Record<string, string> = {};\n const screenshots = this.collectAllScreenshots();\n\n for (const screenshot of screenshots) {\n if (screenshot.hasBase64()) {\n const imagePath = join(\n screenshotsDir,\n `${screenshot.id}.${screenshot.extension}`,\n );\n const rawBase64 = screenshot.rawBase64;\n writeFileSync(imagePath, Buffer.from(rawBase64, 'base64'));\n screenshotMap[screenshot.id] = imagePath;\n }\n }\n\n // Write screenshot map file\n writeFileSync(\n `${basePath}.screenshots.json`,\n JSON.stringify(screenshotMap),\n 'utf-8',\n );\n\n // Write dump JSON with references\n writeFileSync(basePath, this.serialize(), 'utf-8');\n }\n\n /**\n * Read dump from files and return JSON string with inline screenshots.\n * Reads the dump JSON and screenshot files, then inlines the base64 data.\n *\n * @param basePath - Base path for the dump file\n * @returns JSON string with inline screenshots ({ base64: \"...\" } format)\n */\n static fromFilesAsInlineJson(basePath: string): string {\n const dumpString = readFileSync(basePath, 'utf-8');\n const screenshotsMapPath = `${basePath}.screenshots.json`;\n\n if (!existsSync(screenshotsMapPath)) {\n return dumpString;\n }\n\n // Read screenshot map and build imageMap from files\n const screenshotMap: Record<string, string> = JSON.parse(\n readFileSync(screenshotsMapPath, 'utf-8'),\n );\n\n const imageMap: Record<string, string> = {};\n for (const [id, filePath] of Object.entries(screenshotMap)) {\n if (existsSync(filePath)) {\n const data = readFileSync(filePath);\n const mime =\n filePath.endsWith('.jpeg') || filePath.endsWith('.jpg')\n ? 'jpeg'\n : 'png';\n imageMap[id] = `data:image/${mime};base64,${data.toString('base64')}`;\n }\n }\n\n // Restore image references\n const dumpData = JSON.parse(dumpString);\n const processedData = restoreImageReferences(\n dumpData,\n (id) => imageMap[id] ?? '',\n );\n return JSON.stringify(processedData);\n }\n\n /**\n * Clean up all files associated with a serialized dump.\n *\n * @param basePath - Base path for the dump file\n */\n static cleanupFiles(basePath: string): void {\n const filesToClean = [\n basePath,\n `${basePath}.screenshots.json`,\n `${basePath}.screenshots`,\n ];\n\n for (const filePath of filesToClean) {\n try {\n rmSync(filePath, { force: true, recursive: true });\n } catch {\n // Ignore errors - file may already be deleted\n }\n }\n }\n\n /**\n * Get all file paths associated with a serialized dump.\n *\n * @param basePath - Base path for the dump file\n * @returns Array of all associated file paths\n */\n static getFilePaths(basePath: string): string[] {\n return [\n basePath,\n `${basePath}.screenshots.json`,\n `${basePath}.screenshots`,\n ];\n }\n}\n\nexport type InterfaceType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android'\n | string;\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport interface DeviceAction<TParam = any, TReturn = any> {\n name: string;\n description?: string;\n interfaceAlias?: string;\n paramSchema?: z.ZodType<TParam>;\n call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;\n delayAfterRunner?: number;\n /**\n * An example param object for this action.\n * Locate fields with { prompt } will automatically get bbox injected when needed.\n */\n sample?: Record<string, any>;\n}\n\n/**\n * Type utilities for extracting types from DeviceAction definitions\n */\n\n/**\n * Extract parameter type from a DeviceAction\n */\nexport type ActionParam<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<infer P, any> ? P : never;\n\n/**\n * Extract return type from a DeviceAction\n */\nexport type ActionReturn<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<any, infer R> ? R : never;\n\n/**\n * Web-specific types\n */\nexport interface WebElementInfo extends BaseElement {\n id: string;\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n}\n\n/**\n * Agent\n */\n\nexport type CacheConfig = {\n strategy?: 'read-only' | 'read-write' | 'write-only';\n id: string;\n};\n\nexport type Cache =\n | false // No read, no write\n | true // Will throw error at runtime - deprecated\n | CacheConfig; // Object configuration (requires explicit id)\n\nexport interface AgentOpt {\n testId?: string;\n // @deprecated\n cacheId?: string; // Keep backward compatibility, but marked as deprecated\n groupName?: string;\n groupDescription?: string;\n /* if auto generate report, default true */\n generateReport?: boolean;\n /* if auto print report msg, default true */\n autoPrintReportMsg?: boolean;\n\n /**\n * Use directory-based report format with separate image files.\n *\n * When enabled:\n * - Screenshots are saved as PNG files in a `screenshots/` subdirectory\n * - Report is generated as `index.html` with relative image paths\n * - Reduces memory usage and report file size\n *\n * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server\n * (e.g., `npx serve ./report-dir`). The file:// protocol will not\n * work due to browser CORS restrictions.\n *\n * @default 'single-html'\n */\n outputFormat?: 'single-html' | 'html-and-external-assets';\n\n onTaskStartTip?: OnTaskStartTip;\n aiActContext?: string;\n aiActionContext?: string;\n /* custom report file name */\n reportFileName?: string;\n modelConfig?: TModelConfig;\n cache?: Cache;\n /**\n * Maximum number of replanning cycles for aiAct.\n * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.\n * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.\n */\n replanningCycleLimit?: number;\n\n /**\n * Wait time in milliseconds after each action execution.\n * This allows the UI to settle and stabilize before the next action.\n * Defaults to 300ms when not provided.\n */\n waitAfterAction?: number;\n\n /**\n * When set to true, Midscene will use the target device's time (Android/iOS)\n * instead of the system time. Useful when the device time differs from the\n * host machine. Default: false\n */\n useDeviceTimestamp?: boolean;\n\n /**\n * Custom screenshot shrink factor to reduce AI token usage.\n * When set, the screenshot will be scaled down by this factor from the physical resolution.\n *\n * Example:\n * - Physical screen width: 3000px, dpr=6\n * - Logical width: 500px\n * - screenshotShrinkFactor: 2\n * - Actual shrunk screenshot width: 3000 / 2 = 1500px\n * - AI analyzes the 1500px screenshot\n * - Coordinates are transformed back to logical (500px) before actions execute\n *\n * Benefits:\n * - Reduces token usage for high-resolution screenshots\n * - Maintains accuracy by scaling coordinates appropriately\n *\n * Must be >= 1 (shrinking only, enlarging is not supported).\n *\n * @default 1 (no shrinking, uses original physical screenshot)\n */\n screenshotShrinkFactor?: number;\n\n /**\n * Custom OpenAI client factory function\n *\n * If provided, this function will be called to create OpenAI client instances\n * for each AI call, allowing you to:\n * - Wrap clients with observability tools (langsmith, langfuse)\n * - Use custom OpenAI-compatible clients\n * - Apply different configurations based on intent\n *\n * @param config - Resolved model configuration\n * @returns OpenAI client instance (original or wrapped)\n *\n * @example\n * ```typescript\n * createOpenAIClient: async (openai, opts) => {\n * // Wrap with langsmith for planning tasks\n * if (opts.baseURL?.includes('planning')) {\n * return wrapOpenAI(openai, { metadata: { task: 'planning' } });\n * }\n *\n * return openai;\n * }\n * ```\n */\n createOpenAIClient?: CreateOpenAIClientFn;\n}\n\nexport type TestStatus =\n | 'passed'\n | 'failed'\n | 'timedOut'\n | 'skipped'\n | 'interrupted';\n\nexport interface ReportFileWithAttributes {\n reportFilePath: string;\n reportAttributes: {\n testDuration: number;\n testStatus: TestStatus;\n testTitle: string;\n testId: string;\n testDescription: string;\n };\n}\n"],"names":["UIContext","ServiceError","Error","message","dump","replacerForDumpSerialization","_key","value","reviverForDumpDeserialization","key","ScreenshotItem","ExecutionDump","indents","JSON","task","serialized","parsed","data","screenshots","record","GroupedActionDump","processValue","obj","Array","entries","Object","exec","execution","basePath","screenshotsDir","existsSync","mkdirSync","screenshotMap","screenshot","imagePath","join","rawBase64","writeFileSync","Buffer","dumpString","readFileSync","screenshotsMapPath","imageMap","id","filePath","mime","dumpData","processedData","restoreImageReferences","filesToClean","rmSync"],"mappings":";;;;;AAAqD;;;;;;;;;;AA+G9C,MAAeA;AA4BtB;AA4EO,MAAMC,qBAAqBC;IAGhC,YAAYC,OAAe,EAAEC,IAAiB,CAAE;QAC9C,KAAK,CAACD,UAHR;QAIE,IAAI,CAAC,IAAI,GAAG;QACZ,IAAI,CAAC,IAAI,GAAGC;IACd;AACF;AAgOA,SAASC,6BAA6BC,IAAY,EAAEC,KAAU;IAC5D,IAAIA,SAASA,MAAM,WAAW,EAAE,SAAS,QACvC,OAAO;IAET,IAAIA,SAASA,MAAM,WAAW,EAAE,SAAS,WACvC,OAAO;IAGT,IAAIA,SAAS,AAAgC,cAAhC,OAAOA,MAAM,cAAc,EACtC,OAAOA,MAAM,cAAc;IAE7B,OAAOA;AACT;AAcA,SAASC,8BAA8BC,GAAW,EAAEF,KAAU;IAE5D,IAAIE,AAAQ,iBAARA,OAAwB,AAAiB,YAAjB,OAAOF,SAAsBA,AAAU,SAAVA,OACvD,OAAOA;IAKT,IAAIG,eAAe,YAAY,CAACH,QAC9B,OAAOA;IAIL,YAAYA,SAAgBA,MAAM,MAAM;IAI5C,OAAOA;AACT;AAKO,MAAMI;IAkBX,UAAUC,OAAgB,EAAU;QAClC,OAAOC,KAAK,SAAS,CAAC,IAAI,CAAC,MAAM,IAAIR,8BAA8BO;IACrE;IAKA,SAAyB;QACvB,OAAO;YACL,SAAS,IAAI,CAAC,OAAO;YACrB,MAAM,IAAI,CAAC,IAAI;YACf,aAAa,IAAI,CAAC,WAAW;YAC7B,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAACE,OAAU;oBAC/B,GAAGA,IAAI;oBACP,UAAUA,KAAK,QAAQ,IAAI,EAAE;gBAC/B;YACA,cAAc,IAAI,CAAC,YAAY;QACjC;IACF;IAKA,OAAO,qBAAqBC,UAAkB,EAAiB;QAC7D,MAAMC,SAASH,KAAK,KAAK,CACvBE,YACAP;QAEF,OAAO,IAAIG,cAAcK;IAC3B;IAKA,OAAO,SAASC,IAAoB,EAAiB;QACnD,OAAO,IAAIN,cAAcM;IAC3B;IAQA,qBAAuC;QACrC,MAAMC,cAAgC,EAAE;QAExC,KAAK,MAAMJ,QAAQ,IAAI,CAAC,KAAK,CAAE;YAE7B,IAAIA,KAAK,SAAS,EAAE,sBAAsBJ,gBACxCQ,YAAY,IAAI,CAACJ,KAAK,SAAS,CAAC,UAAU;YAI5C,IAAIA,KAAK,QAAQ,EACf;gBAAA,KAAK,MAAMK,UAAUL,KAAK,QAAQ,CAChC,IAAIK,OAAO,UAAU,YAAYT,gBAC/BQ,YAAY,IAAI,CAACC,OAAO,UAAU;YAEtC;QAEJ;QAEA,OAAOD;IACT;IA3EA,YAAYD,IAAoB,CAAE;QANlC;QACA;QACA;QACA;QACA;QAGE,IAAI,CAAC,OAAO,GAAGA,KAAK,OAAO;QAC3B,IAAI,CAAC,IAAI,GAAGA,KAAK,IAAI;QACrB,IAAI,CAAC,WAAW,GAAGA,KAAK,WAAW;QACnC,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,YAAY,GAAGA,KAAK,YAAY;IACvC;AAsEF;AAwIO,MAAMG;IAuBX,UAAUR,OAAgB,EAAU;QAClC,OAAOC,KAAK,SAAS,CAAC,IAAI,CAAC,MAAM,IAAIR,8BAA8BO;IACrE;IAMA,+BAA+BA,OAAgB,EAAU;QACvD,MAAMS,eAAe,CAACC;YACpB,IAAIA,eAAeZ,gBACjB,OAAO;gBAAE,QAAQY,IAAI,MAAM;YAAC;YAE9B,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAACD;YAEjB,IAAIC,OAAO,AAAe,YAAf,OAAOA,KAAkB;gBAClC,MAAME,UAAUC,OAAO,OAAO,CAACH,KAAK,GAAG,CAAC,CAAC,CAACb,KAAKF,MAAM,GAAK;wBACxDE;wBACAY,aAAad;qBACd;gBACD,OAAOkB,OAAO,WAAW,CAACD;YAC5B;YACA,OAAOF;QACT;QAEA,MAAML,OAAOI,aAAa,IAAI,CAAC,MAAM;QACrC,OAAOR,KAAK,SAAS,CAACI,MAAM,MAAML;IACpC;IAKA,SAA6B;QAC3B,OAAO;YACL,YAAY,IAAI,CAAC,UAAU;YAC3B,WAAW,IAAI,CAAC,SAAS;YACzB,kBAAkB,IAAI,CAAC,gBAAgB;YACvC,aAAa,IAAI,CAAC,WAAW;YAC7B,YAAY,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAACc,OAASA,KAAK,MAAM;YACrD,YAAY,IAAI,CAAC,UAAU;QAC7B;IACF;IAKA,OAAO,qBAAqBX,UAAkB,EAAqB;QACjE,MAAMC,SAASH,KAAK,KAAK,CACvBE,YACAP;QAEF,OAAO,IAAIY,kBAAkBJ;IAC/B;IAKA,OAAO,SAASC,IAAwB,EAAqB;QAC3D,OAAO,IAAIG,kBAAkBH;IAC/B;IAOA,wBAA0C;QACxC,MAAMC,cAAgC,EAAE;QACxC,KAAK,MAAMS,aAAa,IAAI,CAAC,UAAU,CACrCT,YAAY,IAAI,IAAIS,UAAU,kBAAkB;QAElD,OAAOT;IACT;IAWA,iBAAiBU,QAAgB,EAAQ;QACvC,MAAMC,iBAAiB,GAAGD,SAAS,YAAY,CAAC;QAChD,IAAI,CAACE,WAAWD,iBACdE,UAAUF,gBAAgB;YAAE,WAAW;QAAK;QAI9C,MAAMG,gBAAwC,CAAC;QAC/C,MAAMd,cAAc,IAAI,CAAC,qBAAqB;QAE9C,KAAK,MAAMe,cAAcf,YACvB,IAAIe,WAAW,SAAS,IAAI;YAC1B,MAAMC,YAAYC,KAChBN,gBACA,GAAGI,WAAW,EAAE,CAAC,CAAC,EAAEA,WAAW,SAAS,EAAE;YAE5C,MAAMG,YAAYH,WAAW,SAAS;YACtCI,cAAcH,WAAWI,OAAO,IAAI,CAACF,WAAW;YAChDJ,aAAa,CAACC,WAAW,EAAE,CAAC,GAAGC;QACjC;QAIFG,cACE,GAAGT,SAAS,iBAAiB,CAAC,EAC9Bf,KAAK,SAAS,CAACmB,gBACf;QAIFK,cAAcT,UAAU,IAAI,CAAC,SAAS,IAAI;IAC5C;IASA,OAAO,sBAAsBA,QAAgB,EAAU;QACrD,MAAMW,aAAaC,aAAaZ,UAAU;QAC1C,MAAMa,qBAAqB,GAAGb,SAAS,iBAAiB,CAAC;QAEzD,IAAI,CAACE,WAAWW,qBACd,OAAOF;QAIT,MAAMP,gBAAwCnB,KAAK,KAAK,CACtD2B,aAAaC,oBAAoB;QAGnC,MAAMC,WAAmC,CAAC;QAC1C,KAAK,MAAM,CAACC,IAAIC,SAAS,IAAInB,OAAO,OAAO,CAACO,eAC1C,IAAIF,WAAWc,WAAW;YACxB,MAAM3B,OAAOuB,aAAaI;YAC1B,MAAMC,OACJD,SAAS,QAAQ,CAAC,YAAYA,SAAS,QAAQ,CAAC,UAC5C,SACA;YACNF,QAAQ,CAACC,GAAG,GAAG,CAAC,WAAW,EAAEE,KAAK,QAAQ,EAAE5B,KAAK,QAAQ,CAAC,WAAW;QACvE;QAIF,MAAM6B,WAAWjC,KAAK,KAAK,CAAC0B;QAC5B,MAAMQ,gBAAgBC,uBACpBF,UACA,CAACH,KAAOD,QAAQ,CAACC,GAAG,IAAI;QAE1B,OAAO9B,KAAK,SAAS,CAACkC;IACxB;IAOA,OAAO,aAAanB,QAAgB,EAAQ;QAC1C,MAAMqB,eAAe;YACnBrB;YACA,GAAGA,SAAS,iBAAiB,CAAC;YAC9B,GAAGA,SAAS,YAAY,CAAC;SAC1B;QAED,KAAK,MAAMgB,YAAYK,aACrB,IAAI;YACFC,OAAON,UAAU;gBAAE,OAAO;gBAAM,WAAW;YAAK;QAClD,EAAE,OAAM,CAER;IAEJ;IAQA,OAAO,aAAahB,QAAgB,EAAY;QAC9C,OAAO;YACLA;YACA,GAAGA,SAAS,iBAAiB,CAAC;YAC9B,GAAGA,SAAS,YAAY,CAAC;SAC1B;IACH;IA9MA,YAAYX,IAAwB,CAAE;QAPtC;QACA;QACA;QACA;QACA;QACA;QAGE,IAAI,CAAC,UAAU,GAAGA,KAAK,UAAU;QACjC,IAAI,CAAC,SAAS,GAAGA,KAAK,SAAS;QAC/B,IAAI,CAAC,gBAAgB,GAAGA,KAAK,gBAAgB;QAC7C,IAAI,CAAC,WAAW,GAAGA,KAAK,WAAW;QACnC,IAAI,CAAC,UAAU,GAAGA,KAAK,UAAU,CAAC,GAAG,CAAC,CAACS,OACrCA,gBAAgBf,gBAAgBe,OAAOf,cAAc,QAAQ,CAACe;QAEhE,IAAI,CAAC,UAAU,GAAGT,KAAK,UAAU;IACnC;AAsMF"}
1
+ {"version":3,"file":"types.mjs","sources":["../../src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport {\n existsSync,\n mkdirSync,\n readFileSync,\n rmSync,\n writeFileSync,\n} from 'node:fs';\nimport { join } from 'node:path';\nimport type { NodeType } from '@midscene/shared/constants';\nimport type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';\nimport type {\n BaseElement,\n LocateResultElement,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { z } from 'zod';\nimport type { TUserPrompt } from './common';\nimport { restoreImageReferences } from './dump/image-restoration';\nimport { ScreenshotItem } from './screenshot-item';\nimport type {\n DetailedLocateParam,\n MidsceneYamlFlowItem,\n ServiceExtractOption,\n} from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n cached_input: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n model_description: string | undefined;\n intent: string | undefined;\n request_id: string | undefined;\n};\n\nexport type { LocateResultElement };\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n errors?: string[];\n}\n\nexport type AIElementResponse = AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepLocate: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext {\n /**\n * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),\n */\n abstract screenshot: ScreenshotItem;\n\n /**\n * screenshot size after shrinking\n */\n abstract shotSize: Size;\n\n /**\n * The ratio for converting shrunk screenshot coordinates to logical coordinates.\n *\n * Example:\n * - Physical screen width: 3000px, dpr=6\n * - Logical width: 500px\n * - User-defined screenshotShrinkFactor: 2\n * - Actual shrunk screenshot width: 3000 / 2 = 1500px\n * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3\n * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px\n */\n abstract shrunkShotToLogicalRatio: number;\n\n abstract _isFrozen?: boolean;\n\n // @deprecated - backward compatibility for aiLocate\n abstract deprecatedDpr?: number;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type ServiceExtractParam = string | Record<string, string>;\n\nexport type ElementCacheFeature = Record<string, unknown>;\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport type ThinkingLevel = 'off' | 'medium' | 'high';\n\nexport type DeepThinkOption = 'unset' | true | false;\n\nexport interface ServiceTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport interface DumpMeta {\n logTime: number;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface ServiceDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: ServiceExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: LocateResultElement[];\n matchedRect?: Rect;\n deepLocate?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: ServiceTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialServiceDumpFromSDK = Omit<\n ServiceDump,\n 'logTime' | 'logId' | 'model_name'\n>;\n\nexport interface ServiceResultBase {\n dump: ServiceDump;\n}\n\nexport type LocateResultWithDump = LocateResult & ServiceResultBase;\n\nexport interface ServiceExtractResult<T> extends ServiceResultBase {\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}\n\nexport class ServiceError extends Error {\n dump: ServiceDump;\n\n constructor(message: string, dump: ServiceDump) {\n super(message);\n this.name = 'ServiceError';\n this.dump = dump;\n }\n}\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type ServiceAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt extends ServiceExtractOption {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n log?: string; // a brief preamble to the user explaining what you’re about to do\n type: string;\n param: ParamType;\n}\n\nexport type SubGoalStatus = 'pending' | 'running' | 'finished';\n\nexport interface SubGoal {\n index: number;\n status: SubGoalStatus;\n description: string;\n logs?: string[];\n}\n\nexport interface RawResponsePlanningAIResponse {\n action: PlanningAction;\n thought?: string;\n log: string;\n memory?: string;\n error?: string;\n finalizeMessage?: string;\n finalizeSuccess?: boolean;\n updateSubGoals?: SubGoal[];\n markFinishedIndexes?: number[];\n}\n\nexport interface PlanningAIResponse\n extends Omit<RawResponsePlanningAIResponse, 'action'> {\n actions?: PlanningAction[];\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n error?: string;\n reasoning_content?: string;\n shouldContinuePlanning: boolean;\n output?: string; // Output message from <complete> tag (same as finalizeMessage)\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {};\n\nexport interface LongPressParam {\n duration?: number;\n}\n\nexport interface PullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: ScreenshotItem;\n timing?: string;\n}\n\nexport type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n uiContext?: UIContext;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n uiContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: void is intentionally allowed as some executors may not return a value\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n taskId: string;\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n getUiContextStart?: number;\n getUiContextEnd?: number;\n callAiStart?: number;\n callAiEnd?: number;\n beforeInvokeActionHookStart?: number;\n beforeInvokeActionHookEnd?: number;\n callActionStart?: number;\n callActionEnd?: number;\n afterInvokeActionHookStart?: number;\n afterInvokeActionHookEnd?: number;\n captureAfterCallingSnapshotStart?: number;\n captureAfterCallingSnapshotEnd?: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n searchAreaUsage?: AIUsageInfo;\n reasoning_content?: string;\n };\n\nexport interface IExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n aiActContext?: string;\n}\n\n/**\n * Replacer function for JSON serialization that handles Page, Browser objects and ScreenshotItem\n */\nfunction replacerForDumpSerialization(_key: string, value: any): any {\n if (value && value.constructor?.name === 'Page') {\n return '[Page object]';\n }\n if (value && value.constructor?.name === 'Browser') {\n return '[Browser object]';\n }\n // Handle ScreenshotItem serialization\n if (value && typeof value.toSerializable === 'function') {\n return value.toSerializable();\n }\n return value;\n}\n\n/**\n * Reviver function for JSON deserialization that handles ScreenshotItem formats.\n *\n * BEHAVIOR:\n * - For { $screenshot: \"id\" } format: Left as-is (plain object)\n * Consumer must use imageMap to restore base64 data\n * - For { base64: \"...\" } format: Creates ScreenshotItem from base64 data\n *\n * @param key - JSON key being processed\n * @param value - JSON value being processed\n * @returns Restored value\n */\nfunction reviverForDumpDeserialization(key: string, value: any): any {\n // Only process screenshot fields\n if (key !== 'screenshot' || typeof value !== 'object' || value === null) {\n return value;\n }\n\n // Handle serialized format: { $screenshot: \"id\" }\n // Leave as plain object — consumer uses imageMap to restore\n if (ScreenshotItem.isSerialized(value)) {\n return value;\n }\n\n // Handle inline base64 format: { base64: \"...\" }\n if ('base64' in value && typeof value.base64 === 'string') {\n return value;\n }\n\n return value;\n}\n\n/**\n * ExecutionDump class for serializing and deserializing execution dumps\n */\nexport class ExecutionDump implements IExecutionDump {\n logTime: number;\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n aiActContext?: string;\n\n constructor(data: IExecutionDump) {\n this.logTime = data.logTime;\n this.name = data.name;\n this.description = data.description;\n this.tasks = data.tasks;\n this.aiActContext = data.aiActContext;\n }\n\n /**\n * Serialize the ExecutionDump to a JSON string\n */\n serialize(indents?: number): string {\n return JSON.stringify(this.toJSON(), replacerForDumpSerialization, indents);\n }\n\n /**\n * Convert to a plain object for JSON serialization\n */\n toJSON(): IExecutionDump {\n return {\n logTime: this.logTime,\n name: this.name,\n description: this.description,\n tasks: this.tasks.map((task) => ({\n ...task,\n recorder: task.recorder || [],\n })),\n aiActContext: this.aiActContext,\n };\n }\n\n /**\n * Create an ExecutionDump instance from a serialized JSON string\n */\n static fromSerializedString(serialized: string): ExecutionDump {\n const parsed = JSON.parse(\n serialized,\n reviverForDumpDeserialization,\n ) as IExecutionDump;\n return new ExecutionDump(parsed);\n }\n\n /**\n * Create an ExecutionDump instance from a plain object\n */\n static fromJSON(data: IExecutionDump): ExecutionDump {\n return new ExecutionDump(data);\n }\n\n /**\n * Collect all ScreenshotItem instances from tasks.\n * Scans through uiContext and recorder items to find screenshots.\n *\n * @returns Array of ScreenshotItem instances\n */\n collectScreenshots(): ScreenshotItem[] {\n const screenshots: ScreenshotItem[] = [];\n\n for (const task of this.tasks) {\n // Collect uiContext.screenshot if present\n if (task.uiContext?.screenshot instanceof ScreenshotItem) {\n screenshots.push(task.uiContext.screenshot);\n }\n\n // Collect recorder screenshots\n if (task.recorder) {\n for (const record of task.recorder) {\n if (record.screenshot instanceof ScreenshotItem) {\n screenshots.push(record.screenshot);\n }\n }\n }\n }\n\n return screenshots;\n }\n}\n\n/*\ntask - service-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskInsightDump = ServiceDump;\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - service-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: ServiceExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n ServiceAssertionResponse,\n ExecutionTaskInsightDump\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action Space',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n aiActContext?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\ntask - planning-locate\n*/\nexport type ExecutionTaskPlanningLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskPlanningLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport type ExecutionTaskPlanningDump = ServiceDump;\n\nexport type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<\n 'Planning',\n ExecutionTaskPlanningLocateParam,\n ExecutionTaskPlanningLocateOutput,\n ExecutionTaskPlanningDump\n>;\n\nexport type ExecutionTaskPlanningLocate =\n ExecutionTask<ExecutionTaskPlanningLocateApply>;\n\n/*\nGrouped dump\n*/\nexport interface IGroupedActionDump {\n sdkVersion: string;\n groupName: string;\n groupDescription?: string;\n modelBriefs: string[];\n executions: IExecutionDump[];\n deviceType?: string;\n}\n\n/**\n * GroupedActionDump class for serializing and deserializing grouped action dumps\n */\nexport class GroupedActionDump implements IGroupedActionDump {\n sdkVersion: string;\n groupName: string;\n groupDescription?: string;\n modelBriefs: string[];\n executions: ExecutionDump[];\n deviceType?: string;\n\n constructor(data: IGroupedActionDump) {\n this.sdkVersion = data.sdkVersion;\n this.groupName = data.groupName;\n this.groupDescription = data.groupDescription;\n this.modelBriefs = data.modelBriefs;\n this.executions = data.executions.map((exec) =>\n exec instanceof ExecutionDump ? exec : ExecutionDump.fromJSON(exec),\n );\n this.deviceType = data.deviceType;\n }\n\n /**\n * Serialize the GroupedActionDump to a JSON string\n * Uses compact { $screenshot: id } format\n */\n serialize(indents?: number): string {\n return JSON.stringify(this.toJSON(), replacerForDumpSerialization, indents);\n }\n\n /**\n * Serialize the GroupedActionDump with inline screenshots to a JSON string.\n * Each ScreenshotItem is replaced with { base64: \"...\", capturedAt }.\n */\n serializeWithInlineScreenshots(indents?: number): string {\n const processValue = (obj: unknown): unknown => {\n if (obj instanceof ScreenshotItem) {\n return { base64: obj.base64, capturedAt: obj.capturedAt };\n }\n if (Array.isArray(obj)) {\n return obj.map(processValue);\n }\n if (obj && typeof obj === 'object') {\n const entries = Object.entries(obj).map(([key, value]) => [\n key,\n processValue(value),\n ]);\n return Object.fromEntries(entries);\n }\n return obj;\n };\n\n const data = processValue(this.toJSON());\n return JSON.stringify(data, null, indents);\n }\n\n /**\n * Convert to a plain object for JSON serialization\n */\n toJSON(): IGroupedActionDump {\n return {\n sdkVersion: this.sdkVersion,\n groupName: this.groupName,\n groupDescription: this.groupDescription,\n modelBriefs: this.modelBriefs,\n executions: this.executions.map((exec) => exec.toJSON()),\n deviceType: this.deviceType,\n };\n }\n\n /**\n * Create a GroupedActionDump instance from a serialized JSON string\n */\n static fromSerializedString(serialized: string): GroupedActionDump {\n const parsed = JSON.parse(\n serialized,\n reviverForDumpDeserialization,\n ) as IGroupedActionDump;\n return new GroupedActionDump(parsed);\n }\n\n /**\n * Create a GroupedActionDump instance from a plain object\n */\n static fromJSON(data: IGroupedActionDump): GroupedActionDump {\n return new GroupedActionDump(data);\n }\n\n /**\n * Collect all ScreenshotItem instances from all executions.\n *\n * @returns Array of all ScreenshotItem instances across all executions\n */\n collectAllScreenshots(): ScreenshotItem[] {\n const screenshots: ScreenshotItem[] = [];\n for (const execution of this.executions) {\n screenshots.push(...execution.collectScreenshots());\n }\n return screenshots;\n }\n\n /**\n * Serialize the dump to files with screenshots as separate PNG files.\n * Creates:\n * - {basePath} - dump JSON with { $screenshot: id } references\n * - {basePath}.screenshots/ - PNG files\n * - {basePath}.screenshots.json - ID to path mapping\n *\n * @param basePath - Base path for the dump file\n */\n serializeToFiles(basePath: string): void {\n const screenshotsDir = `${basePath}.screenshots`;\n if (!existsSync(screenshotsDir)) {\n mkdirSync(screenshotsDir, { recursive: true });\n }\n\n // Write screenshots to separate files\n const screenshotMap: Record<string, string> = {};\n const screenshots = this.collectAllScreenshots();\n\n for (const screenshot of screenshots) {\n if (screenshot.hasBase64()) {\n const imagePath = join(\n screenshotsDir,\n `${screenshot.id}.${screenshot.extension}`,\n );\n const rawBase64 = screenshot.rawBase64;\n writeFileSync(imagePath, Buffer.from(rawBase64, 'base64'));\n screenshotMap[screenshot.id] = imagePath;\n }\n }\n\n // Write screenshot map file\n writeFileSync(\n `${basePath}.screenshots.json`,\n JSON.stringify(screenshotMap),\n 'utf-8',\n );\n\n // Write dump JSON with references\n writeFileSync(basePath, this.serialize(), 'utf-8');\n }\n\n /**\n * Read dump from files and return JSON string with inline screenshots.\n * Reads the dump JSON and screenshot files, then inlines the base64 data.\n *\n * @param basePath - Base path for the dump file\n * @returns JSON string with inline screenshots ({ base64: \"...\" } format)\n */\n static fromFilesAsInlineJson(basePath: string): string {\n const dumpString = readFileSync(basePath, 'utf-8');\n const screenshotsMapPath = `${basePath}.screenshots.json`;\n\n if (!existsSync(screenshotsMapPath)) {\n return dumpString;\n }\n\n // Read screenshot map and build imageMap from files\n const screenshotMap: Record<string, string> = JSON.parse(\n readFileSync(screenshotsMapPath, 'utf-8'),\n );\n\n const imageMap: Record<string, string> = {};\n for (const [id, filePath] of Object.entries(screenshotMap)) {\n if (existsSync(filePath)) {\n const data = readFileSync(filePath);\n const mime =\n filePath.endsWith('.jpeg') || filePath.endsWith('.jpg')\n ? 'jpeg'\n : 'png';\n imageMap[id] = `data:image/${mime};base64,${data.toString('base64')}`;\n }\n }\n\n // Restore image references\n const dumpData = JSON.parse(dumpString);\n const processedData = restoreImageReferences(\n dumpData,\n (id) => imageMap[id] ?? '',\n );\n return JSON.stringify(processedData);\n }\n\n /**\n * Clean up all files associated with a serialized dump.\n *\n * @param basePath - Base path for the dump file\n */\n static cleanupFiles(basePath: string): void {\n const filesToClean = [\n basePath,\n `${basePath}.screenshots.json`,\n `${basePath}.screenshots`,\n ];\n\n for (const filePath of filesToClean) {\n try {\n rmSync(filePath, { force: true, recursive: true });\n } catch {\n // Ignore errors - file may already be deleted\n }\n }\n }\n\n /**\n * Get all file paths associated with a serialized dump.\n *\n * @param basePath - Base path for the dump file\n * @returns Array of all associated file paths\n */\n static getFilePaths(basePath: string): string[] {\n return [\n basePath,\n `${basePath}.screenshots.json`,\n `${basePath}.screenshots`,\n ];\n }\n}\n\nexport type InterfaceType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android'\n | string;\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport interface DeviceAction<TParam = any, TReturn = any> {\n name: string;\n description?: string;\n interfaceAlias?: string;\n paramSchema?: z.ZodType<TParam>;\n call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;\n delayAfterRunner?: number;\n /**\n * An example param object for this action.\n * Locate fields with { prompt } will automatically get bbox injected when needed.\n */\n sample?: { [K in keyof TParam]?: any };\n}\n\n/**\n * Type utilities for extracting types from DeviceAction definitions\n */\n\n/**\n * Extract parameter type from a DeviceAction\n */\nexport type ActionParam<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<infer P, any> ? P : never;\n\n/**\n * Extract return type from a DeviceAction\n */\nexport type ActionReturn<Action extends DeviceAction<any, any>> =\n Action extends DeviceAction<any, infer R> ? R : never;\n\n/**\n * Web-specific types\n */\nexport interface WebElementInfo extends BaseElement {\n id: string;\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n}\n\n/**\n * Agent\n */\n\nexport type CacheConfig = {\n strategy?: 'read-only' | 'read-write' | 'write-only';\n id: string;\n};\n\nexport type Cache =\n | false // No read, no write\n | true // Will throw error at runtime - deprecated\n | CacheConfig; // Object configuration (requires explicit id)\n\nexport interface AgentOpt {\n testId?: string;\n // @deprecated\n cacheId?: string; // Keep backward compatibility, but marked as deprecated\n groupName?: string;\n groupDescription?: string;\n /* if auto generate report, default true */\n generateReport?: boolean;\n /* if auto print report msg, default true */\n autoPrintReportMsg?: boolean;\n\n /**\n * Use directory-based report format with separate image files.\n *\n * When enabled:\n * - Screenshots are saved as PNG files in a `screenshots/` subdirectory\n * - Report is generated as `index.html` with relative image paths\n * - Reduces memory usage and report file size\n *\n * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server\n * (e.g., `npx serve ./report-dir`). The file:// protocol will not\n * work due to browser CORS restrictions.\n *\n * @default 'single-html'\n */\n outputFormat?: 'single-html' | 'html-and-external-assets';\n\n onTaskStartTip?: OnTaskStartTip;\n aiActContext?: string;\n aiActionContext?: string;\n /* custom report file name */\n reportFileName?: string;\n modelConfig?: TModelConfig;\n cache?: Cache;\n /**\n * Maximum number of replanning cycles for aiAct.\n * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.\n * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.\n */\n replanningCycleLimit?: number;\n\n /**\n * Wait time in milliseconds after each action execution.\n * This allows the UI to settle and stabilize before the next action.\n * Defaults to 300ms when not provided.\n */\n waitAfterAction?: number;\n\n /**\n * When set to true, Midscene will use the target device's time (Android/iOS)\n * instead of the system time. Useful when the device time differs from the\n * host machine. Default: false\n */\n useDeviceTimestamp?: boolean;\n\n /**\n * Custom screenshot shrink factor to reduce AI token usage.\n * When set, the screenshot will be scaled down by this factor from the physical resolution.\n *\n * Example:\n * - Physical screen width: 3000px, dpr=6\n * - Logical width: 500px\n * - screenshotShrinkFactor: 2\n * - Actual shrunk screenshot width: 3000 / 2 = 1500px\n * - AI analyzes the 1500px screenshot\n * - Coordinates are transformed back to logical (500px) before actions execute\n *\n * Benefits:\n * - Reduces token usage for high-resolution screenshots\n * - Maintains accuracy by scaling coordinates appropriately\n *\n * Must be >= 1 (shrinking only, enlarging is not supported).\n *\n * @default 1 (no shrinking, uses original physical screenshot)\n */\n screenshotShrinkFactor?: number;\n\n /**\n * Custom OpenAI client factory function\n *\n * If provided, this function will be called to create OpenAI client instances\n * for each AI call, allowing you to:\n * - Wrap clients with observability tools (langsmith, langfuse)\n * - Use custom OpenAI-compatible clients\n * - Apply different configurations based on intent\n *\n * @param config - Resolved model configuration\n * @returns OpenAI client instance (original or wrapped)\n *\n * @example\n * ```typescript\n * createOpenAIClient: async (openai, opts) => {\n * // Wrap with langsmith for planning tasks\n * if (opts.baseURL?.includes('planning')) {\n * return wrapOpenAI(openai, { metadata: { task: 'planning' } });\n * }\n *\n * return openai;\n * }\n * ```\n */\n createOpenAIClient?: CreateOpenAIClientFn;\n}\n\nexport type TestStatus =\n | 'passed'\n | 'failed'\n | 'timedOut'\n | 'skipped'\n | 'interrupted';\n\nexport interface ReportFileWithAttributes {\n reportFilePath: string;\n reportAttributes: {\n testDuration: number;\n testStatus: TestStatus;\n testTitle: string;\n testId: string;\n testDescription: string;\n };\n}\n"],"names":["UIContext","ServiceError","Error","message","dump","replacerForDumpSerialization","_key","value","reviverForDumpDeserialization","key","ScreenshotItem","ExecutionDump","indents","JSON","task","serialized","parsed","data","screenshots","record","GroupedActionDump","processValue","obj","Array","entries","Object","exec","execution","basePath","screenshotsDir","existsSync","mkdirSync","screenshotMap","screenshot","imagePath","join","rawBase64","writeFileSync","Buffer","dumpString","readFileSync","screenshotsMapPath","imageMap","id","filePath","mime","dumpData","processedData","restoreImageReferences","filesToClean","rmSync"],"mappings":";;;;;AAAqD;;;;;;;;;;AA+G9C,MAAeA;AA4BtB;AA4EO,MAAMC,qBAAqBC;IAGhC,YAAYC,OAAe,EAAEC,IAAiB,CAAE;QAC9C,KAAK,CAACD,UAHR;QAIE,IAAI,CAAC,IAAI,GAAG;QACZ,IAAI,CAAC,IAAI,GAAGC;IACd;AACF;AAgOA,SAASC,6BAA6BC,IAAY,EAAEC,KAAU;IAC5D,IAAIA,SAASA,MAAM,WAAW,EAAE,SAAS,QACvC,OAAO;IAET,IAAIA,SAASA,MAAM,WAAW,EAAE,SAAS,WACvC,OAAO;IAGT,IAAIA,SAAS,AAAgC,cAAhC,OAAOA,MAAM,cAAc,EACtC,OAAOA,MAAM,cAAc;IAE7B,OAAOA;AACT;AAcA,SAASC,8BAA8BC,GAAW,EAAEF,KAAU;IAE5D,IAAIE,AAAQ,iBAARA,OAAwB,AAAiB,YAAjB,OAAOF,SAAsBA,AAAU,SAAVA,OACvD,OAAOA;IAKT,IAAIG,eAAe,YAAY,CAACH,QAC9B,OAAOA;IAIL,YAAYA,SAAgBA,MAAM,MAAM;IAI5C,OAAOA;AACT;AAKO,MAAMI;IAkBX,UAAUC,OAAgB,EAAU;QAClC,OAAOC,KAAK,SAAS,CAAC,IAAI,CAAC,MAAM,IAAIR,8BAA8BO;IACrE;IAKA,SAAyB;QACvB,OAAO;YACL,SAAS,IAAI,CAAC,OAAO;YACrB,MAAM,IAAI,CAAC,IAAI;YACf,aAAa,IAAI,CAAC,WAAW;YAC7B,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAACE,OAAU;oBAC/B,GAAGA,IAAI;oBACP,UAAUA,KAAK,QAAQ,IAAI,EAAE;gBAC/B;YACA,cAAc,IAAI,CAAC,YAAY;QACjC;IACF;IAKA,OAAO,qBAAqBC,UAAkB,EAAiB;QAC7D,MAAMC,SAASH,KAAK,KAAK,CACvBE,YACAP;QAEF,OAAO,IAAIG,cAAcK;IAC3B;IAKA,OAAO,SAASC,IAAoB,EAAiB;QACnD,OAAO,IAAIN,cAAcM;IAC3B;IAQA,qBAAuC;QACrC,MAAMC,cAAgC,EAAE;QAExC,KAAK,MAAMJ,QAAQ,IAAI,CAAC,KAAK,CAAE;YAE7B,IAAIA,KAAK,SAAS,EAAE,sBAAsBJ,gBACxCQ,YAAY,IAAI,CAACJ,KAAK,SAAS,CAAC,UAAU;YAI5C,IAAIA,KAAK,QAAQ,EACf;gBAAA,KAAK,MAAMK,UAAUL,KAAK,QAAQ,CAChC,IAAIK,OAAO,UAAU,YAAYT,gBAC/BQ,YAAY,IAAI,CAACC,OAAO,UAAU;YAEtC;QAEJ;QAEA,OAAOD;IACT;IA3EA,YAAYD,IAAoB,CAAE;QANlC;QACA;QACA;QACA;QACA;QAGE,IAAI,CAAC,OAAO,GAAGA,KAAK,OAAO;QAC3B,IAAI,CAAC,IAAI,GAAGA,KAAK,IAAI;QACrB,IAAI,CAAC,WAAW,GAAGA,KAAK,WAAW;QACnC,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,YAAY,GAAGA,KAAK,YAAY;IACvC;AAsEF;AAwIO,MAAMG;IAuBX,UAAUR,OAAgB,EAAU;QAClC,OAAOC,KAAK,SAAS,CAAC,IAAI,CAAC,MAAM,IAAIR,8BAA8BO;IACrE;IAMA,+BAA+BA,OAAgB,EAAU;QACvD,MAAMS,eAAe,CAACC;YACpB,IAAIA,eAAeZ,gBACjB,OAAO;gBAAE,QAAQY,IAAI,MAAM;gBAAE,YAAYA,IAAI,UAAU;YAAC;YAE1D,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAACD;YAEjB,IAAIC,OAAO,AAAe,YAAf,OAAOA,KAAkB;gBAClC,MAAME,UAAUC,OAAO,OAAO,CAACH,KAAK,GAAG,CAAC,CAAC,CAACb,KAAKF,MAAM,GAAK;wBACxDE;wBACAY,aAAad;qBACd;gBACD,OAAOkB,OAAO,WAAW,CAACD;YAC5B;YACA,OAAOF;QACT;QAEA,MAAML,OAAOI,aAAa,IAAI,CAAC,MAAM;QACrC,OAAOR,KAAK,SAAS,CAACI,MAAM,MAAML;IACpC;IAKA,SAA6B;QAC3B,OAAO;YACL,YAAY,IAAI,CAAC,UAAU;YAC3B,WAAW,IAAI,CAAC,SAAS;YACzB,kBAAkB,IAAI,CAAC,gBAAgB;YACvC,aAAa,IAAI,CAAC,WAAW;YAC7B,YAAY,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAACc,OAASA,KAAK,MAAM;YACrD,YAAY,IAAI,CAAC,UAAU;QAC7B;IACF;IAKA,OAAO,qBAAqBX,UAAkB,EAAqB;QACjE,MAAMC,SAASH,KAAK,KAAK,CACvBE,YACAP;QAEF,OAAO,IAAIY,kBAAkBJ;IAC/B;IAKA,OAAO,SAASC,IAAwB,EAAqB;QAC3D,OAAO,IAAIG,kBAAkBH;IAC/B;IAOA,wBAA0C;QACxC,MAAMC,cAAgC,EAAE;QACxC,KAAK,MAAMS,aAAa,IAAI,CAAC,UAAU,CACrCT,YAAY,IAAI,IAAIS,UAAU,kBAAkB;QAElD,OAAOT;IACT;IAWA,iBAAiBU,QAAgB,EAAQ;QACvC,MAAMC,iBAAiB,GAAGD,SAAS,YAAY,CAAC;QAChD,IAAI,CAACE,WAAWD,iBACdE,UAAUF,gBAAgB;YAAE,WAAW;QAAK;QAI9C,MAAMG,gBAAwC,CAAC;QAC/C,MAAMd,cAAc,IAAI,CAAC,qBAAqB;QAE9C,KAAK,MAAMe,cAAcf,YACvB,IAAIe,WAAW,SAAS,IAAI;YAC1B,MAAMC,YAAYC,KAChBN,gBACA,GAAGI,WAAW,EAAE,CAAC,CAAC,EAAEA,WAAW,SAAS,EAAE;YAE5C,MAAMG,YAAYH,WAAW,SAAS;YACtCI,cAAcH,WAAWI,OAAO,IAAI,CAACF,WAAW;YAChDJ,aAAa,CAACC,WAAW,EAAE,CAAC,GAAGC;QACjC;QAIFG,cACE,GAAGT,SAAS,iBAAiB,CAAC,EAC9Bf,KAAK,SAAS,CAACmB,gBACf;QAIFK,cAAcT,UAAU,IAAI,CAAC,SAAS,IAAI;IAC5C;IASA,OAAO,sBAAsBA,QAAgB,EAAU;QACrD,MAAMW,aAAaC,aAAaZ,UAAU;QAC1C,MAAMa,qBAAqB,GAAGb,SAAS,iBAAiB,CAAC;QAEzD,IAAI,CAACE,WAAWW,qBACd,OAAOF;QAIT,MAAMP,gBAAwCnB,KAAK,KAAK,CACtD2B,aAAaC,oBAAoB;QAGnC,MAAMC,WAAmC,CAAC;QAC1C,KAAK,MAAM,CAACC,IAAIC,SAAS,IAAInB,OAAO,OAAO,CAACO,eAC1C,IAAIF,WAAWc,WAAW;YACxB,MAAM3B,OAAOuB,aAAaI;YAC1B,MAAMC,OACJD,SAAS,QAAQ,CAAC,YAAYA,SAAS,QAAQ,CAAC,UAC5C,SACA;YACNF,QAAQ,CAACC,GAAG,GAAG,CAAC,WAAW,EAAEE,KAAK,QAAQ,EAAE5B,KAAK,QAAQ,CAAC,WAAW;QACvE;QAIF,MAAM6B,WAAWjC,KAAK,KAAK,CAAC0B;QAC5B,MAAMQ,gBAAgBC,uBACpBF,UACA,CAACH,KAAOD,QAAQ,CAACC,GAAG,IAAI;QAE1B,OAAO9B,KAAK,SAAS,CAACkC;IACxB;IAOA,OAAO,aAAanB,QAAgB,EAAQ;QAC1C,MAAMqB,eAAe;YACnBrB;YACA,GAAGA,SAAS,iBAAiB,CAAC;YAC9B,GAAGA,SAAS,YAAY,CAAC;SAC1B;QAED,KAAK,MAAMgB,YAAYK,aACrB,IAAI;YACFC,OAAON,UAAU;gBAAE,OAAO;gBAAM,WAAW;YAAK;QAClD,EAAE,OAAM,CAER;IAEJ;IAQA,OAAO,aAAahB,QAAgB,EAAY;QAC9C,OAAO;YACLA;YACA,GAAGA,SAAS,iBAAiB,CAAC;YAC9B,GAAGA,SAAS,YAAY,CAAC;SAC1B;IACH;IA9MA,YAAYX,IAAwB,CAAE;QAPtC;QACA;QACA;QACA;QACA;QACA;QAGE,IAAI,CAAC,UAAU,GAAGA,KAAK,UAAU;QACjC,IAAI,CAAC,SAAS,GAAGA,KAAK,SAAS;QAC/B,IAAI,CAAC,gBAAgB,GAAGA,KAAK,gBAAgB;QAC7C,IAAI,CAAC,WAAW,GAAGA,KAAK,WAAW;QACnC,IAAI,CAAC,UAAU,GAAGA,KAAK,UAAU,CAAC,GAAG,CAAC,CAACS,OACrCA,gBAAgBf,gBAAgBe,OAAOf,cAAc,QAAQ,CAACe;QAEhE,IAAI,CAAC,UAAU,GAAGT,KAAK,UAAU;IACnC;AAsMF"}