@midscene/ios 1.8.1-beta-20260513084557.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/bin.mjs CHANGED
@@ -7,8 +7,8 @@ import { MIDSCENE_IOS_DEVICE_CLASS_OVERRIDE } from "@midscene/shared/env";
7
7
  import { getDebug } from "@midscene/shared/logger";
8
8
  import { mergeAndNormalizeAppNameMapping, normalizeForComparison } from "@midscene/shared/utils";
9
9
  import node_assert from "node:assert";
10
- import { getMidsceneLocationSchema, z } from "@midscene/core";
11
- import { defineAction, defineActionClearInput, defineActionCursorMove, defineActionDoubleClick, defineActionDragAndDrop, defineActionKeyboardPress, defineActionLongPress, defineActionPinch, defineActionScroll, defineActionSwipe, defineActionTap, normalizeMobileSwipeParam, normalizePinchParam } from "@midscene/core/device";
10
+ import { z } from "@midscene/core";
11
+ import { createDefaultMobileActions, defineAction } from "@midscene/core/device";
12
12
  import { sleep } from "@midscene/core/utils";
13
13
  import { createImgBase64ByFormat } from "@midscene/shared/img";
14
14
  import { WDAManager, WebDriverClient } from "@midscene/webdriver";
@@ -650,16 +650,6 @@ function _define_property(obj, key, value) {
650
650
  return obj;
651
651
  }
652
652
  const debugDevice = getDebug('ios:device');
653
- const iosInputParamSchema = z.object({
654
- value: z.string().describe('The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.'),
655
- autoDismissKeyboard: z.boolean().optional().describe('Whether to dismiss the keyboard after input. Defaults to true if not specified. Set to false to keep the keyboard visible after input.'),
656
- mode: z.preprocess((val)=>'append' === val ? 'typeOnly' : val, z["enum"]([
657
- 'replace',
658
- 'clear',
659
- 'typeOnly'
660
- ]).default('replace').optional().describe('Input mode: "replace" (default) - clear the field and input the value; "typeOnly" - type the value directly without clearing the field first; "clear" - clear the field without inputting new text.')),
661
- locate: getMidsceneLocationSchema().describe('The input field to be filled').optional()
662
- });
663
653
  const WDA_HTTP_METHODS = [
664
654
  'GET',
665
655
  'POST',
@@ -668,96 +658,39 @@ const WDA_HTTP_METHODS = [
668
658
  ];
669
659
  const DEFAULT_WDA_MJPEG_PORT = 9100;
670
660
  class IOSDevice {
661
+ async tapPoint(point) {
662
+ debugDevice(`tap at coordinates (${point.x}, ${point.y})`);
663
+ await this.wdaBackend.tap(Math.round(point.x), Math.round(point.y));
664
+ }
665
+ async doubleTapPoint(point) {
666
+ await this.wdaBackend.doubleTap(Math.round(point.x), Math.round(point.y));
667
+ }
668
+ async longPressPoint(point, duration = 1000) {
669
+ await this.wdaBackend.longPress(Math.round(point.x), Math.round(point.y), duration);
670
+ }
671
+ async swipePoint(start, end, duration = 500) {
672
+ await this.wdaBackend.swipe(Math.round(start.x), Math.round(start.y), Math.round(end.x), Math.round(end.y), duration);
673
+ }
674
+ async clearInputAt(point) {
675
+ if (point) {
676
+ await this.tapPoint(point);
677
+ await sleep(100);
678
+ }
679
+ debugDevice('Attempting to clear input with WebDriver Clear API');
680
+ const cleared = await this.wdaBackend.clearActiveElement();
681
+ cleared ? debugDevice('Successfully cleared input with WebDriver Clear API') : debugDevice('WebDriver Clear API returned false (no active element or clear failed)');
682
+ }
671
683
  actionSpace() {
684
+ const mobileActionContext = {
685
+ input: this.inputPrimitives,
686
+ size: ()=>this.size(),
687
+ sleep: async (timeMs)=>{
688
+ await sleep(timeMs);
689
+ },
690
+ getDefaultAutoDismissKeyboard: ()=>this.options?.autoDismissKeyboard
691
+ };
672
692
  const defaultActions = [
673
- defineActionTap(async (param)=>{
674
- const element = param.locate;
675
- node_assert(element, 'Element not found, cannot tap');
676
- await this.mouseClick(element.center[0], element.center[1]);
677
- }),
678
- defineActionDoubleClick(async (param)=>{
679
- const element = param.locate;
680
- node_assert(element, 'Element not found, cannot double click');
681
- await this.doubleTap(element.center[0], element.center[1]);
682
- }),
683
- defineAction({
684
- name: 'Input',
685
- description: 'Input text into the input field',
686
- interfaceAlias: 'aiInput',
687
- paramSchema: iosInputParamSchema,
688
- sample: {
689
- value: 'test@example.com',
690
- locate: {
691
- prompt: 'the email input field'
692
- }
693
- },
694
- call: async (param)=>{
695
- const element = param.locate;
696
- if ('typeOnly' !== param.mode) await this.clearInput(element);
697
- if ('clear' === param.mode) return;
698
- if (!param || !param.value) return;
699
- const autoDismissKeyboard = param.autoDismissKeyboard ?? this.options?.autoDismissKeyboard;
700
- await this.typeText(param.value, {
701
- autoDismissKeyboard
702
- });
703
- }
704
- }),
705
- defineActionScroll(async (param)=>{
706
- const element = param.locate;
707
- const startingPoint = element ? {
708
- left: element.center[0],
709
- top: element.center[1]
710
- } : void 0;
711
- const scrollToEventName = param?.scrollType;
712
- if ('scrollToTop' === scrollToEventName) await this.scrollUntilTop(startingPoint);
713
- else if ('scrollToBottom' === scrollToEventName) await this.scrollUntilBottom(startingPoint);
714
- else if ('scrollToRight' === scrollToEventName) await this.scrollUntilRight(startingPoint);
715
- else if ('scrollToLeft' === scrollToEventName) await this.scrollUntilLeft(startingPoint);
716
- else if ('singleAction' !== scrollToEventName && scrollToEventName) throw new Error(`Unknown scroll event type: ${scrollToEventName}, param: ${JSON.stringify(param)}`);
717
- else {
718
- if (param?.direction !== 'down' && param && param.direction) if ('up' === param.direction) await this.scrollUp(param.distance || void 0, startingPoint);
719
- else if ('left' === param.direction) await this.scrollLeft(param.distance || void 0, startingPoint);
720
- else if ('right' === param.direction) await this.scrollRight(param.distance || void 0, startingPoint);
721
- else throw new Error(`Unknown scroll direction: ${param.direction}`);
722
- else await this.scrollDown(param?.distance || void 0, startingPoint);
723
- await sleep(500);
724
- }
725
- }),
726
- defineActionDragAndDrop(async (param)=>{
727
- const from = param.from;
728
- const to = param.to;
729
- node_assert(from, 'missing "from" param for drag and drop');
730
- node_assert(to, 'missing "to" param for drag and drop');
731
- await this.swipe(from.center[0], from.center[1], to.center[0], to.center[1], 1000);
732
- }),
733
- defineActionSwipe(async (param)=>{
734
- const { startPoint, endPoint, duration, repeatCount } = normalizeMobileSwipeParam(param, await this.size());
735
- for(let i = 0; i < repeatCount; i++)await this.swipe(startPoint.x, startPoint.y, endPoint.x, endPoint.y, duration);
736
- }),
737
- defineActionKeyboardPress(async (param)=>{
738
- await this.pressKey(param.keyName);
739
- }),
740
- defineActionCursorMove(async (param)=>{
741
- const arrowKey = 'left' === param.direction ? 'ArrowLeft' : 'ArrowRight';
742
- const times = param.times ?? 1;
743
- for(let i = 0; i < times; i++){
744
- await this.pressKey(arrowKey);
745
- await sleep(100);
746
- }
747
- }),
748
- defineActionLongPress(async (param)=>{
749
- const element = param.locate;
750
- node_assert(element, 'LongPress requires an element to be located');
751
- const [x, y] = element.center;
752
- await this.longPress(x, y, param?.duration);
753
- }),
754
- defineActionPinch(async (param)=>{
755
- const { centerX, centerY, startDistance, endDistance, duration } = normalizePinchParam(param, await this.size());
756
- await this.wdaBackend.pinch(centerX, centerY, startDistance, endDistance, duration);
757
- }),
758
- defineActionClearInput(async (param)=>{
759
- await this.clearInput(param.locate);
760
- })
693
+ ...createDefaultMobileActions(mobileActionContext)
761
694
  ];
762
695
  const platformSpecificActions = Object.values(createPlatformActions(this));
763
696
  const customActions = this.customActions || [];
@@ -767,6 +700,27 @@ class IOSDevice {
767
700
  ...customActions
768
701
  ];
769
702
  }
703
+ async performActionScroll(param) {
704
+ const element = param.locate;
705
+ const startingPoint = element ? {
706
+ left: element.center[0],
707
+ top: element.center[1]
708
+ } : void 0;
709
+ const scrollToEventName = param?.scrollType;
710
+ if ('scrollToTop' === scrollToEventName) await this.scrollUntilTop(startingPoint);
711
+ else if ('scrollToBottom' === scrollToEventName) await this.scrollUntilBottom(startingPoint);
712
+ else if ('scrollToRight' === scrollToEventName) await this.scrollUntilRight(startingPoint);
713
+ else if ('scrollToLeft' === scrollToEventName) await this.scrollUntilLeft(startingPoint);
714
+ else if ('singleAction' !== scrollToEventName && scrollToEventName) throw new Error(`Unknown scroll event type: ${scrollToEventName}, param: ${JSON.stringify(param)}`);
715
+ else {
716
+ if (param?.direction !== 'down' && param && param.direction) if ('up' === param.direction) await this.scrollUp(param.distance || void 0, startingPoint);
717
+ else if ('left' === param.direction) await this.scrollLeft(param.distance || void 0, startingPoint);
718
+ else if ('right' === param.direction) await this.scrollRight(param.distance || void 0, startingPoint);
719
+ else throw new Error(`Unknown scroll direction: ${param.direction}`);
720
+ else await this.scrollDown(param?.distance || void 0, startingPoint);
721
+ await sleep(500);
722
+ }
723
+ }
770
724
  describe() {
771
725
  return this.description || `Device ID: ${this.deviceId}`;
772
726
  }
@@ -878,35 +832,31 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
878
832
  }
879
833
  }
880
834
  async clearInput(element) {
881
- if (element) {
882
- await this.tap(element.center[0], element.center[1]);
883
- await sleep(100);
884
- }
885
- debugDevice('Attempting to clear input with WebDriver Clear API');
886
- const cleared = await this.wdaBackend.clearActiveElement();
887
- cleared ? debugDevice('Successfully cleared input with WebDriver Clear API') : debugDevice('WebDriver Clear API returned false (no active element or clear failed)');
835
+ await this.clearInputAt(element ? {
836
+ x: element.center[0],
837
+ y: element.center[1]
838
+ } : void 0);
888
839
  }
889
840
  async url() {
890
841
  return '';
891
842
  }
892
843
  async tap(x, y) {
893
- await this.wdaBackend.tap(Math.round(x), Math.round(y));
894
- }
895
- async mouseClick(x, y) {
896
- debugDevice(`mouseClick at coordinates (${x}, ${y})`);
897
- await this.tap(x, y);
898
- }
899
- async doubleTap(x, y) {
900
- await this.wdaBackend.doubleTap(Math.round(x), Math.round(y));
901
- }
902
- async tripleTap(x, y) {
903
- await this.wdaBackend.tripleTap(Math.round(x), Math.round(y));
904
- }
905
- async longPress(x, y, duration = 1000) {
906
- await this.wdaBackend.longPress(Math.round(x), Math.round(y), duration);
844
+ await this.tapPoint({
845
+ x,
846
+ y
847
+ });
907
848
  }
908
849
  async swipe(fromX, fromY, toX, toY, duration = 500) {
909
- await this.wdaBackend.swipe(Math.round(fromX), Math.round(fromY), Math.round(toX), Math.round(toY), duration);
850
+ await this.swipeCoordinates(fromX, fromY, toX, toY, duration);
851
+ }
852
+ async swipeCoordinates(fromX, fromY, toX, toY, duration = 500) {
853
+ await this.swipePoint({
854
+ x: fromX,
855
+ y: fromY
856
+ }, {
857
+ x: toX,
858
+ y: toY
859
+ }, duration);
910
860
  }
911
861
  async typeText(text, options) {
912
862
  if (!text) return;
@@ -935,7 +885,7 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
935
885
  y: Math.round(height / 2)
936
886
  };
937
887
  const scrollDistance = Math.round(distance || height / 3);
938
- await this.swipe(start.x, start.y, start.x, start.y + scrollDistance);
888
+ await this.swipeCoordinates(start.x, start.y, start.x, start.y + scrollDistance);
939
889
  }
940
890
  async scrollDown(distance, startPoint) {
941
891
  const { width, height } = await this.size();
@@ -947,7 +897,7 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
947
897
  y: Math.round(height / 2)
948
898
  };
949
899
  const scrollDistance = Math.round(distance || height / 3);
950
- await this.swipe(start.x, start.y, start.x, start.y - scrollDistance);
900
+ await this.swipeCoordinates(start.x, start.y, start.x, start.y - scrollDistance);
951
901
  }
952
902
  async scrollLeft(distance, startPoint) {
953
903
  const { width, height } = await this.size();
@@ -959,7 +909,7 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
959
909
  y: Math.round(height / 2)
960
910
  };
961
911
  const scrollDistance = Math.round(distance || 0.7 * width);
962
- await this.swipe(start.x, start.y, start.x + scrollDistance, start.y);
912
+ await this.swipeCoordinates(start.x, start.y, start.x + scrollDistance, start.y);
963
913
  }
964
914
  async scrollRight(distance, startPoint) {
965
915
  const { width, height } = await this.size();
@@ -971,7 +921,7 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
971
921
  y: Math.round(height / 2)
972
922
  };
973
923
  const scrollDistance = Math.round(distance || 0.7 * width);
974
- await this.swipe(start.x, start.y, start.x - scrollDistance, start.y);
924
+ await this.swipeCoordinates(start.x, start.y, start.x - scrollDistance, start.y);
975
925
  }
976
926
  async scrollUntilTop(startPoint) {
977
927
  debugDevice('Using screenshot-based scroll detection for better reliability');
@@ -1067,16 +1017,16 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
1067
1017
  debugDevice(`Performing scroll: ${direction}, distance: ${scrollDistance}`);
1068
1018
  switch(direction){
1069
1019
  case 'up':
1070
- await this.swipe(start.x, start.y, start.x, start.y + scrollDistance, 300);
1020
+ await this.swipeCoordinates(start.x, start.y, start.x, start.y + scrollDistance, 300);
1071
1021
  break;
1072
1022
  case 'down':
1073
- await this.swipe(start.x, start.y, start.x, start.y - scrollDistance, 300);
1023
+ await this.swipeCoordinates(start.x, start.y, start.x, start.y - scrollDistance, 300);
1074
1024
  break;
1075
1025
  case 'left':
1076
- await this.swipe(start.x, start.y, start.x + scrollDistance, start.y, 300);
1026
+ await this.swipeCoordinates(start.x, start.y, start.x + scrollDistance, start.y, 300);
1077
1027
  break;
1078
1028
  case 'right':
1079
- await this.swipe(start.x, start.y, start.x - scrollDistance, start.y, 300);
1029
+ await this.swipeCoordinates(start.x, start.y, start.x - scrollDistance, start.y, 300);
1080
1030
  break;
1081
1031
  }
1082
1032
  debugDevice('Waiting for scroll and inertia to complete...');
@@ -1133,7 +1083,7 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
1133
1083
  const centerX = Math.round(windowSize.width / 2);
1134
1084
  const startY = Math.round(0.9 * windowSize.height);
1135
1085
  const endY = Math.round(0.5 * windowSize.height);
1136
- await this.swipe(centerX, startY, centerX, endY, 300);
1086
+ await this.swipeCoordinates(centerX, startY, centerX, endY, 300);
1137
1087
  debugDevice('Dismissed keyboard with swipe up gesture from bottom of screen');
1138
1088
  await sleep(500);
1139
1089
  return true;
@@ -1210,6 +1160,45 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
1210
1160
  _define_property(this, "interfaceType", 'ios');
1211
1161
  _define_property(this, "uri", void 0);
1212
1162
  _define_property(this, "options", void 0);
1163
+ _define_property(this, "inputPrimitives", {
1164
+ pointer: {
1165
+ tap: (point)=>this.tapPoint(point),
1166
+ doubleClick: (point)=>this.doubleTapPoint(point),
1167
+ longPress: (point, opts)=>this.longPressPoint(point, opts?.duration),
1168
+ dragAndDrop: (from, to)=>this.swipePoint(from, to, 1000)
1169
+ },
1170
+ keyboard: {
1171
+ keyboardPress: (keyName)=>this.pressKey(keyName),
1172
+ typeText: async (value, opts)=>{
1173
+ const target = opts?.target;
1174
+ if (target && opts?.replace !== false) await this.clearInput(target);
1175
+ else if (target) await this.tapPoint({
1176
+ x: target.center[0],
1177
+ y: target.center[1]
1178
+ });
1179
+ if (opts?.focusOnly) return;
1180
+ await this.typeText(value, opts);
1181
+ },
1182
+ clearInput: (target)=>this.clearInput(target),
1183
+ cursorMove: async (direction, times = 1)=>{
1184
+ const arrowKey = 'left' === direction ? 'ArrowLeft' : 'ArrowRight';
1185
+ for(let i = 0; i < times; i++)await this.pressKey(arrowKey);
1186
+ }
1187
+ },
1188
+ touch: {
1189
+ swipe: async (start, end, opts)=>{
1190
+ const duration = opts?.duration ?? 300;
1191
+ const repeat = opts?.repeat ?? 1;
1192
+ for(let i = 0; i < repeat; i++)await this.swipePoint(start, end, duration);
1193
+ },
1194
+ pinch: async (center, opts)=>{
1195
+ await this.wdaBackend.pinch(Math.round(center.x), Math.round(center.y), opts.startDistance, opts.endDistance, opts.duration);
1196
+ }
1197
+ },
1198
+ scroll: {
1199
+ scroll: (param)=>this.performActionScroll(param)
1200
+ }
1201
+ });
1213
1202
  this.deviceId = 'pending-connection';
1214
1203
  this.options = options;
1215
1204
  this.customActions = options?.customActions;