@midscene/android 1.3.5-beta-20260204095321.0 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/index.mjs CHANGED
@@ -555,137 +555,6 @@ class ScrcpyDeviceAdapter {
555
555
  this.initFailed = false;
556
556
  }
557
557
  }
558
- const debug = (0, logger_.getDebug)('android:ui-hierarchy');
559
- const DUMP_PATH = '/sdcard/midscene_dump.xml';
560
- function shortClassName(className) {
561
- return className.split('.').pop() || className;
562
- }
563
- function extractAttr(tag, attrName) {
564
- const regex = new RegExp(`${attrName}="([^"]*)"`, '');
565
- const match = tag.match(regex);
566
- return match ? match[1] : '';
567
- }
568
- async function dumpAccessibilityTreeXml(adb) {
569
- debug('dumping accessibility tree via uiautomator');
570
- const dumpCmd = `uiautomator dump ${DUMP_PATH}`;
571
- const dumpResult = await adb.shell(dumpCmd);
572
- debug('uiautomator dump result:', dumpResult);
573
- const xml = await adb.shell(`cat ${DUMP_PATH}`);
574
- adb.shell(`rm ${DUMP_PATH}`).catch(()=>{});
575
- return xml || '';
576
- }
577
- const MAX_TEXT_LENGTH = 200;
578
- function createEmptyFormatNode() {
579
- return {
580
- className: '',
581
- text: '',
582
- resourceId: '',
583
- contentDesc: '',
584
- clickable: false,
585
- selected: false,
586
- checked: false,
587
- scrollable: false,
588
- children: []
589
- };
590
- }
591
- function parseXmlToFormatTree(xml) {
592
- const root = createEmptyFormatNode();
593
- const stack = [
594
- root
595
- ];
596
- const tagRegex = /<node\s+([^>]*?)\/\s*>|<node\s+([^>]*?)>|<\/node>/g;
597
- let match;
598
- while(null !== (match = tagRegex.exec(xml))){
599
- const fullMatch = match[0];
600
- if (fullMatch.startsWith('</node')) {
601
- if (stack.length > 1) stack.pop();
602
- continue;
603
- }
604
- const attrString = match[1] || match[2] || '';
605
- const isSelfClosing = !!match[1];
606
- const text = extractAttr(attrString, 'text');
607
- const resourceId = extractAttr(attrString, 'resource-id');
608
- const className = extractAttr(attrString, 'class');
609
- const contentDesc = extractAttr(attrString, 'content-desc');
610
- const clickable = 'true' === extractAttr(attrString, 'clickable');
611
- const selected = 'true' === extractAttr(attrString, 'selected');
612
- const checked = 'true' === extractAttr(attrString, 'checked');
613
- const scrollable = 'true' === extractAttr(attrString, 'scrollable');
614
- if (isSelfClosing && !text && !contentDesc && !clickable && !scrollable) continue;
615
- const node = {
616
- className: shortClassName(className),
617
- text,
618
- resourceId,
619
- contentDesc,
620
- clickable,
621
- selected,
622
- checked,
623
- scrollable,
624
- children: []
625
- };
626
- const parent = stack[stack.length - 1];
627
- parent.children.push(node);
628
- if (!isSelfClosing) stack.push(node);
629
- }
630
- return root;
631
- }
632
- function shouldCollapseNode(node) {
633
- if (node.text) return false;
634
- if (node.contentDesc) return false;
635
- if (node.clickable) return false;
636
- if (node.scrollable) return false;
637
- if (node.resourceId && node.children.length >= 2) return false;
638
- return true;
639
- }
640
- function collapseWrappers(node) {
641
- node.children = node.children.flatMap((child)=>{
642
- collapseWrappers(child);
643
- if (shouldCollapseNode(child)) return child.children;
644
- return [
645
- child
646
- ];
647
- });
648
- }
649
- function escapeXmlAttr(str) {
650
- return str.replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
651
- }
652
- function truncateText(text, maxLength) {
653
- if (text.length <= maxLength) return text;
654
- return `${text.slice(0, maxLength)}...`;
655
- }
656
- function formatNodeToXml(node, indent) {
657
- const indentStr = ' '.repeat(indent);
658
- const tag = escapeXmlAttr(node.className) || 'node';
659
- const attrs = [];
660
- if (node.text) attrs.push(`text="${escapeXmlAttr(truncateText(node.text, MAX_TEXT_LENGTH))}"`);
661
- if (node.resourceId) attrs.push(`resource-id="${escapeXmlAttr(node.resourceId)}"`);
662
- if (node.contentDesc) attrs.push(`content-desc="${escapeXmlAttr(truncateText(node.contentDesc, MAX_TEXT_LENGTH))}"`);
663
- if (node.clickable) attrs.push('clickable="true"');
664
- if (node.selected) attrs.push('selected="true"');
665
- if (node.checked) attrs.push('checked="true"');
666
- if (node.scrollable) attrs.push('scrollable="true"');
667
- const attrStr = attrs.length > 0 ? ` ${attrs.join(' ')}` : '';
668
- if (0 === node.children.length) return `${indentStr}<${tag}${attrStr} />`;
669
- const childrenStr = node.children.map((child)=>formatNodeToXml(child, indent + 1)).join('\n');
670
- return `${indentStr}<${tag}${attrStr}>\n${childrenStr}\n${indentStr}</${tag}>`;
671
- }
672
- function formatTreeToXml(root) {
673
- if (0 === root.children.length) return '';
674
- return root.children.map((child)=>formatNodeToXml(child, 0)).join('\n');
675
- }
676
- async function dumpAndFormatAccessibilityTree(adb) {
677
- const xml = await dumpAccessibilityTreeXml(adb);
678
- if (xml.length < 10) {
679
- debug('uiautomator dump returned empty or too short XML');
680
- return '';
681
- }
682
- debug(`formatting uiautomator XML (${xml.length} chars)`);
683
- const root = parseXmlToFormatTree(xml);
684
- collapseWrappers(root);
685
- const result = formatTreeToXml(root);
686
- debug(`formatted accessibility tree (${result.length} chars)`);
687
- return result;
688
- }
689
558
  function device_define_property(obj, key, value) {
690
559
  if (key in obj) Object.defineProperty(obj, key, {
691
560
  value: value,
@@ -930,9 +799,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
930
799
  setAppNameMapping(mapping) {
931
800
  this.appNameMapping = mapping;
932
801
  }
933
- setDomIncluded(value) {
934
- this.domIncluded = value;
935
- }
936
802
  resolvePackageName(appName) {
937
803
  const normalizedAppName = normalizeForComparison(appName);
938
804
  return this.appNameMapping[normalizedAppName];
@@ -970,16 +836,11 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
970
836
  async getElementsInfo() {
971
837
  return [];
972
838
  }
973
- async getExtraPlanningContext() {
974
- if (!this.domIncluded) return '';
975
- try {
976
- const adb = await this.getAdb();
977
- const domDescription = await dumpAndFormatAccessibilityTree(adb);
978
- if (domDescription) return `\nPage structure data in the below XML format. You can extract accurate textual content and relevant UI state annotations from it: \n<PageElementsTree>\n${domDescription}\n</PageElementsTree>`;
979
- } catch (e) {
980
- debugDevice('getExtraPlanningContext failed: %O', e);
981
- }
982
- return '';
839
+ async getElementsNodeTree() {
840
+ return {
841
+ node: null,
842
+ children: []
843
+ };
983
844
  }
984
845
  async getScreenSize() {
985
846
  const shouldCache = !(this.options?.alwaysRefreshScreenInfo ?? false);
@@ -1697,7 +1558,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1697
1558
  device_define_property(this, "interfaceType", 'android');
1698
1559
  device_define_property(this, "uri", void 0);
1699
1560
  device_define_property(this, "options", void 0);
1700
- device_define_property(this, "domIncluded", void 0);
1701
1561
  node_assert(deviceId, 'deviceId is required for AndroidDevice');
1702
1562
  this.deviceId = deviceId;
1703
1563
  this.options = options;
@@ -1898,7 +1758,6 @@ class AndroidAgent extends Agent {
1898
1758
  super(device, opts), agent_define_property(this, "back", void 0), agent_define_property(this, "home", void 0), agent_define_property(this, "recentApps", void 0), agent_define_property(this, "appNameMapping", void 0);
1899
1759
  this.appNameMapping = mergeAndNormalizeAppNameMapping(defaultAppNameMapping, opts?.appNameMapping);
1900
1760
  device.setAppNameMapping(this.appNameMapping);
1901
- if (opts?.domIncluded) device.setDomIncluded(opts.domIncluded);
1902
1761
  this.back = this.createActionWrapper('AndroidBackButton');
1903
1762
  this.home = this.createActionWrapper('AndroidHomeButton');
1904
1763
  this.recentApps = this.createActionWrapper('AndroidRecentAppsButton');
@@ -1915,4 +1774,4 @@ async function agentFromAdbDevice(deviceId, opts) {
1915
1774
  await device.connect();
1916
1775
  return new AndroidAgent(device, opts);
1917
1776
  }
1918
- export { AndroidAgent, AndroidDevice, agentFromAdbDevice, collapseWrappers, dumpAndFormatAccessibilityTree, formatTreeToXml, getConnectedDevices, overrideAIConfig, parseXmlToFormatTree };
1777
+ export { AndroidAgent, AndroidDevice, agentFromAdbDevice, getConnectedDevices, overrideAIConfig };
@@ -652,137 +652,6 @@ class ScrcpyDeviceAdapter {
652
652
  this.initFailed = false;
653
653
  }
654
654
  }
655
- const debug = (0, logger_.getDebug)('android:ui-hierarchy');
656
- const DUMP_PATH = '/sdcard/midscene_dump.xml';
657
- function shortClassName(className) {
658
- return className.split('.').pop() || className;
659
- }
660
- function extractAttr(tag, attrName) {
661
- const regex = new RegExp(`${attrName}="([^"]*)"`, '');
662
- const match = tag.match(regex);
663
- return match ? match[1] : '';
664
- }
665
- async function dumpAccessibilityTreeXml(adb) {
666
- debug('dumping accessibility tree via uiautomator');
667
- const dumpCmd = `uiautomator dump ${DUMP_PATH}`;
668
- const dumpResult = await adb.shell(dumpCmd);
669
- debug('uiautomator dump result:', dumpResult);
670
- const xml = await adb.shell(`cat ${DUMP_PATH}`);
671
- adb.shell(`rm ${DUMP_PATH}`).catch(()=>{});
672
- return xml || '';
673
- }
674
- const MAX_TEXT_LENGTH = 200;
675
- function createEmptyFormatNode() {
676
- return {
677
- className: '',
678
- text: '',
679
- resourceId: '',
680
- contentDesc: '',
681
- clickable: false,
682
- selected: false,
683
- checked: false,
684
- scrollable: false,
685
- children: []
686
- };
687
- }
688
- function parseXmlToFormatTree(xml) {
689
- const root = createEmptyFormatNode();
690
- const stack = [
691
- root
692
- ];
693
- const tagRegex = /<node\s+([^>]*?)\/\s*>|<node\s+([^>]*?)>|<\/node>/g;
694
- let match;
695
- while(null !== (match = tagRegex.exec(xml))){
696
- const fullMatch = match[0];
697
- if (fullMatch.startsWith('</node')) {
698
- if (stack.length > 1) stack.pop();
699
- continue;
700
- }
701
- const attrString = match[1] || match[2] || '';
702
- const isSelfClosing = !!match[1];
703
- const text = extractAttr(attrString, 'text');
704
- const resourceId = extractAttr(attrString, 'resource-id');
705
- const className = extractAttr(attrString, 'class');
706
- const contentDesc = extractAttr(attrString, 'content-desc');
707
- const clickable = 'true' === extractAttr(attrString, 'clickable');
708
- const selected = 'true' === extractAttr(attrString, 'selected');
709
- const checked = 'true' === extractAttr(attrString, 'checked');
710
- const scrollable = 'true' === extractAttr(attrString, 'scrollable');
711
- if (isSelfClosing && !text && !contentDesc && !clickable && !scrollable) continue;
712
- const node = {
713
- className: shortClassName(className),
714
- text,
715
- resourceId,
716
- contentDesc,
717
- clickable,
718
- selected,
719
- checked,
720
- scrollable,
721
- children: []
722
- };
723
- const parent = stack[stack.length - 1];
724
- parent.children.push(node);
725
- if (!isSelfClosing) stack.push(node);
726
- }
727
- return root;
728
- }
729
- function shouldCollapseNode(node) {
730
- if (node.text) return false;
731
- if (node.contentDesc) return false;
732
- if (node.clickable) return false;
733
- if (node.scrollable) return false;
734
- if (node.resourceId && node.children.length >= 2) return false;
735
- return true;
736
- }
737
- function collapseWrappers(node) {
738
- node.children = node.children.flatMap((child)=>{
739
- collapseWrappers(child);
740
- if (shouldCollapseNode(child)) return child.children;
741
- return [
742
- child
743
- ];
744
- });
745
- }
746
- function escapeXmlAttr(str) {
747
- return str.replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
748
- }
749
- function truncateText(text, maxLength) {
750
- if (text.length <= maxLength) return text;
751
- return `${text.slice(0, maxLength)}...`;
752
- }
753
- function formatNodeToXml(node, indent) {
754
- const indentStr = ' '.repeat(indent);
755
- const tag = escapeXmlAttr(node.className) || 'node';
756
- const attrs = [];
757
- if (node.text) attrs.push(`text="${escapeXmlAttr(truncateText(node.text, MAX_TEXT_LENGTH))}"`);
758
- if (node.resourceId) attrs.push(`resource-id="${escapeXmlAttr(node.resourceId)}"`);
759
- if (node.contentDesc) attrs.push(`content-desc="${escapeXmlAttr(truncateText(node.contentDesc, MAX_TEXT_LENGTH))}"`);
760
- if (node.clickable) attrs.push('clickable="true"');
761
- if (node.selected) attrs.push('selected="true"');
762
- if (node.checked) attrs.push('checked="true"');
763
- if (node.scrollable) attrs.push('scrollable="true"');
764
- const attrStr = attrs.length > 0 ? ` ${attrs.join(' ')}` : '';
765
- if (0 === node.children.length) return `${indentStr}<${tag}${attrStr} />`;
766
- const childrenStr = node.children.map((child)=>formatNodeToXml(child, indent + 1)).join('\n');
767
- return `${indentStr}<${tag}${attrStr}>\n${childrenStr}\n${indentStr}</${tag}>`;
768
- }
769
- function formatTreeToXml(root) {
770
- if (0 === root.children.length) return '';
771
- return root.children.map((child)=>formatNodeToXml(child, 0)).join('\n');
772
- }
773
- async function dumpAndFormatAccessibilityTree(adb) {
774
- const xml = await dumpAccessibilityTreeXml(adb);
775
- if (xml.length < 10) {
776
- debug('uiautomator dump returned empty or too short XML');
777
- return '';
778
- }
779
- debug(`formatting uiautomator XML (${xml.length} chars)`);
780
- const root = parseXmlToFormatTree(xml);
781
- collapseWrappers(root);
782
- const result = formatTreeToXml(root);
783
- debug(`formatted accessibility tree (${result.length} chars)`);
784
- return result;
785
- }
786
655
  function device_define_property(obj, key, value) {
787
656
  if (key in obj) Object.defineProperty(obj, key, {
788
657
  value: value,
@@ -1027,9 +896,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1027
896
  setAppNameMapping(mapping) {
1028
897
  this.appNameMapping = mapping;
1029
898
  }
1030
- setDomIncluded(value) {
1031
- this.domIncluded = value;
1032
- }
1033
899
  resolvePackageName(appName) {
1034
900
  const normalizedAppName = normalizeForComparison(appName);
1035
901
  return this.appNameMapping[normalizedAppName];
@@ -1067,16 +933,11 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1067
933
  async getElementsInfo() {
1068
934
  return [];
1069
935
  }
1070
- async getExtraPlanningContext() {
1071
- if (!this.domIncluded) return '';
1072
- try {
1073
- const adb = await this.getAdb();
1074
- const domDescription = await dumpAndFormatAccessibilityTree(adb);
1075
- if (domDescription) return `\nPage structure data in the below XML format. You can extract accurate textual content and relevant UI state annotations from it: \n<PageElementsTree>\n${domDescription}\n</PageElementsTree>`;
1076
- } catch (e) {
1077
- debugDevice('getExtraPlanningContext failed: %O', e);
1078
- }
1079
- return '';
936
+ async getElementsNodeTree() {
937
+ return {
938
+ node: null,
939
+ children: []
940
+ };
1080
941
  }
1081
942
  async getScreenSize() {
1082
943
  const shouldCache = !(this.options?.alwaysRefreshScreenInfo ?? false);
@@ -1794,7 +1655,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1794
1655
  device_define_property(this, "interfaceType", 'android');
1795
1656
  device_define_property(this, "uri", void 0);
1796
1657
  device_define_property(this, "options", void 0);
1797
- device_define_property(this, "domIncluded", void 0);
1798
1658
  node_assert(deviceId, 'deviceId is required for AndroidDevice');
1799
1659
  this.deviceId = deviceId;
1800
1660
  this.options = options;
@@ -1899,7 +1759,6 @@ class AndroidAgent extends Agent {
1899
1759
  super(device, opts), agent_define_property(this, "back", void 0), agent_define_property(this, "home", void 0), agent_define_property(this, "recentApps", void 0), agent_define_property(this, "appNameMapping", void 0);
1900
1760
  this.appNameMapping = mergeAndNormalizeAppNameMapping(defaultAppNameMapping, opts?.appNameMapping);
1901
1761
  device.setAppNameMapping(this.appNameMapping);
1902
- if (opts?.domIncluded) device.setDomIncluded(opts.domIncluded);
1903
1762
  this.back = this.createActionWrapper('AndroidBackButton');
1904
1763
  this.home = this.createActionWrapper('AndroidHomeButton');
1905
1764
  this.recentApps = this.createActionWrapper('AndroidRecentAppsButton');
@@ -1916,7 +1775,7 @@ async function agentFromAdbDevice(deviceId, opts) {
1916
1775
  await device.connect();
1917
1776
  return new AndroidAgent(device, opts);
1918
1777
  }
1919
- const mcp_tools_debug = (0, logger_.getDebug)('mcp:android-tools');
1778
+ const debug = (0, logger_.getDebug)('mcp:android-tools');
1920
1779
  class AndroidMidsceneTools extends BaseMidsceneTools {
1921
1780
  createTemporaryDevice() {
1922
1781
  return new AndroidDevice('temp-for-action-space', {});
@@ -1926,12 +1785,12 @@ class AndroidMidsceneTools extends BaseMidsceneTools {
1926
1785
  try {
1927
1786
  await this.agent.destroy?.();
1928
1787
  } catch (error) {
1929
- mcp_tools_debug('Failed to destroy agent during cleanup:', error);
1788
+ debug('Failed to destroy agent during cleanup:', error);
1930
1789
  }
1931
1790
  this.agent = void 0;
1932
1791
  }
1933
1792
  if (this.agent) return this.agent;
1934
- mcp_tools_debug('Creating Android agent with deviceId:', deviceId || 'auto-detect');
1793
+ debug('Creating Android agent with deviceId:', deviceId || 'auto-detect');
1935
1794
  const agent = await agentFromAdbDevice(deviceId, {
1936
1795
  autoDismissKeyboard: false
1937
1796
  });
package/dist/lib/index.js CHANGED
@@ -444,15 +444,11 @@ var __webpack_exports__ = {};
444
444
  (()=>{
445
445
  __webpack_require__.r(__webpack_exports__);
446
446
  __webpack_require__.d(__webpack_exports__, {
447
+ getConnectedDevices: ()=>getConnectedDevices,
448
+ overrideAIConfig: ()=>env_namespaceObject.overrideAIConfig,
447
449
  AndroidAgent: ()=>AndroidAgent,
448
450
  agentFromAdbDevice: ()=>agentFromAdbDevice,
449
- dumpAndFormatAccessibilityTree: ()=>dumpAndFormatAccessibilityTree,
450
- formatTreeToXml: ()=>formatTreeToXml,
451
- AndroidDevice: ()=>AndroidDevice,
452
- collapseWrappers: ()=>collapseWrappers,
453
- overrideAIConfig: ()=>env_namespaceObject.overrideAIConfig,
454
- parseXmlToFormatTree: ()=>parseXmlToFormatTree,
455
- getConnectedDevices: ()=>getConnectedDevices
451
+ AndroidDevice: ()=>AndroidDevice
456
452
  });
457
453
  const external_node_assert_namespaceObject = require("node:assert");
458
454
  var external_node_assert_default = /*#__PURE__*/ __webpack_require__.n(external_node_assert_namespaceObject);
@@ -591,137 +587,6 @@ var __webpack_exports__ = {};
591
587
  this.initFailed = false;
592
588
  }
593
589
  }
594
- const debug = (0, logger_.getDebug)('android:ui-hierarchy');
595
- const DUMP_PATH = '/sdcard/midscene_dump.xml';
596
- function shortClassName(className) {
597
- return className.split('.').pop() || className;
598
- }
599
- function extractAttr(tag, attrName) {
600
- const regex = new RegExp(`${attrName}="([^"]*)"`, '');
601
- const match = tag.match(regex);
602
- return match ? match[1] : '';
603
- }
604
- async function dumpAccessibilityTreeXml(adb) {
605
- debug('dumping accessibility tree via uiautomator');
606
- const dumpCmd = `uiautomator dump ${DUMP_PATH}`;
607
- const dumpResult = await adb.shell(dumpCmd);
608
- debug('uiautomator dump result:', dumpResult);
609
- const xml = await adb.shell(`cat ${DUMP_PATH}`);
610
- adb.shell(`rm ${DUMP_PATH}`).catch(()=>{});
611
- return xml || '';
612
- }
613
- const MAX_TEXT_LENGTH = 200;
614
- function createEmptyFormatNode() {
615
- return {
616
- className: '',
617
- text: '',
618
- resourceId: '',
619
- contentDesc: '',
620
- clickable: false,
621
- selected: false,
622
- checked: false,
623
- scrollable: false,
624
- children: []
625
- };
626
- }
627
- function parseXmlToFormatTree(xml) {
628
- const root = createEmptyFormatNode();
629
- const stack = [
630
- root
631
- ];
632
- const tagRegex = /<node\s+([^>]*?)\/\s*>|<node\s+([^>]*?)>|<\/node>/g;
633
- let match;
634
- while(null !== (match = tagRegex.exec(xml))){
635
- const fullMatch = match[0];
636
- if (fullMatch.startsWith('</node')) {
637
- if (stack.length > 1) stack.pop();
638
- continue;
639
- }
640
- const attrString = match[1] || match[2] || '';
641
- const isSelfClosing = !!match[1];
642
- const text = extractAttr(attrString, 'text');
643
- const resourceId = extractAttr(attrString, 'resource-id');
644
- const className = extractAttr(attrString, 'class');
645
- const contentDesc = extractAttr(attrString, 'content-desc');
646
- const clickable = 'true' === extractAttr(attrString, 'clickable');
647
- const selected = 'true' === extractAttr(attrString, 'selected');
648
- const checked = 'true' === extractAttr(attrString, 'checked');
649
- const scrollable = 'true' === extractAttr(attrString, 'scrollable');
650
- if (isSelfClosing && !text && !contentDesc && !clickable && !scrollable) continue;
651
- const node = {
652
- className: shortClassName(className),
653
- text,
654
- resourceId,
655
- contentDesc,
656
- clickable,
657
- selected,
658
- checked,
659
- scrollable,
660
- children: []
661
- };
662
- const parent = stack[stack.length - 1];
663
- parent.children.push(node);
664
- if (!isSelfClosing) stack.push(node);
665
- }
666
- return root;
667
- }
668
- function shouldCollapseNode(node) {
669
- if (node.text) return false;
670
- if (node.contentDesc) return false;
671
- if (node.clickable) return false;
672
- if (node.scrollable) return false;
673
- if (node.resourceId && node.children.length >= 2) return false;
674
- return true;
675
- }
676
- function collapseWrappers(node) {
677
- node.children = node.children.flatMap((child)=>{
678
- collapseWrappers(child);
679
- if (shouldCollapseNode(child)) return child.children;
680
- return [
681
- child
682
- ];
683
- });
684
- }
685
- function escapeXmlAttr(str) {
686
- return str.replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
687
- }
688
- function truncateText(text, maxLength) {
689
- if (text.length <= maxLength) return text;
690
- return `${text.slice(0, maxLength)}...`;
691
- }
692
- function formatNodeToXml(node, indent) {
693
- const indentStr = ' '.repeat(indent);
694
- const tag = escapeXmlAttr(node.className) || 'node';
695
- const attrs = [];
696
- if (node.text) attrs.push(`text="${escapeXmlAttr(truncateText(node.text, MAX_TEXT_LENGTH))}"`);
697
- if (node.resourceId) attrs.push(`resource-id="${escapeXmlAttr(node.resourceId)}"`);
698
- if (node.contentDesc) attrs.push(`content-desc="${escapeXmlAttr(truncateText(node.contentDesc, MAX_TEXT_LENGTH))}"`);
699
- if (node.clickable) attrs.push('clickable="true"');
700
- if (node.selected) attrs.push('selected="true"');
701
- if (node.checked) attrs.push('checked="true"');
702
- if (node.scrollable) attrs.push('scrollable="true"');
703
- const attrStr = attrs.length > 0 ? ` ${attrs.join(' ')}` : '';
704
- if (0 === node.children.length) return `${indentStr}<${tag}${attrStr} />`;
705
- const childrenStr = node.children.map((child)=>formatNodeToXml(child, indent + 1)).join('\n');
706
- return `${indentStr}<${tag}${attrStr}>\n${childrenStr}\n${indentStr}</${tag}>`;
707
- }
708
- function formatTreeToXml(root) {
709
- if (0 === root.children.length) return '';
710
- return root.children.map((child)=>formatNodeToXml(child, 0)).join('\n');
711
- }
712
- async function dumpAndFormatAccessibilityTree(adb) {
713
- const xml = await dumpAccessibilityTreeXml(adb);
714
- if (xml.length < 10) {
715
- debug('uiautomator dump returned empty or too short XML');
716
- return '';
717
- }
718
- debug(`formatting uiautomator XML (${xml.length} chars)`);
719
- const root = parseXmlToFormatTree(xml);
720
- collapseWrappers(root);
721
- const result = formatTreeToXml(root);
722
- debug(`formatted accessibility tree (${result.length} chars)`);
723
- return result;
724
- }
725
590
  function device_define_property(obj, key, value) {
726
591
  if (key in obj) Object.defineProperty(obj, key, {
727
592
  value: value,
@@ -966,9 +831,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
966
831
  setAppNameMapping(mapping) {
967
832
  this.appNameMapping = mapping;
968
833
  }
969
- setDomIncluded(value) {
970
- this.domIncluded = value;
971
- }
972
834
  resolvePackageName(appName) {
973
835
  const normalizedAppName = (0, shared_utils_namespaceObject.normalizeForComparison)(appName);
974
836
  return this.appNameMapping[normalizedAppName];
@@ -1006,16 +868,11 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1006
868
  async getElementsInfo() {
1007
869
  return [];
1008
870
  }
1009
- async getExtraPlanningContext() {
1010
- if (!this.domIncluded) return '';
1011
- try {
1012
- const adb = await this.getAdb();
1013
- const domDescription = await dumpAndFormatAccessibilityTree(adb);
1014
- if (domDescription) return `\nPage structure data in the below XML format. You can extract accurate textual content and relevant UI state annotations from it: \n<PageElementsTree>\n${domDescription}\n</PageElementsTree>`;
1015
- } catch (e) {
1016
- debugDevice('getExtraPlanningContext failed: %O', e);
1017
- }
1018
- return '';
871
+ async getElementsNodeTree() {
872
+ return {
873
+ node: null,
874
+ children: []
875
+ };
1019
876
  }
1020
877
  async getScreenSize() {
1021
878
  const shouldCache = !(this.options?.alwaysRefreshScreenInfo ?? false);
@@ -1733,7 +1590,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1733
1590
  device_define_property(this, "interfaceType", 'android');
1734
1591
  device_define_property(this, "uri", void 0);
1735
1592
  device_define_property(this, "options", void 0);
1736
- device_define_property(this, "domIncluded", void 0);
1737
1593
  external_node_assert_default()(deviceId, 'deviceId is required for AndroidDevice');
1738
1594
  this.deviceId = deviceId;
1739
1595
  this.options = options;
@@ -1935,7 +1791,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1935
1791
  super(device, opts), agent_define_property(this, "back", void 0), agent_define_property(this, "home", void 0), agent_define_property(this, "recentApps", void 0), agent_define_property(this, "appNameMapping", void 0);
1936
1792
  this.appNameMapping = (0, shared_utils_namespaceObject.mergeAndNormalizeAppNameMapping)(defaultAppNameMapping, opts?.appNameMapping);
1937
1793
  device.setAppNameMapping(this.appNameMapping);
1938
- if (opts?.domIncluded) device.setDomIncluded(opts.domIncluded);
1939
1794
  this.back = this.createActionWrapper('AndroidBackButton');
1940
1795
  this.home = this.createActionWrapper('AndroidHomeButton');
1941
1796
  this.recentApps = this.createActionWrapper('AndroidRecentAppsButton');
@@ -1956,22 +1811,14 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1956
1811
  exports.AndroidAgent = __webpack_exports__.AndroidAgent;
1957
1812
  exports.AndroidDevice = __webpack_exports__.AndroidDevice;
1958
1813
  exports.agentFromAdbDevice = __webpack_exports__.agentFromAdbDevice;
1959
- exports.collapseWrappers = __webpack_exports__.collapseWrappers;
1960
- exports.dumpAndFormatAccessibilityTree = __webpack_exports__.dumpAndFormatAccessibilityTree;
1961
- exports.formatTreeToXml = __webpack_exports__.formatTreeToXml;
1962
1814
  exports.getConnectedDevices = __webpack_exports__.getConnectedDevices;
1963
1815
  exports.overrideAIConfig = __webpack_exports__.overrideAIConfig;
1964
- exports.parseXmlToFormatTree = __webpack_exports__.parseXmlToFormatTree;
1965
1816
  for(var __rspack_i in __webpack_exports__)if (-1 === [
1966
1817
  "AndroidAgent",
1967
1818
  "AndroidDevice",
1968
1819
  "agentFromAdbDevice",
1969
- "collapseWrappers",
1970
- "dumpAndFormatAccessibilityTree",
1971
- "formatTreeToXml",
1972
1820
  "getConnectedDevices",
1973
- "overrideAIConfig",
1974
- "parseXmlToFormatTree"
1821
+ "overrideAIConfig"
1975
1822
  ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
1976
1823
  Object.defineProperty(exports, '__esModule', {
1977
1824
  value: true
@@ -683,137 +683,6 @@ var __webpack_exports__ = {};
683
683
  this.initFailed = false;
684
684
  }
685
685
  }
686
- const debug = (0, logger_.getDebug)('android:ui-hierarchy');
687
- const DUMP_PATH = '/sdcard/midscene_dump.xml';
688
- function shortClassName(className) {
689
- return className.split('.').pop() || className;
690
- }
691
- function extractAttr(tag, attrName) {
692
- const regex = new RegExp(`${attrName}="([^"]*)"`, '');
693
- const match = tag.match(regex);
694
- return match ? match[1] : '';
695
- }
696
- async function dumpAccessibilityTreeXml(adb) {
697
- debug('dumping accessibility tree via uiautomator');
698
- const dumpCmd = `uiautomator dump ${DUMP_PATH}`;
699
- const dumpResult = await adb.shell(dumpCmd);
700
- debug('uiautomator dump result:', dumpResult);
701
- const xml = await adb.shell(`cat ${DUMP_PATH}`);
702
- adb.shell(`rm ${DUMP_PATH}`).catch(()=>{});
703
- return xml || '';
704
- }
705
- const MAX_TEXT_LENGTH = 200;
706
- function createEmptyFormatNode() {
707
- return {
708
- className: '',
709
- text: '',
710
- resourceId: '',
711
- contentDesc: '',
712
- clickable: false,
713
- selected: false,
714
- checked: false,
715
- scrollable: false,
716
- children: []
717
- };
718
- }
719
- function parseXmlToFormatTree(xml) {
720
- const root = createEmptyFormatNode();
721
- const stack = [
722
- root
723
- ];
724
- const tagRegex = /<node\s+([^>]*?)\/\s*>|<node\s+([^>]*?)>|<\/node>/g;
725
- let match;
726
- while(null !== (match = tagRegex.exec(xml))){
727
- const fullMatch = match[0];
728
- if (fullMatch.startsWith('</node')) {
729
- if (stack.length > 1) stack.pop();
730
- continue;
731
- }
732
- const attrString = match[1] || match[2] || '';
733
- const isSelfClosing = !!match[1];
734
- const text = extractAttr(attrString, 'text');
735
- const resourceId = extractAttr(attrString, 'resource-id');
736
- const className = extractAttr(attrString, 'class');
737
- const contentDesc = extractAttr(attrString, 'content-desc');
738
- const clickable = 'true' === extractAttr(attrString, 'clickable');
739
- const selected = 'true' === extractAttr(attrString, 'selected');
740
- const checked = 'true' === extractAttr(attrString, 'checked');
741
- const scrollable = 'true' === extractAttr(attrString, 'scrollable');
742
- if (isSelfClosing && !text && !contentDesc && !clickable && !scrollable) continue;
743
- const node = {
744
- className: shortClassName(className),
745
- text,
746
- resourceId,
747
- contentDesc,
748
- clickable,
749
- selected,
750
- checked,
751
- scrollable,
752
- children: []
753
- };
754
- const parent = stack[stack.length - 1];
755
- parent.children.push(node);
756
- if (!isSelfClosing) stack.push(node);
757
- }
758
- return root;
759
- }
760
- function shouldCollapseNode(node) {
761
- if (node.text) return false;
762
- if (node.contentDesc) return false;
763
- if (node.clickable) return false;
764
- if (node.scrollable) return false;
765
- if (node.resourceId && node.children.length >= 2) return false;
766
- return true;
767
- }
768
- function collapseWrappers(node) {
769
- node.children = node.children.flatMap((child)=>{
770
- collapseWrappers(child);
771
- if (shouldCollapseNode(child)) return child.children;
772
- return [
773
- child
774
- ];
775
- });
776
- }
777
- function escapeXmlAttr(str) {
778
- return str.replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
779
- }
780
- function truncateText(text, maxLength) {
781
- if (text.length <= maxLength) return text;
782
- return `${text.slice(0, maxLength)}...`;
783
- }
784
- function formatNodeToXml(node, indent) {
785
- const indentStr = ' '.repeat(indent);
786
- const tag = escapeXmlAttr(node.className) || 'node';
787
- const attrs = [];
788
- if (node.text) attrs.push(`text="${escapeXmlAttr(truncateText(node.text, MAX_TEXT_LENGTH))}"`);
789
- if (node.resourceId) attrs.push(`resource-id="${escapeXmlAttr(node.resourceId)}"`);
790
- if (node.contentDesc) attrs.push(`content-desc="${escapeXmlAttr(truncateText(node.contentDesc, MAX_TEXT_LENGTH))}"`);
791
- if (node.clickable) attrs.push('clickable="true"');
792
- if (node.selected) attrs.push('selected="true"');
793
- if (node.checked) attrs.push('checked="true"');
794
- if (node.scrollable) attrs.push('scrollable="true"');
795
- const attrStr = attrs.length > 0 ? ` ${attrs.join(' ')}` : '';
796
- if (0 === node.children.length) return `${indentStr}<${tag}${attrStr} />`;
797
- const childrenStr = node.children.map((child)=>formatNodeToXml(child, indent + 1)).join('\n');
798
- return `${indentStr}<${tag}${attrStr}>\n${childrenStr}\n${indentStr}</${tag}>`;
799
- }
800
- function formatTreeToXml(root) {
801
- if (0 === root.children.length) return '';
802
- return root.children.map((child)=>formatNodeToXml(child, 0)).join('\n');
803
- }
804
- async function dumpAndFormatAccessibilityTree(adb) {
805
- const xml = await dumpAccessibilityTreeXml(adb);
806
- if (xml.length < 10) {
807
- debug('uiautomator dump returned empty or too short XML');
808
- return '';
809
- }
810
- debug(`formatting uiautomator XML (${xml.length} chars)`);
811
- const root = parseXmlToFormatTree(xml);
812
- collapseWrappers(root);
813
- const result = formatTreeToXml(root);
814
- debug(`formatted accessibility tree (${result.length} chars)`);
815
- return result;
816
- }
817
686
  function device_define_property(obj, key, value) {
818
687
  if (key in obj) Object.defineProperty(obj, key, {
819
688
  value: value,
@@ -1058,9 +927,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1058
927
  setAppNameMapping(mapping) {
1059
928
  this.appNameMapping = mapping;
1060
929
  }
1061
- setDomIncluded(value) {
1062
- this.domIncluded = value;
1063
- }
1064
930
  resolvePackageName(appName) {
1065
931
  const normalizedAppName = (0, utils_namespaceObject.normalizeForComparison)(appName);
1066
932
  return this.appNameMapping[normalizedAppName];
@@ -1098,16 +964,11 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1098
964
  async getElementsInfo() {
1099
965
  return [];
1100
966
  }
1101
- async getExtraPlanningContext() {
1102
- if (!this.domIncluded) return '';
1103
- try {
1104
- const adb = await this.getAdb();
1105
- const domDescription = await dumpAndFormatAccessibilityTree(adb);
1106
- if (domDescription) return `\nPage structure data in the below XML format. You can extract accurate textual content and relevant UI state annotations from it: \n<PageElementsTree>\n${domDescription}\n</PageElementsTree>`;
1107
- } catch (e) {
1108
- debugDevice('getExtraPlanningContext failed: %O', e);
1109
- }
1110
- return '';
967
+ async getElementsNodeTree() {
968
+ return {
969
+ node: null,
970
+ children: []
971
+ };
1111
972
  }
1112
973
  async getScreenSize() {
1113
974
  const shouldCache = !(this.options?.alwaysRefreshScreenInfo ?? false);
@@ -1825,7 +1686,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1825
1686
  device_define_property(this, "interfaceType", 'android');
1826
1687
  device_define_property(this, "uri", void 0);
1827
1688
  device_define_property(this, "options", void 0);
1828
- device_define_property(this, "domIncluded", void 0);
1829
1689
  external_node_assert_default()(deviceId, 'deviceId is required for AndroidDevice');
1830
1690
  this.deviceId = deviceId;
1831
1691
  this.options = options;
@@ -1930,7 +1790,6 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1930
1790
  super(device, opts), agent_define_property(this, "back", void 0), agent_define_property(this, "home", void 0), agent_define_property(this, "recentApps", void 0), agent_define_property(this, "appNameMapping", void 0);
1931
1791
  this.appNameMapping = (0, utils_namespaceObject.mergeAndNormalizeAppNameMapping)(defaultAppNameMapping, opts?.appNameMapping);
1932
1792
  device.setAppNameMapping(this.appNameMapping);
1933
- if (opts?.domIncluded) device.setDomIncluded(opts.domIncluded);
1934
1793
  this.back = this.createActionWrapper('AndroidBackButton');
1935
1794
  this.home = this.createActionWrapper('AndroidHomeButton');
1936
1795
  this.recentApps = this.createActionWrapper('AndroidRecentAppsButton');
@@ -1947,7 +1806,7 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1947
1806
  await device.connect();
1948
1807
  return new AndroidAgent(device, opts);
1949
1808
  }
1950
- const mcp_tools_debug = (0, logger_.getDebug)('mcp:android-tools');
1809
+ const debug = (0, logger_.getDebug)('mcp:android-tools');
1951
1810
  class AndroidMidsceneTools extends mcp_namespaceObject.BaseMidsceneTools {
1952
1811
  createTemporaryDevice() {
1953
1812
  return new AndroidDevice('temp-for-action-space', {});
@@ -1957,12 +1816,12 @@ ${Object.keys(size).filter((key)=>size[key]).map((key)=>` ${key} size: ${size[k
1957
1816
  try {
1958
1817
  await this.agent.destroy?.();
1959
1818
  } catch (error) {
1960
- mcp_tools_debug('Failed to destroy agent during cleanup:', error);
1819
+ debug('Failed to destroy agent during cleanup:', error);
1961
1820
  }
1962
1821
  this.agent = void 0;
1963
1822
  }
1964
1823
  if (this.agent) return this.agent;
1965
- mcp_tools_debug('Creating Android agent with deviceId:', deviceId || 'auto-detect');
1824
+ debug('Creating Android agent with deviceId:', deviceId || 'auto-detect');
1966
1825
  const agent = await agentFromAdbDevice(deviceId, {
1967
1826
  autoDismissKeyboard: false
1968
1827
  });
@@ -55,11 +55,6 @@ export declare type AndroidAgentOpt = AgentOpt & {
55
55
  * User-provided mappings will take precedence over default mappings
56
56
  */
57
57
  appNameMapping?: Record<string, string>;
58
- /**
59
- * Include accessibility tree (DOM) as reference in AI planning context.
60
- * When true, outputs a filtered and collapsed format optimized for token usage.
61
- */
62
- domIncluded?: boolean;
63
58
  };
64
59
 
65
60
  export declare class AndroidDevice implements AbstractInterface {
@@ -99,8 +94,6 @@ export declare class AndroidDevice implements AbstractInterface {
99
94
  * Set the app name to package name mapping
100
95
  */
101
96
  setAppNameMapping(mapping: Record<string, string>): void;
102
- private domIncluded;
103
- setDomIncluded(value: boolean): void;
104
97
  /**
105
98
  * Resolve app name to package name using the mapping
106
99
  * Comparison is case-insensitive and ignores spaces, dashes, and underscores.
@@ -111,7 +104,7 @@ export declare class AndroidDevice implements AbstractInterface {
111
104
  launch(uri: string): Promise<AndroidDevice>;
112
105
  execYadb(keyboardContent: string): Promise<void>;
113
106
  getElementsInfo(): Promise<ElementInfo[]>;
114
- getExtraPlanningContext(): Promise<string>;
107
+ getElementsNodeTree(): Promise<any>;
115
108
  getScreenSize(): Promise<{
116
109
  override: string;
117
110
  physical: string;
@@ -197,52 +190,16 @@ export declare class AndroidDevice implements AbstractInterface {
197
190
  hideKeyboard(options?: AndroidDeviceInputOpt, timeoutMs?: number): Promise<boolean>;
198
191
  }
199
192
 
200
- /**
201
- * Collapse pure wrapper containers by promoting their children to the parent.
202
- */
203
- export declare function collapseWrappers(node: FormatNode): void;
204
-
205
193
  declare type DeviceActionAndroidBackButton = DeviceAction<undefined, void>;
206
194
 
207
195
  declare type DeviceActionAndroidHomeButton = DeviceAction<undefined, void>;
208
196
 
209
197
  declare type DeviceActionAndroidRecentAppsButton = DeviceAction<undefined, void>;
210
198
 
211
- /**
212
- * Dump the Android accessibility tree, parse, collapse wrappers,
213
- * and format as clean XML without bounds for AI planning context.
214
- */
215
- export declare function dumpAndFormatAccessibilityTree(adb: ADB): Promise<string>;
216
-
217
- /**
218
- * Lightweight node for the formatting pipeline.
219
- * Only carries semantic attributes — no bounds/rect.
220
- */
221
- export declare interface FormatNode {
222
- className: string;
223
- text: string;
224
- resourceId: string;
225
- contentDesc: string;
226
- clickable: boolean;
227
- selected: boolean;
228
- checked: boolean;
229
- scrollable: boolean;
230
- children: FormatNode[];
231
- }
232
-
233
- /** Format the full tree to XML string, skipping the virtual root node */
234
- export declare function formatTreeToXml(root: FormatNode): string;
235
-
236
199
  export declare function getConnectedDevices(): Promise<Device[]>;
237
200
 
238
201
  export { overrideAIConfig }
239
202
 
240
- /**
241
- * Parse UIAutomator XML into a lightweight tree for formatting.
242
- * Does not retain bounds/rect — only semantic attributes.
243
- */
244
- export declare function parseXmlToFormatTree(xml: string): FormatNode;
245
-
246
203
  /**
247
204
  * Helper type to convert DeviceAction to wrapped method signature
248
205
  */
@@ -57,11 +57,6 @@ declare type AndroidAgentOpt = AgentOpt & {
57
57
  * User-provided mappings will take precedence over default mappings
58
58
  */
59
59
  appNameMapping?: Record<string, string>;
60
- /**
61
- * Include accessibility tree (DOM) as reference in AI planning context.
62
- * When true, outputs a filtered and collapsed format optimized for token usage.
63
- */
64
- domIncluded?: boolean;
65
60
  };
66
61
 
67
62
  declare class AndroidDevice implements AbstractInterface {
@@ -101,8 +96,6 @@ declare class AndroidDevice implements AbstractInterface {
101
96
  * Set the app name to package name mapping
102
97
  */
103
98
  setAppNameMapping(mapping: Record<string, string>): void;
104
- private domIncluded;
105
- setDomIncluded(value: boolean): void;
106
99
  /**
107
100
  * Resolve app name to package name using the mapping
108
101
  * Comparison is case-insensitive and ignores spaces, dashes, and underscores.
@@ -113,7 +106,7 @@ declare class AndroidDevice implements AbstractInterface {
113
106
  launch(uri: string): Promise<AndroidDevice>;
114
107
  execYadb(keyboardContent: string): Promise<void>;
115
108
  getElementsInfo(): Promise<ElementInfo[]>;
116
- getExtraPlanningContext(): Promise<string>;
109
+ getElementsNodeTree(): Promise<any>;
117
110
  getScreenSize(): Promise<{
118
111
  override: string;
119
112
  physical: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/android",
3
- "version": "1.3.5-beta-20260204095321.0",
3
+ "version": "1.3.5",
4
4
  "description": "Android automation library for Midscene",
5
5
  "keywords": [
6
6
  "Android UI automation",
@@ -38,8 +38,8 @@
38
38
  "@yume-chan/stream-extra": "^1.0.0",
39
39
  "appium-adb": "12.12.1",
40
40
  "sharp": "^0.34.3",
41
- "@midscene/core": "1.3.5-beta-20260204095321.0",
42
- "@midscene/shared": "1.3.5-beta-20260204095321.0"
41
+ "@midscene/shared": "1.3.5",
42
+ "@midscene/core": "1.3.5"
43
43
  },
44
44
  "optionalDependencies": {
45
45
  "@ffmpeg-installer/ffmpeg": "^1.1.0"
@@ -53,7 +53,7 @@
53
53
  "tsx": "^4.19.2",
54
54
  "vitest": "3.0.5",
55
55
  "zod": "3.24.3",
56
- "@midscene/playground": "1.3.5-beta-20260204095321.0"
56
+ "@midscene/playground": "1.3.5"
57
57
  },
58
58
  "license": "MIT",
59
59
  "scripts": {