react-native-ai-debugger 1.0.47 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -3,9 +3,10 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { z } from "zod";
5
5
  import { getGuideOverview, getGuideByTopic, getAvailableTopics } from "./core/guides.js";
6
+ import { tap } from "./core/tap.js";
6
7
  import { logBuffer, networkBuffer, bundleErrorBuffer, connectedApps, getActiveSimulatorUdid, scanMetroPorts, fetchDevices, selectMainDevice, connectToDevice, getConnectedApps, executeInApp, listDebugGlobals, inspectGlobal, reloadApp,
7
8
  // React Component Inspection
8
- getComponentTree, getScreenLayout, inspectComponent, findComponents, pressElement, inspectAtPoint, toggleElementInspector, isInspectorActive, getInspectorSelection, getFirstConnectedApp, getLogs, searchLogs, getLogSummary, getNetworkRequests, searchNetworkRequests, getNetworkStats, formatRequestDetails,
9
+ getComponentTree, getScreenLayout, inspectComponent, findComponents, inspectAtPoint, toggleElementInspector, isInspectorActive, getInspectorSelection, getFirstConnectedApp, getLogs, searchLogs, getLogSummary, getNetworkRequests, searchNetworkRequests, getNetworkStats, formatRequestDetails,
9
10
  // Connection state
10
11
  getAllConnectionStates, getAllConnectionMetadata, getRecentGaps, formatDuration,
11
12
  // Context health tracking
@@ -23,13 +24,13 @@ listAndroidDevices, androidScreenshot, androidInstallApp, androidLaunchApp, andr
23
24
  // Android UI Input (Phase 2)
24
25
  ANDROID_KEY_EVENTS, androidTap, androidLongPress, androidSwipe, androidInputText, androidKeyEvent, androidGetScreenSize, androidGetDensity, androidGetStatusBarHeight,
25
26
  // Android Accessibility (UI Hierarchy)
26
- androidDescribeAll, androidDescribePoint, androidTapElement,
27
+ androidDescribeAll, androidDescribePoint,
27
28
  // Android Element Finding (no screenshots)
28
29
  androidFindElement, androidWaitForElement,
29
30
  // iOS
30
31
  listIOSSimulators, iosScreenshot, iosInstallApp, iosLaunchApp, iosOpenUrl, iosTerminateApp, iosBootSimulator,
31
32
  // iOS IDB-based UI tools
32
- iosTap, iosTapElement, iosSwipe, iosInputText, iosButton, iosKeyEvent, iosKeySequence, iosDescribeAll, iosDescribePoint, IOS_BUTTON_TYPES,
33
+ iosTap, iosSwipe, iosInputText, iosButton, iosKeyEvent, iosKeySequence, iosDescribeAll, iosDescribePoint, IOS_BUTTON_TYPES,
33
34
  // iOS Element Finding (no screenshots)
34
35
  iosFindElement, iosWaitForElement,
35
36
  // Debug HTTP Server
@@ -43,7 +44,7 @@ const server = new McpServer({
43
44
  name: "react-native-ai-debugger",
44
45
  version: "1.0.0"
45
46
  }, {
46
- instructions: "React Native debugging MCP server. Call get_usage_guide to learn recommended workflows for all tools. Quick start: scan_metro → get_logs / search_logs (console debugging) → ios_screenshot → get_inspector_selection(x, y) (identify components)."
47
+ instructions: "React Native debugging MCP server. Call get_usage_guide to learn recommended workflows for all tools. Quick start: scan_metro → get_logs / search_logs (console debugging) → ios_screenshot → tap(text=\"Submit\") or tap(x, y) (interact with UI)."
47
48
  });
48
49
  // ============================================================================
49
50
  // Telemetry Wrapper
@@ -1012,49 +1013,61 @@ registerToolWithTelemetry("find_components", {
1012
1013
  ]
1013
1014
  };
1014
1015
  });
1015
- // Tool: Press element via React Fiber tree
1016
- registerToolWithTelemetry("press_element", {
1017
- description: "Press a UI element by finding it in the React fiber tree and calling its onPress handler directly. " +
1018
- "Bypasses the accessibility layer works on elements without accessibilityLabel. " +
1019
- "Matches by text content, testID, or component name. At least one search criterion must be provided. " +
1020
- "Works only in __DEV__ mode. Use get_component_tree or find_components first to understand the UI structure. " +
1021
- "LIMITATION: text param only supports ASCII — for non-Latin text (Cyrillic, CJK, etc.), use testID or component param instead, or fall back to ocr_screenshot + tap.",
1016
+ // Tool: Unified tap tries fiber, accessibility, OCR, coordinate strategies
1017
+ registerToolWithTelemetry("tap", {
1018
+ description: "Tap a UI element. Automatically tries multiple strategies: fiber tree (React), accessibility tree (native), and OCR (visual). " +
1019
+ "Auto-detects platform (iOS/Android). For coordinates, accepts pixels from screenshot and converts internally.\n\n" +
1020
+ "Examples:\n" +
1021
+ "- tap(text=\"Submit\") finds and taps element with matching text\n" +
1022
+ "- tap(testID=\"login-btn\") finds by testID\n" +
1023
+ "- tap(component=\"HamburgerIcon\") — finds by React component name\n" +
1024
+ "- tap(x=300, y=600) — taps at pixel coordinates from screenshot\n" +
1025
+ "- tap(text=\"Menu\", strategy=\"ocr\") — forces OCR strategy only",
1022
1026
  inputSchema: {
1023
1027
  text: z
1024
1028
  .string()
1025
1029
  .optional()
1026
- .describe("Case-insensitive partial match on the element's text content (e.g., 'Submit', 'Log in'). ASCII only non-Latin characters (Cyrillic, CJK, etc.) cause Hermes parse errors. Use testID or component for localized UIs."),
1027
- testID: z.string().optional().describe("Exact match on the element's testID prop"),
1030
+ .describe("Visible text to match (case-insensitive substring). ASCII only for fiber strategy; OCR handles non-ASCII."),
1031
+ testID: z
1032
+ .string()
1033
+ .optional()
1034
+ .describe("Exact match on the element's testID prop."),
1028
1035
  component: z
1029
1036
  .string()
1030
1037
  .optional()
1031
- .describe("Case-insensitive partial match on the component's displayName or name (e.g., 'Button', 'MenuItem')"),
1038
+ .describe("Component name match (case-insensitive substring, e.g. 'Button', 'MenuItem')."),
1032
1039
  index: z.coerce
1033
1040
  .number()
1034
1041
  .optional()
1035
- .default(0)
1036
- .describe("Zero-based index when multiple elements match (default: 0). If unsure, omit to press the first match.")
1037
- }
1038
- }, async ({ text, testID, component, index }) => {
1039
- const result = await pressElement({ text, testID, component, index });
1040
- if (!result.success) {
1041
- return {
1042
- content: [
1043
- {
1044
- type: "text",
1045
- text: `Error: ${result.error}`
1046
- }
1047
- ],
1048
- isError: true
1049
- };
1050
- }
1042
+ .describe("Zero-based index when multiple elements match (default: 0)."),
1043
+ x: z.coerce
1044
+ .number()
1045
+ .optional()
1046
+ .describe("X coordinate in pixels (from screenshot). Must provide both x and y."),
1047
+ y: z.coerce
1048
+ .number()
1049
+ .optional()
1050
+ .describe("Y coordinate in pixels (from screenshot). Must provide both x and y."),
1051
+ strategy: z
1052
+ .enum(["auto", "fiber", "accessibility", "ocr", "coordinate"])
1053
+ .optional()
1054
+ .default("auto")
1055
+ .describe('"auto" (default) tries fiber -> accessibility -> OCR. Set explicitly to skip strategies you know will fail.'),
1056
+ },
1057
+ }, async (args) => {
1058
+ const result = await tap({
1059
+ text: args.text,
1060
+ testID: args.testID,
1061
+ component: args.component,
1062
+ index: args.index,
1063
+ x: args.x,
1064
+ y: args.y,
1065
+ strategy: args.strategy,
1066
+ });
1067
+ const text = JSON.stringify(result, null, 2);
1051
1068
  return {
1052
- content: [
1053
- {
1054
- type: "text",
1055
- text: result.result || "Element pressed successfully."
1056
- }
1057
- ]
1069
+ content: [{ type: "text", text }],
1070
+ isError: !result.success,
1058
1071
  };
1059
1072
  });
1060
1073
  // Tool: Toggle Element Inspector programmatically
@@ -1731,6 +1744,15 @@ registerToolWithTelemetry("android_screenshot", {
1731
1744
  // Build info text with coordinate conversion guidance
1732
1745
  const pixelWidth = result.originalWidth || 0;
1733
1746
  const pixelHeight = result.originalHeight || 0;
1747
+ // Store screenshot metadata for coordinate conversion
1748
+ const firstApp = connectedApps.values().next().value;
1749
+ if (firstApp) {
1750
+ firstApp.lastScreenshot = {
1751
+ originalWidth: pixelWidth,
1752
+ originalHeight: pixelHeight,
1753
+ scaleFactor: result.scaleFactor || 1,
1754
+ };
1755
+ }
1734
1756
  let infoText = `Screenshot captured (${pixelWidth}x${pixelHeight} pixels)`;
1735
1757
  // Get status bar height for coordinate guidance
1736
1758
  let statusBarPixels = 63; // Default fallback
@@ -1866,29 +1888,6 @@ registerToolWithTelemetry("android_list_packages", {
1866
1888
  // ============================================================================
1867
1889
  // Android UI Input Tools (Phase 2)
1868
1890
  // ============================================================================
1869
- // Tool: Android tap
1870
- registerToolWithTelemetry("android_tap", {
1871
- description: "Tap at specific coordinates on an Android device/emulator screen. WORKFLOW: Use ocr_screenshot first to get tap coordinates, then use this tool with the returned tapX/tapY values.",
1872
- inputSchema: {
1873
- x: z.coerce.number().describe("X coordinate in pixels"),
1874
- y: z.coerce.number().describe("Y coordinate in pixels"),
1875
- deviceId: z
1876
- .string()
1877
- .optional()
1878
- .describe("Optional device ID. Uses first available device if not specified.")
1879
- }
1880
- }, async ({ x, y, deviceId }) => {
1881
- const result = await androidTap(x, y, deviceId);
1882
- return {
1883
- content: [
1884
- {
1885
- type: "text",
1886
- text: result.success ? result.result : `Error: ${result.error}`
1887
- }
1888
- ],
1889
- isError: !result.success
1890
- };
1891
- });
1892
1891
  // Tool: Android long press
1893
1892
  registerToolWithTelemetry("android_long_press", {
1894
1893
  description: "Long press at specific coordinates on an Android device/emulator screen",
@@ -2063,47 +2062,6 @@ server.registerTool("android_describe_point", {
2063
2062
  isError: !result.success
2064
2063
  };
2065
2064
  });
2066
- // Tool: Android tap element
2067
- server.registerTool("android_tap_element", {
2068
- description: "Tap an element by its text, content-description, or resource-id using uiautomator. TIP: Consider using ocr_screenshot first - it returns ready-to-use tap coordinates for all visible text and works more reliably across different apps.",
2069
- inputSchema: {
2070
- text: z.string().optional().describe("Exact text match for the element"),
2071
- textContains: z.string().optional().describe("Partial text match (case-insensitive)"),
2072
- contentDesc: z.string().optional().describe("Exact content-description match"),
2073
- contentDescContains: z.string().optional().describe("Partial content-description match (case-insensitive)"),
2074
- resourceId: z
2075
- .string()
2076
- .optional()
2077
- .describe("Resource ID match (e.g., 'com.app:id/button' or just 'button')"),
2078
- index: z
2079
- .number()
2080
- .optional()
2081
- .describe("If multiple elements match, tap the nth one (0-indexed, default: 0)"),
2082
- deviceId: z
2083
- .string()
2084
- .optional()
2085
- .describe("Optional device ID. Uses first available device if not specified.")
2086
- }
2087
- }, async ({ text, textContains, contentDesc, contentDescContains, resourceId, index, deviceId }) => {
2088
- const result = await androidTapElement({
2089
- text,
2090
- textContains,
2091
- contentDesc,
2092
- contentDescContains,
2093
- resourceId,
2094
- index,
2095
- deviceId
2096
- });
2097
- return {
2098
- content: [
2099
- {
2100
- type: "text",
2101
- text: result.success ? result.result : `Error: ${result.error}`
2102
- }
2103
- ],
2104
- isError: !result.success
2105
- };
2106
- });
2107
2065
  // Tool: Android find element (no screenshot needed)
2108
2066
  server.registerTool("android_find_element", {
2109
2067
  description: "Find a UI element on Android screen by text, content description, or resource ID. Returns element details including tap coordinates. Use this to check if an element exists without tapping it. Workflow: 1) wait_for_element, 2) find_element, 3) tap with returned coordinates. Prefer this over screenshots for button taps.",
@@ -2287,6 +2245,15 @@ registerToolWithTelemetry("ios_screenshot", {
2287
2245
  // Build info text with coordinate guidance for iOS
2288
2246
  const pixelWidth = result.originalWidth || 0;
2289
2247
  const pixelHeight = result.originalHeight || 0;
2248
+ // Store screenshot metadata for coordinate conversion
2249
+ const firstApp = connectedApps.values().next().value;
2250
+ if (firstApp) {
2251
+ firstApp.lastScreenshot = {
2252
+ originalWidth: pixelWidth,
2253
+ originalHeight: pixelHeight,
2254
+ scaleFactor: result.scaleFactor || 1,
2255
+ };
2256
+ }
2290
2257
  // Try to get actual screen dimensions and safe area from accessibility tree
2291
2258
  let pointWidth = 0;
2292
2259
  let pointHeight = 0;
@@ -2548,55 +2515,6 @@ registerToolWithTelemetry("ios_boot_simulator", {
2548
2515
  // iOS IDB-Based UI Tools (require Facebook IDB)
2549
2516
  // Install with: brew install idb-companion
2550
2517
  // ============================================================================
2551
- // Tool: iOS tap
2552
- server.registerTool("ios_tap", {
2553
- description: "Tap at specific coordinates on an iOS simulator screen. WORKFLOW: Use ocr_screenshot first to get tap coordinates, then use this tool with the returned tapX/tapY values. Requires IDB (brew install idb-companion).",
2554
- inputSchema: {
2555
- x: z.coerce.number().describe("X coordinate in pixels"),
2556
- y: z.coerce.number().describe("Y coordinate in pixels"),
2557
- duration: z.number().optional().describe("Optional tap duration in seconds (for long press)"),
2558
- udid: z.string().optional().describe("Optional simulator UDID. Uses booted simulator if not specified.")
2559
- }
2560
- }, async ({ x, y, duration, udid }) => {
2561
- const result = await iosTap(x, y, { duration, udid });
2562
- return {
2563
- content: [
2564
- {
2565
- type: "text",
2566
- text: result.success ? result.result : `Error: ${result.error}`
2567
- }
2568
- ],
2569
- isError: !result.success
2570
- };
2571
- });
2572
- // Tool: iOS tap element by label
2573
- server.registerTool("ios_tap_element", {
2574
- description: "Tap an element by its accessibility label. Requires IDB (brew install idb-companion). TIP: Consider using ocr_screenshot first - it returns ready-to-use tap coordinates for all visible text and works without requiring accessibility labels.",
2575
- inputSchema: {
2576
- label: z.string().optional().describe("Exact accessibility label to match (e.g., 'Home', 'Settings')"),
2577
- labelContains: z
2578
- .string()
2579
- .optional()
2580
- .describe("Partial label match, case-insensitive (e.g., 'Circular' matches 'Circulars, 3, 12 total')"),
2581
- index: z
2582
- .number()
2583
- .optional()
2584
- .describe("If multiple elements match, tap the nth one (0-indexed, default: 0)"),
2585
- duration: z.number().optional().describe("Optional tap duration in seconds (for long press)"),
2586
- udid: z.string().optional().describe("Optional simulator UDID. Uses booted simulator if not specified.")
2587
- }
2588
- }, async ({ label, labelContains, index, duration, udid }) => {
2589
- const result = await iosTapElement({ label, labelContains, index, duration, udid });
2590
- return {
2591
- content: [
2592
- {
2593
- type: "text",
2594
- text: result.success ? result.result : `Error: ${result.error}`
2595
- }
2596
- ],
2597
- isError: !result.success
2598
- };
2599
- });
2600
2518
  // Tool: iOS swipe
2601
2519
  server.registerTool("ios_swipe", {
2602
2520
  description: "Swipe gesture on an iOS simulator screen. Requires IDB to be installed (brew install idb-companion).",