npm - @midscene/shared - Versions diffs - 1.0.1-beta-20251208031823.0 → 1.0.1-beta-20251208033501.0 - Mend

@midscene/shared 1.0.1-beta-20251208031823.0 → 1.0.1-beta-20251208033501.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/dist/es/mcp/base-server.mjs +250 -0
package/dist/es/mcp/base-tools.mjs +84 -0
package/dist/es/mcp/index.mjs +4 -0
package/dist/es/mcp/tool-generator.mjs +215 -0
package/dist/es/mcp/types.mjs +3 -0
package/dist/es/node/fs.mjs +1 -1
package/dist/lib/baseDB.js +2 -2
package/dist/lib/build/copy-static.js +2 -2
package/dist/lib/build/rspack-config.js +2 -2
package/dist/lib/common.js +2 -2
package/dist/lib/constants/example-code.js +2 -2
package/dist/lib/constants/index.js +2 -2
package/dist/lib/env/basic.js +2 -2
package/dist/lib/env/constants.js +2 -2
package/dist/lib/env/global-config-manager.js +2 -2
package/dist/lib/env/helper.js +2 -2
package/dist/lib/env/index.js +6 -6
package/dist/lib/env/init-debug.js +2 -2
package/dist/lib/env/model-config-manager.js +2 -2
package/dist/lib/env/parse-model-config.js +2 -2
package/dist/lib/env/types.js +2 -2
package/dist/lib/env/utils.js +2 -2
package/dist/lib/extractor/constants.js +2 -2
package/dist/lib/extractor/debug.js +1 -1
package/dist/lib/extractor/dom-util.js +2 -2
package/dist/lib/extractor/index.js +2 -2
package/dist/lib/extractor/locator.js +2 -2
package/dist/lib/extractor/tree.js +2 -2
package/dist/lib/extractor/util.js +2 -2
package/dist/lib/extractor/web-extractor.js +2 -2
package/dist/lib/img/box-select.js +2 -2
package/dist/lib/img/draw-box.js +2 -2
package/dist/lib/img/get-jimp.js +2 -2
package/dist/lib/img/get-photon.js +2 -2
package/dist/lib/img/get-sharp.js +2 -2
package/dist/lib/img/index.js +2 -2
package/dist/lib/img/info.js +2 -2
package/dist/lib/img/transform.js +2 -2
package/dist/lib/index.js +2 -2
package/dist/lib/logger.js +2 -2
package/dist/lib/mcp/base-server.js +290 -0
package/dist/lib/mcp/base-tools.js +118 -0
package/dist/lib/mcp/index.js +79 -0
package/dist/lib/mcp/tool-generator.js +252 -0
package/dist/lib/mcp/types.js +40 -0
package/dist/lib/node/fs.js +3 -3
package/dist/lib/node/index.js +2 -2
package/dist/lib/polyfills/async-hooks.js +2 -2
package/dist/lib/polyfills/index.js +2 -2
package/dist/lib/types/index.js +2 -2
package/dist/lib/us-keyboard-layout.js +2 -2
package/dist/lib/utils.js +2 -2
package/dist/types/mcp/base-server.d.ts +77 -0
package/dist/types/mcp/base-tools.d.ts +51 -0
package/dist/types/mcp/index.d.ts +4 -0
package/dist/types/mcp/tool-generator.d.ts +11 -0
package/dist/types/mcp/types.d.ts +98 -0
package/package.json +17 -3
package/src/mcp/base-server.ts +432 -0
package/src/mcp/base-tools.ts +190 -0
package/src/mcp/index.ts +4 -0
package/src/mcp/tool-generator.ts +311 -0
package/src/mcp/types.ts +106 -0

package/src/mcp/base-tools.ts ADDED Viewed

@@ -0,0 +1,190 @@
+import { parseBase64 } from '@midscene/shared/img';
+import { getDebug } from '@midscene/shared/logger';
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import {
+  generateCommonTools,
+  generateToolsFromActionSpace,
+} from './tool-generator';
+import type {
+  ActionSpaceItem,
+  BaseAgent,
+  BaseDevice,
+  IMidsceneTools,
+  ToolDefinition,
+} from './types';
+const debug = getDebug('mcp:base-tools');
+export abstract class BaseMidsceneTools implements IMidsceneTools {
+  protected mcpServer?: McpServer;
+  protected agent?: BaseAgent;
+  protected toolDefinitions: ToolDefinition[] = [];
+  /**
+   * Ensure agent is initialized and ready for use.
+   * Must be implemented by subclasses to create platform-specific agent.
+   * @param initParam Optional initialization parameter (platform-specific, e.g., URL, device ID)
+   * @returns Promise resolving to initialized agent instance
+   * @throws Error if agent initialization fails
+   */
+  protected abstract ensureAgent(initParam?: string): Promise<BaseAgent>;
+  /**
+   * Optional: prepare platform-specific tools (e.g., device connection)
+   */
+  protected preparePlatformTools(): ToolDefinition[] {
+    return [];
+  }
+  /**
+   * Must be implemented by subclasses to create a temporary device instance
+   * This allows getting real actionSpace without connecting to device
+   */
+  protected abstract createTemporaryDevice(): BaseDevice;
+  /**
+   * Initialize all tools by querying actionSpace
+   * Uses two-layer fallback strategy:
+   * 1. Try to get actionSpace from connected agent (if available)
+   * 2. Create temporary device instance to read actionSpace (always succeeds)
+   */
+  public async initTools(): Promise<void> {
+    this.toolDefinitions = [];
+    // 1. Add platform-specific tools first (device connection, etc.)
+    // These don't require an agent and should always be available
+    const platformTools = this.preparePlatformTools();
+    this.toolDefinitions.push(...platformTools);
+    // 2. Try to get agent and its action space (two-layer fallback)
+    let actionSpace: ActionSpaceItem[];
+    try {
+      // Layer 1: Try to use connected agent
+      const agent = await this.ensureAgent();
+      actionSpace = await agent.getActionSpace();
+      debug(
+        'Action space from connected agent:',
+        actionSpace.map((a) => a.name).join(', '),
+      );
+    } catch (error) {
+      // Layer 2: Create temporary device instance to read actionSpace
+      // This is expected behavior for bridge mode without URL or unconnected devices
+      const errorMessage =
+        error instanceof Error ? error.message : String(error);
+      if (
+        errorMessage.includes('requires a URL') ||
+        errorMessage.includes('web_connect')
+      ) {
+        debug(
+          'Bridge mode detected - agent will be initialized on first web_connect call',
+        );
+      } else {
+        debug(
+          'Agent not available yet, using temporary device for action space',
+        );
+      }
+      const tempDevice = this.createTemporaryDevice();
+      actionSpace = tempDevice.actionSpace();
+      debug(
+        'Action space from temporary device:',
+        actionSpace.map((a) => a.name).join(', '),
+      );
+      // Destroy temporary instance using optional chaining
+      await tempDevice.destroy?.();
+    }
+    // 3. Generate tools from action space (core innovation)
+    const actionTools = generateToolsFromActionSpace(actionSpace, () =>
+      this.ensureAgent(),
+    );
+    // 4. Add common tools (screenshot, waitFor)
+    const commonTools = generateCommonTools(() => this.ensureAgent());
+    this.toolDefinitions.push(...actionTools, ...commonTools);
+    debug('Total tools prepared:', this.toolDefinitions.length);
+  }
+  /**
+   * Attach to MCP server and register all tools
+   */
+  public attachToServer(server: McpServer): void {
+    this.mcpServer = server;
+    if (this.toolDefinitions.length === 0) {
+      debug('Warning: No tools to register. Tools may be initialized lazily.');
+    }
+    for (const toolDef of this.toolDefinitions) {
+      if (toolDef.autoDestroy) {
+        this.toolWithAutoDestroy(
+          toolDef.name,
+          toolDef.description,
+          toolDef.schema,
+          toolDef.handler,
+        );
+      } else {
+        this.mcpServer.tool(
+          toolDef.name,
+          toolDef.description,
+          toolDef.schema,
+          toolDef.handler,
+        );
+      }
+    }
+    debug('Registered', this.toolDefinitions.length, 'tools');
+  }
+  /**
+   * Wrapper for auto-destroy behavior
+   */
+  private toolWithAutoDestroy(
+    name: string,
+    description: string,
+    schema: any,
+    handler: (...args: any[]) => Promise<any>,
+  ): void {
+    if (!this.mcpServer) {
+      throw new Error('MCP server not attached');
+    }
+    this.mcpServer.tool(name, description, schema, async (...args: any[]) => {
+      try {
+        return await handler(...args);
+      } finally {
+        if (!process.env.MIDSCENE_MCP_DISABLE_AGENT_AUTO_DESTROY) {
+          try {
+            await this.agent?.destroy?.();
+          } catch (error) {
+            debug('Failed to destroy agent during cleanup:', error);
+          }
+          this.agent = undefined;
+        }
+      }
+    });
+  }
+  /**
+   * Cleanup method - destroy agent and release resources
+   */
+  public async closeBrowser(): Promise<void> {
+    await this.agent?.destroy?.();
+  }
+  /**
+   * Helper: Convert base64 screenshot to image content array
+   */
+  protected buildScreenshotContent(screenshot: string) {
+    const { mimeType, body } = parseBase64(screenshot);
+    return [
+      {
+        type: 'image' as const,
+        data: body,
+        mimeType,
+      },
+    ];
+  }
+}

package/src/mcp/index.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export * from './base-server';
+export * from './base-tools';
+export * from './tool-generator';
+export * from './types';

package/src/mcp/tool-generator.ts ADDED Viewed

@@ -0,0 +1,311 @@
+import { parseBase64 } from '@midscene/shared/img';
+import { z } from 'zod';
+import type {
+  ActionSpaceItem,
+  BaseAgent,
+  ToolDefinition,
+  ToolResult,
+} from './types';
+/**
+ * Extract error message from unknown error type
+ */
+function getErrorMessage(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+/**
+ * Type guard: check if a Zod type is ZodOptional
+ */
+function isZodOptional(
+  value: z.ZodTypeAny,
+): value is z.ZodOptional<z.ZodTypeAny> {
+  return '_def' in value && value._def?.typeName === 'ZodOptional';
+}
+/**
+ * Type guard: check if a Zod type is ZodObject
+ */
+function isZodObject(value: z.ZodTypeAny): value is z.ZodObject<z.ZodRawShape> {
+  return (
+    '_def' in value && value._def?.typeName === 'ZodObject' && 'shape' in value
+  );
+}
+/**
+ * Unwrap ZodOptional to get inner type
+ */
+function unwrapOptional(value: z.ZodTypeAny): {
+  innerValue: z.ZodTypeAny;
+  isOptional: boolean;
+} {
+  if (isZodOptional(value)) {
+    return { innerValue: value._def.innerType, isOptional: true };
+  }
+  return { innerValue: value, isOptional: false };
+}
+/**
+ * Check if a Zod object schema contains a 'prompt' field (locate field pattern)
+ */
+function isLocateField(value: z.ZodTypeAny): boolean {
+  if (!isZodObject(value)) {
+    return false;
+  }
+  return 'prompt' in value.shape;
+}
+/**
+ * Transform a locate field schema to make its 'prompt' field optional
+ */
+function makePromptOptional(
+  value: z.ZodObject<z.ZodRawShape>,
+  wrapInOptional: boolean,
+): z.ZodTypeAny {
+  const newShape = { ...value.shape };
+  newShape.prompt = value.shape.prompt.optional();
+  let newSchema: z.ZodTypeAny = z.object(newShape).passthrough();
+  if (wrapInOptional) {
+    newSchema = newSchema.optional();
+  }
+  return newSchema;
+}
+/**
+ * Transform schema field to make locate.prompt optional if applicable
+ */
+function transformSchemaField(
+  key: string,
+  value: z.ZodTypeAny,
+): [string, z.ZodTypeAny] {
+  const { innerValue, isOptional } = unwrapOptional(value);
+  if (isZodObject(innerValue) && isLocateField(innerValue)) {
+    return [key, makePromptOptional(innerValue, isOptional)];
+  }
+  return [key, value];
+}
+/**
+ * Extract and transform schema from action's paramSchema
+ */
+function extractActionSchema(
+  paramSchema: z.ZodTypeAny | undefined,
+): Record<string, z.ZodTypeAny> {
+  if (!paramSchema) {
+    return {};
+  }
+  const schema = paramSchema as z.ZodTypeAny;
+  if (!isZodObject(schema)) {
+    return schema as unknown as Record<string, z.ZodTypeAny>;
+  }
+  return Object.fromEntries(
+    Object.entries(schema.shape).map(([key, value]) =>
+      transformSchemaField(key, value as z.ZodTypeAny),
+    ),
+  );
+}
+/**
+ * Serialize args to human-readable description for AI action
+ */
+function serializeArgsToDescription(args: Record<string, unknown>): string {
+  try {
+    return Object.entries(args)
+      .map(([key, value]) => {
+        if (typeof value === 'object' && value !== null) {
+          try {
+            return `${key}: ${JSON.stringify(value)}`;
+          } catch {
+            // Circular reference or non-serializable object
+            return `${key}: [object]`;
+          }
+        }
+        return `${key}: "${value}"`;
+      })
+      .join(', ');
+  } catch (error: unknown) {
+    const errorMessage = getErrorMessage(error);
+    console.error('Error serializing args:', errorMessage);
+    return `[args serialization failed: ${errorMessage}]`;
+  }
+}
+/**
+ * Build action instruction string from action name and args
+ */
+function buildActionInstruction(
+  actionName: string,
+  args: Record<string, unknown>,
+): string {
+  const argsDescription = serializeArgsToDescription(args);
+  return argsDescription
+    ? `Use the action "${actionName}" with ${argsDescription}`
+    : `Use the action "${actionName}"`;
+}
+/**
+ * Capture screenshot and return as tool result
+ */
+async function captureScreenshotResult(
+  agent: BaseAgent,
+  actionName: string,
+): Promise<ToolResult> {
+  try {
+    const screenshot = await agent.page?.screenshotBase64();
+    if (!screenshot) {
+      return {
+        content: [{ type: 'text', text: `Action "${actionName}" completed.` }],
+      };
+    }
+    const { mimeType, body } = parseBase64(screenshot);
+    return {
+      content: [
+        { type: 'text', text: `Action "${actionName}" completed.` },
+        { type: 'image', data: body, mimeType },
+      ],
+    };
+  } catch (error: unknown) {
+    const errorMessage = getErrorMessage(error);
+    console.error('Error capturing screenshot:', errorMessage);
+    return {
+      content: [
+        {
+          type: 'text',
+          text: `Action "${actionName}" completed (screenshot unavailable: ${errorMessage})`,
+        },
+      ],
+    };
+  }
+}
+/**
+ * Create error result for tool handler
+ */
+function createErrorResult(message: string): ToolResult {
+  return {
+    content: [{ type: 'text', text: message }],
+    isError: true,
+  };
+}
+/**
+ * Converts DeviceAction from actionSpace into MCP ToolDefinition
+ * This is the core logic that removes need for hardcoded tool definitions
+ */
+export function generateToolsFromActionSpace(
+  actionSpace: ActionSpaceItem[],
+  getAgent: () => Promise<BaseAgent>,
+): ToolDefinition[] {
+  return actionSpace.map((action) => {
+    const schema = extractActionSchema(action.paramSchema as z.ZodTypeAny);
+    return {
+      name: action.name,
+      description: action.description || `Execute ${action.name} action`,
+      schema,
+      handler: async (args: Record<string, unknown>) => {
+        try {
+          const agent = await getAgent();
+          if (agent.aiAction) {
+            const instruction = buildActionInstruction(action.name, args);
+            try {
+              await agent.aiAction(instruction);
+            } catch (error: unknown) {
+              const errorMessage = getErrorMessage(error);
+              console.error(
+                `Error executing action "${action.name}":`,
+                errorMessage,
+              );
+              return createErrorResult(
+                `Failed to execute action "${action.name}": ${errorMessage}`,
+              );
+            }
+          }
+          return await captureScreenshotResult(agent, action.name);
+        } catch (error: unknown) {
+          const errorMessage = getErrorMessage(error);
+          console.error(`Error in handler for "${action.name}":`, errorMessage);
+          return createErrorResult(
+            `Failed to get agent or execute action "${action.name}": ${errorMessage}`,
+          );
+        }
+      },
+      autoDestroy: true,
+    };
+  });
+}
+/**
+ * Generate common tools (screenshot, waitFor)
+ * SIMPLIFIED: Only keep essential helper tools, removed assert
+ */
+export function generateCommonTools(
+  getAgent: () => Promise<BaseAgent>,
+): ToolDefinition[] {
+  return [
+    {
+      name: 'take_screenshot',
+      description: 'Capture screenshot of current page/screen',
+      schema: {},
+      handler: async (): Promise<ToolResult> => {
+        try {
+          const agent = await getAgent();
+          const screenshot = await agent.page?.screenshotBase64();
+          if (!screenshot) {
+            return createErrorResult('Screenshot not available');
+          }
+          const { mimeType, body } = parseBase64(screenshot);
+          return {
+            content: [{ type: 'image', data: body, mimeType }],
+          };
+        } catch (error: unknown) {
+          const errorMessage = getErrorMessage(error);
+          console.error('Error taking screenshot:', errorMessage);
+          return createErrorResult(
+            `Failed to capture screenshot: ${errorMessage}`,
+          );
+        }
+      },
+      autoDestroy: true,
+    },
+    {
+      name: 'wait_for',
+      description: 'Wait until condition becomes true',
+      schema: {
+        assertion: z.string().describe('Condition to wait for'),
+        timeoutMs: z.number().optional().default(15000),
+        checkIntervalMs: z.number().optional().default(3000),
+      },
+      handler: async (args): Promise<ToolResult> => {
+        try {
+          const agent = await getAgent();
+          const { assertion, timeoutMs, checkIntervalMs } = args as {
+            assertion: string;
+            timeoutMs?: number;
+            checkIntervalMs?: number;
+          };
+          if (agent.aiWaitFor) {
+            await agent.aiWaitFor(assertion, { timeoutMs, checkIntervalMs });
+          }
+          return {
+            content: [{ type: 'text', text: `Condition met: "${assertion}"` }],
+          };
+        } catch (error: unknown) {
+          const errorMessage = getErrorMessage(error);
+          console.error('Error in wait_for:', errorMessage);
+          return createErrorResult(`Wait condition failed: ${errorMessage}`);
+        }
+      },
+      autoDestroy: true,
+    },
+  ];
+}

package/src/mcp/types.ts ADDED Viewed

@@ -0,0 +1,106 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import type { z } from 'zod';
+// Avoid circular dependency: don't import from @midscene/core
+// Instead, use generic types that will be provided by implementation
+/**
+ * Default timeout constants for app loading verification
+ */
+export const defaultAppLoadingTimeoutMs = 10000;
+export const defaultAppLoadingCheckIntervalMs = 2000;
+/**
+ * Content item types for tool results (MCP compatible)
+ */
+export type ToolResultContent =
+  | { type: 'text'; text: string }
+  | { type: 'image'; data: string; mimeType: string }
+  | { type: 'audio'; data: string; mimeType: string }
+  | {
+      type: 'resource';
+      resource:
+        | { text: string; uri: string; mimeType?: string }
+        | { uri: string; blob: string; mimeType?: string };
+    };
+/**
+ * Result type for tool execution (MCP compatible)
+ */
+export interface ToolResult {
+  [x: string]: unknown;
+  content: ToolResultContent[];
+  isError?: boolean;
+  _meta?: Record<string, unknown>;
+}
+/**
+ * Tool handler function type
+ * Takes parsed arguments and returns a tool result
+ */
+export type ToolHandler<T = Record<string, unknown>> = (
+  args: T,
+) => Promise<ToolResult>;
+/**
+ * Tool schema type using Zod
+ */
+export type ToolSchema = Record<string, z.ZodTypeAny>;
+/**
+ * Tool definition for MCP server
+ */
+export interface ToolDefinition<T = Record<string, unknown>> {
+  name: string;
+  description: string;
+  schema: ToolSchema;
+  handler: ToolHandler<T>;
+  autoDestroy?: boolean;
+}
+/**
+ * Action space item definition
+ */
+export interface ActionSpaceItem {
+  name: string;
+  description?: string;
+  args?: Record<string, unknown>;
+  [key: string]: unknown;
+}
+/**
+ * Base agent interface
+ * Represents a platform-specific agent (Android, iOS, Web)
+ */
+export interface BaseAgent {
+  getActionSpace(): Promise<ActionSpaceItem[]>;
+  destroy?(): Promise<void>;
+  page?: {
+    screenshotBase64(): Promise<string>;
+  };
+  aiAction?: (
+    description: string,
+    params?: Record<string, unknown>,
+  ) => Promise<void>;
+  aiWaitFor?: (
+    assertion: string,
+    options: Record<string, unknown>,
+  ) => Promise<void>;
+}
+/**
+ * Base device interface for temporary device instances
+ */
+export interface BaseDevice {
+  actionSpace(): ActionSpaceItem[];
+  destroy?(): Promise<void>;
+}
+/**
+ * Interface for platform-specific MCP tools manager
+ */
+export interface IMidsceneTools {
+  attachToServer(server: McpServer): void;
+  initTools(): Promise<void>;
+  closeBrowser?(): Promise<void>;
+}