@midscene/shared 1.9.1 → 1.9.2-beta-20260605084246.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/cli/cli-runner.mjs +5 -2
- package/dist/es/env/parse-model-config.mjs +1 -1
- package/dist/es/mcp/base-server.mjs +14 -1
- package/dist/es/mcp/base-tools.mjs +7 -2
- package/dist/es/mcp/index.mjs +1 -0
- package/dist/es/mcp/tool-defaults.mjs +54 -0
- package/dist/es/mcp/tool-generator.mjs +44 -7
- package/dist/lib/cli/cli-runner.js +5 -2
- package/dist/lib/env/parse-model-config.js +1 -1
- package/dist/lib/mcp/base-server.js +14 -1
- package/dist/lib/mcp/base-tools.js +7 -2
- package/dist/lib/mcp/index.js +21 -14
- package/dist/lib/mcp/tool-defaults.js +97 -0
- package/dist/lib/mcp/tool-generator.js +44 -7
- package/dist/types/mcp/base-server.d.ts +14 -1
- package/dist/types/mcp/base-tools.d.ts +14 -0
- package/dist/types/mcp/index.d.ts +1 -0
- package/dist/types/mcp/tool-defaults.d.ts +64 -0
- package/dist/types/mcp/tool-generator.d.ts +3 -2
- package/dist/types/mcp/types.d.ts +2 -0
- package/package.json +1 -1
- package/src/cli/cli-runner.ts +13 -2
- package/src/mcp/base-server.ts +30 -1
- package/src/mcp/base-tools.ts +20 -0
- package/src/mcp/index.ts +1 -0
- package/src/mcp/tool-defaults.ts +120 -0
- package/src/mcp/tool-generator.ts +100 -3
- package/src/mcp/types.ts +2 -0
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
unwrapZodField,
|
|
8
8
|
} from '../zod-schema-utils';
|
|
9
9
|
import { getErrorMessage } from './error-formatter';
|
|
10
|
+
import type { ToolDefaults } from './tool-defaults';
|
|
10
11
|
import type {
|
|
11
12
|
ActionSpaceItem,
|
|
12
13
|
BaseAgent,
|
|
@@ -290,6 +291,62 @@ function normalizeActionArgs(
|
|
|
290
291
|
);
|
|
291
292
|
}
|
|
292
293
|
|
|
294
|
+
/**
|
|
295
|
+
* Merge `defaults` into a single locate object without overwriting values the
|
|
296
|
+
* caller set explicitly. `deepThink` is a deprecated alias for `deepLocate`,
|
|
297
|
+
* so an explicit `deepThink` counts as `deepLocate` already being set.
|
|
298
|
+
*/
|
|
299
|
+
function mergeLocateDefaults(
|
|
300
|
+
locate: Record<string, unknown>,
|
|
301
|
+
defaults: Record<string, unknown>,
|
|
302
|
+
): Record<string, unknown> {
|
|
303
|
+
let merged: Record<string, unknown> | undefined;
|
|
304
|
+
for (const [key, value] of Object.entries(defaults)) {
|
|
305
|
+
if (locate[key] !== undefined) {
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
308
|
+
if (key === 'deepLocate' && locate.deepThink !== undefined) {
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
merged = merged ?? { ...locate };
|
|
312
|
+
merged[key] = value;
|
|
313
|
+
}
|
|
314
|
+
return merged ?? locate;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Apply `locateDefaults` to every locate-like field of an action's args.
|
|
319
|
+
* Generic over the default keys, so new behaviors need no changes here.
|
|
320
|
+
*/
|
|
321
|
+
function applyLocateDefaults(
|
|
322
|
+
args: Record<string, unknown>,
|
|
323
|
+
paramSchema: z.ZodTypeAny | undefined,
|
|
324
|
+
locateDefaults: Record<string, unknown>,
|
|
325
|
+
): Record<string, unknown> {
|
|
326
|
+
if (!paramSchema || Object.keys(locateDefaults).length === 0) {
|
|
327
|
+
return args;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const shape = getZodObjectShape(paramSchema);
|
|
331
|
+
if (!shape) {
|
|
332
|
+
return args;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
return Object.fromEntries(
|
|
336
|
+
Object.entries(args).map(([key, value]) => {
|
|
337
|
+
const fieldSchema = shape[key] as z.ZodTypeAny | undefined;
|
|
338
|
+
if (
|
|
339
|
+
fieldSchema &&
|
|
340
|
+
isMidsceneLocatorField(fieldSchema) &&
|
|
341
|
+
isRecord(value)
|
|
342
|
+
) {
|
|
343
|
+
return [key, mergeLocateDefaults(value, locateDefaults)];
|
|
344
|
+
}
|
|
345
|
+
return [key, value];
|
|
346
|
+
}),
|
|
347
|
+
);
|
|
348
|
+
}
|
|
349
|
+
|
|
293
350
|
/**
|
|
294
351
|
* Serialize args to human-readable description for AI action
|
|
295
352
|
*/
|
|
@@ -490,6 +547,7 @@ export function generateToolsFromActionSpace(
|
|
|
490
547
|
) => args,
|
|
491
548
|
initArgSchema: ToolSchema = {},
|
|
492
549
|
initArgCliMetadata?: ToolCliMetadata,
|
|
550
|
+
toolDefaults: ToolDefaults = {},
|
|
493
551
|
): ToolDefinition[] {
|
|
494
552
|
return actionSpace.map((action) => {
|
|
495
553
|
const schema = {
|
|
@@ -505,10 +563,17 @@ export function generateToolsFromActionSpace(
|
|
|
505
563
|
handler: async (args: Record<string, unknown>) => {
|
|
506
564
|
try {
|
|
507
565
|
const agent = await getAgent(args);
|
|
508
|
-
|
|
566
|
+
let normalizedArgs = normalizeActionArgs(
|
|
509
567
|
sanitizeArgs(args),
|
|
510
568
|
action.paramSchema,
|
|
511
569
|
);
|
|
570
|
+
if (toolDefaults.locate) {
|
|
571
|
+
normalizedArgs = applyLocateDefaults(
|
|
572
|
+
normalizedArgs,
|
|
573
|
+
action.paramSchema,
|
|
574
|
+
toolDefaults.locate,
|
|
575
|
+
);
|
|
576
|
+
}
|
|
512
577
|
let actionResult: unknown;
|
|
513
578
|
|
|
514
579
|
try {
|
|
@@ -553,6 +618,7 @@ export function generateCommonTools(
|
|
|
553
618
|
getAgent: (args?: Record<string, unknown>) => Promise<BaseAgent>,
|
|
554
619
|
initArgSchema: ToolSchema = {},
|
|
555
620
|
initArgCliMetadata?: ToolCliMetadata,
|
|
621
|
+
toolDefaults: ToolDefaults = {},
|
|
556
622
|
): ToolDefinition[] {
|
|
557
623
|
return [
|
|
558
624
|
{
|
|
@@ -597,6 +663,18 @@ export function generateCommonTools(
|
|
|
597
663
|
.describe(
|
|
598
664
|
'Natural language description of the action to perform, e.g. "press Command+Space, type Safari, press Enter"',
|
|
599
665
|
),
|
|
666
|
+
deepLocate: z
|
|
667
|
+
.boolean()
|
|
668
|
+
.optional()
|
|
669
|
+
.describe(
|
|
670
|
+
'Use deep locate for every element this action targets. Improves precision for small or ambiguous targets at the cost of speed. Defaults to the server --deep-locate setting.',
|
|
671
|
+
),
|
|
672
|
+
deepThink: z
|
|
673
|
+
.boolean()
|
|
674
|
+
.optional()
|
|
675
|
+
.describe(
|
|
676
|
+
'Plan this action with deep thinking (richer context and sub-goal decomposition). Helps with complex multi-step instructions at the cost of speed. Defaults to the server --deep-think setting.',
|
|
677
|
+
),
|
|
600
678
|
...initArgSchema,
|
|
601
679
|
},
|
|
602
680
|
cli: mergeToolCliMetadata(undefined, initArgCliMetadata),
|
|
@@ -609,7 +687,19 @@ export function generateCommonTools(
|
|
|
609
687
|
if (!agent.aiAction) {
|
|
610
688
|
return createErrorResult('act is not supported by this agent');
|
|
611
689
|
}
|
|
612
|
-
|
|
690
|
+
// Start from the act defaults (deepThink off), overlay the server
|
|
691
|
+
// tool defaults, then let explicit per-call args win.
|
|
692
|
+
const actOptions: Record<string, unknown> = {
|
|
693
|
+
deepThink: false,
|
|
694
|
+
...toolDefaults.act,
|
|
695
|
+
};
|
|
696
|
+
if (args.deepLocate !== undefined) {
|
|
697
|
+
actOptions.deepLocate = args.deepLocate;
|
|
698
|
+
}
|
|
699
|
+
if (args.deepThink !== undefined) {
|
|
700
|
+
actOptions.deepThink = args.deepThink;
|
|
701
|
+
}
|
|
702
|
+
const result = await agent.aiAction(prompt, actOptions);
|
|
613
703
|
return await captureScreenshotResult(agent, 'act', result);
|
|
614
704
|
} catch (error: unknown) {
|
|
615
705
|
const errorMessage = getErrorMessage(error);
|
|
@@ -628,6 +718,12 @@ export function generateCommonTools(
|
|
|
628
718
|
.describe(
|
|
629
719
|
'Natural language assertion to verify, e.g. "there is a login button visible"',
|
|
630
720
|
),
|
|
721
|
+
message: z
|
|
722
|
+
.string()
|
|
723
|
+
.optional()
|
|
724
|
+
.describe(
|
|
725
|
+
'Custom error message to throw when the assertion fails, e.g. "the login button should be visible".',
|
|
726
|
+
),
|
|
631
727
|
...promptInputExtraSchema,
|
|
632
728
|
...initArgSchema,
|
|
633
729
|
},
|
|
@@ -636,6 +732,7 @@ export function generateCommonTools(
|
|
|
636
732
|
args: Record<string, unknown> = {},
|
|
637
733
|
): Promise<ToolResult> => {
|
|
638
734
|
const prompt = args.prompt as string;
|
|
735
|
+
const message = args.message as string | undefined;
|
|
639
736
|
try {
|
|
640
737
|
const agent = await getAgent(args);
|
|
641
738
|
if (!agent.aiAssert) {
|
|
@@ -647,7 +744,7 @@ export function generateCommonTools(
|
|
|
647
744
|
imageName: args.imageName,
|
|
648
745
|
convertHttpImage2Base64: args.convertHttpImage2Base64,
|
|
649
746
|
});
|
|
650
|
-
await agent.aiAssert(userPrompt);
|
|
747
|
+
await agent.aiAssert(userPrompt, message);
|
|
651
748
|
return {
|
|
652
749
|
content: [{ type: 'text', text: 'Assertion passed.' }],
|
|
653
750
|
};
|
package/src/mcp/types.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
2
|
import type { z } from 'zod';
|
|
3
|
+
import type { ToolDefaults } from './tool-defaults';
|
|
3
4
|
|
|
4
5
|
// Avoid circular dependency: don't import from @midscene/core
|
|
5
6
|
// Instead, use generic types that will be provided by implementation
|
|
@@ -148,4 +149,5 @@ export interface IMidsceneTools {
|
|
|
148
149
|
attachToServer(server: McpServer): void;
|
|
149
150
|
initTools(): Promise<void>;
|
|
150
151
|
destroy?(): Promise<void>;
|
|
152
|
+
setToolDefaults?(toolDefaults: ToolDefaults): void;
|
|
151
153
|
}
|