@midscene/shared 1.9.7 → 1.9.8-beta-20260618014851.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent-tools/agent-behavior-init-args.mjs +44 -0
- package/dist/es/{mcp → agent-tools}/base-tools.mjs +1 -8
- package/dist/es/{mcp → agent-tools}/chrome-path.mjs +14 -3
- package/dist/es/{mcp → agent-tools}/index.mjs +1 -3
- package/dist/es/{mcp → agent-tools}/tool-generator.mjs +6 -5
- package/dist/es/cli/cli-runner.mjs +1 -1
- package/dist/es/env/parse-model-config.mjs +1 -1
- package/dist/es/env/types.mjs +3 -5
- package/dist/es/img/index.mjs +2 -2
- package/dist/es/img/transform.mjs +18 -1
- package/dist/es/utils.mjs +2 -6
- package/dist/lib/agent-tools/agent-behavior-init-args.js +87 -0
- package/dist/lib/{mcp → agent-tools}/base-tools.js +1 -8
- package/dist/lib/{mcp → agent-tools}/chrome-path.js +13 -2
- package/dist/lib/{mcp → agent-tools}/index.js +10 -24
- package/dist/lib/{mcp → agent-tools}/tool-generator.js +6 -5
- package/dist/lib/cli/cli-runner.js +1 -1
- package/dist/lib/env/parse-model-config.js +1 -1
- package/dist/lib/env/types.js +5 -10
- package/dist/lib/img/index.js +3 -0
- package/dist/lib/img/transform.js +20 -0
- package/dist/lib/utils.js +8 -15
- package/dist/types/agent-tools/agent-behavior-init-args.d.ts +17 -0
- package/dist/types/{mcp → agent-tools}/base-tools.d.ts +7 -13
- package/dist/types/{mcp → agent-tools}/index.d.ts +1 -3
- package/dist/types/{mcp → agent-tools}/init-arg-utils.d.ts +3 -3
- package/dist/types/{mcp → agent-tools}/tool-defaults.d.ts +5 -6
- package/dist/types/{mcp → agent-tools}/tool-generator.d.ts +1 -1
- package/dist/types/{mcp → agent-tools}/types.d.ts +20 -13
- package/dist/types/cli/cli-args.d.ts +1 -1
- package/dist/types/cli/cli-runner.d.ts +2 -2
- package/dist/types/env/types.d.ts +8 -6
- package/dist/types/img/index.d.ts +1 -1
- package/dist/types/img/transform.d.ts +4 -0
- package/dist/types/key-alias-utils.d.ts +2 -2
- package/dist/types/utils.d.ts +0 -1
- package/package.json +15 -8
- package/src/agent-tools/agent-behavior-init-args.ts +109 -0
- package/src/{mcp → agent-tools}/base-tools.ts +8 -33
- package/src/{mcp → agent-tools}/chrome-path.ts +20 -3
- package/src/{mcp → agent-tools}/index.ts +1 -3
- package/src/{mcp → agent-tools}/init-arg-utils.ts +3 -3
- package/src/{mcp → agent-tools}/tool-defaults.ts +5 -6
- package/src/{mcp → agent-tools}/tool-generator.ts +14 -7
- package/src/{mcp → agent-tools}/types.ts +22 -10
- package/src/cli/cli-args.ts +1 -1
- package/src/cli/cli-runner.ts +4 -4
- package/src/env/types.ts +5 -5
- package/src/img/index.ts +2 -0
- package/src/img/transform.ts +45 -0
- package/src/key-alias-utils.ts +2 -2
- package/src/utils.ts +1 -10
- package/dist/es/mcp/base-server.mjs +0 -295
- package/dist/es/mcp/inject-report-html-plugin.mjs +0 -53
- package/dist/es/mcp/launcher-helper.mjs +0 -52
- package/dist/lib/mcp/base-server.js +0 -345
- package/dist/lib/mcp/inject-report-html-plugin.js +0 -98
- package/dist/lib/mcp/launcher-helper.js +0 -86
- package/dist/types/mcp/base-server.d.ts +0 -106
- package/dist/types/mcp/inject-report-html-plugin.d.ts +0 -18
- package/dist/types/mcp/launcher-helper.d.ts +0 -94
- package/src/mcp/base-server.ts +0 -529
- package/src/mcp/inject-report-html-plugin.ts +0 -119
- package/src/mcp/launcher-helper.ts +0 -200
- /package/dist/es/{mcp → agent-tools}/cli-report-session.mjs +0 -0
- /package/dist/es/{mcp → agent-tools}/error-formatter.mjs +0 -0
- /package/dist/es/{mcp → agent-tools}/init-arg-utils.mjs +0 -0
- /package/dist/es/{mcp → agent-tools}/tool-defaults.mjs +0 -0
- /package/dist/es/{mcp → agent-tools}/types.mjs +0 -0
- /package/dist/es/{mcp → agent-tools}/user-prompt.mjs +0 -0
- /package/dist/lib/{mcp → agent-tools}/cli-report-session.js +0 -0
- /package/dist/lib/{mcp → agent-tools}/error-formatter.js +0 -0
- /package/dist/lib/{mcp → agent-tools}/init-arg-utils.js +0 -0
- /package/dist/lib/{mcp → agent-tools}/tool-defaults.js +0 -0
- /package/dist/lib/{mcp → agent-tools}/types.js +0 -0
- /package/dist/lib/{mcp → agent-tools}/user-prompt.js +0 -0
- /package/dist/types/{mcp → agent-tools}/chrome-path.d.ts +0 -0
- /package/dist/types/{mcp → agent-tools}/cli-report-session.d.ts +0 -0
- /package/dist/types/{mcp → agent-tools}/error-formatter.d.ts +0 -0
- /package/dist/types/{mcp → agent-tools}/user-prompt.d.ts +0 -0
- /package/src/{mcp → agent-tools}/cli-report-session.ts +0 -0
- /package/src/{mcp → agent-tools}/error-formatter.ts +0 -0
- /package/src/{mcp → agent-tools}/user-prompt.ts +0 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
export interface AgentBehaviorInitArgs {
|
|
4
|
+
aiActContext?: string;
|
|
5
|
+
aiActionContext?: string;
|
|
6
|
+
replanningCycleLimit?: number;
|
|
7
|
+
waitAfterAction?: number;
|
|
8
|
+
screenshotShrinkFactor?: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
type ExposedAgentBehaviorInitArgKey = Exclude<
|
|
12
|
+
keyof AgentBehaviorInitArgs,
|
|
13
|
+
'aiActionContext'
|
|
14
|
+
>;
|
|
15
|
+
|
|
16
|
+
export const agentBehaviorInitArgShape = {
|
|
17
|
+
aiActContext: z
|
|
18
|
+
.string()
|
|
19
|
+
.optional()
|
|
20
|
+
.describe(
|
|
21
|
+
'Background knowledge passed to aiAct. Default: no extra context.',
|
|
22
|
+
),
|
|
23
|
+
replanningCycleLimit: z
|
|
24
|
+
.number()
|
|
25
|
+
.int()
|
|
26
|
+
.nonnegative()
|
|
27
|
+
.optional()
|
|
28
|
+
.describe(
|
|
29
|
+
'Maximum number of replanning cycles for aiAct. Default: model adapter default.',
|
|
30
|
+
),
|
|
31
|
+
waitAfterAction: z
|
|
32
|
+
.number()
|
|
33
|
+
.nonnegative()
|
|
34
|
+
.optional()
|
|
35
|
+
.describe(
|
|
36
|
+
'Wait time in milliseconds after each action execution. Default: 300ms.',
|
|
37
|
+
),
|
|
38
|
+
screenshotShrinkFactor: z
|
|
39
|
+
.number()
|
|
40
|
+
.min(1)
|
|
41
|
+
.optional()
|
|
42
|
+
.describe(
|
|
43
|
+
'Screenshot shrink factor before sending images to AI. Default: 1; high values may reduce recognition quality, especially on mobile.',
|
|
44
|
+
),
|
|
45
|
+
} satisfies Record<ExposedAgentBehaviorInitArgKey, z.ZodTypeAny>;
|
|
46
|
+
|
|
47
|
+
export function extractAgentBehaviorInitArgs(
|
|
48
|
+
extracted: Partial<AgentBehaviorInitArgs> | undefined,
|
|
49
|
+
): AgentBehaviorInitArgs | undefined {
|
|
50
|
+
if (!extracted) {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const agentOptions: AgentBehaviorInitArgs = {
|
|
55
|
+
...(typeof extracted.aiActContext === 'string'
|
|
56
|
+
? { aiActContext: extracted.aiActContext }
|
|
57
|
+
: {}),
|
|
58
|
+
...(typeof extracted.aiActionContext === 'string'
|
|
59
|
+
? { aiActionContext: extracted.aiActionContext }
|
|
60
|
+
: {}),
|
|
61
|
+
...(typeof extracted.replanningCycleLimit === 'number'
|
|
62
|
+
? { replanningCycleLimit: extracted.replanningCycleLimit }
|
|
63
|
+
: {}),
|
|
64
|
+
...(typeof extracted.waitAfterAction === 'number'
|
|
65
|
+
? { waitAfterAction: extracted.waitAfterAction }
|
|
66
|
+
: {}),
|
|
67
|
+
...(typeof extracted.screenshotShrinkFactor === 'number'
|
|
68
|
+
? { screenshotShrinkFactor: extracted.screenshotShrinkFactor }
|
|
69
|
+
: {}),
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
return Object.keys(agentOptions).length > 0 ? agentOptions : undefined;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function stableJsonValue(value: unknown): unknown {
|
|
76
|
+
if (Array.isArray(value)) {
|
|
77
|
+
return value.map(stableJsonValue);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (value && typeof value === 'object') {
|
|
81
|
+
return Object.fromEntries(
|
|
82
|
+
Object.entries(value as Record<string, unknown>)
|
|
83
|
+
.sort(([left], [right]) => left.localeCompare(right))
|
|
84
|
+
.map(([key, nestedValue]) => [key, stableJsonValue(nestedValue)]),
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return value;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function getAgentInitArgsSignature(
|
|
92
|
+
initArgs: object | undefined,
|
|
93
|
+
): string | undefined {
|
|
94
|
+
if (!initArgs || Object.keys(initArgs).length === 0) {
|
|
95
|
+
return undefined;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return JSON.stringify(stableJsonValue(initArgs));
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export function shouldRebuildAgentForInitArgs(
|
|
102
|
+
currentSignature: string | undefined,
|
|
103
|
+
nextSignature: string | undefined,
|
|
104
|
+
): boolean {
|
|
105
|
+
return (
|
|
106
|
+
currentSignature !== nextSignature &&
|
|
107
|
+
(currentSignature !== undefined || nextSignature !== undefined)
|
|
108
|
+
);
|
|
109
|
+
}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { parseBase64 } from '@midscene/shared/img';
|
|
2
2
|
import { getDebug } from '@midscene/shared/logger';
|
|
3
|
-
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
4
3
|
import type { z } from 'zod';
|
|
5
4
|
import { camelToKebab, getKeyAliases } from '../key-alias-utils';
|
|
6
5
|
import {
|
|
@@ -29,7 +28,7 @@ import type {
|
|
|
29
28
|
ToolSchema,
|
|
30
29
|
} from './types';
|
|
31
30
|
|
|
32
|
-
const debug = getDebug('
|
|
31
|
+
const debug = getDebug('agent-tools:base-tools');
|
|
33
32
|
|
|
34
33
|
/**
|
|
35
34
|
* Declarative description of a platform's agent init args.
|
|
@@ -39,11 +38,11 @@ const debug = getDebug('mcp:base-tools');
|
|
|
39
38
|
export interface InitArgSpec<TInitParam> {
|
|
40
39
|
/** Arg namespace, e.g. `android`, `ios`. */
|
|
41
40
|
namespace: string;
|
|
42
|
-
/** Zod shape describing the init args. Field names drive the
|
|
41
|
+
/** Zod shape describing the init args. Field names drive the tool schema. */
|
|
43
42
|
shape: Record<string, z.ZodTypeAny>;
|
|
44
43
|
/**
|
|
45
44
|
* Optional CLI presentation hints. These affect `--help` output for
|
|
46
|
-
* single-platform CLIs but do not alter
|
|
45
|
+
* single-platform CLIs but do not alter YAML protocol keys.
|
|
47
46
|
*/
|
|
48
47
|
cli?: {
|
|
49
48
|
/** Prefer bare `--device-id`-style options in platform CLI help output. */
|
|
@@ -61,7 +60,7 @@ export interface InitArgSpec<TInitParam> {
|
|
|
61
60
|
}
|
|
62
61
|
|
|
63
62
|
/**
|
|
64
|
-
* Base class for platform-specific
|
|
63
|
+
* Base class for platform-specific Midscene tools.
|
|
65
64
|
* @typeParam TAgent - Platform-specific agent type.
|
|
66
65
|
* @typeParam TInitParam - Platform-specific init parameter consumed by
|
|
67
66
|
* `ensureAgent`. Defaults to `undefined` for platforms that take no args.
|
|
@@ -71,20 +70,19 @@ export abstract class BaseMidsceneTools<
|
|
|
71
70
|
TInitParam = unknown,
|
|
72
71
|
> implements IMidsceneTools
|
|
73
72
|
{
|
|
74
|
-
protected mcpServer?: McpServer;
|
|
75
73
|
protected agent?: TAgent;
|
|
76
74
|
protected toolDefinitions: ToolDefinition[] = [];
|
|
77
75
|
|
|
78
76
|
/**
|
|
79
77
|
* Default options injected into every generated tool call (e.g. forced deep
|
|
80
|
-
* locate / deep think). Set from
|
|
78
|
+
* locate / deep think). Set from startup/CLI behavior flags before
|
|
81
79
|
* `initTools()` so they are baked into the generated tool handlers.
|
|
82
80
|
* See https://github.com/web-infra-dev/midscene/issues/2446.
|
|
83
81
|
*/
|
|
84
82
|
protected toolDefaults: ToolDefaults = {};
|
|
85
83
|
|
|
86
84
|
/**
|
|
87
|
-
* Declarative init-arg spec. Subclasses that accept CLI
|
|
85
|
+
* Declarative init-arg spec. Subclasses that accept CLI init args should
|
|
88
86
|
* set this once and get `extractAgentInitParam` / `sanitizeToolArgs` /
|
|
89
87
|
* `getAgentInitArgSchema` auto-implemented.
|
|
90
88
|
*
|
|
@@ -108,7 +106,7 @@ export abstract class BaseMidsceneTools<
|
|
|
108
106
|
}
|
|
109
107
|
|
|
110
108
|
/**
|
|
111
|
-
* Extract a platform-specific agent init parameter from CLI
|
|
109
|
+
* Extract a platform-specific agent init parameter from CLI tool args.
|
|
112
110
|
*/
|
|
113
111
|
protected extractAgentInitParam(
|
|
114
112
|
args: Record<string, unknown>,
|
|
@@ -161,7 +159,7 @@ export abstract class BaseMidsceneTools<
|
|
|
161
159
|
* show ergonomic bare flags while the underlying schema stays namespaced.
|
|
162
160
|
* When `preferBareKeys` is enabled, single-platform CLIs only accept the
|
|
163
161
|
* bare spellings; namespaced dotted spellings remain available through the
|
|
164
|
-
*
|
|
162
|
+
* YAML schema instead of the platform CLI surface.
|
|
165
163
|
*/
|
|
166
164
|
protected getAgentInitArgCliMetadata(): ToolCliMetadata | undefined {
|
|
167
165
|
if (!this.initArgSpec?.cli) {
|
|
@@ -272,7 +270,6 @@ export abstract class BaseMidsceneTools<
|
|
|
272
270
|
this.toolDefinitions.push(...platformTools);
|
|
273
271
|
|
|
274
272
|
// 2. Get action space: use pre-set agent if available, otherwise temp device.
|
|
275
|
-
// When called via mcpKitForAgent(), agent is set before initTools().
|
|
276
273
|
// For CLI usage, agent is deferred to the first real command.
|
|
277
274
|
let actionSpace: ActionSpaceItem[];
|
|
278
275
|
if (this.agent) {
|
|
@@ -313,28 +310,6 @@ export abstract class BaseMidsceneTools<
|
|
|
313
310
|
debug('Total tools prepared:', this.toolDefinitions.length);
|
|
314
311
|
}
|
|
315
312
|
|
|
316
|
-
/**
|
|
317
|
-
* Attach to MCP server and register all tools
|
|
318
|
-
*/
|
|
319
|
-
public attachToServer(server: McpServer): void {
|
|
320
|
-
this.mcpServer = server;
|
|
321
|
-
|
|
322
|
-
if (this.toolDefinitions.length === 0) {
|
|
323
|
-
debug('Warning: No tools to register. Tools may be initialized lazily.');
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
for (const toolDef of this.toolDefinitions) {
|
|
327
|
-
this.mcpServer.tool(
|
|
328
|
-
toolDef.name,
|
|
329
|
-
toolDef.description,
|
|
330
|
-
toolDef.schema,
|
|
331
|
-
toolDef.handler,
|
|
332
|
-
);
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
debug('Registered', this.toolDefinitions.length, 'tools');
|
|
336
|
-
}
|
|
337
|
-
|
|
338
313
|
/**
|
|
339
314
|
* Cleanup method - destroy agent and release resources
|
|
340
315
|
*/
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import { existsSync } from 'node:fs';
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
MIDSCENE_CHROME_PATH,
|
|
4
|
+
MIDSCENE_MCP_CHROME_PATH,
|
|
5
|
+
globalConfigManager,
|
|
6
|
+
} from '../env';
|
|
7
|
+
import { getDebug } from '../logger';
|
|
8
|
+
|
|
9
|
+
const warnChromePath = getDebug('agent-tools:chrome-path', { console: true });
|
|
10
|
+
let hasWarnedLegacyChromePath = false;
|
|
3
11
|
|
|
4
12
|
export function getSystemChromePath(): string | undefined {
|
|
5
13
|
const platform = process.platform;
|
|
@@ -33,9 +41,18 @@ export function getSystemChromePath(): string | undefined {
|
|
|
33
41
|
}
|
|
34
42
|
|
|
35
43
|
export function resolveChromePath(): string {
|
|
36
|
-
const
|
|
44
|
+
const primaryEnvPath =
|
|
45
|
+
globalConfigManager.getEnvConfigValue(MIDSCENE_CHROME_PATH);
|
|
46
|
+
const legacyEnvPath = globalConfigManager.getEnvConfigValue(
|
|
37
47
|
MIDSCENE_MCP_CHROME_PATH,
|
|
38
48
|
);
|
|
49
|
+
const envPath = primaryEnvPath || legacyEnvPath;
|
|
50
|
+
if (!primaryEnvPath && legacyEnvPath && !hasWarnedLegacyChromePath) {
|
|
51
|
+
warnChromePath(
|
|
52
|
+
'MIDSCENE_MCP_CHROME_PATH is deprecated. Use MIDSCENE_CHROME_PATH instead.',
|
|
53
|
+
);
|
|
54
|
+
hasWarnedLegacyChromePath = true;
|
|
55
|
+
}
|
|
39
56
|
if (envPath && envPath !== 'auto' && existsSync(envPath)) {
|
|
40
57
|
return envPath;
|
|
41
58
|
}
|
|
@@ -43,6 +60,6 @@ export function resolveChromePath(): string {
|
|
|
43
60
|
if (systemPath) return systemPath;
|
|
44
61
|
|
|
45
62
|
throw new Error(
|
|
46
|
-
'Chrome not found. Install Google Chrome or set
|
|
63
|
+
'Chrome not found. Install Google Chrome or set MIDSCENE_CHROME_PATH environment variable.',
|
|
47
64
|
);
|
|
48
65
|
}
|
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
export * from './base-server';
|
|
2
1
|
export * from './base-tools';
|
|
3
2
|
export * from './tool-defaults';
|
|
3
|
+
export * from './agent-behavior-init-args';
|
|
4
4
|
export * from './init-arg-utils';
|
|
5
5
|
export * from './error-formatter';
|
|
6
6
|
export * from './tool-generator';
|
|
7
7
|
export * from './types';
|
|
8
|
-
export * from './inject-report-html-plugin';
|
|
9
|
-
export * from './launcher-helper';
|
|
10
8
|
export * from './chrome-path';
|
|
@@ -88,11 +88,11 @@ export function sanitizeNamespacedArgs(
|
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
/**
|
|
91
|
-
* Build a flat
|
|
91
|
+
* Build a flat tool schema whose keys are dotted `"<namespace>.<field>"`.
|
|
92
92
|
*
|
|
93
93
|
* We intentionally stay flat (rather than `{ namespace: z.object({...}) }`) so
|
|
94
|
-
* that CLI (`--android.device-id`)
|
|
95
|
-
*
|
|
94
|
+
* that CLI (`--android.device-id`) and `--help` output share the same spelling.
|
|
95
|
+
* `readNamespacedArg` understands all three input shapes:
|
|
96
96
|
* nested namespace object, dotted flat key, and bare key fallback.
|
|
97
97
|
*/
|
|
98
98
|
export function createNamespacedInitArgSchema(
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Unified, declarative mechanism for "force a default option on every tool
|
|
3
|
-
* call" behaviors exposed by
|
|
3
|
+
* call" behaviors exposed by device and Agent Skill CLIs.
|
|
4
4
|
*
|
|
5
5
|
* Adding a new behavior flag (e.g. `--deep-search`) is a one-line change to
|
|
6
6
|
* {@link TOOL_BEHAVIOR_FLAGS}: declare which default-option "bag" it fills.
|
|
7
|
-
* The tool generator,
|
|
7
|
+
* The tool generator, tools managers and CLI parsing are all generic
|
|
8
8
|
* over {@link ToolDefaults} and never need to learn about individual flags.
|
|
9
9
|
*
|
|
10
10
|
* See https://github.com/web-infra-dev/midscene/issues/2446.
|
|
@@ -91,13 +91,12 @@ export function resolveToolDefaults(
|
|
|
91
91
|
*
|
|
92
92
|
* Behavior flags (e.g. `--deep-locate`) are global: they may appear anywhere
|
|
93
93
|
* in argv and are not tied to a specific sub-command. They are recognized by
|
|
94
|
-
* exact kebab-case match
|
|
95
|
-
* — and removed so a strict per-command parser never sees them. Every other
|
|
94
|
+
* exact kebab-case match and removed so a strict per-command parser never sees them. Every other
|
|
96
95
|
* token is returned untouched and in order for that per-command parser.
|
|
97
96
|
*
|
|
98
97
|
* This is the single place that knows how a behavior flag looks on the command
|
|
99
|
-
* line;
|
|
100
|
-
*
|
|
98
|
+
* line; the device / Agent Skill CLI resolves defaults from
|
|
99
|
+
* {@link TOOL_BEHAVIOR_FLAGS} through here / {@link resolveToolDefaults}.
|
|
101
100
|
*/
|
|
102
101
|
export function stripBehaviorFlags(argv: readonly string[]): {
|
|
103
102
|
rawArgs: string[];
|
|
@@ -21,10 +21,10 @@ import { composeUserPrompt, promptInputExtraSchema } from './user-prompt';
|
|
|
21
21
|
export { composeUserPrompt };
|
|
22
22
|
|
|
23
23
|
/**
|
|
24
|
-
* Generate
|
|
24
|
+
* Generate tool description from ActionSpaceItem.
|
|
25
25
|
* Format: "actionName action, description. Parameters: param1 (type) - desc; param2 (type) - desc"
|
|
26
26
|
*/
|
|
27
|
-
function
|
|
27
|
+
function describeActionForTool(action: ActionSpaceItem): string {
|
|
28
28
|
const actionDesc = action.description || `Execute ${action.name} action`;
|
|
29
29
|
|
|
30
30
|
if (!action.paramSchema) {
|
|
@@ -129,6 +129,7 @@ function isRecord(value: unknown): value is Record<string, unknown> {
|
|
|
129
129
|
function makePromptOptional(
|
|
130
130
|
shape: Record<string, z.ZodTypeAny>,
|
|
131
131
|
wrapInOptional: boolean,
|
|
132
|
+
description?: string | null,
|
|
132
133
|
): z.ZodTypeAny {
|
|
133
134
|
const newShape = { ...shape };
|
|
134
135
|
newShape.prompt = shape.prompt.optional();
|
|
@@ -137,6 +138,9 @@ function makePromptOptional(
|
|
|
137
138
|
if (wrapInOptional) {
|
|
138
139
|
newSchema = newSchema.optional();
|
|
139
140
|
}
|
|
141
|
+
if (description) {
|
|
142
|
+
newSchema = newSchema.describe(description);
|
|
143
|
+
}
|
|
140
144
|
return newSchema;
|
|
141
145
|
}
|
|
142
146
|
|
|
@@ -151,7 +155,10 @@ function transformSchemaField(
|
|
|
151
155
|
const shape = getZodObjectShape(innerValue);
|
|
152
156
|
|
|
153
157
|
if (shape && isMidsceneLocatorField(innerValue)) {
|
|
154
|
-
return [
|
|
158
|
+
return [
|
|
159
|
+
key,
|
|
160
|
+
makePromptOptional(shape, isOptional, getZodDescription(value)),
|
|
161
|
+
];
|
|
155
162
|
}
|
|
156
163
|
return [key, value];
|
|
157
164
|
}
|
|
@@ -159,7 +166,7 @@ function transformSchemaField(
|
|
|
159
166
|
/**
|
|
160
167
|
* Extract and transform schema from action's paramSchema.
|
|
161
168
|
*
|
|
162
|
-
* CLI
|
|
169
|
+
* CLI tools expose parameters as named fields, so the only schema
|
|
163
170
|
* shapes we can surface are ZodObject (any number of fields) or undefined
|
|
164
171
|
* (the action takes no parameters). A primitive schema like `z.string()`
|
|
165
172
|
* silently degraded to leaking the ZodString instance's prototype methods
|
|
@@ -181,7 +188,7 @@ function extractActionSchema(
|
|
|
181
188
|
(paramSchema as unknown as { _def?: { typeName?: string } })?._def
|
|
182
189
|
?.typeName ?? 'unknown';
|
|
183
190
|
throw new Error(
|
|
184
|
-
`Action "${actionName}" declared a non-object paramSchema (${typeName}). CLI
|
|
191
|
+
`Action "${actionName}" declared a non-object paramSchema (${typeName}). CLI tool schemas must be a ZodObject (e.g. z.object({ uri: z.string() })) or undefined. Wrap primitive fields in an object schema.`,
|
|
185
192
|
);
|
|
186
193
|
}
|
|
187
194
|
|
|
@@ -536,7 +543,7 @@ function mergeToolCliMetadata(
|
|
|
536
543
|
}
|
|
537
544
|
|
|
538
545
|
/**
|
|
539
|
-
* Converts DeviceAction from actionSpace into
|
|
546
|
+
* Converts DeviceAction from actionSpace into ToolDefinition.
|
|
540
547
|
* This is the core logic that removes need for hardcoded tool definitions
|
|
541
548
|
*/
|
|
542
549
|
export function generateToolsFromActionSpace(
|
|
@@ -557,7 +564,7 @@ export function generateToolsFromActionSpace(
|
|
|
557
564
|
|
|
558
565
|
return {
|
|
559
566
|
name: action.name,
|
|
560
|
-
description:
|
|
567
|
+
description: describeActionForTool(action),
|
|
561
568
|
schema,
|
|
562
569
|
cli: initArgCliMetadata,
|
|
563
570
|
handler: async (args: Record<string, unknown>) => {
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
1
|
import type { z } from 'zod';
|
|
3
2
|
import type { ToolDefaults } from './tool-defaults';
|
|
4
3
|
|
|
@@ -12,7 +11,7 @@ export const defaultAppLoadingTimeoutMs = 10000;
|
|
|
12
11
|
export const defaultAppLoadingCheckIntervalMs = 2000;
|
|
13
12
|
|
|
14
13
|
/**
|
|
15
|
-
* Content item types for tool results
|
|
14
|
+
* Content item types for tool results.
|
|
16
15
|
*/
|
|
17
16
|
export type ToolResultContent =
|
|
18
17
|
| { type: 'text'; text: string }
|
|
@@ -26,7 +25,7 @@ export type ToolResultContent =
|
|
|
26
25
|
};
|
|
27
26
|
|
|
28
27
|
/**
|
|
29
|
-
* Result type for tool execution
|
|
28
|
+
* Result type for tool execution.
|
|
30
29
|
*/
|
|
31
30
|
export interface ToolResult {
|
|
32
31
|
[x: string]: unknown;
|
|
@@ -58,7 +57,7 @@ export interface ToolCliMetadata {
|
|
|
58
57
|
}
|
|
59
58
|
|
|
60
59
|
/**
|
|
61
|
-
* Tool definition for
|
|
60
|
+
* Tool definition for Midscene CLI and Skill surfaces.
|
|
62
61
|
*/
|
|
63
62
|
export interface ToolDefinition<T = Record<string, unknown>> {
|
|
64
63
|
name: string;
|
|
@@ -68,9 +67,6 @@ export interface ToolDefinition<T = Record<string, unknown>> {
|
|
|
68
67
|
cli?: ToolCliMetadata;
|
|
69
68
|
}
|
|
70
69
|
|
|
71
|
-
/**
|
|
72
|
-
* Tool type for mcpKitForAgent return value
|
|
73
|
-
*/
|
|
74
70
|
export type Tool = ToolDefinition;
|
|
75
71
|
|
|
76
72
|
/**
|
|
@@ -100,6 +96,23 @@ export type UserPromptLike =
|
|
|
100
96
|
convertHttpImage2Base64?: boolean;
|
|
101
97
|
};
|
|
102
98
|
|
|
99
|
+
export interface RecordToReportScreenshot {
|
|
100
|
+
/**
|
|
101
|
+
* PNG/JPEG data URI, or raw PNG base64 body.
|
|
102
|
+
*/
|
|
103
|
+
base64: string;
|
|
104
|
+
description?: string;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export interface RecordToReportOptions {
|
|
108
|
+
content?: string;
|
|
109
|
+
/**
|
|
110
|
+
* @deprecated Use `screenshots: [{ base64 }]` instead.
|
|
111
|
+
*/
|
|
112
|
+
screenshotBase64?: string;
|
|
113
|
+
screenshots?: RecordToReportScreenshot[];
|
|
114
|
+
}
|
|
115
|
+
|
|
103
116
|
/**
|
|
104
117
|
* Base agent interface
|
|
105
118
|
* Represents a platform-specific agent (Android, iOS, Web)
|
|
@@ -113,7 +126,7 @@ export interface BaseAgent {
|
|
|
113
126
|
};
|
|
114
127
|
recordToReport?: (
|
|
115
128
|
title?: string,
|
|
116
|
-
opt?:
|
|
129
|
+
opt?: RecordToReportOptions,
|
|
117
130
|
) => Promise<void>;
|
|
118
131
|
callActionInActionSpace?: (
|
|
119
132
|
actionName: string,
|
|
@@ -143,10 +156,9 @@ export interface BaseDevice {
|
|
|
143
156
|
}
|
|
144
157
|
|
|
145
158
|
/**
|
|
146
|
-
* Interface for platform-specific
|
|
159
|
+
* Interface for platform-specific tools manager.
|
|
147
160
|
*/
|
|
148
161
|
export interface IMidsceneTools {
|
|
149
|
-
attachToServer(server: McpServer): void;
|
|
150
162
|
initTools(): Promise<void>;
|
|
151
163
|
destroy?(): Promise<void>;
|
|
152
164
|
setToolDefaults?(toolDefaults: ToolDefaults): void;
|
package/src/cli/cli-args.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
+
import type { ToolCliOption, ToolDefinition } from '../agent-tools/types';
|
|
2
3
|
import { getKeyAliases } from '../key-alias-utils';
|
|
3
|
-
import type { ToolCliOption, ToolDefinition } from '../mcp/types';
|
|
4
4
|
import { CLIError } from './cli-error';
|
|
5
5
|
|
|
6
6
|
export function parseValue(raw: string): unknown {
|
package/src/cli/cli-runner.ts
CHANGED
|
@@ -2,14 +2,14 @@ import { existsSync, writeFileSync } from 'node:fs';
|
|
|
2
2
|
import { tmpdir } from 'node:os';
|
|
3
3
|
import { join } from 'node:path';
|
|
4
4
|
import dotenv from 'dotenv';
|
|
5
|
-
import {
|
|
6
|
-
import
|
|
7
|
-
import { stripBehaviorFlags } from '../mcp/tool-defaults';
|
|
5
|
+
import type { BaseMidsceneTools } from '../agent-tools/base-tools';
|
|
6
|
+
import { stripBehaviorFlags } from '../agent-tools/tool-defaults';
|
|
8
7
|
import type {
|
|
9
8
|
ToolDefinition,
|
|
10
9
|
ToolResult,
|
|
11
10
|
ToolResultContent,
|
|
12
|
-
} from '../
|
|
11
|
+
} from '../agent-tools/types';
|
|
12
|
+
import { getDebug } from '../logger';
|
|
13
13
|
import {
|
|
14
14
|
canonicalizeCliArgKeys,
|
|
15
15
|
formatCliValidationError,
|
package/src/env/types.ts
CHANGED
|
@@ -8,10 +8,11 @@ export const MIDSCENE_DEBUG_MODEL_RESPONSE = 'MIDSCENE_DEBUG_MODEL_RESPONSE';
|
|
|
8
8
|
export const MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG =
|
|
9
9
|
'MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG';
|
|
10
10
|
export const MIDSCENE_DEBUG_MODE = 'MIDSCENE_DEBUG_MODE';
|
|
11
|
-
export const
|
|
12
|
-
|
|
11
|
+
export const MIDSCENE_CHROME_PATH = 'MIDSCENE_CHROME_PATH';
|
|
12
|
+
/**
|
|
13
|
+
* @deprecated Use MIDSCENE_CHROME_PATH instead. This is kept for backward compatibility.
|
|
14
|
+
*/
|
|
13
15
|
export const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
|
|
14
|
-
export const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
|
|
15
16
|
export const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
|
|
16
17
|
|
|
17
18
|
// Observability
|
|
@@ -166,8 +167,6 @@ export const BASIC_ENV_KEYS = [
|
|
|
166
167
|
|
|
167
168
|
export const BOOLEAN_ENV_KEYS = [
|
|
168
169
|
MIDSCENE_CACHE,
|
|
169
|
-
MIDSCENE_MCP_USE_PUPPETEER_MODE,
|
|
170
|
-
MIDSCENE_MCP_ANDROID_MODE,
|
|
171
170
|
MIDSCENE_LANGSMITH_DEBUG,
|
|
172
171
|
MIDSCENE_LANGFUSE_DEBUG,
|
|
173
172
|
MIDSCENE_REPORT_QUIET,
|
|
@@ -188,6 +187,7 @@ export const STRING_ENV_KEYS = [
|
|
|
188
187
|
MIDSCENE_REPORT_TAG_NAME,
|
|
189
188
|
MIDSCENE_PREFERRED_LANGUAGE,
|
|
190
189
|
MATCH_BY_POSITION,
|
|
190
|
+
MIDSCENE_CHROME_PATH,
|
|
191
191
|
MIDSCENE_MCP_CHROME_PATH,
|
|
192
192
|
DOCKER_CONTAINER,
|
|
193
193
|
] as const;
|
package/src/img/index.ts
CHANGED
package/src/img/transform.ts
CHANGED
|
@@ -157,6 +157,9 @@ export async function resizeAndConvertImgBuffer(
|
|
|
157
157
|
export const normalizeBase64Body = (body: string) => body.replace(/\s/g, '');
|
|
158
158
|
|
|
159
159
|
const base64ImageDataUrlPattern = /^data:image\/[a-zA-Z0-9.+-]+;base64,/i;
|
|
160
|
+
const supportedScreenshotDataUriPattern =
|
|
161
|
+
/^data:image\/(png|jpe?g);base64,([\s\S]*)$/i;
|
|
162
|
+
const rawBase64BodyPattern = /^[A-Za-z0-9+/=\s]+$/;
|
|
160
163
|
|
|
161
164
|
export const inferBase64ImageFormat = (base64Body: string) => {
|
|
162
165
|
if (base64Body.startsWith('iVBORw0KGgo')) {
|
|
@@ -207,6 +210,48 @@ export const createImgBase64ByFormat = (format: string, body: string) => {
|
|
|
207
210
|
return `data:image/${format};base64,${normalizeBase64Body(body)}`;
|
|
208
211
|
};
|
|
209
212
|
|
|
213
|
+
export interface NormalizeScreenshotBase64Options {
|
|
214
|
+
label?: string;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export const normalizeScreenshotBase64 = (
|
|
218
|
+
base64: string,
|
|
219
|
+
options?: NormalizeScreenshotBase64Options,
|
|
220
|
+
) => {
|
|
221
|
+
const label = options?.label ?? 'screenshot base64';
|
|
222
|
+
const trimmedBase64 = base64.trim();
|
|
223
|
+
if (!trimmedBase64) {
|
|
224
|
+
throw new Error(`${label} cannot be empty`);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const dataUriMatch = trimmedBase64.match(supportedScreenshotDataUriPattern);
|
|
228
|
+
if (dataUriMatch) {
|
|
229
|
+
const imageFormat =
|
|
230
|
+
dataUriMatch[1].toLowerCase() === 'jpg'
|
|
231
|
+
? 'jpeg'
|
|
232
|
+
: dataUriMatch[1].toLowerCase();
|
|
233
|
+
const body = dataUriMatch[2];
|
|
234
|
+
if (!normalizeBase64Body(body)) {
|
|
235
|
+
throw new Error(`${label} cannot be empty`);
|
|
236
|
+
}
|
|
237
|
+
return createImgBase64ByFormat(imageFormat, body);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (trimmedBase64.startsWith('data:')) {
|
|
241
|
+
throw new Error(
|
|
242
|
+
`${label} must be a PNG/JPEG data URI or raw PNG base64 string`,
|
|
243
|
+
);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (!rawBase64BodyPattern.test(trimmedBase64)) {
|
|
247
|
+
throw new Error(
|
|
248
|
+
`${label} must be a PNG/JPEG data URI or raw PNG base64 string`,
|
|
249
|
+
);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return createImgBase64ByFormat('png', trimmedBase64);
|
|
253
|
+
};
|
|
254
|
+
|
|
210
255
|
export const normalizeBase64Image = (base64: string) => {
|
|
211
256
|
const trimmedBase64 = base64.trim();
|
|
212
257
|
if (base64ImageDataUrlPattern.test(trimmedBase64)) {
|
package/src/key-alias-utils.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Internal-only helpers for CLI
|
|
2
|
+
* Internal-only helpers for CLI argument key aliasing.
|
|
3
3
|
* Not re-exported from the package entry point — keep consumers within
|
|
4
|
-
* `cli
|
|
4
|
+
* `cli/`.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
export function kebabToCamel(str: string): string {
|
package/src/utils.ts
CHANGED
|
@@ -64,17 +64,8 @@ export function assert(condition: any, message?: string): asserts condition {
|
|
|
64
64
|
}
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
-
let isMcp = false;
|
|
68
|
-
|
|
69
|
-
export function setIsMcp(value: boolean) {
|
|
70
|
-
isMcp = value;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
//mcp need use obj format to console msg: https://github.com/modelcontextprotocol/typescript-sdk/issues/244
|
|
74
67
|
export function logMsg(...message: Parameters<typeof console.log>) {
|
|
75
|
-
|
|
76
|
-
console.log(...message);
|
|
77
|
-
}
|
|
68
|
+
console.log(...message);
|
|
78
69
|
}
|
|
79
70
|
|
|
80
71
|
export async function repeat(
|