@midscene/shared 1.9.7 → 1.9.8-beta-20260618091332.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ import { assert } from "../utils.mjs";
5
5
  import { maskConfig, parseJson } from "./helper.mjs";
6
6
  import { initDebugConfig } from "./init-debug.mjs";
7
7
  const MODEL_CONFIG_DOC_URL = 'https://midscenejs.com/model-common-config.html';
8
- const getCurrentVersion = ()=>"1.9.7";
8
+ const getCurrentVersion = ()=>"1.9.8-beta-20260618091332.0";
9
9
  const getInvalidModelFamilyMessage = (modelFamily)=>`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}. Current version v${getCurrentVersion()} accepts the following model families: ${MODEL_FAMILY_VALUES.join(', ')}. You can also visit ${MODEL_CONFIG_DOC_URL} for the latest configuration information.`;
10
10
  const KEYS_MAP = {
11
11
  insight: INSIGHT_MODEL_CONFIG_KEYS,
@@ -1,4 +1,4 @@
1
1
  import { imageInfoOfBase64, isValidImageBuffer, isValidJPEGImageBuffer, isValidPNGImageBuffer, validateScreenshotBuffer } from "./info.mjs";
2
- import { createImgBase64ByFormat, cropByRect, httpImg2Base64, inferBase64ImageFormat, localImg2Base64, normalizeBase64Image, paddingToMatchBlockByBase64, parseBase64, preProcessImageUrl, resizeAndConvertImgBuffer, resizeImgBase64, saveBase64Image, scaleImage, zoomForGPT4o } from "./transform.mjs";
2
+ import { createImgBase64ByFormat, cropByRect, httpImg2Base64, inferBase64ImageFormat, localImg2Base64, normalizeBase64Image, normalizeScreenshotBase64, paddingToMatchBlockByBase64, parseBase64, preProcessImageUrl, resizeAndConvertImgBuffer, resizeImgBase64, saveBase64Image, scaleImage, zoomForGPT4o } from "./transform.mjs";
3
3
  import { annotateRects, compositeElementInfoImg, compositePointMarkerImg, processImageElementInfo } from "./box-select.mjs";
4
- export { annotateRects, compositeElementInfoImg, compositePointMarkerImg, createImgBase64ByFormat, cropByRect, httpImg2Base64, imageInfoOfBase64, inferBase64ImageFormat, isValidImageBuffer, isValidJPEGImageBuffer, isValidPNGImageBuffer, localImg2Base64, normalizeBase64Image, paddingToMatchBlockByBase64, parseBase64, preProcessImageUrl, processImageElementInfo, resizeAndConvertImgBuffer, resizeImgBase64, saveBase64Image, scaleImage, validateScreenshotBuffer, zoomForGPT4o };
4
+ export { annotateRects, compositeElementInfoImg, compositePointMarkerImg, createImgBase64ByFormat, cropByRect, httpImg2Base64, imageInfoOfBase64, inferBase64ImageFormat, isValidImageBuffer, isValidJPEGImageBuffer, isValidPNGImageBuffer, localImg2Base64, normalizeBase64Image, normalizeScreenshotBase64, paddingToMatchBlockByBase64, parseBase64, preProcessImageUrl, processImageElementInfo, resizeAndConvertImgBuffer, resizeImgBase64, saveBase64Image, scaleImage, validateScreenshotBuffer, zoomForGPT4o };
@@ -71,6 +71,8 @@ async function resizeAndConvertImgBuffer(inputFormat, inputData, newSize) {
71
71
  }
72
72
  const normalizeBase64Body = (body)=>body.replace(/\s/g, '');
73
73
  const base64ImageDataUrlPattern = /^data:image\/[a-zA-Z0-9.+-]+;base64,/i;
74
+ const supportedScreenshotDataUriPattern = /^data:image\/(png|jpe?g);base64,([\s\S]*)$/i;
75
+ const rawBase64BodyPattern = /^[A-Za-z0-9+/=\s]+$/;
74
76
  const inferBase64ImageFormat = (base64Body)=>{
75
77
  if (base64Body.startsWith('iVBORw0KGgo')) return 'png';
76
78
  return 'jpeg';
@@ -83,6 +85,21 @@ function detectImageMimeTypeFromBuffer(buffer) {
83
85
  if (buffer.length >= 2 && 0x42 === buffer[0] && 0x4d === buffer[1]) return 'image/bmp';
84
86
  }
85
87
  const createImgBase64ByFormat = (format, body)=>`data:image/${format};base64,${normalizeBase64Body(body)}`;
88
+ const normalizeScreenshotBase64 = (base64, options)=>{
89
+ const label = options?.label ?? 'screenshot base64';
90
+ const trimmedBase64 = base64.trim();
91
+ if (!trimmedBase64) throw new Error(`${label} cannot be empty`);
92
+ const dataUriMatch = trimmedBase64.match(supportedScreenshotDataUriPattern);
93
+ if (dataUriMatch) {
94
+ const imageFormat = 'jpg' === dataUriMatch[1].toLowerCase() ? 'jpeg' : dataUriMatch[1].toLowerCase();
95
+ const body = dataUriMatch[2];
96
+ if (!normalizeBase64Body(body)) throw new Error(`${label} cannot be empty`);
97
+ return createImgBase64ByFormat(imageFormat, body);
98
+ }
99
+ if (trimmedBase64.startsWith('data:')) throw new Error(`${label} must be a PNG/JPEG data URI or raw PNG base64 string`);
100
+ if (!rawBase64BodyPattern.test(trimmedBase64)) throw new Error(`${label} must be a PNG/JPEG data URI or raw PNG base64 string`);
101
+ return createImgBase64ByFormat('png', trimmedBase64);
102
+ };
86
103
  const normalizeBase64Image = (base64)=>{
87
104
  const trimmedBase64 = base64.trim();
88
105
  if (base64ImageDataUrlPattern.test(trimmedBase64)) return trimmedBase64;
@@ -293,4 +310,4 @@ async function scaleImage(imageBase64, scale) {
293
310
  imageBase64: base64
294
311
  };
295
312
  }
296
- export { createImgBase64ByFormat, cropByRect, httpImg2Base64, inferBase64ImageFormat, localImg2Base64, normalizeBase64Body, normalizeBase64Image, paddingToMatchBlock, paddingToMatchBlockByBase64, parseBase64, photonFromBase64, photonToBase64, preProcessImageUrl, resizeAndConvertImgBuffer, resizeImgBase64, saveBase64Image, scaleImage, zoomForGPT4o };
313
+ export { createImgBase64ByFormat, cropByRect, httpImg2Base64, inferBase64ImageFormat, localImg2Base64, normalizeBase64Body, normalizeBase64Image, normalizeScreenshotBase64, paddingToMatchBlock, paddingToMatchBlockByBase64, parseBase64, photonFromBase64, photonToBase64, preProcessImageUrl, resizeAndConvertImgBuffer, resizeImgBase64, saveBase64Image, scaleImage, zoomForGPT4o };
@@ -0,0 +1,44 @@
1
+ import { z } from "zod";
2
+ const agentBehaviorInitArgShape = {
3
+ aiActContext: z.string().optional().describe('Background knowledge passed to aiAct. Default: no extra context.'),
4
+ replanningCycleLimit: z.number().int().nonnegative().optional().describe('Maximum number of replanning cycles for aiAct. Default: model adapter default.'),
5
+ waitAfterAction: z.number().nonnegative().optional().describe('Wait time in milliseconds after each action execution. Default: 300ms.'),
6
+ screenshotShrinkFactor: z.number().min(1).optional().describe('Screenshot shrink factor before sending images to AI. Default: 1; high values may reduce recognition quality, especially on mobile.')
7
+ };
8
+ function extractAgentBehaviorInitArgs(extracted) {
9
+ if (!extracted) return;
10
+ const agentOptions = {
11
+ ...'string' == typeof extracted.aiActContext ? {
12
+ aiActContext: extracted.aiActContext
13
+ } : {},
14
+ ...'string' == typeof extracted.aiActionContext ? {
15
+ aiActionContext: extracted.aiActionContext
16
+ } : {},
17
+ ...'number' == typeof extracted.replanningCycleLimit ? {
18
+ replanningCycleLimit: extracted.replanningCycleLimit
19
+ } : {},
20
+ ...'number' == typeof extracted.waitAfterAction ? {
21
+ waitAfterAction: extracted.waitAfterAction
22
+ } : {},
23
+ ...'number' == typeof extracted.screenshotShrinkFactor ? {
24
+ screenshotShrinkFactor: extracted.screenshotShrinkFactor
25
+ } : {}
26
+ };
27
+ return Object.keys(agentOptions).length > 0 ? agentOptions : void 0;
28
+ }
29
+ function stableJsonValue(value) {
30
+ if (Array.isArray(value)) return value.map(stableJsonValue);
31
+ if (value && 'object' == typeof value) return Object.fromEntries(Object.entries(value).sort(([left], [right])=>left.localeCompare(right)).map(([key, nestedValue])=>[
32
+ key,
33
+ stableJsonValue(nestedValue)
34
+ ]));
35
+ return value;
36
+ }
37
+ function getAgentInitArgsSignature(initArgs) {
38
+ if (!initArgs || 0 === Object.keys(initArgs).length) return;
39
+ return JSON.stringify(stableJsonValue(initArgs));
40
+ }
41
+ function shouldRebuildAgentForInitArgs(currentSignature, nextSignature) {
42
+ return currentSignature !== nextSignature && (void 0 !== currentSignature || void 0 !== nextSignature);
43
+ }
44
+ export { agentBehaviorInitArgShape, extractAgentBehaviorInitArgs, getAgentInitArgsSignature, shouldRebuildAgentForInitArgs };
@@ -1,6 +1,7 @@
1
1
  export * from "./base-server.mjs";
2
2
  export * from "./base-tools.mjs";
3
3
  export * from "./tool-defaults.mjs";
4
+ export * from "./agent-behavior-init-args.mjs";
4
5
  export * from "./init-arg-utils.mjs";
5
6
  export * from "./error-formatter.mjs";
6
7
  export * from "./tool-generator.mjs";
@@ -48,13 +48,14 @@ function getZodObjectShape(value) {
48
48
  function isRecord(value) {
49
49
  return 'object' == typeof value && null !== value && !Array.isArray(value);
50
50
  }
51
- function makePromptOptional(shape, wrapInOptional) {
51
+ function makePromptOptional(shape, wrapInOptional, description) {
52
52
  const newShape = {
53
53
  ...shape
54
54
  };
55
55
  newShape.prompt = shape.prompt.optional();
56
56
  let newSchema = z.object(newShape).passthrough();
57
57
  if (wrapInOptional) newSchema = newSchema.optional();
58
+ if (description) newSchema = newSchema.describe(description);
58
59
  return newSchema;
59
60
  }
60
61
  function transformSchemaField(key, value) {
@@ -62,7 +63,7 @@ function transformSchemaField(key, value) {
62
63
  const shape = getZodObjectShape(innerValue);
63
64
  if (shape && isMidsceneLocatorField(innerValue)) return [
64
65
  key,
65
- makePromptOptional(shape, isOptional)
66
+ makePromptOptional(shape, isOptional, getZodDescription(value))
66
67
  ];
67
68
  return [
68
69
  key,
@@ -37,7 +37,7 @@ const external_utils_js_namespaceObject = require("../utils.js");
37
37
  const external_helper_js_namespaceObject = require("./helper.js");
38
38
  const external_init_debug_js_namespaceObject = require("./init-debug.js");
39
39
  const MODEL_CONFIG_DOC_URL = 'https://midscenejs.com/model-common-config.html';
40
- const getCurrentVersion = ()=>"1.9.7";
40
+ const getCurrentVersion = ()=>"1.9.8-beta-20260618091332.0";
41
41
  const getInvalidModelFamilyMessage = (modelFamily)=>`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}. Current version v${getCurrentVersion()} accepts the following model families: ${external_types_js_namespaceObject.MODEL_FAMILY_VALUES.join(', ')}. You can also visit ${MODEL_CONFIG_DOC_URL} for the latest configuration information.`;
42
42
  const KEYS_MAP = {
43
43
  insight: external_constants_js_namespaceObject.INSIGHT_MODEL_CONFIG_KEYS,
@@ -33,6 +33,7 @@ __webpack_require__.d(__webpack_exports__, {
33
33
  saveBase64Image: ()=>external_transform_js_namespaceObject.saveBase64Image,
34
34
  isValidImageBuffer: ()=>external_info_js_namespaceObject.isValidImageBuffer,
35
35
  validateScreenshotBuffer: ()=>external_info_js_namespaceObject.validateScreenshotBuffer,
36
+ normalizeScreenshotBase64: ()=>external_transform_js_namespaceObject.normalizeScreenshotBase64,
36
37
  preProcessImageUrl: ()=>external_transform_js_namespaceObject.preProcessImageUrl,
37
38
  annotateRects: ()=>external_box_select_js_namespaceObject.annotateRects,
38
39
  compositeElementInfoImg: ()=>external_box_select_js_namespaceObject.compositeElementInfoImg,
@@ -64,6 +65,7 @@ exports.isValidJPEGImageBuffer = __webpack_exports__.isValidJPEGImageBuffer;
64
65
  exports.isValidPNGImageBuffer = __webpack_exports__.isValidPNGImageBuffer;
65
66
  exports.localImg2Base64 = __webpack_exports__.localImg2Base64;
66
67
  exports.normalizeBase64Image = __webpack_exports__.normalizeBase64Image;
68
+ exports.normalizeScreenshotBase64 = __webpack_exports__.normalizeScreenshotBase64;
67
69
  exports.paddingToMatchBlockByBase64 = __webpack_exports__.paddingToMatchBlockByBase64;
68
70
  exports.parseBase64 = __webpack_exports__.parseBase64;
69
71
  exports.preProcessImageUrl = __webpack_exports__.preProcessImageUrl;
@@ -88,6 +90,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
88
90
  "isValidPNGImageBuffer",
89
91
  "localImg2Base64",
90
92
  "normalizeBase64Image",
93
+ "normalizeScreenshotBase64",
91
94
  "paddingToMatchBlockByBase64",
92
95
  "parseBase64",
93
96
  "preProcessImageUrl",
@@ -42,6 +42,7 @@ __webpack_require__.d(__webpack_exports__, {
42
42
  resizeAndConvertImgBuffer: ()=>resizeAndConvertImgBuffer,
43
43
  resizeImgBase64: ()=>resizeImgBase64,
44
44
  saveBase64Image: ()=>saveBase64Image,
45
+ normalizeScreenshotBase64: ()=>normalizeScreenshotBase64,
45
46
  preProcessImageUrl: ()=>preProcessImageUrl,
46
47
  cropByRect: ()=>cropByRect,
47
48
  scaleImage: ()=>scaleImage,
@@ -129,6 +130,8 @@ async function resizeAndConvertImgBuffer(inputFormat, inputData, newSize) {
129
130
  }
130
131
  const normalizeBase64Body = (body)=>body.replace(/\s/g, '');
131
132
  const base64ImageDataUrlPattern = /^data:image\/[a-zA-Z0-9.+-]+;base64,/i;
133
+ const supportedScreenshotDataUriPattern = /^data:image\/(png|jpe?g);base64,([\s\S]*)$/i;
134
+ const rawBase64BodyPattern = /^[A-Za-z0-9+/=\s]+$/;
132
135
  const inferBase64ImageFormat = (base64Body)=>{
133
136
  if (base64Body.startsWith('iVBORw0KGgo')) return 'png';
134
137
  return 'jpeg';
@@ -141,6 +144,21 @@ function detectImageMimeTypeFromBuffer(buffer) {
141
144
  if (buffer.length >= 2 && 0x42 === buffer[0] && 0x4d === buffer[1]) return 'image/bmp';
142
145
  }
143
146
  const createImgBase64ByFormat = (format, body)=>`data:image/${format};base64,${normalizeBase64Body(body)}`;
147
+ const normalizeScreenshotBase64 = (base64, options)=>{
148
+ const label = options?.label ?? 'screenshot base64';
149
+ const trimmedBase64 = base64.trim();
150
+ if (!trimmedBase64) throw new Error(`${label} cannot be empty`);
151
+ const dataUriMatch = trimmedBase64.match(supportedScreenshotDataUriPattern);
152
+ if (dataUriMatch) {
153
+ const imageFormat = 'jpg' === dataUriMatch[1].toLowerCase() ? 'jpeg' : dataUriMatch[1].toLowerCase();
154
+ const body = dataUriMatch[2];
155
+ if (!normalizeBase64Body(body)) throw new Error(`${label} cannot be empty`);
156
+ return createImgBase64ByFormat(imageFormat, body);
157
+ }
158
+ if (trimmedBase64.startsWith('data:')) throw new Error(`${label} must be a PNG/JPEG data URI or raw PNG base64 string`);
159
+ if (!rawBase64BodyPattern.test(trimmedBase64)) throw new Error(`${label} must be a PNG/JPEG data URI or raw PNG base64 string`);
160
+ return createImgBase64ByFormat('png', trimmedBase64);
161
+ };
144
162
  const normalizeBase64Image = (base64)=>{
145
163
  const trimmedBase64 = base64.trim();
146
164
  if (base64ImageDataUrlPattern.test(trimmedBase64)) return trimmedBase64;
@@ -358,6 +376,7 @@ exports.inferBase64ImageFormat = __webpack_exports__.inferBase64ImageFormat;
358
376
  exports.localImg2Base64 = __webpack_exports__.localImg2Base64;
359
377
  exports.normalizeBase64Body = __webpack_exports__.normalizeBase64Body;
360
378
  exports.normalizeBase64Image = __webpack_exports__.normalizeBase64Image;
379
+ exports.normalizeScreenshotBase64 = __webpack_exports__.normalizeScreenshotBase64;
361
380
  exports.paddingToMatchBlock = __webpack_exports__.paddingToMatchBlock;
362
381
  exports.paddingToMatchBlockByBase64 = __webpack_exports__.paddingToMatchBlockByBase64;
363
382
  exports.parseBase64 = __webpack_exports__.parseBase64;
@@ -377,6 +396,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
377
396
  "localImg2Base64",
378
397
  "normalizeBase64Body",
379
398
  "normalizeBase64Image",
399
+ "normalizeScreenshotBase64",
380
400
  "paddingToMatchBlock",
381
401
  "paddingToMatchBlockByBase64",
382
402
  "parseBase64",
@@ -0,0 +1,87 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ extractAgentBehaviorInitArgs: ()=>extractAgentBehaviorInitArgs,
28
+ agentBehaviorInitArgShape: ()=>agentBehaviorInitArgShape,
29
+ getAgentInitArgsSignature: ()=>getAgentInitArgsSignature,
30
+ shouldRebuildAgentForInitArgs: ()=>shouldRebuildAgentForInitArgs
31
+ });
32
+ const external_zod_namespaceObject = require("zod");
33
+ const agentBehaviorInitArgShape = {
34
+ aiActContext: external_zod_namespaceObject.z.string().optional().describe('Background knowledge passed to aiAct. Default: no extra context.'),
35
+ replanningCycleLimit: external_zod_namespaceObject.z.number().int().nonnegative().optional().describe('Maximum number of replanning cycles for aiAct. Default: model adapter default.'),
36
+ waitAfterAction: external_zod_namespaceObject.z.number().nonnegative().optional().describe('Wait time in milliseconds after each action execution. Default: 300ms.'),
37
+ screenshotShrinkFactor: external_zod_namespaceObject.z.number().min(1).optional().describe('Screenshot shrink factor before sending images to AI. Default: 1; high values may reduce recognition quality, especially on mobile.')
38
+ };
39
+ function extractAgentBehaviorInitArgs(extracted) {
40
+ if (!extracted) return;
41
+ const agentOptions = {
42
+ ...'string' == typeof extracted.aiActContext ? {
43
+ aiActContext: extracted.aiActContext
44
+ } : {},
45
+ ...'string' == typeof extracted.aiActionContext ? {
46
+ aiActionContext: extracted.aiActionContext
47
+ } : {},
48
+ ...'number' == typeof extracted.replanningCycleLimit ? {
49
+ replanningCycleLimit: extracted.replanningCycleLimit
50
+ } : {},
51
+ ...'number' == typeof extracted.waitAfterAction ? {
52
+ waitAfterAction: extracted.waitAfterAction
53
+ } : {},
54
+ ...'number' == typeof extracted.screenshotShrinkFactor ? {
55
+ screenshotShrinkFactor: extracted.screenshotShrinkFactor
56
+ } : {}
57
+ };
58
+ return Object.keys(agentOptions).length > 0 ? agentOptions : void 0;
59
+ }
60
+ function stableJsonValue(value) {
61
+ if (Array.isArray(value)) return value.map(stableJsonValue);
62
+ if (value && 'object' == typeof value) return Object.fromEntries(Object.entries(value).sort(([left], [right])=>left.localeCompare(right)).map(([key, nestedValue])=>[
63
+ key,
64
+ stableJsonValue(nestedValue)
65
+ ]));
66
+ return value;
67
+ }
68
+ function getAgentInitArgsSignature(initArgs) {
69
+ if (!initArgs || 0 === Object.keys(initArgs).length) return;
70
+ return JSON.stringify(stableJsonValue(initArgs));
71
+ }
72
+ function shouldRebuildAgentForInitArgs(currentSignature, nextSignature) {
73
+ return currentSignature !== nextSignature && (void 0 !== currentSignature || void 0 !== nextSignature);
74
+ }
75
+ exports.agentBehaviorInitArgShape = __webpack_exports__.agentBehaviorInitArgShape;
76
+ exports.extractAgentBehaviorInitArgs = __webpack_exports__.extractAgentBehaviorInitArgs;
77
+ exports.getAgentInitArgsSignature = __webpack_exports__.getAgentInitArgsSignature;
78
+ exports.shouldRebuildAgentForInitArgs = __webpack_exports__.shouldRebuildAgentForInitArgs;
79
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
80
+ "agentBehaviorInitArgShape",
81
+ "extractAgentBehaviorInitArgs",
82
+ "getAgentInitArgsSignature",
83
+ "shouldRebuildAgentForInitArgs"
84
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
85
+ Object.defineProperty(exports, '__esModule', {
86
+ value: true
87
+ });
@@ -1,5 +1,8 @@
1
1
  "use strict";
2
2
  var __webpack_modules__ = {
3
+ "./agent-behavior-init-args" (module) {
4
+ module.exports = require("./agent-behavior-init-args.js");
5
+ },
3
6
  "./base-server" (module) {
4
7
  module.exports = require("./base-server.js");
5
8
  },
@@ -86,33 +89,37 @@ var __webpack_exports__ = {};
86
89
  var __rspack_reexport = {};
87
90
  for(const __rspack_import_key in _tool_defaults__rspack_import_2)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_tool_defaults__rspack_import_2[__rspack_import_key];
88
91
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
89
- var _init_arg_utils__rspack_import_3 = __webpack_require__("./init-arg-utils");
92
+ var _agent_behavior_init_args__rspack_import_3 = __webpack_require__("./agent-behavior-init-args");
93
+ var __rspack_reexport = {};
94
+ for(const __rspack_import_key in _agent_behavior_init_args__rspack_import_3)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_agent_behavior_init_args__rspack_import_3[__rspack_import_key];
95
+ __webpack_require__.d(__webpack_exports__, __rspack_reexport);
96
+ var _init_arg_utils__rspack_import_4 = __webpack_require__("./init-arg-utils");
90
97
  var __rspack_reexport = {};
91
- for(const __rspack_import_key in _init_arg_utils__rspack_import_3)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_init_arg_utils__rspack_import_3[__rspack_import_key];
98
+ for(const __rspack_import_key in _init_arg_utils__rspack_import_4)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_init_arg_utils__rspack_import_4[__rspack_import_key];
92
99
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
93
- var _error_formatter__rspack_import_4 = __webpack_require__("./error-formatter");
100
+ var _error_formatter__rspack_import_5 = __webpack_require__("./error-formatter");
94
101
  var __rspack_reexport = {};
95
- for(const __rspack_import_key in _error_formatter__rspack_import_4)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_error_formatter__rspack_import_4[__rspack_import_key];
102
+ for(const __rspack_import_key in _error_formatter__rspack_import_5)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_error_formatter__rspack_import_5[__rspack_import_key];
96
103
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
97
- var _tool_generator__rspack_import_5 = __webpack_require__("./tool-generator");
104
+ var _tool_generator__rspack_import_6 = __webpack_require__("./tool-generator");
98
105
  var __rspack_reexport = {};
99
- for(const __rspack_import_key in _tool_generator__rspack_import_5)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_tool_generator__rspack_import_5[__rspack_import_key];
106
+ for(const __rspack_import_key in _tool_generator__rspack_import_6)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_tool_generator__rspack_import_6[__rspack_import_key];
100
107
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
101
- var _types__rspack_import_6 = __webpack_require__("./types");
108
+ var _types__rspack_import_7 = __webpack_require__("./types");
102
109
  var __rspack_reexport = {};
103
- for(const __rspack_import_key in _types__rspack_import_6)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_types__rspack_import_6[__rspack_import_key];
110
+ for(const __rspack_import_key in _types__rspack_import_7)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_types__rspack_import_7[__rspack_import_key];
104
111
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
105
- var _inject_report_html_plugin__rspack_import_7 = __webpack_require__("./inject-report-html-plugin");
112
+ var _inject_report_html_plugin__rspack_import_8 = __webpack_require__("./inject-report-html-plugin");
106
113
  var __rspack_reexport = {};
107
- for(const __rspack_import_key in _inject_report_html_plugin__rspack_import_7)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_inject_report_html_plugin__rspack_import_7[__rspack_import_key];
114
+ for(const __rspack_import_key in _inject_report_html_plugin__rspack_import_8)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_inject_report_html_plugin__rspack_import_8[__rspack_import_key];
108
115
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
109
- var _launcher_helper__rspack_import_8 = __webpack_require__("./launcher-helper");
116
+ var _launcher_helper__rspack_import_9 = __webpack_require__("./launcher-helper");
110
117
  var __rspack_reexport = {};
111
- for(const __rspack_import_key in _launcher_helper__rspack_import_8)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_launcher_helper__rspack_import_8[__rspack_import_key];
118
+ for(const __rspack_import_key in _launcher_helper__rspack_import_9)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_launcher_helper__rspack_import_9[__rspack_import_key];
112
119
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
113
- var _chrome_path__rspack_import_9 = __webpack_require__("./chrome-path");
120
+ var _chrome_path__rspack_import_10 = __webpack_require__("./chrome-path");
114
121
  var __rspack_reexport = {};
115
- for(const __rspack_import_key in _chrome_path__rspack_import_9)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_chrome_path__rspack_import_9[__rspack_import_key];
122
+ for(const __rspack_import_key in _chrome_path__rspack_import_10)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_chrome_path__rspack_import_10[__rspack_import_key];
116
123
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
117
124
  })();
118
125
  for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
@@ -78,13 +78,14 @@ function getZodObjectShape(value) {
78
78
  function isRecord(value) {
79
79
  return 'object' == typeof value && null !== value && !Array.isArray(value);
80
80
  }
81
- function makePromptOptional(shape, wrapInOptional) {
81
+ function makePromptOptional(shape, wrapInOptional, description) {
82
82
  const newShape = {
83
83
  ...shape
84
84
  };
85
85
  newShape.prompt = shape.prompt.optional();
86
86
  let newSchema = external_zod_namespaceObject.z.object(newShape).passthrough();
87
87
  if (wrapInOptional) newSchema = newSchema.optional();
88
+ if (description) newSchema = newSchema.describe(description);
88
89
  return newSchema;
89
90
  }
90
91
  function transformSchemaField(key, value) {
@@ -92,7 +93,7 @@ function transformSchemaField(key, value) {
92
93
  const shape = getZodObjectShape(innerValue);
93
94
  if (shape && (0, external_zod_schema_utils_js_namespaceObject.isMidsceneLocatorField)(innerValue)) return [
94
95
  key,
95
- makePromptOptional(shape, isOptional)
96
+ makePromptOptional(shape, isOptional, (0, external_zod_schema_utils_js_namespaceObject.getZodDescription)(value))
96
97
  ];
97
98
  return [
98
99
  key,
@@ -1,3 +1,3 @@
1
1
  export { imageInfoOfBase64, isValidPNGImageBuffer, isValidJPEGImageBuffer, isValidImageBuffer, validateScreenshotBuffer, type ValidateScreenshotBufferOptions, } from './info';
2
- export { resizeAndConvertImgBuffer, resizeImgBase64, zoomForGPT4o, saveBase64Image, paddingToMatchBlockByBase64, cropByRect, scaleImage, localImg2Base64, httpImg2Base64, preProcessImageUrl, parseBase64, createImgBase64ByFormat, inferBase64ImageFormat, normalizeBase64Image, } from './transform';
2
+ export { resizeAndConvertImgBuffer, resizeImgBase64, zoomForGPT4o, saveBase64Image, paddingToMatchBlockByBase64, cropByRect, scaleImage, localImg2Base64, httpImg2Base64, preProcessImageUrl, parseBase64, createImgBase64ByFormat, inferBase64ImageFormat, normalizeBase64Image, normalizeScreenshotBase64, type NormalizeScreenshotBase64Options, } from './transform';
3
3
  export { processImageElementInfo, compositeElementInfoImg, compositePointMarkerImg, annotateRects, } from './box-select';
@@ -29,6 +29,10 @@ export declare function resizeAndConvertImgBuffer(inputFormat: string, inputData
29
29
  export declare const normalizeBase64Body: (body: string) => string;
30
30
  export declare const inferBase64ImageFormat: (base64Body: string) => "jpeg" | "png";
31
31
  export declare const createImgBase64ByFormat: (format: string, body: string) => string;
32
+ export interface NormalizeScreenshotBase64Options {
33
+ label?: string;
34
+ }
35
+ export declare const normalizeScreenshotBase64: (base64: string, options?: NormalizeScreenshotBase64Options) => string;
32
36
  export declare const normalizeBase64Image: (base64: string) => string;
33
37
  export declare function resizeImgBase64(inputBase64: string, newSize: {
34
38
  width: number;
@@ -0,0 +1,17 @@
1
+ import { z } from 'zod';
2
+ export interface AgentBehaviorInitArgs {
3
+ aiActContext?: string;
4
+ aiActionContext?: string;
5
+ replanningCycleLimit?: number;
6
+ waitAfterAction?: number;
7
+ screenshotShrinkFactor?: number;
8
+ }
9
+ export declare const agentBehaviorInitArgShape: {
10
+ aiActContext: z.ZodOptional<z.ZodString>;
11
+ replanningCycleLimit: z.ZodOptional<z.ZodNumber>;
12
+ waitAfterAction: z.ZodOptional<z.ZodNumber>;
13
+ screenshotShrinkFactor: z.ZodOptional<z.ZodNumber>;
14
+ };
15
+ export declare function extractAgentBehaviorInitArgs(extracted: Partial<AgentBehaviorInitArgs> | undefined): AgentBehaviorInitArgs | undefined;
16
+ export declare function getAgentInitArgsSignature(initArgs: object | undefined): string | undefined;
17
+ export declare function shouldRebuildAgentForInitArgs(currentSignature: string | undefined, nextSignature: string | undefined): boolean;
@@ -1,6 +1,7 @@
1
1
  export * from './base-server';
2
2
  export * from './base-tools';
3
3
  export * from './tool-defaults';
4
+ export * from './agent-behavior-init-args';
4
5
  export * from './init-arg-utils';
5
6
  export * from './error-formatter';
6
7
  export * from './tool-generator';
@@ -97,6 +97,21 @@ export type UserPromptLike = string | {
97
97
  }>;
98
98
  convertHttpImage2Base64?: boolean;
99
99
  };
100
+ export interface RecordToReportScreenshot {
101
+ /**
102
+ * PNG/JPEG data URI, or raw PNG base64 body.
103
+ */
104
+ base64: string;
105
+ description?: string;
106
+ }
107
+ export interface RecordToReportOptions {
108
+ content?: string;
109
+ /**
110
+ * @deprecated Use `screenshots: [{ base64 }]` instead.
111
+ */
112
+ screenshotBase64?: string;
113
+ screenshots?: RecordToReportScreenshot[];
114
+ }
100
115
  /**
101
116
  * Base agent interface
102
117
  * Represents a platform-specific agent (Android, iOS, Web)
@@ -108,10 +123,7 @@ export interface BaseAgent {
108
123
  page?: {
109
124
  screenshotBase64(): Promise<string>;
110
125
  };
111
- recordToReport?: (title?: string, opt?: {
112
- content?: string;
113
- screenshotBase64?: string;
114
- }) => Promise<void>;
126
+ recordToReport?: (title?: string, opt?: RecordToReportOptions) => Promise<void>;
115
127
  callActionInActionSpace?: (actionName: string, params?: unknown) => Promise<unknown>;
116
128
  aiAction?: (description: string, params?: Record<string, unknown>) => Promise<unknown>;
117
129
  aiWaitFor?: (assertion: string, options: Record<string, unknown>) => Promise<unknown>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "1.9.7",
3
+ "version": "1.9.8-beta-20260618091332.0",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
package/src/img/index.ts CHANGED
@@ -21,6 +21,8 @@ export {
21
21
  createImgBase64ByFormat,
22
22
  inferBase64ImageFormat,
23
23
  normalizeBase64Image,
24
+ normalizeScreenshotBase64,
25
+ type NormalizeScreenshotBase64Options,
24
26
  } from './transform';
25
27
  export {
26
28
  processImageElementInfo,
@@ -157,6 +157,9 @@ export async function resizeAndConvertImgBuffer(
157
157
  export const normalizeBase64Body = (body: string) => body.replace(/\s/g, '');
158
158
 
159
159
  const base64ImageDataUrlPattern = /^data:image\/[a-zA-Z0-9.+-]+;base64,/i;
160
+ const supportedScreenshotDataUriPattern =
161
+ /^data:image\/(png|jpe?g);base64,([\s\S]*)$/i;
162
+ const rawBase64BodyPattern = /^[A-Za-z0-9+/=\s]+$/;
160
163
 
161
164
  export const inferBase64ImageFormat = (base64Body: string) => {
162
165
  if (base64Body.startsWith('iVBORw0KGgo')) {
@@ -207,6 +210,48 @@ export const createImgBase64ByFormat = (format: string, body: string) => {
207
210
  return `data:image/${format};base64,${normalizeBase64Body(body)}`;
208
211
  };
209
212
 
213
+ export interface NormalizeScreenshotBase64Options {
214
+ label?: string;
215
+ }
216
+
217
+ export const normalizeScreenshotBase64 = (
218
+ base64: string,
219
+ options?: NormalizeScreenshotBase64Options,
220
+ ) => {
221
+ const label = options?.label ?? 'screenshot base64';
222
+ const trimmedBase64 = base64.trim();
223
+ if (!trimmedBase64) {
224
+ throw new Error(`${label} cannot be empty`);
225
+ }
226
+
227
+ const dataUriMatch = trimmedBase64.match(supportedScreenshotDataUriPattern);
228
+ if (dataUriMatch) {
229
+ const imageFormat =
230
+ dataUriMatch[1].toLowerCase() === 'jpg'
231
+ ? 'jpeg'
232
+ : dataUriMatch[1].toLowerCase();
233
+ const body = dataUriMatch[2];
234
+ if (!normalizeBase64Body(body)) {
235
+ throw new Error(`${label} cannot be empty`);
236
+ }
237
+ return createImgBase64ByFormat(imageFormat, body);
238
+ }
239
+
240
+ if (trimmedBase64.startsWith('data:')) {
241
+ throw new Error(
242
+ `${label} must be a PNG/JPEG data URI or raw PNG base64 string`,
243
+ );
244
+ }
245
+
246
+ if (!rawBase64BodyPattern.test(trimmedBase64)) {
247
+ throw new Error(
248
+ `${label} must be a PNG/JPEG data URI or raw PNG base64 string`,
249
+ );
250
+ }
251
+
252
+ return createImgBase64ByFormat('png', trimmedBase64);
253
+ };
254
+
210
255
  export const normalizeBase64Image = (base64: string) => {
211
256
  const trimmedBase64 = base64.trim();
212
257
  if (base64ImageDataUrlPattern.test(trimmedBase64)) {
@@ -0,0 +1,109 @@
1
+ import { z } from 'zod';
2
+
3
+ export interface AgentBehaviorInitArgs {
4
+ aiActContext?: string;
5
+ aiActionContext?: string;
6
+ replanningCycleLimit?: number;
7
+ waitAfterAction?: number;
8
+ screenshotShrinkFactor?: number;
9
+ }
10
+
11
+ type ExposedAgentBehaviorInitArgKey = Exclude<
12
+ keyof AgentBehaviorInitArgs,
13
+ 'aiActionContext'
14
+ >;
15
+
16
+ export const agentBehaviorInitArgShape = {
17
+ aiActContext: z
18
+ .string()
19
+ .optional()
20
+ .describe(
21
+ 'Background knowledge passed to aiAct. Default: no extra context.',
22
+ ),
23
+ replanningCycleLimit: z
24
+ .number()
25
+ .int()
26
+ .nonnegative()
27
+ .optional()
28
+ .describe(
29
+ 'Maximum number of replanning cycles for aiAct. Default: model adapter default.',
30
+ ),
31
+ waitAfterAction: z
32
+ .number()
33
+ .nonnegative()
34
+ .optional()
35
+ .describe(
36
+ 'Wait time in milliseconds after each action execution. Default: 300ms.',
37
+ ),
38
+ screenshotShrinkFactor: z
39
+ .number()
40
+ .min(1)
41
+ .optional()
42
+ .describe(
43
+ 'Screenshot shrink factor before sending images to AI. Default: 1; high values may reduce recognition quality, especially on mobile.',
44
+ ),
45
+ } satisfies Record<ExposedAgentBehaviorInitArgKey, z.ZodTypeAny>;
46
+
47
+ export function extractAgentBehaviorInitArgs(
48
+ extracted: Partial<AgentBehaviorInitArgs> | undefined,
49
+ ): AgentBehaviorInitArgs | undefined {
50
+ if (!extracted) {
51
+ return undefined;
52
+ }
53
+
54
+ const agentOptions: AgentBehaviorInitArgs = {
55
+ ...(typeof extracted.aiActContext === 'string'
56
+ ? { aiActContext: extracted.aiActContext }
57
+ : {}),
58
+ ...(typeof extracted.aiActionContext === 'string'
59
+ ? { aiActionContext: extracted.aiActionContext }
60
+ : {}),
61
+ ...(typeof extracted.replanningCycleLimit === 'number'
62
+ ? { replanningCycleLimit: extracted.replanningCycleLimit }
63
+ : {}),
64
+ ...(typeof extracted.waitAfterAction === 'number'
65
+ ? { waitAfterAction: extracted.waitAfterAction }
66
+ : {}),
67
+ ...(typeof extracted.screenshotShrinkFactor === 'number'
68
+ ? { screenshotShrinkFactor: extracted.screenshotShrinkFactor }
69
+ : {}),
70
+ };
71
+
72
+ return Object.keys(agentOptions).length > 0 ? agentOptions : undefined;
73
+ }
74
+
75
+ function stableJsonValue(value: unknown): unknown {
76
+ if (Array.isArray(value)) {
77
+ return value.map(stableJsonValue);
78
+ }
79
+
80
+ if (value && typeof value === 'object') {
81
+ return Object.fromEntries(
82
+ Object.entries(value as Record<string, unknown>)
83
+ .sort(([left], [right]) => left.localeCompare(right))
84
+ .map(([key, nestedValue]) => [key, stableJsonValue(nestedValue)]),
85
+ );
86
+ }
87
+
88
+ return value;
89
+ }
90
+
91
+ export function getAgentInitArgsSignature(
92
+ initArgs: object | undefined,
93
+ ): string | undefined {
94
+ if (!initArgs || Object.keys(initArgs).length === 0) {
95
+ return undefined;
96
+ }
97
+
98
+ return JSON.stringify(stableJsonValue(initArgs));
99
+ }
100
+
101
+ export function shouldRebuildAgentForInitArgs(
102
+ currentSignature: string | undefined,
103
+ nextSignature: string | undefined,
104
+ ): boolean {
105
+ return (
106
+ currentSignature !== nextSignature &&
107
+ (currentSignature !== undefined || nextSignature !== undefined)
108
+ );
109
+ }
package/src/mcp/index.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  export * from './base-server';
2
2
  export * from './base-tools';
3
3
  export * from './tool-defaults';
4
+ export * from './agent-behavior-init-args';
4
5
  export * from './init-arg-utils';
5
6
  export * from './error-formatter';
6
7
  export * from './tool-generator';
@@ -129,6 +129,7 @@ function isRecord(value: unknown): value is Record<string, unknown> {
129
129
  function makePromptOptional(
130
130
  shape: Record<string, z.ZodTypeAny>,
131
131
  wrapInOptional: boolean,
132
+ description?: string | null,
132
133
  ): z.ZodTypeAny {
133
134
  const newShape = { ...shape };
134
135
  newShape.prompt = shape.prompt.optional();
@@ -137,6 +138,9 @@ function makePromptOptional(
137
138
  if (wrapInOptional) {
138
139
  newSchema = newSchema.optional();
139
140
  }
141
+ if (description) {
142
+ newSchema = newSchema.describe(description);
143
+ }
140
144
  return newSchema;
141
145
  }
142
146
 
@@ -151,7 +155,10 @@ function transformSchemaField(
151
155
  const shape = getZodObjectShape(innerValue);
152
156
 
153
157
  if (shape && isMidsceneLocatorField(innerValue)) {
154
- return [key, makePromptOptional(shape, isOptional)];
158
+ return [
159
+ key,
160
+ makePromptOptional(shape, isOptional, getZodDescription(value)),
161
+ ];
155
162
  }
156
163
  return [key, value];
157
164
  }
package/src/mcp/types.ts CHANGED
@@ -100,6 +100,23 @@ export type UserPromptLike =
100
100
  convertHttpImage2Base64?: boolean;
101
101
  };
102
102
 
103
+ export interface RecordToReportScreenshot {
104
+ /**
105
+ * PNG/JPEG data URI, or raw PNG base64 body.
106
+ */
107
+ base64: string;
108
+ description?: string;
109
+ }
110
+
111
+ export interface RecordToReportOptions {
112
+ content?: string;
113
+ /**
114
+ * @deprecated Use `screenshots: [{ base64 }]` instead.
115
+ */
116
+ screenshotBase64?: string;
117
+ screenshots?: RecordToReportScreenshot[];
118
+ }
119
+
103
120
  /**
104
121
  * Base agent interface
105
122
  * Represents a platform-specific agent (Android, iOS, Web)
@@ -113,7 +130,7 @@ export interface BaseAgent {
113
130
  };
114
131
  recordToReport?: (
115
132
  title?: string,
116
- opt?: { content?: string; screenshotBase64?: string },
133
+ opt?: RecordToReportOptions,
117
134
  ) => Promise<void>;
118
135
  callActionInActionSpace?: (
119
136
  actionName: string,