@midscene/core 1.9.6 → 1.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/dist/es/agent/agent.mjs +40 -8
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +3 -3
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +18 -3
  6. package/dist/es/agent/utils.mjs.map +1 -1
  7. package/dist/es/ai-model/prompt/describe.mjs +10 -2
  8. package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
  9. package/dist/es/ai-model/prompt/markdown-generator.mjs +150 -40
  10. package/dist/es/ai-model/prompt/markdown-generator.mjs.map +1 -1
  11. package/dist/es/ai-model/prompt/recorder-generation-common.mjs +74 -14
  12. package/dist/es/ai-model/prompt/recorder-generation-common.mjs.map +1 -1
  13. package/dist/es/ai-model/prompt/recorder-metadata-generator.mjs +3 -5
  14. package/dist/es/ai-model/prompt/recorder-metadata-generator.mjs.map +1 -1
  15. package/dist/es/ai-model/prompt/recorder-ui-describer.mjs +10 -6
  16. package/dist/es/ai-model/prompt/recorder-ui-describer.mjs.map +1 -1
  17. package/dist/es/ai-model/prompt/yaml-generator.mjs +2 -2
  18. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  19. package/dist/es/ai-model/service-caller/index.mjs +33 -3
  20. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  21. package/dist/es/device/index.mjs.map +1 -1
  22. package/dist/es/recorder-ui-describer.mjs +33 -84
  23. package/dist/es/recorder-ui-describer.mjs.map +1 -1
  24. package/dist/es/service/index.mjs +11 -3
  25. package/dist/es/service/index.mjs.map +1 -1
  26. package/dist/es/service/utils.mjs +50 -1
  27. package/dist/es/service/utils.mjs.map +1 -1
  28. package/dist/es/types.mjs.map +1 -1
  29. package/dist/es/utils.mjs +2 -2
  30. package/dist/lib/agent/agent.js +39 -7
  31. package/dist/lib/agent/agent.js.map +1 -1
  32. package/dist/lib/agent/tasks.js +3 -3
  33. package/dist/lib/agent/tasks.js.map +1 -1
  34. package/dist/lib/agent/utils.js +20 -2
  35. package/dist/lib/agent/utils.js.map +1 -1
  36. package/dist/lib/ai-model/prompt/describe.js +10 -2
  37. package/dist/lib/ai-model/prompt/describe.js.map +1 -1
  38. package/dist/lib/ai-model/prompt/markdown-generator.js +150 -40
  39. package/dist/lib/ai-model/prompt/markdown-generator.js.map +1 -1
  40. package/dist/lib/ai-model/prompt/recorder-generation-common.js +75 -12
  41. package/dist/lib/ai-model/prompt/recorder-generation-common.js.map +1 -1
  42. package/dist/lib/ai-model/prompt/recorder-metadata-generator.js +2 -4
  43. package/dist/lib/ai-model/prompt/recorder-metadata-generator.js.map +1 -1
  44. package/dist/lib/ai-model/prompt/recorder-ui-describer.js +10 -6
  45. package/dist/lib/ai-model/prompt/recorder-ui-describer.js.map +1 -1
  46. package/dist/lib/ai-model/prompt/yaml-generator.js +2 -2
  47. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  48. package/dist/lib/ai-model/service-caller/index.js +33 -3
  49. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  50. package/dist/lib/device/index.js.map +1 -1
  51. package/dist/lib/recorder-ui-describer.js +33 -84
  52. package/dist/lib/recorder-ui-describer.js.map +1 -1
  53. package/dist/lib/service/index.js +10 -2
  54. package/dist/lib/service/index.js.map +1 -1
  55. package/dist/lib/service/utils.js +53 -1
  56. package/dist/lib/service/utils.js.map +1 -1
  57. package/dist/lib/types.js.map +1 -1
  58. package/dist/lib/utils.js +2 -2
  59. package/dist/types/agent/agent.d.ts +17 -6
  60. package/dist/types/agent/index.d.ts +1 -1
  61. package/dist/types/agent/tasks.d.ts +4 -2
  62. package/dist/types/agent/utils.d.ts +4 -1
  63. package/dist/types/ai-model/prompt/recorder-generation-common.d.ts +11 -7
  64. package/dist/types/ai-model/prompt/recorder-ui-describer.d.ts +1 -1
  65. package/dist/types/device/index.d.ts +6 -0
  66. package/dist/types/service/index.d.ts +1 -0
  67. package/dist/types/service/utils.d.ts +2 -0
  68. package/dist/types/types.d.ts +1 -0
  69. package/package.json +2 -2
@@ -1,11 +1,26 @@
1
1
  import type { TUserPrompt } from '../ai-model/index';
2
2
  import Service from '../service/index';
3
- import { type ActionParam, type ActionReturn, type AgentAssertOpt, type AgentDescribeElementAtPointResult, type AgentOpt, type AgentWaitForOpt, type DeepThinkOption, type DeviceAction, ExecutionDump, type LocateOption, type LocateResultElement, type LocateValidatorResult, type LocatorValidatorOption, type OnTaskStartTip, ReportActionDump, type ScrollParam, type ServiceAction, type ServiceExtractOption, type ServiceExtractParam, type UIContext } from '../types';
3
+ import { type ActionParam, type ActionReturn, type AgentAssertOpt, type AgentDescribeElementAtPointResult, type AgentOpt, type AgentWaitForOpt, type DeepThinkOption, type DeviceAction, ExecutionDump, type LocateOption, type LocateResultElement, type LocateValidatorResult, type LocatorValidatorOption, type OnTaskStartTip, ReportActionDump, type ScrollParam, type ServiceAction, type ServiceExtractOption, type ServiceExtractParam, type Size, type UIContext } from '../types';
4
4
  import type { AbstractInterface } from '../device';
5
5
  import type { TaskRunner } from '../task-runner';
6
6
  import { ModelConfigManager } from '@midscene/shared/env';
7
7
  import { TaskCache } from './task-cache';
8
8
  import { TaskExecutor } from './tasks';
9
+ export type DescribeElementCoordinateSpace = 'screenshot' | 'logical';
10
+ export type DescribeElementAtPointOptions = {
11
+ verifyPrompt?: boolean;
12
+ retryLimit?: number;
13
+ deepLocate?: boolean;
14
+ screenshotBase64?: string;
15
+ screenshotSize?: Size;
16
+ coordinateSpace?: DescribeElementCoordinateSpace;
17
+ logicalSize?: Size;
18
+ onProgress?: (progress: {
19
+ prompt?: string;
20
+ deepLocate?: boolean;
21
+ verifyResult?: LocateValidatorResult;
22
+ }) => void;
23
+ } & LocatorValidatorOption;
9
24
  export type AiActOptions = {
10
25
  cacheable?: boolean;
11
26
  fileChooserAccept?: string | string[];
@@ -142,11 +157,7 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
142
157
  aiNumber(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<number>;
143
158
  aiString(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
144
159
  aiAsk(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
145
- describeElementAtPoint(center: [number, number], opt?: {
146
- verifyPrompt?: boolean;
147
- retryLimit?: number;
148
- deepLocate?: boolean;
149
- } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
160
+ describeElementAtPoint(center: [number, number], opt?: DescribeElementAtPointOptions): Promise<AgentDescribeElementAtPointResult>;
150
161
  verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
151
162
  /**
152
163
  * Locate an element and return both its center point and an approximate rect.
@@ -6,4 +6,4 @@ export { type LocateCache, type PlanningCache, TaskCache } from './task-cache';
6
6
  export { cacheFileExt } from './task-cache';
7
7
  export { TaskExecutor } from './tasks';
8
8
  export type { AgentOpt } from '../types';
9
- export type { AiActOptions } from './agent';
9
+ export type { AiActOptions, DescribeElementAtPointOptions, DescribeElementCoordinateSpace, } from './agent';
@@ -4,7 +4,7 @@ import type { AbstractInterface } from '../device';
4
4
  import type Service from '../service';
5
5
  import type { TaskRunner } from '../task-runner';
6
6
  import { TaskExecutionError } from '../task-runner';
7
- import type { DeviceAction, ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam } from '../types';
7
+ import type { DeviceAction, ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam, UIContext } from '../types';
8
8
  import type { TaskCache } from './task-cache';
9
9
  export { locatePlanForLocate } from './task-builder';
10
10
  import { type TaskTitleType } from './ui-utils';
@@ -75,7 +75,9 @@ export declare class TaskExecutor {
75
75
  loadYamlFlowAsPlanning(userInstruction: TUserPrompt, yamlString: string, reportOptions?: ActionReportOptions): Promise<{
76
76
  runner: TaskRunner;
77
77
  }>;
78
- runPlans(title: string, plans: PlanningAction[], planningModel: ModelRuntime, defaultModel: ModelRuntime): Promise<ExecutionResult>;
78
+ runPlans(title: string, plans: PlanningAction[], planningModel: ModelRuntime, defaultModel: ModelRuntime, options?: {
79
+ uiContext?: UIContext;
80
+ }): Promise<ExecutionResult>;
79
81
  action(userPrompt: TUserPrompt, planningModel: ModelRuntime, defaultModel: ModelRuntime, includeLocateInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: boolean, fileChooserAccept?: string[], deepLocate?: boolean, abortSignal?: AbortSignal, reportOptions?: ActionReportOptions): Promise<ExecutionResult<{
80
82
  yamlFlow?: MidsceneYamlFlowItem[];
81
83
  output?: string;
@@ -1,11 +1,14 @@
1
1
  import type { TMultimodalPrompt, TUserPrompt } from '../common';
2
2
  import type { AbstractInterface } from '../device';
3
- import type { ElementCacheFeature, LocateResultElement, PixelBbox, PlanningLocateParam, PlanningLocateParamWithLocatedPixelBbox, Rect, UIContext } from '../types';
3
+ import type { ElementCacheFeature, LocateResultElement, PixelBbox, PlanningLocateParam, PlanningLocateParamWithLocatedPixelBbox, Rect, Size, UIContext } from '../types';
4
4
  import type { TaskCache } from './task-cache';
5
5
  export declare function commonContextParser(interfaceInstance: AbstractInterface, _opt: {
6
6
  uploadServerUrl?: string;
7
7
  screenshotShrinkFactor?: number;
8
8
  }): Promise<UIContext>;
9
+ export declare function createScreenshotBoundUIContext(screenshotBase64: string, opt: {
10
+ screenshotSize?: Size;
11
+ }): Promise<UIContext>;
9
12
  export declare function getReportFileName(tag?: string): string;
10
13
  export declare function printReportMsg(filepath: string): void;
11
14
  export declare function isPixelBbox(value: unknown): value is PixelBbox;
@@ -1,4 +1,4 @@
1
- import { type MidsceneRecorderEvent, type MidsceneRecorderMarkdownScreenshotAsset, type MidsceneRecorderTarget } from '@midscene/shared/recorder';
1
+ import { type MidsceneRecorderEvent, type MidsceneRecorderMarkdownScreenshotAsset, type MidsceneRecorderSemantic, type MidsceneRecorderTarget } from '@midscene/shared/recorder';
2
2
  export interface EventCounts {
3
3
  navigation: number;
4
4
  click: number;
@@ -16,16 +16,19 @@ export interface ProcessedEvent {
16
16
  timestamp: number;
17
17
  source?: string;
18
18
  actionType?: string;
19
- descriptionSource?: string;
20
- descriptionError?: string;
21
19
  url?: string;
22
20
  title?: string;
23
- elementDescription?: string;
24
- replayInstruction?: string;
25
- actionSummary?: string;
26
- semanticConfidence?: string;
21
+ semantic?: MidsceneRecorderSemantic;
27
22
  description?: string;
28
23
  value?: string;
24
+ typedText?: string;
25
+ inputIndex?: number;
26
+ isSequentialInput?: boolean;
27
+ hasNeighborInput?: boolean;
28
+ previousInputDescription?: string;
29
+ previousActionDescription?: string;
30
+ nextActionDescription?: string;
31
+ neighborInputValues?: string[];
29
32
  pageInfo?: any;
30
33
  elementRect?: any;
31
34
  screenshotPath?: string;
@@ -70,6 +73,7 @@ export interface FilteredEvents {
70
73
  inputEvents: ChromeRecordedEvent[];
71
74
  scrollEvents: ChromeRecordedEvent[];
72
75
  }
76
+ export declare function compactRecorderSemanticForGeneration(semantic?: MidsceneRecorderSemantic): MidsceneRecorderSemantic | undefined;
73
77
  export declare const validateEvents: (events: ChromeRecordedEvent[]) => void;
74
78
  export declare const getScreenshotsForLLM: (events: ChromeRecordedEvent[], maxScreenshots?: number) => string[];
75
79
  export declare const filterEventsByType: (events: ChromeRecordedEvent[]) => FilteredEvents;
@@ -1 +1 @@
1
- export declare const RECORDER_UI_DESCRIBER_SYSTEM_PROMPT = "You convert Studio preview recorder UI events into semantic replay instructions.\n\nThe recorder works from screenshots and mapped real-device coordinates only. Infer stable UI intent from the highlighted BEFORE screenshot. The AFTER screenshot is contextual evidence for state changes and scroll destinations.\n\nOutput JSON only:\n{\n \"elementDescription\": \"short stable target/region description\",\n \"replayInstruction\": \"one executable natural-language replay step\",\n \"actionSummary\": \"short timeline summary\",\n \"scrollDestinationDescription\": \"for scroll only: concrete newly visible destination content or goal\",\n \"confidence\": \"high\" | \"medium\" | \"low\",\n \"error\"?: \"only if no useful visual description can be inferred\"\n}\n\nRules:\n- Do NOT output coordinates as the main description.\n- Do NOT mention \"near coordinates\", \"nearby element\", \"near point\", \"red marker\", highlighted box, highlighted element, or screenshot.\n- Prefer stable target descriptions in this order: exact visible text > label/placeholder > role + stable section/context > icon purpose > visual position.\n- Keep quoted UI text in the original UI language, for example \"\u4F7F\u7528\u6587\u6863\" or \"\u5F00\u59CB\u4F7F\u7528\".\n- Apply the platform guidance from the user event:\n - Web: button, input, link, menu item, tab, dialog, aria-label, placeholder, form section.\n - Mobile: tab, list item, text field, icon button, navigation bar, bottom bar, sheet, card, screen section.\n - Desktop/computer: menu item, toolbar button, dialog field, sidebar item, window control, file row, application region.\n- Pointer action rules:\n - Preserve event.actionType semantics. Tap, DoubleClick, LongPress, and RightClick must not all become Click.\n - Tap replayInstruction format: Tap on the element described as \"<elementDescription>\".\n - DoubleClick replayInstruction format: Double click on the element described as \"<elementDescription>\".\n - LongPress replayInstruction format: Long press the element described as \"<elementDescription>\".\n - RightClick replayInstruction format: Right click on the element described as \"<elementDescription>\".\n - Click replayInstruction format: Click on the element described as \"<elementDescription>\".\n- Input-specific rules:\n - The highlighted BEFORE screenshot marks the field that receives the text.\n - The screenshot after the action may show the typed value; use it only to confirm the field, never as the field description.\n - elementDescription must identify the field itself, for example \"\u5E74\u9F84 input in the basic form\" or \"search input in the top navigation\".\n - Never use \"AI is analyzing element\", the typed value, or a generic \"input field\" as elementDescription.\n - Input replayInstruction format: Input \"<value>\" into the element described as \"<elementDescription>\".\n- Scroll target quality bar:\n - elementDescription describes the scrollable page, panel, list, table, or section.\n - scrollDestinationDescription is required and describes what the scroll is trying to reveal or reach, using newly visible headings, section titles, list items, or stable content from the AFTER screenshot.\n - Prefer descriptions like \"\u96C6\u6210\u5230 Playwright - Midscene - Vision-Driven UI Automation page, scrolling toward the API reference section\" or \"Android API documentation page, scrolling to the installation steps section\".\n - Do NOT write generic phrases like \"more content\", \"the page\", \"current screen\", or \"main scrollable area\".\n- Scroll replayInstruction format: Scroll the page/region with description \"<elementDescription>\" by value \"<recorded value>\" until \"<scrollDestinationDescription>\" is visible.\n- Scroll actionSummary format: Scroll <elementDescription> toward <scrollDestinationDescription>.\n- Drag/Swipe rules:\n - Drag replayInstruction format: Drag through the area described as \"<elementDescription>\".\n - Swipe replayInstruction format: Swipe through the area described as \"<elementDescription>\".\n - Describe start/end regions or the dragged UI control; do not describe only the gesture path.\n- KeyboardPress replayInstruction format: Press \"<value>\" on the element described as \"<elementDescription>\".\n- If uncertain, provide the best concrete visible text/role/context description. Set confidence to \"low\"; do not fall back to coordinates.";
1
+ export declare const RECORDER_UI_DESCRIBER_SYSTEM_PROMPT = "You convert Studio preview recorder UI events into semantic replay instructions.\n\nThe recorder works from screenshots and mapped real-device coordinates only. Infer stable UI intent from the highlighted BEFORE screenshot. The AFTER screenshot is contextual evidence for state changes and scroll destinations.\n\nOutput JSON only:\n{\n \"elementDescription\": \"short stable target/region description\",\n \"replayInstruction\": \"one executable natural-language replay step\",\n \"actionSummary\": \"short timeline summary\",\n \"scrollDestinationDescription\": \"for scroll only: concrete newly visible destination content or goal\",\n \"confidence\": \"high\" | \"medium\" | \"low\",\n \"error\"?: \"only if no useful visual description can be inferred\"\n}\n\nRules:\n- Do NOT output coordinates as the main description.\n- Do NOT mention \"near coordinates\", \"nearby element\", \"near point\", \"red marker\", highlighted box, highlighted element, or screenshot.\n- Prefer stable target descriptions in this order: exact stable control text > stable label > role + stable section/context > icon purpose > visual position.\n- Treat placeholder or hint text that can change by user, time, data, or context as dynamic. Do not use dynamic hint values as the primary target description; prefer role + stable region + intent.\n- For repeated collections, treat item identity text as dynamic unless the user is clearly verifying that exact item. This includes any list/grid/table/feed/menu or repeated record surface. Do not output descriptions like \"<role> titled/named '<content>'\"; prefer stable role + region + selection intent.\n- Keep quoted UI text in the original UI language when it is a stable control label.\n- Apply the platform guidance from the user event:\n - Web: button, input, link, menu item, tab, dialog, aria-label, placeholder, form section.\n - Mobile: tab, list item, text field, icon button, navigation bar, bottom bar, sheet, card, screen section.\n - Desktop/computer: menu item, toolbar button, dialog field, sidebar item, window control, file row, application region.\n- Pointer action rules:\n - Preserve event.actionType semantics. Tap, DoubleClick, LongPress, and RightClick must not all become Click.\n - Tap replayInstruction format: Tap on the element described as \"<elementDescription>\".\n - DoubleClick replayInstruction format: Double click on the element described as \"<elementDescription>\".\n - LongPress replayInstruction format: Long press the element described as \"<elementDescription>\".\n - RightClick replayInstruction format: Right click on the element described as \"<elementDescription>\".\n - Click replayInstruction format: Click on the element described as \"<elementDescription>\".\n- Input-specific rules:\n - The highlighted BEFORE screenshot marks the field that receives the text.\n - The screenshot after the action may show the typed value; use it only to confirm the field, never as the field description.\n - elementDescription must identify the field itself by stable field role, nearby label, region, section, or sequence intent.\n - Never use \"AI is analyzing element\", the typed value, the page title alone, or a generic \"input field\" as elementDescription.\n - For consecutive input events, distinguish fields by stable role, section, order, current focus, filled/empty state, and neighboring actions instead of reusing the same generic field description.\n - Input replayInstruction format: Input \"<value>\" into the element described as \"<elementDescription>\".\n- Scroll target quality bar:\n - elementDescription describes the scrollable page, panel, list, table, or section at the highlighted scroll point.\n - When multiple scrollable regions are visible, preserve the specific region where the scroll happened, such as left/right/top/bottom panel, navigation area, content pane, dialog body, table, list, or menu. Do not generalize a panel/list scroll into the whole page.\n - scrollDestinationDescription is required and describes what the scroll is trying to reveal or reach, using newly visible headings, section titles, list items, or stable content from the AFTER screenshot.\n - Prefer descriptions like \"Playwright integration documentation page, scrolling toward the API reference section\" or \"Android API documentation page, scrolling to the installation steps section\".\n - Do NOT write generic phrases like \"more content\", \"the page\", \"current screen\", or \"main scrollable area\".\n- Scroll replayInstruction format: Scroll the page/region with description \"<elementDescription>\" by value \"<recorded value>\" until \"<scrollDestinationDescription>\" is visible.\n- Scroll actionSummary format: Scroll <elementDescription> toward <scrollDestinationDescription>.\n- Drag/Swipe rules:\n - Drag replayInstruction format: Drag through the area described as \"<elementDescription>\".\n - Swipe replayInstruction format: Swipe through the area described as \"<elementDescription>\".\n - Describe start/end regions or the dragged UI control; do not describe only the gesture path.\n- KeyboardPress replayInstruction format: Press \"<value>\" on the element described as \"<elementDescription>\".\n- If uncertain, provide the best concrete visible text/role/context description. Set confidence to \"low\"; do not fall back to coordinates.";
@@ -153,6 +153,12 @@ export declare abstract class AbstractInterface {
153
153
  * is active.
154
154
  */
155
155
  flushPendingVisualUpdate?(): Promise<void>;
156
+ /**
157
+ * Optional non-blocking variant of `flushPendingVisualUpdate`. Keyboard-
158
+ * heavy preview interactions can schedule a coalesced refresh here without
159
+ * stalling the input hot path.
160
+ */
161
+ schedulePendingVisualUpdate?(): void;
156
162
  /**
157
163
  * Optional navigation state probe for browser-like interfaces, used to drive
158
164
  * loading indicators in playground UIs. Returning `undefined` means the
@@ -20,6 +20,7 @@ export default class Service {
20
20
  extract<T>(dataDemand: ServiceExtractParam, modelRuntime: ModelRuntime, opt?: ServiceExtractOption, pageDescription?: string, multimodalPrompt?: TMultimodalPrompt, context?: UIContext): Promise<ServiceExtractResult<T>>;
21
21
  describe(target: Rect | [number, number], modelRuntime: ModelRuntime, opt?: {
22
22
  deepLocate?: boolean;
23
+ context?: UIContext;
23
24
  }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
24
25
  }
25
26
  export {};
@@ -1,2 +1,4 @@
1
+ import type { AIDescribeElementResponse } from '../types';
1
2
  import type { PartialServiceDumpFromSDK, ServiceDump } from '../types';
2
3
  export declare function createServiceDump(data: PartialServiceDumpFromSDK): ServiceDump;
4
+ export declare function recoverDescribeResponseFromParseError(error: unknown): Pick<AIDescribeElementResponse, 'description'> | undefined;
@@ -79,6 +79,7 @@ export interface LocateValidatorResult {
79
79
  rect: Rect;
80
80
  center: [number, number];
81
81
  centerDistance?: number;
82
+ includedInRect?: boolean;
82
83
  }
83
84
  export interface AgentDescribeElementAtPointResult {
84
85
  prompt: string;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.9.6",
4
+ "version": "1.9.7",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -107,7 +107,7 @@
107
107
  "semver": "7.5.2",
108
108
  "undici": "^6.0.0",
109
109
  "zod": "^3.25.1",
110
- "@midscene/shared": "1.9.6"
110
+ "@midscene/shared": "1.9.7"
111
111
  },
112
112
  "devDependencies": {
113
113
  "@rslib/core": "^0.18.3",