@midscene/core 1.1.1-beta-20260108085624.0 → 1.2.1-beta-20260108154312.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/es/agent/agent.mjs +13 -14
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +14 -21
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/llm-planning.mjs +12 -3
  7. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  8. package/dist/es/ai-model/prompt/llm-planning.mjs +2 -7
  9. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  10. package/dist/es/ai-model/ui-tars-planning.mjs +2 -2
  11. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  12. package/dist/es/common.mjs +14 -5
  13. package/dist/es/common.mjs.map +1 -1
  14. package/dist/es/device/index.mjs +28 -3
  15. package/dist/es/device/index.mjs.map +1 -1
  16. package/dist/es/types.mjs.map +1 -1
  17. package/dist/es/utils.mjs +2 -2
  18. package/dist/lib/agent/agent.js +12 -13
  19. package/dist/lib/agent/agent.js.map +1 -1
  20. package/dist/lib/agent/tasks.js +14 -21
  21. package/dist/lib/agent/tasks.js.map +1 -1
  22. package/dist/lib/agent/utils.js +1 -1
  23. package/dist/lib/ai-model/llm-planning.js +11 -2
  24. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  25. package/dist/lib/ai-model/prompt/llm-planning.js +2 -7
  26. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  27. package/dist/lib/ai-model/ui-tars-planning.js +2 -2
  28. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  29. package/dist/lib/common.js +20 -5
  30. package/dist/lib/common.js.map +1 -1
  31. package/dist/lib/device/index.js +52 -15
  32. package/dist/lib/device/index.js.map +1 -1
  33. package/dist/lib/types.js.map +1 -1
  34. package/dist/lib/utils.js +2 -2
  35. package/dist/types/agent/agent.d.ts +4 -15
  36. package/dist/types/agent/tasks.d.ts +2 -1
  37. package/dist/types/common.d.ts +8 -1
  38. package/dist/types/device/index.d.ts +22 -0
  39. package/dist/types/types.d.ts +1 -2
  40. package/package.json +2 -2
@@ -47,6 +47,7 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
47
47
  */
48
48
  private screenshotScalePromise?;
49
49
  private executionDumpIndexByRunner;
50
+ private fullActionSpace;
50
51
  get page(): InterfaceType;
51
52
  /**
52
53
  * Ensures VL model warning is shown once when needed
@@ -107,19 +108,11 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
107
108
  * @deprecated Use aiScroll(locatePrompt, opt) instead where opt contains the scroll parameters
108
109
  */
109
110
  aiScroll(scrollParam: ScrollParam, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
110
- aiAct(taskPrompt: string, opt?: AiActOptions): Promise<{
111
- result: Record<string, any>;
112
- } | {
113
- yamlFlow?: import("../yaml").MidsceneYamlFlowItem[];
114
- } | undefined>;
111
+ aiAct(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
115
112
  /**
116
113
  * @deprecated Use {@link Agent.aiAct} instead.
117
114
  */
118
- aiAction(taskPrompt: string, opt?: AiActOptions): Promise<{
119
- result: Record<string, any>;
120
- } | {
121
- yamlFlow?: import("../yaml").MidsceneYamlFlowItem[];
122
- } | undefined>;
115
+ aiAction(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
123
116
  aiQuery<ReturnType = any>(demand: ServiceExtractParam, opt?: ServiceExtractOption): Promise<ReturnType>;
124
117
  aiBoolean(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<boolean>;
125
118
  aiNumber(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<number>;
@@ -140,11 +133,7 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
140
133
  message: string | undefined;
141
134
  } | undefined>;
142
135
  aiWaitFor(assertion: TUserPrompt, opt?: AgentWaitForOpt): Promise<void>;
143
- ai(...args: Parameters<typeof this.aiAct>): Promise<{
144
- result: Record<string, any>;
145
- } | {
146
- yamlFlow?: import("../yaml").MidsceneYamlFlowItem[];
147
- } | undefined>;
136
+ ai(...args: Parameters<typeof this.aiAct>): Promise<string | undefined>;
148
137
  runYaml(yamlScriptContent: string): Promise<{
149
138
  result: Record<string, any>;
150
139
  }>;
@@ -1,4 +1,4 @@
1
- import type { TMultimodalPrompt, TUserPrompt } from '../common';
1
+ import { type TMultimodalPrompt, type TUserPrompt } from '../common';
2
2
  import type { AbstractInterface } from '../device';
3
3
  import type Service from '../service';
4
4
  import type { TaskRunner } from '../task-runner';
@@ -48,6 +48,7 @@ export declare class TaskExecutor {
48
48
  runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
49
49
  action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: DeepThinkOption, fileChooserAccept?: string[]): Promise<ExecutionResult<{
50
50
  yamlFlow?: MidsceneYamlFlowItem[];
51
+ output?: string;
51
52
  } | undefined>>;
52
53
  private runAction;
53
54
  private createTypeQueryTask;
@@ -20,7 +20,7 @@ export declare function mergeRects(rects: Rect[]): {
20
20
  };
21
21
  export declare function expandSearchArea(rect: Rect, screenSize: Size, vlMode: TVlModeTypes | undefined): Rect;
22
22
  export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
23
- export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction<any>[], sleep?: number): MidsceneYamlFlowItem[];
23
+ export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction<any>[]): MidsceneYamlFlowItem[];
24
24
  export declare const PointSchema: z.ZodObject<{
25
25
  left: z.ZodNumber;
26
26
  top: z.ZodNumber;
@@ -558,4 +558,11 @@ export declare const loadActionParam: (jsonObject: Record<string, any>, zodSchem
558
558
  * so they are intentionally excluded from Zod parsing and use existing validation logic.
559
559
  */
560
560
  export declare const parseActionParam: (rawParam: Record<string, any> | undefined, zodSchema?: z.ZodType<any>) => Record<string, any> | undefined;
561
+ export declare const finalizeActionName = "Finalize";
562
+ /**
563
+ * Get a readable time string for the current time
564
+ * @param format - Optional format string. Supports: YYYY, MM, DD, HH, mm, ss. Default: 'YYYY-MM-DD HH:mm:ss'
565
+ * @returns A formatted time string with format label
566
+ */
567
+ export declare const getReadableTimeString: (format?: string) => string;
561
568
  export {};
@@ -2177,5 +2177,27 @@ export type ActionAssertParam = {
2177
2177
  result: boolean;
2178
2178
  };
2179
2179
  export declare const defineActionAssert: () => DeviceAction<ActionAssertParam>;
2180
+ export declare const ActionSleepParamSchema: z.ZodObject<{
2181
+ millisecond: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
2182
+ }, "strip", z.ZodTypeAny, {
2183
+ millisecond?: number | undefined;
2184
+ }, {
2185
+ millisecond?: number | undefined;
2186
+ }>;
2187
+ export type ActionSleepParam = {
2188
+ millisecond?: number;
2189
+ };
2190
+ export declare const defineActionSleep: () => DeviceAction<ActionSleepParam>;
2191
+ export declare const actionFinalizeParamSchema: z.ZodObject<{
2192
+ message: z.ZodOptional<z.ZodString>;
2193
+ }, "strip", z.ZodTypeAny, {
2194
+ message?: string | undefined;
2195
+ }, {
2196
+ message?: string | undefined;
2197
+ }>;
2198
+ export type ActionFinalizeParam = {
2199
+ message?: string;
2200
+ };
2201
+ export declare const defineActionFinalize: () => DeviceAction<ActionFinalizeParam>;
2180
2202
  export type { DeviceAction } from '../types';
2181
2203
  export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
@@ -177,9 +177,7 @@ export interface PlanningAction<ParamType = any> {
177
177
  }
178
178
  export interface RawResponsePlanningAIResponse {
179
179
  action: PlanningAction;
180
- more_actions_needed_by_instruction: boolean;
181
180
  log: string;
182
- sleep?: number;
183
181
  error?: string;
184
182
  }
185
183
  export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse, 'action'> {
@@ -190,6 +188,7 @@ export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse,
190
188
  yamlString?: string;
191
189
  error?: string;
192
190
  reasoning_content?: string;
191
+ shouldContinuePlanning: boolean;
193
192
  }
194
193
  export interface PlanningActionParamSleep {
195
194
  timeMs: number;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.1.1-beta-20260108085624.0",
4
+ "version": "1.2.1-beta-20260108154312.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -89,7 +89,7 @@
89
89
  "semver": "7.5.2",
90
90
  "undici": "^6.0.0",
91
91
  "zod": "3.24.3",
92
- "@midscene/shared": "1.1.1-beta-20260108085624.0"
92
+ "@midscene/shared": "1.2.1-beta-20260108154312.0"
93
93
  },
94
94
  "devDependencies": {
95
95
  "@rslib/core": "^0.18.3",