@midscene/core 1.2.1-beta-20260113073450.0 → 1.2.1-beta-20260115021413.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/es/agent/agent.mjs +14 -13
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +29 -15
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/llm-planning.mjs +3 -12
  7. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  8. package/dist/es/ai-model/prompt/llm-planning.mjs +8 -40
  9. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  10. package/dist/es/ai-model/service-caller/index.mjs +8 -0
  11. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  12. package/dist/es/ai-model/ui-tars-planning.mjs +1 -1
  13. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  14. package/dist/es/common.mjs +8 -16
  15. package/dist/es/common.mjs.map +1 -1
  16. package/dist/es/device/index.mjs +3 -28
  17. package/dist/es/device/index.mjs.map +1 -1
  18. package/dist/es/types.mjs.map +1 -1
  19. package/dist/es/utils.mjs +2 -2
  20. package/dist/lib/agent/agent.js +13 -12
  21. package/dist/lib/agent/agent.js.map +1 -1
  22. package/dist/lib/agent/tasks.js +29 -15
  23. package/dist/lib/agent/tasks.js.map +1 -1
  24. package/dist/lib/agent/utils.js +1 -1
  25. package/dist/lib/ai-model/llm-planning.js +2 -11
  26. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  27. package/dist/lib/ai-model/prompt/llm-planning.js +8 -40
  28. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  29. package/dist/lib/ai-model/service-caller/index.js +8 -0
  30. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  31. package/dist/lib/ai-model/ui-tars-planning.js +1 -1
  32. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  33. package/dist/lib/common.js +7 -21
  34. package/dist/lib/common.js.map +1 -1
  35. package/dist/lib/device/index.js +15 -52
  36. package/dist/lib/device/index.js.map +1 -1
  37. package/dist/lib/types.js.map +1 -1
  38. package/dist/lib/utils.js +2 -2
  39. package/dist/types/agent/agent.d.ts +15 -4
  40. package/dist/types/agent/tasks.d.ts +4 -4
  41. package/dist/types/common.d.ts +1 -8
  42. package/dist/types/device/index.d.ts +0 -22
  43. package/dist/types/types.d.ts +2 -2
  44. package/package.json +2 -2
@@ -47,7 +47,6 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
47
47
  */
48
48
  private screenshotScalePromise?;
49
49
  private executionDumpIndexByRunner;
50
- private fullActionSpace;
51
50
  get page(): InterfaceType;
52
51
  /**
53
52
  * Ensures VL model warning is shown once when needed
@@ -108,11 +107,19 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
108
107
  * @deprecated Use aiScroll(locatePrompt, opt) instead where opt contains the scroll parameters
109
108
  */
110
109
  aiScroll(scrollParam: ScrollParam, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
111
- aiAct(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
110
+ aiAct(taskPrompt: string, opt?: AiActOptions): Promise<{
111
+ result: Record<string, any>;
112
+ } | {
113
+ yamlFlow?: import("../yaml").MidsceneYamlFlowItem[];
114
+ } | undefined>;
112
115
  /**
113
116
  * @deprecated Use {@link Agent.aiAct} instead.
114
117
  */
115
- aiAction(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
118
+ aiAction(taskPrompt: string, opt?: AiActOptions): Promise<{
119
+ result: Record<string, any>;
120
+ } | {
121
+ yamlFlow?: import("../yaml").MidsceneYamlFlowItem[];
122
+ } | undefined>;
116
123
  aiQuery<ReturnType = any>(demand: ServiceExtractParam, opt?: ServiceExtractOption): Promise<ReturnType>;
117
124
  aiBoolean(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<boolean>;
118
125
  aiNumber(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<number>;
@@ -133,7 +140,11 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
133
140
  message: string | undefined;
134
141
  } | undefined>;
135
142
  aiWaitFor(assertion: TUserPrompt, opt?: AgentWaitForOpt): Promise<void>;
136
- ai(...args: Parameters<typeof this.aiAct>): Promise<string | undefined>;
143
+ ai(...args: Parameters<typeof this.aiAct>): Promise<{
144
+ result: Record<string, any>;
145
+ } | {
146
+ yamlFlow?: import("../yaml").MidsceneYamlFlowItem[];
147
+ } | undefined>;
137
148
  runYaml(yamlScriptContent: string): Promise<{
138
149
  result: Record<string, any>;
139
150
  }>;
@@ -1,9 +1,9 @@
1
- import { type TMultimodalPrompt, type TUserPrompt } from '../common';
1
+ import type { TMultimodalPrompt, TUserPrompt } from '../common';
2
2
  import type { AbstractInterface } from '../device';
3
3
  import type Service from '../service';
4
4
  import type { TaskRunner } from '../task-runner';
5
5
  import { TaskExecutionError } from '../task-runner';
6
- import type { DeepThinkOption, DeviceAction, ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam } from '../types';
6
+ import type { DeepThinkOption, DeviceAction, ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamSleep, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam } from '../types';
7
7
  import type { IModelConfig } from '@midscene/shared/env';
8
8
  import type { TaskCache } from './task-cache';
9
9
  export { locatePlanForLocate } from './task-builder';
@@ -48,12 +48,12 @@ export declare class TaskExecutor {
48
48
  runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
49
49
  action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: DeepThinkOption, fileChooserAccept?: string[]): Promise<ExecutionResult<{
50
50
  yamlFlow?: MidsceneYamlFlowItem[];
51
- output?: string;
52
51
  } | undefined>>;
53
52
  private runAction;
54
53
  private createTypeQueryTask;
55
54
  createTypeQueryExecution<T>(type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert', demand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<ExecutionResult<T>>;
56
- taskForSleep(timeMs: number, _modelConfig: IModelConfig): Promise<import("../types").ExecutionTaskActionApply<import("../types").PlanningActionParamSleep>>;
55
+ private sleepPlan;
56
+ taskForSleep(timeMs: number, _modelConfig: IModelConfig): Promise<import("../types").ExecutionTaskActionApply<PlanningActionParamSleep>>;
57
57
  waitFor(assertion: TUserPrompt, opt: PlanningActionParamWaitFor, modelConfig: IModelConfig): Promise<ExecutionResult<void>>;
58
58
  }
59
59
  export declare function withFileChooser<T>(interfaceInstance: AbstractInterface, fileChooserAccept: string[] | undefined, action: () => Promise<T>): Promise<T>;
@@ -20,7 +20,7 @@ export declare function mergeRects(rects: Rect[]): {
20
20
  };
21
21
  export declare function expandSearchArea(rect: Rect, screenSize: Size, vlMode: TVlModeTypes | undefined): Rect;
22
22
  export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
23
- export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction<any>[]): MidsceneYamlFlowItem[];
23
+ export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction<any>[], sleep?: number): MidsceneYamlFlowItem[];
24
24
  export declare const PointSchema: z.ZodObject<{
25
25
  left: z.ZodNumber;
26
26
  top: z.ZodNumber;
@@ -558,11 +558,4 @@ export declare const loadActionParam: (jsonObject: Record<string, any>, zodSchem
558
558
  * so they are intentionally excluded from Zod parsing and use existing validation logic.
559
559
  */
560
560
  export declare const parseActionParam: (rawParam: Record<string, any> | undefined, zodSchema?: z.ZodType<any>) => Record<string, any> | undefined;
561
- export declare const finalizeActionName = "Finalize";
562
- /**
563
- * Get a readable time string for the current time
564
- * @param format - Optional format string. Supports: YYYY, MM, DD, HH, mm, ss. Default: 'YYYY-MM-DD HH:mm:ss'
565
- * @returns A formatted time string with format label
566
- */
567
- export declare const getReadableTimeString: (format?: string) => string;
568
561
  export {};
@@ -2177,27 +2177,5 @@ export type ActionAssertParam = {
2177
2177
  result: boolean;
2178
2178
  };
2179
2179
  export declare const defineActionAssert: () => DeviceAction<ActionAssertParam>;
2180
- export declare const ActionSleepParamSchema: z.ZodObject<{
2181
- millisecond: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
2182
- }, "strip", z.ZodTypeAny, {
2183
- millisecond?: number | undefined;
2184
- }, {
2185
- millisecond?: number | undefined;
2186
- }>;
2187
- export type ActionSleepParam = {
2188
- millisecond?: number;
2189
- };
2190
- export declare const defineActionSleep: () => DeviceAction<ActionSleepParam>;
2191
- export declare const actionFinalizeParamSchema: z.ZodObject<{
2192
- message: z.ZodOptional<z.ZodString>;
2193
- }, "strip", z.ZodTypeAny, {
2194
- message?: string | undefined;
2195
- }, {
2196
- message?: string | undefined;
2197
- }>;
2198
- export type ActionFinalizeParam = {
2199
- message?: string;
2200
- };
2201
- export declare const defineActionFinalize: () => DeviceAction<ActionFinalizeParam>;
2202
2180
  export type { DeviceAction } from '../types';
2203
2181
  export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
@@ -176,8 +176,9 @@ export interface PlanningAction<ParamType = any> {
176
176
  }
177
177
  export interface RawResponsePlanningAIResponse {
178
178
  action: PlanningAction;
179
+ more_actions_needed_by_instruction: boolean;
179
180
  log: string;
180
- note?: string;
181
+ sleep?: number;
181
182
  error?: string;
182
183
  }
183
184
  export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse, 'action'> {
@@ -188,7 +189,6 @@ export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse,
188
189
  yamlString?: string;
189
190
  error?: string;
190
191
  reasoning_content?: string;
191
- shouldContinuePlanning: boolean;
192
192
  }
193
193
  export interface PlanningActionParamSleep {
194
194
  timeMs: number;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.2.1-beta-20260113073450.0",
4
+ "version": "1.2.1-beta-20260115021413.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -89,7 +89,7 @@
89
89
  "semver": "7.5.2",
90
90
  "undici": "^6.0.0",
91
91
  "zod": "3.24.3",
92
- "@midscene/shared": "1.2.1-beta-20260113073450.0"
92
+ "@midscene/shared": "1.2.1-beta-20260115021413.0"
93
93
  },
94
94
  "devDependencies": {
95
95
  "@rslib/core": "^0.18.3",