@midscene/core 1.2.2 → 1.2.3-beta-20260120082504.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/es/agent/agent.mjs +3 -3
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +4 -2
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +9 -5
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/utils.mjs +1 -1
  8. package/dist/es/ai-model/inspect.mjs +7 -6
  9. package/dist/es/ai-model/inspect.mjs.map +1 -1
  10. package/dist/es/ai-model/llm-planning.mjs +60 -6
  11. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  12. package/dist/es/ai-model/prompt/extraction.mjs +51 -53
  13. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  14. package/dist/es/ai-model/prompt/llm-planning.mjs +64 -49
  15. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  16. package/dist/es/ai-model/prompt/util.mjs +6 -1
  17. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  18. package/dist/es/device/index.mjs +2 -14
  19. package/dist/es/device/index.mjs.map +1 -1
  20. package/dist/es/types.mjs.map +1 -1
  21. package/dist/es/utils.mjs +2 -2
  22. package/dist/lib/agent/agent.js +2 -2
  23. package/dist/lib/agent/agent.js.map +1 -1
  24. package/dist/lib/agent/task-builder.js +4 -2
  25. package/dist/lib/agent/task-builder.js.map +1 -1
  26. package/dist/lib/agent/tasks.js +9 -5
  27. package/dist/lib/agent/tasks.js.map +1 -1
  28. package/dist/lib/agent/utils.js +1 -1
  29. package/dist/lib/ai-model/inspect.js +5 -4
  30. package/dist/lib/ai-model/inspect.js.map +1 -1
  31. package/dist/lib/ai-model/llm-planning.js +60 -3
  32. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  33. package/dist/lib/ai-model/prompt/extraction.js +53 -55
  34. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  35. package/dist/lib/ai-model/prompt/llm-planning.js +64 -49
  36. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  37. package/dist/lib/ai-model/prompt/util.js +8 -0
  38. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  39. package/dist/lib/device/index.js +16 -34
  40. package/dist/lib/device/index.js.map +1 -1
  41. package/dist/lib/types.js.map +1 -1
  42. package/dist/lib/utils.js +2 -2
  43. package/dist/types/agent/task-builder.d.ts +3 -1
  44. package/dist/types/agent/tasks.d.ts +2 -0
  45. package/dist/types/ai-model/inspect.d.ts +2 -2
  46. package/dist/types/ai-model/llm-planning.d.ts +6 -2
  47. package/dist/types/ai-model/prompt/extraction.d.ts +5 -2
  48. package/dist/types/ai-model/prompt/util.d.ts +7 -0
  49. package/dist/types/device/index.d.ts +0 -11
  50. package/dist/types/types.d.ts +8 -0
  51. package/dist/types/yaml.d.ts +1 -5
  52. package/package.json +2 -2
@@ -9,6 +9,7 @@ interface TaskBuilderDeps {
9
9
  service: Service;
10
10
  taskCache?: TaskCache;
11
11
  actionSpace: DeviceAction[];
12
+ waitAfterAction?: number;
12
13
  }
13
14
  interface BuildOptions {
14
15
  cacheable?: boolean;
@@ -19,7 +20,8 @@ export declare class TaskBuilder {
19
20
  private readonly service;
20
21
  private readonly taskCache?;
21
22
  private readonly actionSpace;
22
- constructor({ interfaceInstance, service, taskCache, actionSpace, }: TaskBuilderDeps);
23
+ private readonly waitAfterAction?;
24
+ constructor({ interfaceInstance, service, taskCache, actionSpace, waitAfterAction, }: TaskBuilderDeps);
23
25
  build(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: BuildOptions): Promise<{
24
26
  tasks: ExecutionTaskApply[];
25
27
  }>;
@@ -26,11 +26,13 @@ export declare class TaskExecutor {
26
26
  onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
27
27
  private readonly hooks?;
28
28
  replanningCycleLimit?: number;
29
+ waitAfterAction?: number;
29
30
  get page(): AbstractInterface;
30
31
  constructor(interfaceInstance: AbstractInterface, service: Service, opts: {
31
32
  taskCache?: TaskCache;
32
33
  onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
33
34
  replanningCycleLimit?: number;
35
+ waitAfterAction?: number;
34
36
  hooks?: TaskExecutorHooks;
35
37
  actionSpace: DeviceAction[];
36
38
  });
@@ -1,4 +1,4 @@
1
- import type { AIDataExtractionResponse, AIElementResponse, AIUsageInfo, Rect, ServiceExtractOption, UIContext } from '../types';
1
+ import type { AIElementResponse, AIUsageInfo, Rect, ServiceExtractOption, UIContext } from '../types';
2
2
  import type { IModelConfig } from '@midscene/shared/env';
3
3
  import type { LocateResultElement } from '@midscene/shared/types';
4
4
  import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
@@ -43,7 +43,7 @@ export declare function AiExtractElementInfo<T>(options: {
43
43
  extractOption?: ServiceExtractOption;
44
44
  modelConfig: IModelConfig;
45
45
  }): Promise<{
46
- parseResult: AIDataExtractionResponse<T>;
46
+ parseResult: import("../types").AIDataExtractionResponse<T>;
47
47
  usage: AIUsageInfo | undefined;
48
48
  reasoning_content: string | undefined;
49
49
  }>;
@@ -1,6 +1,10 @@
1
- import type { DeepThinkOption, DeviceAction, InterfaceType, PlanningAIResponse, UIContext } from '../types';
2
- import type { IModelConfig } from '@midscene/shared/env';
1
+ import type { DeepThinkOption, DeviceAction, InterfaceType, PlanningAIResponse, RawResponsePlanningAIResponse, UIContext } from '../types';
2
+ import type { IModelConfig, TModelFamily } from '@midscene/shared/env';
3
3
  import type { ConversationHistory } from './conversation-history';
4
+ /**
5
+ * Parse XML response from LLM and convert to RawResponsePlanningAIResponse
6
+ */
7
+ export declare function parseXMLPlanningResponse(xmlString: string, modelFamily: TModelFamily | undefined): RawResponsePlanningAIResponse;
4
8
  export declare function plan(userInstruction: string, opts: {
5
9
  context: UIContext;
6
10
  interfaceType: InterfaceType;
@@ -1,4 +1,7 @@
1
- import type { ResponseFormatJSONSchema } from 'openai/resources/index';
1
+ import type { AIDataExtractionResponse } from '../../types';
2
+ /**
3
+ * Parse XML response from LLM and convert to AIDataExtractionResponse
4
+ */
5
+ export declare function parseXMLExtractionResponse<T>(xmlString: string): AIDataExtractionResponse<T>;
2
6
  export declare function systemPromptToExtract(): string;
3
7
  export declare const extractDataQueryPrompt: (pageDescription: string, dataQuery: string | Record<string, string>) => string;
4
- export declare const extractDataSchema: ResponseFormatJSONSchema;
@@ -1,4 +1,11 @@
1
1
  import type { BaseElement, Size, UIContext } from '../../types';
2
+ /**
3
+ * Extract content from an XML tag in a string
4
+ * @param xmlString - The XML string to parse
5
+ * @param tagName - The name of the tag to extract (case-insensitive)
6
+ * @returns The trimmed content of the tag, or undefined if not found
7
+ */
8
+ export declare function extractXMLTag(xmlString: string, tagName: string): string | undefined;
2
9
  export declare function describeSize(size: Size): string;
3
10
  export declare function describeElement(elements: (Pick<BaseElement, 'rect' | 'content'> & {
4
11
  id: string;
@@ -2228,16 +2228,5 @@ export type ActionSleepParam = {
2228
2228
  millisecond?: number;
2229
2229
  };
2230
2230
  export declare const defineActionSleep: () => DeviceAction<ActionSleepParam>;
2231
- export declare const actionFinalizeParamSchema: z.ZodObject<{
2232
- message: z.ZodOptional<z.ZodString>;
2233
- }, "strip", z.ZodTypeAny, {
2234
- message?: string | undefined;
2235
- }, {
2236
- message?: string | undefined;
2237
- }>;
2238
- export type ActionFinalizeParam = {
2239
- message?: string;
2240
- };
2241
- export declare const defineActionFinalize: () => DeviceAction<ActionFinalizeParam>;
2242
2231
  export type { DeviceAction } from '../types';
2243
2232
  export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
@@ -174,6 +174,8 @@ export interface RawResponsePlanningAIResponse {
174
174
  log: string;
175
175
  note?: string;
176
176
  error?: string;
177
+ finalizeMessage?: string;
178
+ finalizeSuccess?: boolean;
177
179
  }
178
180
  export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse, 'action'> {
179
181
  actions?: PlanningAction[];
@@ -456,6 +458,12 @@ export interface AgentOpt {
456
458
  * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.
457
459
  */
458
460
  replanningCycleLimit?: number;
461
+ /**
462
+ * Wait time in milliseconds after each action execution.
463
+ * This allows the UI to settle and stabilize before the next action.
464
+ * Defaults to 300ms when not provided.
465
+ */
466
+ waitAfterAction?: number;
459
467
  /**
460
468
  * Custom OpenAI client factory function
461
469
  *
@@ -1,6 +1,6 @@
1
1
  import type { TUserPrompt } from './common';
2
2
  import type { AndroidDeviceOpt, IOSDeviceOpt } from './device';
3
- import type { AgentOpt, LocateResultElement, Rect } from './types';
3
+ import type { AgentOpt, LocateResultElement } from './types';
4
4
  import type { UIContext } from './types';
5
5
  export interface LocateOption {
6
6
  prompt?: TUserPrompt;
@@ -15,10 +15,6 @@ export interface ServiceExtractOption {
15
15
  screenshotIncluded?: boolean;
16
16
  [key: string]: unknown;
17
17
  }
18
- export interface ReferenceImage {
19
- base64: string;
20
- rect?: Rect;
21
- }
22
18
  export interface DetailedLocateParam extends LocateOption {
23
19
  prompt: TUserPrompt;
24
20
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.2.2",
4
+ "version": "1.2.3-beta-20260120082504.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -89,7 +89,7 @@
89
89
  "semver": "7.5.2",
90
90
  "undici": "^6.0.0",
91
91
  "zod": "3.24.3",
92
- "@midscene/shared": "1.2.2"
92
+ "@midscene/shared": "1.2.3-beta-20260120082504.0"
93
93
  },
94
94
  "devDependencies": {
95
95
  "@rslib/core": "^0.18.3",