@midscene/core 1.2.2 → 1.2.3-beta-20260120082504.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +3 -3
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/task-builder.mjs +4 -2
- package/dist/es/agent/task-builder.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +9 -5
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/inspect.mjs +7 -6
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +60 -6
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/extraction.mjs +51 -53
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +64 -49
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +6 -1
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/device/index.mjs +2 -14
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/agent.js +2 -2
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/task-builder.js +4 -2
- package/dist/lib/agent/task-builder.js.map +1 -1
- package/dist/lib/agent/tasks.js +9 -5
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/inspect.js +5 -4
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +60 -3
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/extraction.js +53 -55
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +64 -49
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +8 -0
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/device/index.js +16 -34
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/agent/task-builder.d.ts +3 -1
- package/dist/types/agent/tasks.d.ts +2 -0
- package/dist/types/ai-model/inspect.d.ts +2 -2
- package/dist/types/ai-model/llm-planning.d.ts +6 -2
- package/dist/types/ai-model/prompt/extraction.d.ts +5 -2
- package/dist/types/ai-model/prompt/util.d.ts +7 -0
- package/dist/types/device/index.d.ts +0 -11
- package/dist/types/types.d.ts +8 -0
- package/dist/types/yaml.d.ts +1 -5
- package/package.json +2 -2
|
@@ -9,6 +9,7 @@ interface TaskBuilderDeps {
|
|
|
9
9
|
service: Service;
|
|
10
10
|
taskCache?: TaskCache;
|
|
11
11
|
actionSpace: DeviceAction[];
|
|
12
|
+
waitAfterAction?: number;
|
|
12
13
|
}
|
|
13
14
|
interface BuildOptions {
|
|
14
15
|
cacheable?: boolean;
|
|
@@ -19,7 +20,8 @@ export declare class TaskBuilder {
|
|
|
19
20
|
private readonly service;
|
|
20
21
|
private readonly taskCache?;
|
|
21
22
|
private readonly actionSpace;
|
|
22
|
-
|
|
23
|
+
private readonly waitAfterAction?;
|
|
24
|
+
constructor({ interfaceInstance, service, taskCache, actionSpace, waitAfterAction, }: TaskBuilderDeps);
|
|
23
25
|
build(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: BuildOptions): Promise<{
|
|
24
26
|
tasks: ExecutionTaskApply[];
|
|
25
27
|
}>;
|
|
@@ -26,11 +26,13 @@ export declare class TaskExecutor {
|
|
|
26
26
|
onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
|
|
27
27
|
private readonly hooks?;
|
|
28
28
|
replanningCycleLimit?: number;
|
|
29
|
+
waitAfterAction?: number;
|
|
29
30
|
get page(): AbstractInterface;
|
|
30
31
|
constructor(interfaceInstance: AbstractInterface, service: Service, opts: {
|
|
31
32
|
taskCache?: TaskCache;
|
|
32
33
|
onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
|
|
33
34
|
replanningCycleLimit?: number;
|
|
35
|
+
waitAfterAction?: number;
|
|
34
36
|
hooks?: TaskExecutorHooks;
|
|
35
37
|
actionSpace: DeviceAction[];
|
|
36
38
|
});
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { AIElementResponse, AIUsageInfo, Rect, ServiceExtractOption, UIContext } from '../types';
|
|
2
2
|
import type { IModelConfig } from '@midscene/shared/env';
|
|
3
3
|
import type { LocateResultElement } from '@midscene/shared/types';
|
|
4
4
|
import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
|
|
@@ -43,7 +43,7 @@ export declare function AiExtractElementInfo<T>(options: {
|
|
|
43
43
|
extractOption?: ServiceExtractOption;
|
|
44
44
|
modelConfig: IModelConfig;
|
|
45
45
|
}): Promise<{
|
|
46
|
-
parseResult: AIDataExtractionResponse<T>;
|
|
46
|
+
parseResult: import("../types").AIDataExtractionResponse<T>;
|
|
47
47
|
usage: AIUsageInfo | undefined;
|
|
48
48
|
reasoning_content: string | undefined;
|
|
49
49
|
}>;
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
import type { DeepThinkOption, DeviceAction, InterfaceType, PlanningAIResponse, UIContext } from '../types';
|
|
2
|
-
import type { IModelConfig } from '@midscene/shared/env';
|
|
1
|
+
import type { DeepThinkOption, DeviceAction, InterfaceType, PlanningAIResponse, RawResponsePlanningAIResponse, UIContext } from '../types';
|
|
2
|
+
import type { IModelConfig, TModelFamily } from '@midscene/shared/env';
|
|
3
3
|
import type { ConversationHistory } from './conversation-history';
|
|
4
|
+
/**
|
|
5
|
+
* Parse XML response from LLM and convert to RawResponsePlanningAIResponse
|
|
6
|
+
*/
|
|
7
|
+
export declare function parseXMLPlanningResponse(xmlString: string, modelFamily: TModelFamily | undefined): RawResponsePlanningAIResponse;
|
|
4
8
|
export declare function plan(userInstruction: string, opts: {
|
|
5
9
|
context: UIContext;
|
|
6
10
|
interfaceType: InterfaceType;
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { AIDataExtractionResponse } from '../../types';
|
|
2
|
+
/**
|
|
3
|
+
* Parse XML response from LLM and convert to AIDataExtractionResponse
|
|
4
|
+
*/
|
|
5
|
+
export declare function parseXMLExtractionResponse<T>(xmlString: string): AIDataExtractionResponse<T>;
|
|
2
6
|
export declare function systemPromptToExtract(): string;
|
|
3
7
|
export declare const extractDataQueryPrompt: (pageDescription: string, dataQuery: string | Record<string, string>) => string;
|
|
4
|
-
export declare const extractDataSchema: ResponseFormatJSONSchema;
|
|
@@ -1,4 +1,11 @@
|
|
|
1
1
|
import type { BaseElement, Size, UIContext } from '../../types';
|
|
2
|
+
/**
|
|
3
|
+
* Extract content from an XML tag in a string
|
|
4
|
+
* @param xmlString - The XML string to parse
|
|
5
|
+
* @param tagName - The name of the tag to extract (case-insensitive)
|
|
6
|
+
* @returns The trimmed content of the tag, or undefined if not found
|
|
7
|
+
*/
|
|
8
|
+
export declare function extractXMLTag(xmlString: string, tagName: string): string | undefined;
|
|
2
9
|
export declare function describeSize(size: Size): string;
|
|
3
10
|
export declare function describeElement(elements: (Pick<BaseElement, 'rect' | 'content'> & {
|
|
4
11
|
id: string;
|
|
@@ -2228,16 +2228,5 @@ export type ActionSleepParam = {
|
|
|
2228
2228
|
millisecond?: number;
|
|
2229
2229
|
};
|
|
2230
2230
|
export declare const defineActionSleep: () => DeviceAction<ActionSleepParam>;
|
|
2231
|
-
export declare const actionFinalizeParamSchema: z.ZodObject<{
|
|
2232
|
-
message: z.ZodOptional<z.ZodString>;
|
|
2233
|
-
}, "strip", z.ZodTypeAny, {
|
|
2234
|
-
message?: string | undefined;
|
|
2235
|
-
}, {
|
|
2236
|
-
message?: string | undefined;
|
|
2237
|
-
}>;
|
|
2238
|
-
export type ActionFinalizeParam = {
|
|
2239
|
-
message?: string;
|
|
2240
|
-
};
|
|
2241
|
-
export declare const defineActionFinalize: () => DeviceAction<ActionFinalizeParam>;
|
|
2242
2231
|
export type { DeviceAction } from '../types';
|
|
2243
2232
|
export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
|
package/dist/types/types.d.ts
CHANGED
|
@@ -174,6 +174,8 @@ export interface RawResponsePlanningAIResponse {
|
|
|
174
174
|
log: string;
|
|
175
175
|
note?: string;
|
|
176
176
|
error?: string;
|
|
177
|
+
finalizeMessage?: string;
|
|
178
|
+
finalizeSuccess?: boolean;
|
|
177
179
|
}
|
|
178
180
|
export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse, 'action'> {
|
|
179
181
|
actions?: PlanningAction[];
|
|
@@ -456,6 +458,12 @@ export interface AgentOpt {
|
|
|
456
458
|
* If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.
|
|
457
459
|
*/
|
|
458
460
|
replanningCycleLimit?: number;
|
|
461
|
+
/**
|
|
462
|
+
* Wait time in milliseconds after each action execution.
|
|
463
|
+
* This allows the UI to settle and stabilize before the next action.
|
|
464
|
+
* Defaults to 300ms when not provided.
|
|
465
|
+
*/
|
|
466
|
+
waitAfterAction?: number;
|
|
459
467
|
/**
|
|
460
468
|
* Custom OpenAI client factory function
|
|
461
469
|
*
|
package/dist/types/yaml.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { TUserPrompt } from './common';
|
|
2
2
|
import type { AndroidDeviceOpt, IOSDeviceOpt } from './device';
|
|
3
|
-
import type { AgentOpt, LocateResultElement
|
|
3
|
+
import type { AgentOpt, LocateResultElement } from './types';
|
|
4
4
|
import type { UIContext } from './types';
|
|
5
5
|
export interface LocateOption {
|
|
6
6
|
prompt?: TUserPrompt;
|
|
@@ -15,10 +15,6 @@ export interface ServiceExtractOption {
|
|
|
15
15
|
screenshotIncluded?: boolean;
|
|
16
16
|
[key: string]: unknown;
|
|
17
17
|
}
|
|
18
|
-
export interface ReferenceImage {
|
|
19
|
-
base64: string;
|
|
20
|
-
rect?: Rect;
|
|
21
|
-
}
|
|
22
18
|
export interface DetailedLocateParam extends LocateOption {
|
|
23
19
|
prompt: TUserPrompt;
|
|
24
20
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/core",
|
|
3
3
|
"description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
|
|
4
|
-
"version": "1.2.
|
|
4
|
+
"version": "1.2.3-beta-20260120082504.0",
|
|
5
5
|
"repository": "https://github.com/web-infra-dev/midscene",
|
|
6
6
|
"homepage": "https://midscenejs.com/",
|
|
7
7
|
"main": "./dist/lib/index.js",
|
|
@@ -89,7 +89,7 @@
|
|
|
89
89
|
"semver": "7.5.2",
|
|
90
90
|
"undici": "^6.0.0",
|
|
91
91
|
"zod": "3.24.3",
|
|
92
|
-
"@midscene/shared": "1.2.
|
|
92
|
+
"@midscene/shared": "1.2.3-beta-20260120082504.0"
|
|
93
93
|
},
|
|
94
94
|
"devDependencies": {
|
|
95
95
|
"@rslib/core": "^0.18.3",
|