@midscene/core 0.30.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +233 -144
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/execution-session.mjs +41 -0
- package/dist/es/agent/execution-session.mjs.map +1 -0
- package/dist/es/agent/index.mjs +3 -3
- package/dist/es/agent/task-builder.mjs +319 -0
- package/dist/es/agent/task-builder.mjs.map +1 -0
- package/dist/es/agent/task-cache.mjs +4 -4
- package/dist/es/agent/task-cache.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +197 -504
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/ui-utils.mjs +54 -35
- package/dist/es/agent/ui-utils.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +16 -58
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/conversation-history.mjs +25 -13
- package/dist/es/ai-model/conversation-history.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +4 -4
- package/dist/es/ai-model/inspect.mjs +45 -54
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +47 -65
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
- package/dist/es/ai-model/prompt/common.mjs.map +1 -1
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +3 -88
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +182 -274
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
- package/dist/es/common.mjs.map +1 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +29 -12
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/index.mjs +5 -4
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/report.mjs.map +1 -1
- package/dist/es/{insight → service}/index.mjs +38 -51
- package/dist/es/service/index.mjs.map +1 -0
- package/dist/es/{insight → service}/utils.mjs +3 -3
- package/dist/es/service/utils.mjs.map +1 -0
- package/dist/es/task-runner.mjs +264 -0
- package/dist/es/task-runner.mjs.map +1 -0
- package/dist/es/tree.mjs +13 -2
- package/dist/es/tree.mjs.map +1 -0
- package/dist/es/types.mjs +18 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +6 -7
- package/dist/es/utils.mjs.map +1 -1
- package/dist/es/yaml/builder.mjs.map +1 -1
- package/dist/es/yaml/player.mjs +121 -98
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/es/yaml/utils.mjs +1 -1
- package/dist/es/yaml/utils.mjs.map +1 -1
- package/dist/lib/agent/agent.js +231 -142
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/common.js +1 -1
- package/dist/lib/agent/execution-session.js +75 -0
- package/dist/lib/agent/execution-session.js.map +1 -0
- package/dist/lib/agent/index.js +14 -14
- package/dist/lib/agent/index.js.map +1 -1
- package/dist/lib/agent/task-builder.js +356 -0
- package/dist/lib/agent/task-builder.js.map +1 -0
- package/dist/lib/agent/task-cache.js +8 -8
- package/dist/lib/agent/task-cache.js.map +1 -1
- package/dist/lib/agent/tasks.js +202 -506
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/ui-utils.js +58 -36
- package/dist/lib/agent/ui-utils.js.map +1 -1
- package/dist/lib/agent/utils.js +26 -68
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/conversation-history.js +27 -15
- package/dist/lib/ai-model/conversation-history.js.map +1 -1
- package/dist/lib/ai-model/index.js +27 -27
- package/dist/lib/ai-model/index.js.map +1 -1
- package/dist/lib/ai-model/inspect.js +51 -57
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +49 -67
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/assertion.js +2 -2
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
- package/dist/lib/ai-model/prompt/common.js +2 -2
- package/dist/lib/ai-model/prompt/common.js.map +1 -1
- package/dist/lib/ai-model/prompt/describe.js +2 -2
- package/dist/lib/ai-model/prompt/describe.js.map +1 -1
- package/dist/lib/ai-model/prompt/extraction.js +2 -2
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
- package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +7 -95
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +288 -401
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +71 -10
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/{ai-model/common.js → common.js} +40 -55
- package/dist/lib/common.js.map +1 -0
- package/dist/lib/device/device-options.js +20 -0
- package/dist/lib/device/device-options.js.map +1 -0
- package/dist/lib/device/index.js +63 -40
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/image/index.js +5 -5
- package/dist/lib/image/index.js.map +1 -1
- package/dist/lib/index.js +24 -20
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/report.js +2 -2
- package/dist/lib/report.js.map +1 -1
- package/dist/lib/{insight → service}/index.js +41 -54
- package/dist/lib/service/index.js.map +1 -0
- package/dist/lib/{insight → service}/utils.js +7 -7
- package/dist/lib/service/utils.js.map +1 -0
- package/dist/lib/task-runner.js +301 -0
- package/dist/lib/task-runner.js.map +1 -0
- package/dist/lib/tree.js +13 -4
- package/dist/lib/tree.js.map +1 -1
- package/dist/lib/types.js +31 -12
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +16 -17
- package/dist/lib/utils.js.map +1 -1
- package/dist/lib/yaml/builder.js +2 -2
- package/dist/lib/yaml/builder.js.map +1 -1
- package/dist/lib/yaml/index.js +16 -22
- package/dist/lib/yaml/index.js.map +1 -1
- package/dist/lib/yaml/player.js +123 -100
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/lib/yaml/utils.js +6 -6
- package/dist/lib/yaml/utils.js.map +1 -1
- package/dist/lib/yaml.js +1 -1
- package/dist/lib/yaml.js.map +1 -1
- package/dist/types/agent/agent.d.ts +62 -17
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +3 -2
- package/dist/types/agent/task-builder.d.ts +35 -0
- package/dist/types/agent/tasks.d.ts +32 -23
- package/dist/types/agent/ui-utils.d.ts +9 -2
- package/dist/types/agent/utils.d.ts +9 -35
- package/dist/types/ai-model/conversation-history.d.ts +8 -4
- package/dist/types/ai-model/index.d.ts +5 -5
- package/dist/types/ai-model/inspect.d.ts +20 -12
- package/dist/types/ai-model/llm-planning.d.ts +3 -1
- package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
- package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +2 -34
- package/dist/types/ai-model/service-caller/index.d.ts +2 -3
- package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
- package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
- package/dist/types/device/device-options.d.ts +57 -0
- package/dist/types/device/index.d.ts +55 -39
- package/dist/types/index.d.ts +7 -6
- package/dist/types/service/index.d.ts +26 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/task-runner.d.ts +49 -0
- package/dist/types/tree.d.ts +4 -1
- package/dist/types/types.d.ts +103 -66
- package/dist/types/yaml/utils.d.ts +1 -1
- package/dist/types/yaml.d.ts +68 -43
- package/package.json +9 -12
- package/dist/es/ai-model/action-executor.mjs +0 -129
- package/dist/es/ai-model/action-executor.mjs.map +0 -1
- package/dist/es/ai-model/common.mjs.map +0 -1
- package/dist/es/insight/index.mjs.map +0 -1
- package/dist/es/insight/utils.mjs.map +0 -1
- package/dist/lib/ai-model/action-executor.js +0 -163
- package/dist/lib/ai-model/action-executor.js.map +0 -1
- package/dist/lib/ai-model/common.js.map +0 -1
- package/dist/lib/insight/index.js.map +0 -1
- package/dist/lib/insight/utils.js.map +0 -1
- package/dist/types/ai-model/action-executor.d.ts +0 -19
- package/dist/types/insight/index.d.ts +0 -31
- package/dist/types/insight/utils.d.ts +0 -2
package/dist/types/types.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import type { NodeType } from '@midscene/shared/constants';
|
|
2
|
-
import type {
|
|
3
|
-
import type { BaseElement,
|
|
2
|
+
import type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';
|
|
3
|
+
import type { BaseElement, LocateResultElement, Rect, Size } from '@midscene/shared/types';
|
|
4
4
|
import type { z } from 'zod';
|
|
5
|
-
import type { TUserPrompt } from './
|
|
5
|
+
import type { TUserPrompt } from './common';
|
|
6
6
|
import type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';
|
|
7
7
|
export type { ElementTreeNode, BaseElement, Rect, Size, Point, } from '@midscene/shared/types';
|
|
8
8
|
export * from './yaml';
|
|
@@ -10,11 +10,13 @@ export type AIUsageInfo = Record<string, any> & {
|
|
|
10
10
|
prompt_tokens: number | undefined;
|
|
11
11
|
completion_tokens: number | undefined;
|
|
12
12
|
total_tokens: number | undefined;
|
|
13
|
+
cached_input: number | undefined;
|
|
13
14
|
time_cost: number | undefined;
|
|
14
15
|
model_name: string | undefined;
|
|
15
16
|
model_description: string | undefined;
|
|
16
17
|
intent: string | undefined;
|
|
17
18
|
};
|
|
19
|
+
export type { LocateResultElement };
|
|
18
20
|
/**
|
|
19
21
|
* openai
|
|
20
22
|
*
|
|
@@ -39,23 +41,11 @@ export type AISingleElementResponseByPosition = {
|
|
|
39
41
|
text: string;
|
|
40
42
|
};
|
|
41
43
|
export type AISingleElementResponse = AISingleElementResponseById;
|
|
42
|
-
export interface AIElementLocatorResponse {
|
|
43
|
-
elements: {
|
|
44
|
-
id: string;
|
|
45
|
-
reason?: string;
|
|
46
|
-
text?: string;
|
|
47
|
-
xpaths?: string[];
|
|
48
|
-
}[];
|
|
49
|
-
bbox?: [number, number, number, number];
|
|
50
|
-
isOrderSensitive?: boolean;
|
|
51
|
-
errors?: string[];
|
|
52
|
-
}
|
|
53
44
|
export interface AIElementCoordinatesResponse {
|
|
54
45
|
bbox: [number, number, number, number];
|
|
55
|
-
isOrderSensitive?: boolean;
|
|
56
46
|
errors?: string[];
|
|
57
47
|
}
|
|
58
|
-
export type AIElementResponse =
|
|
48
|
+
export type AIElementResponse = AIElementCoordinatesResponse;
|
|
59
49
|
export interface AIDataExtractionResponse<DataDemand> {
|
|
60
50
|
data: DataDemand;
|
|
61
51
|
errors?: string[];
|
|
@@ -91,35 +81,23 @@ export interface AgentDescribeElementAtPointResult {
|
|
|
91
81
|
/**
|
|
92
82
|
* context
|
|
93
83
|
*/
|
|
94
|
-
export declare abstract class UIContext
|
|
84
|
+
export declare abstract class UIContext {
|
|
95
85
|
abstract screenshotBase64: string;
|
|
96
|
-
abstract tree: ElementTreeNode<ElementType>;
|
|
97
86
|
abstract size: Size;
|
|
98
87
|
abstract _isFrozen?: boolean;
|
|
99
88
|
}
|
|
100
89
|
export type EnsureObject<T> = {
|
|
101
90
|
[K in keyof T]: any;
|
|
102
91
|
};
|
|
103
|
-
export type
|
|
104
|
-
export type
|
|
92
|
+
export type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';
|
|
93
|
+
export type ServiceExtractParam = string | Record<string, string>;
|
|
105
94
|
export type ElementCacheFeature = Record<string, unknown>;
|
|
106
|
-
export type LocateResultElement = {
|
|
107
|
-
center: [number, number];
|
|
108
|
-
rect: Rect;
|
|
109
|
-
id: string;
|
|
110
|
-
indexId?: number;
|
|
111
|
-
xpaths: string[];
|
|
112
|
-
attributes: {
|
|
113
|
-
nodeType: NodeType;
|
|
114
|
-
[key: string]: string;
|
|
115
|
-
};
|
|
116
|
-
isOrderSensitive?: boolean;
|
|
117
|
-
};
|
|
118
95
|
export interface LocateResult {
|
|
119
96
|
element: LocateResultElement | null;
|
|
120
97
|
rect?: Rect;
|
|
121
98
|
}
|
|
122
|
-
export
|
|
99
|
+
export type ThinkingLevel = 'off' | 'medium' | 'high';
|
|
100
|
+
export interface ServiceTaskInfo {
|
|
123
101
|
durationMs: number;
|
|
124
102
|
formatResponse?: string;
|
|
125
103
|
rawResponse?: string;
|
|
@@ -135,26 +113,38 @@ export interface ReportDumpWithAttributes {
|
|
|
135
113
|
dumpString: string;
|
|
136
114
|
attributes?: Record<string, any>;
|
|
137
115
|
}
|
|
138
|
-
export interface
|
|
116
|
+
export interface ServiceDump extends DumpMeta {
|
|
139
117
|
type: 'locate' | 'extract' | 'assert';
|
|
140
118
|
logId: string;
|
|
141
119
|
userQuery: {
|
|
142
120
|
element?: TUserPrompt;
|
|
143
|
-
dataDemand?:
|
|
121
|
+
dataDemand?: ServiceExtractParam;
|
|
144
122
|
assertion?: TUserPrompt;
|
|
145
123
|
};
|
|
146
|
-
matchedElement:
|
|
124
|
+
matchedElement: LocateResultElement[];
|
|
147
125
|
matchedRect?: Rect;
|
|
148
126
|
deepThink?: boolean;
|
|
149
127
|
data: any;
|
|
150
128
|
assertionPass?: boolean;
|
|
151
129
|
assertionThought?: string;
|
|
152
|
-
taskInfo:
|
|
130
|
+
taskInfo: ServiceTaskInfo;
|
|
153
131
|
error?: string;
|
|
154
132
|
output?: any;
|
|
155
133
|
}
|
|
156
|
-
export type
|
|
157
|
-
export
|
|
134
|
+
export type PartialServiceDumpFromSDK = Omit<ServiceDump, 'logTime' | 'logId' | 'model_name'>;
|
|
135
|
+
export interface ServiceResultBase {
|
|
136
|
+
dump: ServiceDump;
|
|
137
|
+
}
|
|
138
|
+
export type LocateResultWithDump = LocateResult & ServiceResultBase;
|
|
139
|
+
export interface ServiceExtractResult<T> extends ServiceResultBase {
|
|
140
|
+
data: T;
|
|
141
|
+
thought?: string;
|
|
142
|
+
usage?: AIUsageInfo;
|
|
143
|
+
}
|
|
144
|
+
export declare class ServiceError extends Error {
|
|
145
|
+
dump: ServiceDump;
|
|
146
|
+
constructor(message: string, dump: ServiceDump);
|
|
147
|
+
}
|
|
158
148
|
export interface LiteUISection {
|
|
159
149
|
name: string;
|
|
160
150
|
description: string;
|
|
@@ -162,7 +152,7 @@ export interface LiteUISection {
|
|
|
162
152
|
textIds: string[];
|
|
163
153
|
}
|
|
164
154
|
export type ElementById = (id: string) => BaseElement | null;
|
|
165
|
-
export type
|
|
155
|
+
export type ServiceAssertionResponse = AIAssertionResponse & {
|
|
166
156
|
usage?: AIUsageInfo;
|
|
167
157
|
};
|
|
168
158
|
/**
|
|
@@ -172,6 +162,7 @@ export type OnTaskStartTip = (tip: string) => Promise<void> | void;
|
|
|
172
162
|
export interface AgentWaitForOpt {
|
|
173
163
|
checkIntervalMs?: number;
|
|
174
164
|
timeoutMs?: number;
|
|
165
|
+
[key: string]: unknown;
|
|
175
166
|
}
|
|
176
167
|
export interface AgentAssertOpt {
|
|
177
168
|
keepRawResponse?: boolean;
|
|
@@ -181,33 +172,27 @@ export interface AgentAssertOpt {
|
|
|
181
172
|
*
|
|
182
173
|
*/
|
|
183
174
|
export interface PlanningLocateParam extends DetailedLocateParam {
|
|
184
|
-
id?: string;
|
|
185
175
|
bbox?: [number, number, number, number];
|
|
186
176
|
}
|
|
187
177
|
export interface PlanningAction<ParamType = any> {
|
|
188
178
|
thought?: string;
|
|
189
179
|
type: string;
|
|
190
180
|
param: ParamType;
|
|
191
|
-
locate?: PlanningLocateParam | null;
|
|
192
181
|
}
|
|
193
|
-
export interface
|
|
194
|
-
action
|
|
195
|
-
actions?: PlanningAction[];
|
|
182
|
+
export interface RawResponsePlanningAIResponse {
|
|
183
|
+
action: PlanningAction;
|
|
196
184
|
more_actions_needed_by_instruction: boolean;
|
|
197
185
|
log: string;
|
|
198
186
|
sleep?: number;
|
|
199
187
|
error?: string;
|
|
188
|
+
}
|
|
189
|
+
export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse, 'action'> {
|
|
190
|
+
actions?: PlanningAction[];
|
|
200
191
|
usage?: AIUsageInfo;
|
|
201
192
|
rawResponse?: string;
|
|
202
193
|
yamlFlow?: MidsceneYamlFlowItem[];
|
|
203
194
|
yamlString?: string;
|
|
204
|
-
|
|
205
|
-
export type PlanningActionParamTap = null;
|
|
206
|
-
export type PlanningActionParamHover = null;
|
|
207
|
-
export type PlanningActionParamRightClick = null;
|
|
208
|
-
export interface PlanningActionParamInputOrKeyPress {
|
|
209
|
-
value: string;
|
|
210
|
-
autoDismissKeyboard?: boolean;
|
|
195
|
+
error?: string;
|
|
211
196
|
}
|
|
212
197
|
export interface PlanningActionParamSleep {
|
|
213
198
|
timeMs: number;
|
|
@@ -216,10 +201,10 @@ export interface PlanningActionParamError {
|
|
|
216
201
|
thought: string;
|
|
217
202
|
}
|
|
218
203
|
export type PlanningActionParamWaitFor = AgentWaitForOpt & {};
|
|
219
|
-
export interface
|
|
204
|
+
export interface LongPressParam {
|
|
220
205
|
duration?: number;
|
|
221
206
|
}
|
|
222
|
-
export interface
|
|
207
|
+
export interface PullParam {
|
|
223
208
|
direction: 'up' | 'down';
|
|
224
209
|
distance?: number;
|
|
225
210
|
duration?: number;
|
|
@@ -247,17 +232,18 @@ export interface ExecutionRecorderItem {
|
|
|
247
232
|
screenshot?: string;
|
|
248
233
|
timing?: string;
|
|
249
234
|
}
|
|
250
|
-
export type ExecutionTaskType = 'Planning' | 'Insight' | 'Action
|
|
235
|
+
export type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';
|
|
251
236
|
export interface ExecutorContext {
|
|
252
237
|
task: ExecutionTask;
|
|
253
238
|
element?: LocateResultElement | null;
|
|
239
|
+
uiContext?: UIContext;
|
|
254
240
|
}
|
|
255
241
|
export interface ExecutionTaskApply<Type extends ExecutionTaskType = any, TaskParam = any, TaskOutput = any, TaskLog = any> {
|
|
256
242
|
type: Type;
|
|
257
243
|
subType?: string;
|
|
244
|
+
subTask?: boolean;
|
|
258
245
|
param?: TaskParam;
|
|
259
246
|
thought?: string;
|
|
260
|
-
locate?: PlanningLocateParam | null;
|
|
261
247
|
uiContext?: UIContext;
|
|
262
248
|
executor: (param: TaskParam, context: ExecutorContext) => Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void> | undefined | void;
|
|
263
249
|
}
|
|
@@ -288,17 +274,17 @@ export interface ExecutionDump extends DumpMeta {
|
|
|
288
274
|
name: string;
|
|
289
275
|
description?: string;
|
|
290
276
|
tasks: ExecutionTask[];
|
|
291
|
-
|
|
277
|
+
aiActContext?: string;
|
|
292
278
|
}
|
|
293
279
|
export type ExecutionTaskInsightLocateParam = PlanningLocateParam;
|
|
294
280
|
export interface ExecutionTaskInsightLocateOutput {
|
|
295
281
|
element: LocateResultElement | null;
|
|
296
282
|
}
|
|
297
|
-
export type ExecutionTaskInsightDump =
|
|
283
|
+
export type ExecutionTaskInsightDump = ServiceDump;
|
|
298
284
|
export type ExecutionTaskInsightLocateApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightLocateParam, ExecutionTaskInsightLocateOutput, ExecutionTaskInsightDump>;
|
|
299
285
|
export type ExecutionTaskInsightLocate = ExecutionTask<ExecutionTaskInsightLocateApply>;
|
|
300
286
|
export interface ExecutionTaskInsightQueryParam {
|
|
301
|
-
dataDemand:
|
|
287
|
+
dataDemand: ServiceExtractParam;
|
|
302
288
|
}
|
|
303
289
|
export interface ExecutionTaskInsightQueryOutput {
|
|
304
290
|
data: any;
|
|
@@ -308,9 +294,9 @@ export type ExecutionTaskInsightQuery = ExecutionTask<ExecutionTaskInsightQueryA
|
|
|
308
294
|
export interface ExecutionTaskInsightAssertionParam {
|
|
309
295
|
assertion: string;
|
|
310
296
|
}
|
|
311
|
-
export type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightAssertionParam,
|
|
297
|
+
export type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightAssertionParam, ServiceAssertionResponse, ExecutionTaskInsightDump>;
|
|
312
298
|
export type ExecutionTaskInsightAssertion = ExecutionTask<ExecutionTaskInsightAssertionApply>;
|
|
313
|
-
export type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<'Action', ActionParam, void, void>;
|
|
299
|
+
export type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<'Action Space', ActionParam, void, void>;
|
|
314
300
|
export type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;
|
|
315
301
|
export type ExecutionTaskLogApply<LogParam = {
|
|
316
302
|
content: string;
|
|
@@ -318,8 +304,16 @@ export type ExecutionTaskLogApply<LogParam = {
|
|
|
318
304
|
export type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;
|
|
319
305
|
export type ExecutionTaskPlanningApply = ExecutionTaskApply<'Planning', {
|
|
320
306
|
userInstruction: string;
|
|
307
|
+
aiActContext?: string;
|
|
321
308
|
}, PlanningAIResponse>;
|
|
322
309
|
export type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;
|
|
310
|
+
export type ExecutionTaskPlanningLocateParam = PlanningLocateParam;
|
|
311
|
+
export interface ExecutionTaskPlanningLocateOutput {
|
|
312
|
+
element: LocateResultElement | null;
|
|
313
|
+
}
|
|
314
|
+
export type ExecutionTaskPlanningDump = ServiceDump;
|
|
315
|
+
export type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<'Planning', ExecutionTaskPlanningLocateParam, ExecutionTaskPlanningLocateOutput, ExecutionTaskPlanningDump>;
|
|
316
|
+
export type ExecutionTaskPlanningLocate = ExecutionTask<ExecutionTaskPlanningLocateApply>;
|
|
323
317
|
export interface GroupedActionDump {
|
|
324
318
|
sdkVersion: string;
|
|
325
319
|
groupName: string;
|
|
@@ -359,13 +353,25 @@ export interface StreamingAIResponse {
|
|
|
359
353
|
/** Whether the response was streamed */
|
|
360
354
|
isStreamed: boolean;
|
|
361
355
|
}
|
|
362
|
-
export interface DeviceAction<
|
|
356
|
+
export interface DeviceAction<TParam = any, TReturn = any> {
|
|
363
357
|
name: string;
|
|
364
358
|
description?: string;
|
|
365
359
|
interfaceAlias?: string;
|
|
366
|
-
paramSchema?: z.ZodType<
|
|
367
|
-
call: (param:
|
|
360
|
+
paramSchema?: z.ZodType<TParam>;
|
|
361
|
+
call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;
|
|
362
|
+
delayAfterRunner?: number;
|
|
368
363
|
}
|
|
364
|
+
/**
|
|
365
|
+
* Type utilities for extracting types from DeviceAction definitions
|
|
366
|
+
*/
|
|
367
|
+
/**
|
|
368
|
+
* Extract parameter type from a DeviceAction
|
|
369
|
+
*/
|
|
370
|
+
export type ActionParam<Action extends DeviceAction<any, any>> = Action extends DeviceAction<infer P, any> ? P : never;
|
|
371
|
+
/**
|
|
372
|
+
* Extract return type from a DeviceAction
|
|
373
|
+
*/
|
|
374
|
+
export type ActionReturn<Action extends DeviceAction<any, any>> = Action extends DeviceAction<any, infer R> ? R : never;
|
|
369
375
|
/**
|
|
370
376
|
* Web-specific types
|
|
371
377
|
*/
|
|
@@ -376,7 +382,7 @@ export interface WebElementInfo extends BaseElement {
|
|
|
376
382
|
[key: string]: string;
|
|
377
383
|
};
|
|
378
384
|
}
|
|
379
|
-
export type WebUIContext = UIContext
|
|
385
|
+
export type WebUIContext = UIContext;
|
|
380
386
|
/**
|
|
381
387
|
* Agent
|
|
382
388
|
*/
|
|
@@ -393,11 +399,42 @@ export interface AgentOpt {
|
|
|
393
399
|
generateReport?: boolean;
|
|
394
400
|
autoPrintReportMsg?: boolean;
|
|
395
401
|
onTaskStartTip?: OnTaskStartTip;
|
|
402
|
+
aiActContext?: string;
|
|
396
403
|
aiActionContext?: string;
|
|
397
404
|
reportFileName?: string;
|
|
398
|
-
modelConfig?:
|
|
405
|
+
modelConfig?: TModelConfig;
|
|
399
406
|
cache?: Cache;
|
|
407
|
+
/**
|
|
408
|
+
* Maximum number of replanning cycles for aiAct.
|
|
409
|
+
* Defaults to 20 (40 for `vlm-ui-tars`) when not provided.
|
|
410
|
+
* If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.
|
|
411
|
+
*/
|
|
400
412
|
replanningCycleLimit?: number;
|
|
413
|
+
/**
|
|
414
|
+
* Custom OpenAI client factory function
|
|
415
|
+
*
|
|
416
|
+
* If provided, this function will be called to create OpenAI client instances
|
|
417
|
+
* for each AI call, allowing you to:
|
|
418
|
+
* - Wrap clients with observability tools (langsmith, langfuse)
|
|
419
|
+
* - Use custom OpenAI-compatible clients
|
|
420
|
+
* - Apply different configurations based on intent
|
|
421
|
+
*
|
|
422
|
+
* @param config - Resolved model configuration
|
|
423
|
+
* @returns OpenAI client instance (original or wrapped)
|
|
424
|
+
*
|
|
425
|
+
* @example
|
|
426
|
+
* ```typescript
|
|
427
|
+
* createOpenAIClient: async (openai, opts) => {
|
|
428
|
+
* // Wrap with langsmith for planning tasks
|
|
429
|
+
* if (opts.baseURL?.includes('planning')) {
|
|
430
|
+
* return wrapOpenAI(openai, { metadata: { task: 'planning' } });
|
|
431
|
+
* }
|
|
432
|
+
*
|
|
433
|
+
* return openai;
|
|
434
|
+
* }
|
|
435
|
+
* ```
|
|
436
|
+
*/
|
|
437
|
+
createOpenAIClient?: CreateOpenAIClientFn;
|
|
401
438
|
}
|
|
402
439
|
export type TestStatus = 'passed' | 'failed' | 'timedOut' | 'skipped' | 'interrupted';
|
|
403
440
|
export interface ReportFileWithAttributes {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { TUserPrompt } from '../
|
|
1
|
+
import type { TUserPrompt } from '../common';
|
|
2
2
|
import type { DetailedLocateParam, LocateOption, MidsceneYamlScript } from '../types';
|
|
3
3
|
export declare function interpolateEnvVars(content: string): string;
|
|
4
4
|
export declare function parseYamlScript(content: string, filePath?: string): MidsceneYamlScript;
|
package/dist/types/yaml.d.ts
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
|
-
import type { TUserPrompt } from './
|
|
2
|
-
import type {
|
|
3
|
-
import type {
|
|
1
|
+
import type { TUserPrompt } from './common';
|
|
2
|
+
import type { AndroidDeviceOpt, IOSDeviceOpt } from './device';
|
|
3
|
+
import type { AgentOpt, LocateResultElement, Rect } from './types';
|
|
4
|
+
import type { UIContext } from './types';
|
|
4
5
|
export interface LocateOption {
|
|
5
6
|
prompt?: TUserPrompt;
|
|
6
7
|
deepThink?: boolean;
|
|
7
8
|
cacheable?: boolean;
|
|
8
9
|
xpath?: string;
|
|
9
|
-
uiContext?: UIContext
|
|
10
|
+
uiContext?: UIContext;
|
|
10
11
|
}
|
|
11
|
-
export interface
|
|
12
|
+
export interface ServiceExtractOption {
|
|
12
13
|
domIncluded?: boolean | 'visible-only';
|
|
13
14
|
screenshotIncluded?: boolean;
|
|
14
|
-
|
|
15
|
+
[key: string]: unknown;
|
|
15
16
|
}
|
|
16
17
|
export interface ReferenceImage {
|
|
17
18
|
base64: string;
|
|
@@ -21,11 +22,13 @@ export interface DetailedLocateParam extends LocateOption {
|
|
|
21
22
|
prompt: TUserPrompt;
|
|
22
23
|
referenceImage?: ReferenceImage;
|
|
23
24
|
}
|
|
24
|
-
export
|
|
25
|
-
direction
|
|
26
|
-
scrollType
|
|
27
|
-
distance?:
|
|
28
|
-
|
|
25
|
+
export type ActionScrollParam = {
|
|
26
|
+
direction?: 'down' | 'up' | 'right' | 'left';
|
|
27
|
+
scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft';
|
|
28
|
+
distance?: number | null;
|
|
29
|
+
locate?: LocateResultElement;
|
|
30
|
+
};
|
|
31
|
+
export type ScrollParam = Omit<ActionScrollParam, 'locate'>;
|
|
29
32
|
export interface MidsceneYamlScript {
|
|
30
33
|
target?: MidsceneYamlScriptWebEnv;
|
|
31
34
|
web?: MidsceneYamlScriptWebEnv;
|
|
@@ -41,7 +44,30 @@ export interface MidsceneYamlTask {
|
|
|
41
44
|
flow: MidsceneYamlFlowItem[];
|
|
42
45
|
continueOnError?: boolean;
|
|
43
46
|
}
|
|
44
|
-
|
|
47
|
+
/**
|
|
48
|
+
* Agent configuration options that can be specified in YAML scripts.
|
|
49
|
+
*
|
|
50
|
+
* This type includes serializable fields from AgentOpt, excluding non-serializable
|
|
51
|
+
* fields like functions and complex objects. All fields are optional.
|
|
52
|
+
*
|
|
53
|
+
* @remarks
|
|
54
|
+
* - testId priority: CLI parameter > YAML agent.testId > filename
|
|
55
|
+
* - These settings apply to all platforms (Web, Android, iOS, Generic Interface)
|
|
56
|
+
* - modelConfig is configured through environment variables, not in YAML
|
|
57
|
+
*
|
|
58
|
+
* @example
|
|
59
|
+
* ```yaml
|
|
60
|
+
* agent:
|
|
61
|
+
* testId: "checkout-test"
|
|
62
|
+
* groupName: "E2E Test Suite"
|
|
63
|
+
* generateReport: true
|
|
64
|
+
* replanningCycleLimit: 30
|
|
65
|
+
* cache:
|
|
66
|
+
* id: "checkout-cache"
|
|
67
|
+
* strategy: "read-write"
|
|
68
|
+
* ```
|
|
69
|
+
*/
|
|
70
|
+
export type MidsceneYamlScriptAgentOpt = Pick<AgentOpt, 'testId' | 'groupName' | 'groupDescription' | 'generateReport' | 'autoPrintReportMsg' | 'reportFileName' | 'replanningCycleLimit' | 'aiActContext' | 'aiActionContext' | 'cache'>;
|
|
45
71
|
export interface MidsceneYamlScriptConfig {
|
|
46
72
|
output?: string;
|
|
47
73
|
unstableLogContent?: boolean | string;
|
|
@@ -65,58 +91,56 @@ export interface MidsceneYamlScriptWebEnv extends MidsceneYamlScriptConfig, Mids
|
|
|
65
91
|
};
|
|
66
92
|
cookie?: string;
|
|
67
93
|
forceSameTabNavigation?: boolean;
|
|
94
|
+
/**
|
|
95
|
+
* Custom Chrome launch arguments (Puppeteer only, not supported in bridge mode).
|
|
96
|
+
*
|
|
97
|
+
* Allows passing custom command-line arguments to Chrome/Chromium when launching the browser.
|
|
98
|
+
* This is useful for testing scenarios that require specific browser configurations.
|
|
99
|
+
*
|
|
100
|
+
* ⚠️ Security Warning: Some arguments (e.g., --no-sandbox, --disable-web-security) may
|
|
101
|
+
* reduce browser security. Use only in controlled testing environments.
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```yaml
|
|
105
|
+
* web:
|
|
106
|
+
* url: https://example.com
|
|
107
|
+
* chromeArgs:
|
|
108
|
+
* - '--disable-features=ThirdPartyCookiePhaseout'
|
|
109
|
+
* - '--disable-features=SameSiteByDefaultCookies'
|
|
110
|
+
* - '--window-size=1920,1080'
|
|
111
|
+
* ```
|
|
112
|
+
*/
|
|
113
|
+
chromeArgs?: string[];
|
|
68
114
|
bridgeMode?: false | 'newTabWithUrl' | 'currentTab';
|
|
69
115
|
closeNewTabsAfterDisconnect?: boolean;
|
|
70
116
|
}
|
|
71
|
-
export interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptConfig {
|
|
117
|
+
export interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptConfig, Omit<AndroidDeviceOpt, 'customActions'> {
|
|
72
118
|
deviceId?: string;
|
|
73
119
|
launch?: string;
|
|
74
120
|
}
|
|
75
|
-
export interface MidsceneYamlScriptIOSEnv extends MidsceneYamlScriptConfig {
|
|
76
|
-
wdaPort?: number;
|
|
77
|
-
wdaHost?: string;
|
|
78
|
-
autoDismissKeyboard?: boolean;
|
|
121
|
+
export interface MidsceneYamlScriptIOSEnv extends MidsceneYamlScriptConfig, Omit<IOSDeviceOpt, 'customActions'> {
|
|
79
122
|
launch?: string;
|
|
80
123
|
}
|
|
81
124
|
export type MidsceneYamlScriptEnv = MidsceneYamlScriptWebEnv | MidsceneYamlScriptAndroidEnv | MidsceneYamlScriptIOSEnv;
|
|
82
125
|
export interface MidsceneYamlFlowItemAIAction {
|
|
83
|
-
ai?: string;
|
|
84
126
|
aiAction?: string;
|
|
127
|
+
ai?: string;
|
|
128
|
+
aiAct?: string;
|
|
85
129
|
aiActionProgressTips?: string[];
|
|
86
130
|
cacheable?: boolean;
|
|
131
|
+
_deepThink?: boolean;
|
|
132
|
+
[key: string]: unknown;
|
|
87
133
|
}
|
|
88
134
|
export interface MidsceneYamlFlowItemAIAssert {
|
|
89
135
|
aiAssert: string;
|
|
90
136
|
errorMessage?: string;
|
|
91
137
|
name?: string;
|
|
92
|
-
|
|
93
|
-
export interface MidsceneYamlFlowItemAIQuery extends InsightExtractOption {
|
|
94
|
-
aiQuery: string;
|
|
95
|
-
name?: string;
|
|
96
|
-
}
|
|
97
|
-
export interface MidsceneYamlFlowItemAINumber extends InsightExtractOption {
|
|
98
|
-
aiNumber: string;
|
|
99
|
-
name?: string;
|
|
100
|
-
}
|
|
101
|
-
export interface MidsceneYamlFlowItemAIString extends InsightExtractOption {
|
|
102
|
-
aiString: string;
|
|
103
|
-
name?: string;
|
|
104
|
-
}
|
|
105
|
-
export interface MidsceneYamlFlowItemAIAsk extends InsightExtractOption {
|
|
106
|
-
aiAsk: string;
|
|
107
|
-
name?: string;
|
|
108
|
-
}
|
|
109
|
-
export interface MidsceneYamlFlowItemAIBoolean extends InsightExtractOption {
|
|
110
|
-
aiBoolean: string;
|
|
111
|
-
name?: string;
|
|
112
|
-
}
|
|
113
|
-
export interface MidsceneYamlFlowItemAILocate extends LocateOption {
|
|
114
|
-
aiLocate: string;
|
|
115
|
-
name?: string;
|
|
138
|
+
[key: string]: unknown;
|
|
116
139
|
}
|
|
117
140
|
export interface MidsceneYamlFlowItemAIWaitFor {
|
|
118
141
|
aiWaitFor: string;
|
|
119
142
|
timeout?: number;
|
|
143
|
+
[key: string]: unknown;
|
|
120
144
|
}
|
|
121
145
|
export interface MidsceneYamlFlowItemEvaluateJavaScript {
|
|
122
146
|
javascript: string;
|
|
@@ -127,9 +151,10 @@ export interface MidsceneYamlFlowItemSleep {
|
|
|
127
151
|
}
|
|
128
152
|
export interface MidsceneYamlFlowItemLogScreenshot {
|
|
129
153
|
logScreenshot?: string;
|
|
154
|
+
recordToReport?: string;
|
|
130
155
|
content?: string;
|
|
131
156
|
}
|
|
132
|
-
export type MidsceneYamlFlowItem = MidsceneYamlFlowItemAIAction | MidsceneYamlFlowItemAIAssert |
|
|
157
|
+
export type MidsceneYamlFlowItem = MidsceneYamlFlowItemAIAction | MidsceneYamlFlowItemAIAssert | MidsceneYamlFlowItemAIWaitFor | MidsceneYamlFlowItemEvaluateJavaScript | MidsceneYamlFlowItemSleep | MidsceneYamlFlowItemLogScreenshot;
|
|
133
158
|
export interface FreeFn {
|
|
134
159
|
name: string;
|
|
135
160
|
fn: () => void;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/core",
|
|
3
3
|
"description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "1.0.0",
|
|
5
5
|
"repository": "https://github.com/web-infra-dev/midscene",
|
|
6
6
|
"homepage": "https://midscenejs.com/",
|
|
7
7
|
"main": "./dist/lib/index.js",
|
|
@@ -79,23 +79,20 @@
|
|
|
79
79
|
}
|
|
80
80
|
},
|
|
81
81
|
"dependencies": {
|
|
82
|
-
"@anthropic-ai/sdk": "0.33.1",
|
|
83
|
-
"@azure/identity": "4.5.0",
|
|
84
82
|
"@ui-tars/action-parser": "1.2.3",
|
|
83
|
+
"dayjs": "^1.11.11",
|
|
85
84
|
"dotenv": "^16.4.5",
|
|
86
|
-
"
|
|
85
|
+
"fetch-socks": "^1.3.0",
|
|
86
|
+
"openai": "6.3.0",
|
|
87
|
+
"undici": "^6.0.0",
|
|
87
88
|
"jsonrepair": "3.12.0",
|
|
88
|
-
"langsmith": "0.3.7",
|
|
89
|
-
"openai": "4.81.0",
|
|
90
|
-
"socks-proxy-agent": "8.0.4",
|
|
91
|
-
"zod": "3.24.3",
|
|
92
89
|
"semver": "7.5.2",
|
|
93
90
|
"js-yaml": "4.1.0",
|
|
94
|
-
"
|
|
95
|
-
"@midscene/shared": "0.
|
|
91
|
+
"zod": "3.24.3",
|
|
92
|
+
"@midscene/shared": "1.0.0"
|
|
96
93
|
},
|
|
97
94
|
"devDependencies": {
|
|
98
|
-
"@rslib/core": "^0.
|
|
95
|
+
"@rslib/core": "^0.18.3",
|
|
99
96
|
"@types/node": "^18.0.0",
|
|
100
97
|
"@types/node-fetch": "2.6.11",
|
|
101
98
|
"@types/js-yaml": "4.0.9",
|
|
@@ -114,7 +111,7 @@
|
|
|
114
111
|
"scripts": {
|
|
115
112
|
"dev": "npm run build:watch",
|
|
116
113
|
"build": "rslib build",
|
|
117
|
-
"build:watch": "USE_DEV_REPORT=1 rslib build --watch",
|
|
114
|
+
"build:watch": "USE_DEV_REPORT=1 rslib build --watch --no-clean",
|
|
118
115
|
"test": "vitest --run",
|
|
119
116
|
"test:u": "vitest --run -u",
|
|
120
117
|
"test:ai": "AITEST=true npm run test",
|