@midscene/core 1.0.1-beta-20251024063839.0 → 1.0.1-beta-20251024064637.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +2 -3
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/index.mjs +2 -2
- package/dist/es/agent/task-builder.mjs +11 -7
- package/dist/es/agent/task-builder.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +8 -1
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/ui-utils.mjs +10 -10
- package/dist/es/agent/ui-utils.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +6 -50
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +2 -2
- package/dist/es/ai-model/inspect.mjs +12 -31
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +3 -88
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +6 -15
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/tree.mjs +13 -2
- package/dist/es/tree.mjs.map +1 -0
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/agent.js +1 -2
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/index.js +0 -3
- package/dist/lib/agent/task-builder.js +11 -7
- package/dist/lib/agent/task-builder.js.map +1 -1
- package/dist/lib/agent/tasks.js +8 -1
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/ui-utils.js +10 -10
- package/dist/lib/agent/ui-utils.js.map +1 -1
- package/dist/lib/agent/utils.js +5 -52
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/index.js +11 -14
- package/dist/lib/ai-model/inspect.js +11 -30
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +5 -93
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/insight/index.js +6 -15
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/tree.js +10 -1
- package/dist/lib/tree.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/agent/agent.d.ts +1 -1
- package/dist/types/agent/index.d.ts +1 -1
- package/dist/types/agent/utils.d.ts +2 -33
- package/dist/types/ai-model/index.d.ts +1 -1
- package/dist/types/ai-model/inspect.d.ts +12 -10
- package/dist/types/ai-model/prompt/util.d.ts +2 -34
- package/dist/types/device/index.d.ts +4 -5
- package/dist/types/insight/index.d.ts +6 -6
- package/dist/types/tree.d.ts +4 -1
- package/dist/types/types.d.ts +6 -30
- package/dist/types/yaml.d.ts +7 -5
- package/package.json +3 -3
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/lib/device/device-options.js +0 -20
- package/dist/lib/device/device-options.js.map +0 -1
- package/dist/types/device/device-options.d.ts +0 -57
|
@@ -118,7 +118,7 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
|
|
|
118
118
|
deepThink?: boolean;
|
|
119
119
|
} & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
|
|
120
120
|
verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
|
|
121
|
-
aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "
|
|
121
|
+
aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "center" | "rect"> & {
|
|
122
122
|
dpr?: number;
|
|
123
123
|
}>;
|
|
124
124
|
aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & InsightExtractOption): Promise<{
|
|
@@ -5,5 +5,5 @@ export { locateParamStr, paramStr, taskTitleStr, typeStr } from './ui-utils';
|
|
|
5
5
|
export { type LocateCache, type PlanningCache, TaskCache } from './task-cache';
|
|
6
6
|
export { cacheFileExt } from './task-cache';
|
|
7
7
|
export { TaskExecutor } from './tasks';
|
|
8
|
-
export { getCurrentExecutionFile
|
|
8
|
+
export { getCurrentExecutionFile } from './utils';
|
|
9
9
|
export type { AgentOpt } from '../types';
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { TMultimodalPrompt, TUserPrompt } from '../ai-model/common';
|
|
2
2
|
import type { AbstractInterface } from '../device';
|
|
3
|
-
import type {
|
|
3
|
+
import type { ElementCacheFeature, LocateResultElement, PlanningLocateParam, UIContext } from '../types';
|
|
4
4
|
import type { TaskCache } from './task-cache';
|
|
5
5
|
export declare function commonContextParser(interfaceInstance: AbstractInterface, _opt: {
|
|
6
6
|
uploadServerUrl?: string;
|
|
@@ -13,42 +13,11 @@ export declare function printReportMsg(filepath: string): void;
|
|
|
13
13
|
*/
|
|
14
14
|
export declare function getCurrentExecutionFile(trace?: string): string | false;
|
|
15
15
|
export declare function generateCacheId(fileName?: string): string;
|
|
16
|
-
export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam
|
|
16
|
+
export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam): LocateResultElement | undefined;
|
|
17
17
|
export declare function matchElementFromCache(context: {
|
|
18
18
|
taskCache?: TaskCache;
|
|
19
19
|
interfaceInstance: AbstractInterface;
|
|
20
20
|
}, cacheEntry: ElementCacheFeature | undefined, cachePrompt: TUserPrompt, cacheable: boolean | undefined): Promise<LocateResultElement | undefined>;
|
|
21
|
-
export declare function trimContextByViewport(execution: ExecutionDump): {
|
|
22
|
-
tasks: {
|
|
23
|
-
type: any;
|
|
24
|
-
subType?: string;
|
|
25
|
-
subTask?: boolean;
|
|
26
|
-
param?: any;
|
|
27
|
-
thought?: string;
|
|
28
|
-
locate?: PlanningLocateParam | null;
|
|
29
|
-
uiContext?: UIContext;
|
|
30
|
-
executor: (param: any, context: ExecutorContext) => void | Promise<void | import("../types").ExecutionTaskReturn<any, any> | undefined> | undefined;
|
|
31
|
-
output?: any;
|
|
32
|
-
log?: any;
|
|
33
|
-
recorder?: import("../types").ExecutionRecorderItem[];
|
|
34
|
-
hitBy?: import("../types").ExecutionTaskHitBy;
|
|
35
|
-
status: "pending" | "running" | "finished" | "failed" | "cancelled";
|
|
36
|
-
error?: Error;
|
|
37
|
-
errorMessage?: string;
|
|
38
|
-
errorStack?: string;
|
|
39
|
-
timing?: {
|
|
40
|
-
start: number;
|
|
41
|
-
end?: number;
|
|
42
|
-
cost?: number;
|
|
43
|
-
};
|
|
44
|
-
usage?: import("../types").AIUsageInfo;
|
|
45
|
-
searchAreaUsage?: import("../types").AIUsageInfo;
|
|
46
|
-
}[];
|
|
47
|
-
name: string;
|
|
48
|
-
description?: string;
|
|
49
|
-
aiActionContext?: string;
|
|
50
|
-
logTime: number;
|
|
51
|
-
};
|
|
52
21
|
export declare const getMidsceneVersion: () => string;
|
|
53
22
|
export declare const parsePrompt: (prompt: TUserPrompt) => {
|
|
54
23
|
textPrompt: string;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export { callAIWithStringResponse, callAIWithObjectResponse, callAI, } from './service-caller/index';
|
|
2
2
|
export { systemPromptToLocateElement } from './prompt/llm-locator';
|
|
3
|
-
export { describeUserPage
|
|
3
|
+
export { describeUserPage } from './prompt/util';
|
|
4
4
|
export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
|
|
5
5
|
export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
|
|
6
6
|
export type { ChatCompletionMessageParam } from 'openai/resources/index';
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import type { AIDataExtractionResponse,
|
|
1
|
+
import type { AIDataExtractionResponse, AIElementResponse, AIUsageInfo, InsightExtractOption, Rect, ReferenceImage, UIContext } from '../types';
|
|
2
2
|
import type { IModelConfig } from '@midscene/shared/env';
|
|
3
|
+
import type { LocateResultElement } from '@midscene/shared/types';
|
|
3
4
|
import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
|
|
4
5
|
import type { TMultimodalPrompt, TUserPrompt } from './common';
|
|
5
6
|
import { callAIWithObjectResponse } from './service-caller/index';
|
|
@@ -7,23 +8,24 @@ export type AIArgs = [
|
|
|
7
8
|
ChatCompletionSystemMessageParam,
|
|
8
9
|
...ChatCompletionUserMessageParam[]
|
|
9
10
|
];
|
|
10
|
-
export declare function AiLocateElement
|
|
11
|
-
context: UIContext
|
|
11
|
+
export declare function AiLocateElement(options: {
|
|
12
|
+
context: UIContext;
|
|
12
13
|
targetElementDescription: TUserPrompt;
|
|
13
14
|
referenceImage?: ReferenceImage;
|
|
14
15
|
callAIFn: typeof callAIWithObjectResponse<AIElementResponse | [number, number]>;
|
|
15
16
|
searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
|
|
16
17
|
modelConfig: IModelConfig;
|
|
17
18
|
}): Promise<{
|
|
18
|
-
parseResult:
|
|
19
|
+
parseResult: {
|
|
20
|
+
elements: LocateResultElement[];
|
|
21
|
+
errors?: string[];
|
|
22
|
+
};
|
|
19
23
|
rect?: Rect;
|
|
20
24
|
rawResponse: string;
|
|
21
|
-
elementById: ElementById;
|
|
22
25
|
usage?: AIUsageInfo;
|
|
23
|
-
isOrderSensitive?: boolean;
|
|
24
26
|
}>;
|
|
25
27
|
export declare function AiLocateSection(options: {
|
|
26
|
-
context: UIContext
|
|
28
|
+
context: UIContext;
|
|
27
29
|
sectionDescription: TUserPrompt;
|
|
28
30
|
modelConfig: IModelConfig;
|
|
29
31
|
}): Promise<{
|
|
@@ -33,14 +35,14 @@ export declare function AiLocateSection(options: {
|
|
|
33
35
|
rawResponse: string;
|
|
34
36
|
usage?: AIUsageInfo;
|
|
35
37
|
}>;
|
|
36
|
-
export declare function AiExtractElementInfo<T
|
|
38
|
+
export declare function AiExtractElementInfo<T>(options: {
|
|
37
39
|
dataQuery: string | Record<string, string>;
|
|
38
40
|
multimodalPrompt?: TMultimodalPrompt;
|
|
39
|
-
context: UIContext
|
|
41
|
+
context: UIContext;
|
|
42
|
+
pageDescription?: string;
|
|
40
43
|
extractOption?: InsightExtractOption;
|
|
41
44
|
modelConfig: IModelConfig;
|
|
42
45
|
}): Promise<{
|
|
43
46
|
parseResult: AIDataExtractionResponse<T>;
|
|
44
|
-
elementById: (idOrIndexId: string) => ElementType;
|
|
45
47
|
usage: AIUsageInfo | undefined;
|
|
46
48
|
}>;
|
|
@@ -1,17 +1,9 @@
|
|
|
1
|
-
import type { BaseElement,
|
|
2
|
-
import type { TVlModeTypes } from '@midscene/shared/env';
|
|
1
|
+
import type { BaseElement, Size, UIContext } from '../../types';
|
|
3
2
|
export declare function describeSize(size: Size): string;
|
|
4
3
|
export declare function describeElement(elements: (Pick<BaseElement, 'rect' | 'content'> & {
|
|
5
4
|
id: string;
|
|
6
5
|
})[]): string;
|
|
7
6
|
export declare const distanceThreshold = 16;
|
|
8
|
-
export declare function elementByPositionWithElementInfo(treeRoot: ElementTreeNode<BaseElement>, position: {
|
|
9
|
-
x: number;
|
|
10
|
-
y: number;
|
|
11
|
-
}, options?: {
|
|
12
|
-
requireStrictDistance?: boolean;
|
|
13
|
-
filterPositionElements?: boolean;
|
|
14
|
-
}): BaseElement | undefined;
|
|
15
7
|
export declare function distance(point1: {
|
|
16
8
|
x: number;
|
|
17
9
|
y: number;
|
|
@@ -20,28 +12,4 @@ export declare function distance(point1: {
|
|
|
20
12
|
y: number;
|
|
21
13
|
}): number;
|
|
22
14
|
export declare const samplePageDescription = "\nAnd the page is described as follows:\n====================\nThe size of the page: 1280 x 720\nSome of the elements are marked with a rectangle in the screenshot corresponding to the markerId, some are not.\n\nDescription of all the elements in screenshot:\n<div id=\"969f1637\" markerId=\"1\" left=\"100\" top=\"100\" width=\"100\" height=\"100\"> // The markerId indicated by the rectangle label in the screenshot\n <h4 id=\"b211ecb2\" markerId=\"5\" left=\"150\" top=\"150\" width=\"90\" height=\"60\">\n The username is accepted\n </h4>\n ...many more\n</div>\n====================\n";
|
|
23
|
-
export declare function describeUserPage
|
|
24
|
-
truncateTextLength?: number;
|
|
25
|
-
filterNonTextContent?: boolean;
|
|
26
|
-
domIncluded?: boolean | 'visible-only';
|
|
27
|
-
visibleOnly?: boolean;
|
|
28
|
-
vlMode: TVlModeTypes | undefined;
|
|
29
|
-
}): Promise<{
|
|
30
|
-
description: string;
|
|
31
|
-
elementById(idOrIndexId: string): ElementType;
|
|
32
|
-
elementByPosition(position: {
|
|
33
|
-
x: number;
|
|
34
|
-
y: number;
|
|
35
|
-
}, size: {
|
|
36
|
-
width: number;
|
|
37
|
-
height: number;
|
|
38
|
-
}): BaseElement | undefined;
|
|
39
|
-
insertElementByPosition(position: {
|
|
40
|
-
x: number;
|
|
41
|
-
y: number;
|
|
42
|
-
}): ElementType;
|
|
43
|
-
size: {
|
|
44
|
-
width: number;
|
|
45
|
-
height: number;
|
|
46
|
-
};
|
|
47
|
-
}>;
|
|
15
|
+
export declare function describeUserPage(context: UIContext): Promise<string>;
|
|
@@ -1106,7 +1106,7 @@ export declare const actionScrollParamSchema: z.ZodObject<{
|
|
|
1106
1106
|
xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
|
|
1107
1107
|
}, z.ZodTypeAny, "passthrough">>>;
|
|
1108
1108
|
}, "strip", z.ZodTypeAny, {
|
|
1109
|
-
direction: "
|
|
1109
|
+
direction: "down" | "up" | "right" | "left";
|
|
1110
1110
|
scrollType: "once" | "untilBottom" | "untilTop" | "untilRight" | "untilLeft";
|
|
1111
1111
|
locate?: z.objectOutputType<{
|
|
1112
1112
|
prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
|
|
@@ -1182,7 +1182,7 @@ export declare const actionScrollParamSchema: z.ZodObject<{
|
|
|
1182
1182
|
cacheable: z.ZodOptional<z.ZodBoolean>;
|
|
1183
1183
|
xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
|
|
1184
1184
|
}, z.ZodTypeAny, "passthrough"> | undefined;
|
|
1185
|
-
direction?: "
|
|
1185
|
+
direction?: "down" | "up" | "right" | "left" | undefined;
|
|
1186
1186
|
scrollType?: "once" | "untilBottom" | "untilTop" | "untilRight" | "untilLeft" | undefined;
|
|
1187
1187
|
distance?: number | null | undefined;
|
|
1188
1188
|
}>;
|
|
@@ -1848,7 +1848,7 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
|
|
|
1848
1848
|
}, "strip", z.ZodTypeAny, {
|
|
1849
1849
|
duration: number;
|
|
1850
1850
|
repeat?: number | undefined;
|
|
1851
|
-
direction?: "
|
|
1851
|
+
direction?: "down" | "up" | "right" | "left" | undefined;
|
|
1852
1852
|
distance?: number | undefined;
|
|
1853
1853
|
start?: z.objectOutputType<{
|
|
1854
1854
|
prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
|
|
@@ -1924,7 +1924,7 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
|
|
|
1924
1924
|
}, z.ZodTypeAny, "passthrough"> | undefined;
|
|
1925
1925
|
}, {
|
|
1926
1926
|
repeat?: number | undefined;
|
|
1927
|
-
direction?: "
|
|
1927
|
+
direction?: "down" | "up" | "right" | "left" | undefined;
|
|
1928
1928
|
distance?: number | undefined;
|
|
1929
1929
|
duration?: number | undefined;
|
|
1930
1930
|
start?: z.objectInputType<{
|
|
@@ -2156,4 +2156,3 @@ export type ActionClearInputParam = {
|
|
|
2156
2156
|
};
|
|
2157
2157
|
export declare const defineActionClearInput: (call: (param: ActionClearInputParam) => Promise<void>) => DeviceAction<ActionClearInputParam>;
|
|
2158
2158
|
export type { DeviceAction } from '../types';
|
|
2159
|
-
export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { callAIWithObjectResponse } from '../ai-model/index';
|
|
2
|
-
import type { AIDescribeElementResponse,
|
|
2
|
+
import type { AIDescribeElementResponse, DetailedLocateParam, InsightExtractOption, InsightExtractParam, InsightExtractResult, InsightTaskInfo, LocateResultWithDump, Rect, UIContext } from '../types';
|
|
3
3
|
import { type IModelConfig } from '@midscene/shared/env';
|
|
4
4
|
import type { TMultimodalPrompt } from '../ai-model/common';
|
|
5
5
|
export interface LocateOpts {
|
|
6
|
-
context?: UIContext
|
|
6
|
+
context?: UIContext;
|
|
7
7
|
}
|
|
8
8
|
export type AnyValue<T> = {
|
|
9
9
|
[K in keyof T]: unknown extends T[K] ? any : T[K];
|
|
@@ -12,13 +12,13 @@ interface InsightOptions {
|
|
|
12
12
|
taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
|
|
13
13
|
aiVendorFn?: typeof callAIWithObjectResponse;
|
|
14
14
|
}
|
|
15
|
-
export default class Insight
|
|
16
|
-
contextRetrieverFn: () => Promise<
|
|
15
|
+
export default class Insight {
|
|
16
|
+
contextRetrieverFn: () => Promise<UIContext> | UIContext;
|
|
17
17
|
aiVendorFn: Exclude<InsightOptions['aiVendorFn'], undefined>;
|
|
18
18
|
taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
|
|
19
|
-
constructor(context:
|
|
19
|
+
constructor(context: UIContext | (() => Promise<UIContext> | UIContext), opt?: InsightOptions);
|
|
20
20
|
locate(query: DetailedLocateParam, opt: LocateOpts, modelConfig: IModelConfig): Promise<LocateResultWithDump>;
|
|
21
|
-
extract<T>(dataDemand: InsightExtractParam, modelConfig: IModelConfig, opt?: InsightExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<InsightExtractResult<T>>;
|
|
21
|
+
extract<T>(dataDemand: InsightExtractParam, modelConfig: IModelConfig, opt?: InsightExtractOption, pageDescription?: string, multimodalPrompt?: TMultimodalPrompt): Promise<InsightExtractResult<T>>;
|
|
22
22
|
describe(target: Rect | [number, number], modelConfig: IModelConfig, opt?: {
|
|
23
23
|
deepThink?: boolean;
|
|
24
24
|
}): Promise<Pick<AIDescribeElementResponse, 'description'>>;
|
package/dist/types/tree.d.ts
CHANGED
|
@@ -1 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import type { BaseElement, ElementTreeNode } from '@midscene/shared/types';
|
|
2
|
+
import { trimAttributes, truncateText } from '@midscene/shared/extractor';
|
|
3
|
+
export { trimAttributes, truncateText };
|
|
4
|
+
export declare function descriptionOfTree<ElementType extends BaseElement = BaseElement>(tree: ElementTreeNode<ElementType>, truncateTextLength?: number, filterNonTextContent?: boolean, visibleOnly?: boolean): string;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { NodeType } from '@midscene/shared/constants';
|
|
2
2
|
import type { CreateOpenAIClientFn, TModelConfigFn } from '@midscene/shared/env';
|
|
3
|
-
import type { BaseElement,
|
|
3
|
+
import type { BaseElement, LocateResultElement, Rect, Size } from '@midscene/shared/types';
|
|
4
4
|
import type { z } from 'zod';
|
|
5
5
|
import type { TUserPrompt } from './ai-model/common';
|
|
6
6
|
import type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';
|
|
@@ -15,6 +15,7 @@ export type AIUsageInfo = Record<string, any> & {
|
|
|
15
15
|
model_description: string | undefined;
|
|
16
16
|
intent: string | undefined;
|
|
17
17
|
};
|
|
18
|
+
export type { LocateResultElement };
|
|
18
19
|
/**
|
|
19
20
|
* openai
|
|
20
21
|
*
|
|
@@ -39,23 +40,12 @@ export type AISingleElementResponseByPosition = {
|
|
|
39
40
|
text: string;
|
|
40
41
|
};
|
|
41
42
|
export type AISingleElementResponse = AISingleElementResponseById;
|
|
42
|
-
export interface AIElementLocatorResponse {
|
|
43
|
-
elements: {
|
|
44
|
-
id: string;
|
|
45
|
-
reason?: string;
|
|
46
|
-
text?: string;
|
|
47
|
-
xpaths?: string[];
|
|
48
|
-
}[];
|
|
49
|
-
bbox?: [number, number, number, number];
|
|
50
|
-
isOrderSensitive?: boolean;
|
|
51
|
-
errors?: string[];
|
|
52
|
-
}
|
|
53
43
|
export interface AIElementCoordinatesResponse {
|
|
54
44
|
bbox: [number, number, number, number];
|
|
55
45
|
isOrderSensitive?: boolean;
|
|
56
46
|
errors?: string[];
|
|
57
47
|
}
|
|
58
|
-
export type AIElementResponse =
|
|
48
|
+
export type AIElementResponse = AIElementCoordinatesResponse;
|
|
59
49
|
export interface AIDataExtractionResponse<DataDemand> {
|
|
60
50
|
data: DataDemand;
|
|
61
51
|
errors?: string[];
|
|
@@ -91,9 +81,8 @@ export interface AgentDescribeElementAtPointResult {
|
|
|
91
81
|
/**
|
|
92
82
|
* context
|
|
93
83
|
*/
|
|
94
|
-
export declare abstract class UIContext
|
|
84
|
+
export declare abstract class UIContext {
|
|
95
85
|
abstract screenshotBase64: string;
|
|
96
|
-
abstract tree: ElementTreeNode<ElementType>;
|
|
97
86
|
abstract size: Size;
|
|
98
87
|
abstract _isFrozen?: boolean;
|
|
99
88
|
}
|
|
@@ -103,18 +92,6 @@ export type EnsureObject<T> = {
|
|
|
103
92
|
export type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';
|
|
104
93
|
export type InsightExtractParam = string | Record<string, string>;
|
|
105
94
|
export type ElementCacheFeature = Record<string, unknown>;
|
|
106
|
-
export type LocateResultElement = {
|
|
107
|
-
center: [number, number];
|
|
108
|
-
rect: Rect;
|
|
109
|
-
id: string;
|
|
110
|
-
indexId?: number;
|
|
111
|
-
xpaths: string[];
|
|
112
|
-
attributes: {
|
|
113
|
-
nodeType: NodeType;
|
|
114
|
-
[key: string]: string;
|
|
115
|
-
};
|
|
116
|
-
isOrderSensitive?: boolean;
|
|
117
|
-
};
|
|
118
95
|
export interface LocateResult {
|
|
119
96
|
element: LocateResultElement | null;
|
|
120
97
|
rect?: Rect;
|
|
@@ -143,7 +120,7 @@ export interface InsightDump extends DumpMeta {
|
|
|
143
120
|
dataDemand?: InsightExtractParam;
|
|
144
121
|
assertion?: TUserPrompt;
|
|
145
122
|
};
|
|
146
|
-
matchedElement:
|
|
123
|
+
matchedElement: LocateResultElement[];
|
|
147
124
|
matchedRect?: Rect;
|
|
148
125
|
deepThink?: boolean;
|
|
149
126
|
data: any;
|
|
@@ -193,7 +170,6 @@ export interface AgentAssertOpt {
|
|
|
193
170
|
*
|
|
194
171
|
*/
|
|
195
172
|
export interface PlanningLocateParam extends DetailedLocateParam {
|
|
196
|
-
id?: string;
|
|
197
173
|
bbox?: [number, number, number, number];
|
|
198
174
|
}
|
|
199
175
|
export interface PlanningAction<ParamType = any> {
|
|
@@ -385,7 +361,7 @@ export interface WebElementInfo extends BaseElement {
|
|
|
385
361
|
[key: string]: string;
|
|
386
362
|
};
|
|
387
363
|
}
|
|
388
|
-
export type WebUIContext = UIContext
|
|
364
|
+
export type WebUIContext = UIContext;
|
|
389
365
|
/**
|
|
390
366
|
* Agent
|
|
391
367
|
*/
|
package/dist/types/yaml.d.ts
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import type { TUserPrompt } from './ai-model/common';
|
|
2
|
-
import type { AndroidDeviceOpt, IOSDeviceOpt } from './device';
|
|
3
2
|
import type { AgentOpt, Rect } from './types';
|
|
4
|
-
import type {
|
|
3
|
+
import type { UIContext } from './types';
|
|
5
4
|
export interface LocateOption {
|
|
6
5
|
prompt?: TUserPrompt;
|
|
7
6
|
deepThink?: boolean;
|
|
8
7
|
cacheable?: boolean;
|
|
9
8
|
xpath?: string;
|
|
10
|
-
uiContext?: UIContext
|
|
9
|
+
uiContext?: UIContext;
|
|
11
10
|
}
|
|
12
11
|
export interface InsightExtractOption {
|
|
13
12
|
domIncluded?: boolean | 'visible-only';
|
|
@@ -69,11 +68,14 @@ export interface MidsceneYamlScriptWebEnv extends MidsceneYamlScriptConfig, Mids
|
|
|
69
68
|
bridgeMode?: false | 'newTabWithUrl' | 'currentTab';
|
|
70
69
|
closeNewTabsAfterDisconnect?: boolean;
|
|
71
70
|
}
|
|
72
|
-
export interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptConfig
|
|
71
|
+
export interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptConfig {
|
|
73
72
|
deviceId?: string;
|
|
74
73
|
launch?: string;
|
|
75
74
|
}
|
|
76
|
-
export interface MidsceneYamlScriptIOSEnv extends MidsceneYamlScriptConfig
|
|
75
|
+
export interface MidsceneYamlScriptIOSEnv extends MidsceneYamlScriptConfig {
|
|
76
|
+
wdaPort?: number;
|
|
77
|
+
wdaHost?: string;
|
|
78
|
+
autoDismissKeyboard?: boolean;
|
|
77
79
|
launch?: string;
|
|
78
80
|
}
|
|
79
81
|
export type MidsceneYamlScriptEnv = MidsceneYamlScriptWebEnv | MidsceneYamlScriptAndroidEnv | MidsceneYamlScriptIOSEnv;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/core",
|
|
3
3
|
"description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
|
|
4
|
-
"version": "1.0.1-beta-
|
|
4
|
+
"version": "1.0.1-beta-20251024064637.0",
|
|
5
5
|
"repository": "https://github.com/web-infra-dev/midscene",
|
|
6
6
|
"homepage": "https://midscenejs.com/",
|
|
7
7
|
"main": "./dist/lib/index.js",
|
|
@@ -89,8 +89,8 @@
|
|
|
89
89
|
"zod": "3.24.3",
|
|
90
90
|
"semver": "7.5.2",
|
|
91
91
|
"js-yaml": "4.1.0",
|
|
92
|
-
"@midscene/recorder": "1.0.1-beta-
|
|
93
|
-
"@midscene/shared": "1.0.1-beta-
|
|
92
|
+
"@midscene/recorder": "1.0.1-beta-20251024064637.0",
|
|
93
|
+
"@midscene/shared": "1.0.1-beta-20251024064637.0"
|
|
94
94
|
},
|
|
95
95
|
"devDependencies": {
|
|
96
96
|
"@rslib/core": "^0.11.2",
|
|
File without changes
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __webpack_require__ = {};
|
|
3
|
-
(()=>{
|
|
4
|
-
__webpack_require__.r = (exports1)=>{
|
|
5
|
-
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
6
|
-
value: 'Module'
|
|
7
|
-
});
|
|
8
|
-
Object.defineProperty(exports1, '__esModule', {
|
|
9
|
-
value: true
|
|
10
|
-
});
|
|
11
|
-
};
|
|
12
|
-
})();
|
|
13
|
-
var __webpack_exports__ = {};
|
|
14
|
-
__webpack_require__.r(__webpack_exports__);
|
|
15
|
-
for(var __webpack_i__ in __webpack_exports__)exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
16
|
-
Object.defineProperty(exports, '__esModule', {
|
|
17
|
-
value: true
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
//# sourceMappingURL=device-options.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"device/device-options.js","sources":["webpack://@midscene/core/webpack/runtime/make_namespace_object"],"sourcesContent":["// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};"],"names":["__webpack_require__","Symbol","Object"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOC,UAA0BA,OAAO,WAAW,EACrDC,OAAO,cAAc,CAAC,UAASD,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEC,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D"}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import type { DeviceAction } from '../types';
|
|
2
|
-
/**
|
|
3
|
-
* Android device input options
|
|
4
|
-
*/
|
|
5
|
-
export type AndroidDeviceInputOpt = {
|
|
6
|
-
/** Automatically dismiss the keyboard after input is completed */
|
|
7
|
-
autoDismissKeyboard?: boolean;
|
|
8
|
-
/** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
|
|
9
|
-
keyboardDismissStrategy?: 'esc-first' | 'back-first';
|
|
10
|
-
};
|
|
11
|
-
/**
|
|
12
|
-
* Android device options
|
|
13
|
-
*/
|
|
14
|
-
export type AndroidDeviceOpt = {
|
|
15
|
-
/** Path to the ADB executable */
|
|
16
|
-
androidAdbPath?: string;
|
|
17
|
-
/** Remote ADB host address */
|
|
18
|
-
remoteAdbHost?: string;
|
|
19
|
-
/** Remote ADB port */
|
|
20
|
-
remoteAdbPort?: number;
|
|
21
|
-
/** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
|
|
22
|
-
imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
|
|
23
|
-
/** Display ID to use for this device */
|
|
24
|
-
displayId?: number;
|
|
25
|
-
/** Use physical display ID for screenshot operations */
|
|
26
|
-
usePhysicalDisplayIdForScreenshot?: boolean;
|
|
27
|
-
/** Use physical display ID when looking up display information */
|
|
28
|
-
usePhysicalDisplayIdForDisplayLookup?: boolean;
|
|
29
|
-
/** Custom device actions to register */
|
|
30
|
-
customActions?: DeviceAction<any>[];
|
|
31
|
-
/** Screenshot resize scale factor */
|
|
32
|
-
screenshotResizeScale?: number;
|
|
33
|
-
/** Always fetch screen info on each call; if false, cache the first result */
|
|
34
|
-
alwaysRefreshScreenInfo?: boolean;
|
|
35
|
-
} & AndroidDeviceInputOpt;
|
|
36
|
-
/**
|
|
37
|
-
* iOS device input options
|
|
38
|
-
*/
|
|
39
|
-
export type IOSDeviceInputOpt = {
|
|
40
|
-
/** Automatically dismiss the keyboard after input is completed */
|
|
41
|
-
autoDismissKeyboard?: boolean;
|
|
42
|
-
};
|
|
43
|
-
/**
|
|
44
|
-
* iOS device options
|
|
45
|
-
*/
|
|
46
|
-
export type IOSDeviceOpt = {
|
|
47
|
-
/** Device ID (UDID) to connect to */
|
|
48
|
-
deviceId?: string;
|
|
49
|
-
/** Custom device actions to register */
|
|
50
|
-
customActions?: DeviceAction<any>[];
|
|
51
|
-
/** WebDriverAgent port (default: 8100) */
|
|
52
|
-
wdaPort?: number;
|
|
53
|
-
/** WebDriverAgent host (default: 'localhost') */
|
|
54
|
-
wdaHost?: string;
|
|
55
|
-
/** Whether to use WebDriverAgent */
|
|
56
|
-
useWDA?: boolean;
|
|
57
|
-
} & IOSDeviceInputOpt;
|