@midscene/core 0.26.7-beta-20250818035341.0 → 0.26.7-beta-20250818081955.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/action-executor.mjs +0 -8
- package/dist/es/ai-model/action-executor.mjs.map +1 -1
- package/dist/es/ai-model/common.mjs +16 -37
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +4 -4
- package/dist/es/ai-model/inspect.mjs +2 -51
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +1 -1
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/assertion.mjs +1 -25
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +6 -3
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/index.mjs +2 -2
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +1 -36
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/insight/utils.mjs +1 -3
- package/dist/es/insight/utils.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/ai-model/action-executor.js +0 -8
- package/dist/lib/ai-model/action-executor.js.map +1 -1
- package/dist/lib/ai-model/common.js +18 -36
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/index.js +7 -4
- package/dist/lib/ai-model/inspect.js +2 -54
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +1 -1
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/assertion.js +2 -29
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +6 -3
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/index.js +0 -3
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +0 -35
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/insight/utils.js +1 -3
- package/dist/lib/insight/utils.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/common.d.ts +3 -2
- package/dist/types/ai-model/index.d.ts +3 -2
- package/dist/types/ai-model/inspect.d.ts +1 -8
- package/dist/types/ai-model/prompt/assertion.d.ts +0 -3
- package/dist/types/index.d.ts +1 -1
- package/dist/types/insight/index.d.ts +1 -2
- package/dist/types/types.d.ts +4 -2
- package/dist/types/yaml.d.ts +6 -6
- package/package.json +3 -3
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { AIUsageInfo, BaseElement, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from '../types';
|
|
1
|
+
import type { AIUsageInfo, BaseElement, DeviceAction, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from '../types';
|
|
2
2
|
import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
|
|
3
3
|
import type { PlanningLocateParam } from '../types';
|
|
4
4
|
export type AIArgs = [
|
|
@@ -12,6 +12,7 @@ export declare enum AIActionType {
|
|
|
12
12
|
PLAN = 3,
|
|
13
13
|
DESCRIBE_ELEMENT = 4
|
|
14
14
|
}
|
|
15
|
+
export declare const actionSpaceTypePrefix = "action_space_";
|
|
15
16
|
export declare function callAiFn<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
|
|
16
17
|
content: T;
|
|
17
18
|
usage?: AIUsageInfo;
|
|
@@ -31,4 +32,4 @@ export declare function mergeRects(rects: Rect[]): {
|
|
|
31
32
|
};
|
|
32
33
|
export declare function expandSearchArea(rect: Rect, screenSize: Size): Rect;
|
|
33
34
|
export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
|
|
34
|
-
export declare function buildYamlFlowFromPlans(plans: PlanningAction[], sleep?: number): MidsceneYamlFlowItem[];
|
|
35
|
+
export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction[], sleep?: number): MidsceneYamlFlowItem[];
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
export { callAiFnWithStringResponse, callToGetJSONObject, call as callAi, } from './service-caller/index';
|
|
1
|
+
export { callAiFnWithStringResponse, callToGetJSONObject, call as callAi, getModelName, } from './service-caller/index';
|
|
2
2
|
export { systemPromptToLocateElement } from './prompt/llm-locator';
|
|
3
3
|
export { describeUserPage, elementByPositionWithElementInfo, } from './prompt/util';
|
|
4
4
|
export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
|
|
5
5
|
export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
|
|
6
6
|
export type { ChatCompletionMessageParam } from 'openai/resources/index';
|
|
7
|
-
export { AiLocateElement, AiExtractElementInfo,
|
|
7
|
+
export { AiLocateElement, AiExtractElementInfo, AiLocateSection, } from './inspect';
|
|
8
8
|
export { plan } from './llm-planning';
|
|
9
9
|
export { callAiFn, adaptBboxToRect, } from './common';
|
|
10
10
|
export { vlmPlanning, resizeImageForUiTars } from './ui-tars-planning';
|
|
11
11
|
export { AIActionType, type AIArgs } from './common';
|
|
12
|
+
export { actionSpaceTypePrefix } from './common';
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { AIDataExtractionResponse, AIElementLocatorResponse, AIElementResponse, AISectionLocatorResponse, AIUsageInfo, BaseElement, ElementById, InsightExtractOption, Rect, ReferenceImage, TMultimodalPrompt, TUserPrompt, UIContext } from '../types';
|
|
2
2
|
import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
|
|
3
3
|
import { callAiFn } from './common';
|
|
4
4
|
export type AIArgs = [
|
|
@@ -40,10 +40,3 @@ export declare function AiExtractElementInfo<T, ElementType extends BaseElement
|
|
|
40
40
|
elementById: (idOrIndexId: string) => ElementType;
|
|
41
41
|
usage: AIUsageInfo | undefined;
|
|
42
42
|
}>;
|
|
43
|
-
export declare function AiAssert<ElementType extends BaseElement = BaseElement>(options: {
|
|
44
|
-
assertion: TUserPrompt;
|
|
45
|
-
context: UIContext<ElementType>;
|
|
46
|
-
}): Promise<{
|
|
47
|
-
content: AIAssertionResponse;
|
|
48
|
-
usage: AIUsageInfo | undefined;
|
|
49
|
-
}>;
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Executor } from './ai-model/action-executor';
|
|
2
2
|
import Insight from './insight/index';
|
|
3
3
|
import { getVersion } from './utils';
|
|
4
|
-
export { plan, describeUserPage, AiLocateElement,
|
|
4
|
+
export { plan, describeUserPage, AiLocateElement, } from './ai-model/index';
|
|
5
5
|
export { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';
|
|
6
6
|
export type * from './types';
|
|
7
7
|
export default Insight;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { callAiFn } from '../ai-model/common';
|
|
2
|
-
import type { AIDescribeElementResponse, AIElementResponse, AIUsageInfo, BaseElement, DetailedLocateParam, DumpSubscriber, InsightAction,
|
|
2
|
+
import type { AIDescribeElementResponse, AIElementResponse, AIUsageInfo, BaseElement, DetailedLocateParam, DumpSubscriber, InsightAction, InsightExtractOption, InsightExtractParam, InsightOptions, InsightTaskInfo, LocateResult, Rect, TMultimodalPrompt, UIContext } from '../types';
|
|
3
3
|
export interface LocateOpts {
|
|
4
4
|
context?: UIContext<BaseElement>;
|
|
5
5
|
callAI?: typeof callAiFn<AIElementResponse>;
|
|
@@ -19,7 +19,6 @@ export default class Insight<ElementType extends BaseElement = BaseElement, Cont
|
|
|
19
19
|
thought?: string;
|
|
20
20
|
usage?: AIUsageInfo;
|
|
21
21
|
}>;
|
|
22
|
-
assert(assertion: TUserPrompt): Promise<InsightAssertionResponse>;
|
|
23
22
|
describe(target: Rect | [number, number], opt?: {
|
|
24
23
|
deepThink?: boolean;
|
|
25
24
|
}): Promise<Pick<AIDescribeElementResponse, 'description'>>;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export type AIUsageInfo = Record<string, any> & {
|
|
|
9
9
|
completion_tokens: number | undefined;
|
|
10
10
|
total_tokens: number | undefined;
|
|
11
11
|
time_cost: number | undefined;
|
|
12
|
+
model_name: string | undefined;
|
|
12
13
|
};
|
|
13
14
|
/**
|
|
14
15
|
* openai
|
|
@@ -132,8 +133,6 @@ export interface InsightTaskInfo {
|
|
|
132
133
|
export interface DumpMeta {
|
|
133
134
|
sdkVersion: string;
|
|
134
135
|
logTime: number;
|
|
135
|
-
model_name: string;
|
|
136
|
-
model_description?: string;
|
|
137
136
|
}
|
|
138
137
|
export interface ReportDumpWithAttributes {
|
|
139
138
|
dumpString: string;
|
|
@@ -334,6 +333,8 @@ export type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;
|
|
|
334
333
|
export interface GroupedActionDump {
|
|
335
334
|
groupName: string;
|
|
336
335
|
groupDescription?: string;
|
|
336
|
+
modelName: string;
|
|
337
|
+
modelDescription: string;
|
|
337
338
|
executions: ExecutionDump[];
|
|
338
339
|
}
|
|
339
340
|
export type PageType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android';
|
|
@@ -390,6 +391,7 @@ export type TUserPrompt = string | ({
|
|
|
390
391
|
} & Partial<TMultimodalPrompt>);
|
|
391
392
|
export interface DeviceAction<ParamType = any> {
|
|
392
393
|
name: string;
|
|
394
|
+
interfaceAlias?: string;
|
|
393
395
|
description?: string;
|
|
394
396
|
paramSchema?: string;
|
|
395
397
|
paramDescription?: string;
|
package/dist/types/yaml.d.ts
CHANGED
|
@@ -10,6 +10,7 @@ export interface InsightExtractOption {
|
|
|
10
10
|
domIncluded?: boolean | 'visible-only';
|
|
11
11
|
screenshotIncluded?: boolean;
|
|
12
12
|
returnThought?: boolean;
|
|
13
|
+
isWaitForAssert?: boolean;
|
|
13
14
|
}
|
|
14
15
|
export interface ReferenceImage {
|
|
15
16
|
base64: string;
|
|
@@ -110,16 +111,15 @@ export interface MidsceneYamlFlowItemAIHover extends LocateOption {
|
|
|
110
111
|
aiHover: TUserPrompt;
|
|
111
112
|
}
|
|
112
113
|
export interface MidsceneYamlFlowItemAIInput extends LocateOption {
|
|
113
|
-
aiInput:
|
|
114
|
-
|
|
114
|
+
aiInput: TUserPrompt | undefined;
|
|
115
|
+
value: string;
|
|
115
116
|
}
|
|
116
117
|
export interface MidsceneYamlFlowItemAIKeyboardPress extends LocateOption {
|
|
117
|
-
aiKeyboardPress:
|
|
118
|
-
|
|
118
|
+
aiKeyboardPress: TUserPrompt | undefined;
|
|
119
|
+
key: string;
|
|
119
120
|
}
|
|
120
121
|
export interface MidsceneYamlFlowItemAIScroll extends LocateOption, ScrollParam {
|
|
121
|
-
aiScroll:
|
|
122
|
-
locate?: TUserPrompt;
|
|
122
|
+
aiScroll: TUserPrompt | undefined;
|
|
123
123
|
}
|
|
124
124
|
export interface MidsceneYamlFlowItemEvaluateJavaScript {
|
|
125
125
|
javascript: string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/core",
|
|
3
3
|
"description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
|
|
4
|
-
"version": "0.26.7-beta-
|
|
4
|
+
"version": "0.26.7-beta-20250818081955.0",
|
|
5
5
|
"repository": "https://github.com/web-infra-dev/midscene",
|
|
6
6
|
"homepage": "https://midscenejs.com/",
|
|
7
7
|
"main": "./dist/lib/index.js",
|
|
@@ -60,8 +60,8 @@
|
|
|
60
60
|
"langsmith": "0.3.7",
|
|
61
61
|
"openai": "4.81.0",
|
|
62
62
|
"socks-proxy-agent": "8.0.4",
|
|
63
|
-
"@midscene/
|
|
64
|
-
"@midscene/
|
|
63
|
+
"@midscene/recorder": "0.26.7-beta-20250818081955.0",
|
|
64
|
+
"@midscene/shared": "0.26.7-beta-20250818081955.0"
|
|
65
65
|
},
|
|
66
66
|
"devDependencies": {
|
|
67
67
|
"@microsoft/api-extractor": "^7.52.10",
|