@midscene/core 0.26.5-beta-20250814095614.0 → 0.26.5-beta-20250814125155.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/action-executor.mjs +139 -0
- package/dist/es/ai-model/action-executor.mjs.map +1 -0
- package/dist/es/ai-model/common.mjs +219 -0
- package/dist/es/ai-model/common.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +10 -0
- package/dist/es/ai-model/inspect.mjs +317 -0
- package/dist/es/ai-model/inspect.mjs.map +1 -0
- package/dist/es/ai-model/llm-planning.mjs +85 -0
- package/dist/es/ai-model/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/assertion.mjs +55 -0
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -0
- package/dist/es/ai-model/prompt/common.mjs +7 -0
- package/dist/es/ai-model/prompt/common.mjs.map +1 -0
- package/dist/es/ai-model/prompt/describe.mjs +44 -0
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
- package/dist/es/ai-model/prompt/extraction.mjs +137 -0
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +275 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +359 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +47 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs +34 -0
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/util.mjs +123 -0
- package/dist/es/ai-model/prompt/util.mjs.map +1 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +413 -0
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +235 -0
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
- package/dist/es/image/index.mjs +2 -0
- package/dist/es/index.mjs +7 -2360
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +261 -0
- package/dist/es/insight/index.mjs.map +1 -0
- package/dist/es/insight/utils.mjs +19 -0
- package/dist/es/insight/utils.mjs.map +1 -0
- package/dist/es/types.mjs +11 -0
- package/dist/es/types.mjs.map +1 -0
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/ai-model/action-executor.js +173 -0
- package/dist/lib/ai-model/action-executor.js.map +1 -0
- package/dist/lib/ai-model/common.js +289 -0
- package/dist/lib/ai-model/common.js.map +1 -0
- package/dist/lib/ai-model/index.js +103 -0
- package/dist/lib/ai-model/index.js.map +1 -0
- package/dist/lib/ai-model/inspect.js +360 -0
- package/dist/lib/ai-model/inspect.js.map +1 -0
- package/dist/lib/ai-model/llm-planning.js +119 -0
- package/dist/lib/ai-model/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/assertion.js +92 -0
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -0
- package/dist/lib/ai-model/prompt/common.js +41 -0
- package/dist/lib/ai-model/prompt/common.js.map +1 -0
- package/dist/lib/ai-model/prompt/describe.js +78 -0
- package/dist/lib/ai-model/prompt/describe.js.map +1 -0
- package/dist/lib/ai-model/prompt/extraction.js +177 -0
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +315 -0
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +415 -0
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +84 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-locator.js +68 -0
- package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/util.js +175 -0
- package/dist/lib/ai-model/prompt/util.js.map +1 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +496 -0
- package/dist/lib/ai-model/service-caller/index.js.map +1 -0
- package/dist/lib/ai-model/ui-tars-planning.js +272 -0
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
- package/dist/lib/image/index.js +56 -0
- package/dist/lib/image/index.js.map +1 -0
- package/dist/lib/index.js +21 -2393
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +295 -0
- package/dist/lib/insight/index.js.map +1 -0
- package/dist/lib/insight/utils.js +53 -0
- package/dist/lib/insight/utils.js.map +1 -0
- package/dist/lib/types.js +82 -0
- package/dist/lib/types.js.map +1 -0
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml.js +20 -0
- package/dist/lib/yaml.js.map +1 -0
- package/dist/types/ai-model/action-executor.d.ts +19 -0
- package/dist/types/ai-model/common.d.ts +34 -0
- package/dist/types/ai-model/index.d.ts +11 -0
- package/dist/types/ai-model/inspect.d.ts +49 -0
- package/dist/types/ai-model/llm-planning.d.ts +10 -0
- package/dist/types/ai-model/prompt/assertion.d.ts +5 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +4 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +9 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +15 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +6 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +25 -0
- package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +1 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +45 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +99 -0
- package/dist/types/ai-model/service-caller/index.d.ts +26 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +76 -0
- package/dist/types/image/index.d.ts +1 -0
- package/dist/types/index.d.ts +9 -1289
- package/dist/types/insight/index.d.ts +26 -0
- package/dist/types/insight/utils.d.ts +2 -0
- package/dist/types/tree.d.ts +1 -11
- package/dist/types/types.d.ts +399 -0
- package/dist/types/utils.d.ts +27 -47
- package/dist/types/yaml.d.ts +172 -0
- package/package.json +6 -6
- package/dist/es/ai-model.mjs +0 -2502
- package/dist/es/ai-model.mjs.map +0 -1
- package/dist/lib/ai-model.js +0 -2622
- package/dist/lib/ai-model.js.map +0 -1
- package/dist/types/ai-model.d.ts +0 -596
package/dist/lib/yaml.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.r = (exports1)=>{
|
|
5
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
6
|
+
value: 'Module'
|
|
7
|
+
});
|
|
8
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
9
|
+
value: true
|
|
10
|
+
});
|
|
11
|
+
};
|
|
12
|
+
})();
|
|
13
|
+
var __webpack_exports__ = {};
|
|
14
|
+
__webpack_require__.r(__webpack_exports__);
|
|
15
|
+
for(var __webpack_i__ in __webpack_exports__)exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
16
|
+
Object.defineProperty(exports, '__esModule', {
|
|
17
|
+
value: true
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
//# sourceMappingURL=yaml.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"yaml.js","sources":["webpack://@midscene/core/webpack/runtime/make_namespace_object"],"sourcesContent":["// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};"],"names":["__webpack_require__","Symbol","Object"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOC,UAA0BA,OAAO,WAAW,EACrDC,OAAO,cAAc,CAAC,UAASD,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEC,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions } from '../types';
|
|
2
|
+
export declare class Executor {
|
|
3
|
+
name: string;
|
|
4
|
+
tasks: ExecutionTask[];
|
|
5
|
+
status: 'init' | 'pending' | 'running' | 'completed' | 'error';
|
|
6
|
+
onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
|
|
7
|
+
constructor(name: string, options?: ExecutionTaskProgressOptions & {
|
|
8
|
+
tasks?: ExecutionTaskApply[];
|
|
9
|
+
});
|
|
10
|
+
private markTaskAsPending;
|
|
11
|
+
append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void>;
|
|
12
|
+
flush(): Promise<{
|
|
13
|
+
output: any;
|
|
14
|
+
thought?: string;
|
|
15
|
+
} | undefined>;
|
|
16
|
+
isInErrorState(): boolean;
|
|
17
|
+
latestErrorTask(): ExecutionTask | null;
|
|
18
|
+
dump(): ExecutionDump;
|
|
19
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { AIUsageInfo, BaseElement, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from '../types';
|
|
2
|
+
import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
|
|
3
|
+
import type { PlanningLocateParam } from '../types';
|
|
4
|
+
export type AIArgs = [
|
|
5
|
+
ChatCompletionSystemMessageParam,
|
|
6
|
+
...ChatCompletionUserMessageParam[]
|
|
7
|
+
];
|
|
8
|
+
export declare enum AIActionType {
|
|
9
|
+
ASSERT = 0,
|
|
10
|
+
INSPECT_ELEMENT = 1,
|
|
11
|
+
EXTRACT_DATA = 2,
|
|
12
|
+
PLAN = 3,
|
|
13
|
+
DESCRIBE_ELEMENT = 4
|
|
14
|
+
}
|
|
15
|
+
export declare function callAiFn<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
|
|
16
|
+
content: T;
|
|
17
|
+
usage?: AIUsageInfo;
|
|
18
|
+
}>;
|
|
19
|
+
export declare function fillBboxParam(locate: PlanningLocateParam, width: number, height: number): PlanningLocateParam;
|
|
20
|
+
export declare function adaptQwenBbox(bbox: number[]): [number, number, number, number];
|
|
21
|
+
export declare function adaptDoubaoBbox(bbox: string[] | number[] | string, width: number, height: number): [number, number, number, number];
|
|
22
|
+
export declare function adaptBbox(bbox: number[], width: number, height: number): [number, number, number, number];
|
|
23
|
+
export declare function adaptGeminiBbox(bbox: number[], width: number, height: number): [number, number, number, number];
|
|
24
|
+
export declare function adaptBboxToRect(bbox: number[], width: number, height: number, offsetX?: number, offsetY?: number): Rect;
|
|
25
|
+
export declare function warnGPT4oSizeLimit(size: Size): void;
|
|
26
|
+
export declare function mergeRects(rects: Rect[]): {
|
|
27
|
+
left: number;
|
|
28
|
+
top: number;
|
|
29
|
+
width: number;
|
|
30
|
+
height: number;
|
|
31
|
+
};
|
|
32
|
+
export declare function expandSearchArea(rect: Rect, screenSize: Size): Rect;
|
|
33
|
+
export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
|
|
34
|
+
export declare function buildYamlFlowFromPlans(plans: PlanningAction[], sleep?: number): MidsceneYamlFlowItem[];
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export { callAiFnWithStringResponse, callToGetJSONObject, call as callAi, } from './service-caller/index';
|
|
2
|
+
export { systemPromptToLocateElement } from './prompt/llm-locator';
|
|
3
|
+
export { describeUserPage, elementByPositionWithElementInfo, } from './prompt/util';
|
|
4
|
+
export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
|
|
5
|
+
export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
|
|
6
|
+
export type { ChatCompletionMessageParam } from 'openai/resources/index';
|
|
7
|
+
export { AiLocateElement, AiExtractElementInfo, AiAssert, AiLocateSection, } from './inspect';
|
|
8
|
+
export { plan } from './llm-planning';
|
|
9
|
+
export { callAiFn, adaptBboxToRect, } from './common';
|
|
10
|
+
export { vlmPlanning, resizeImageForUiTars } from './ui-tars-planning';
|
|
11
|
+
export { AIActionType, type AIArgs } from './common';
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type { AIAssertionResponse, AIDataExtractionResponse, AIElementLocatorResponse, AIElementResponse, AISectionLocatorResponse, AIUsageInfo, BaseElement, ElementById, InsightExtractOption, Rect, ReferenceImage, TMultimodalPrompt, TUserPrompt, UIContext } from '../types';
|
|
2
|
+
import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
|
|
3
|
+
import { callAiFn } from './common';
|
|
4
|
+
export type AIArgs = [
|
|
5
|
+
ChatCompletionSystemMessageParam,
|
|
6
|
+
...ChatCompletionUserMessageParam[]
|
|
7
|
+
];
|
|
8
|
+
export declare function AiLocateElement<ElementType extends BaseElement = BaseElement>(options: {
|
|
9
|
+
context: UIContext<ElementType>;
|
|
10
|
+
targetElementDescription: TUserPrompt;
|
|
11
|
+
referenceImage?: ReferenceImage;
|
|
12
|
+
callAI?: typeof callAiFn<AIElementResponse | [number, number]>;
|
|
13
|
+
searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
|
|
14
|
+
}): Promise<{
|
|
15
|
+
parseResult: AIElementLocatorResponse;
|
|
16
|
+
rect?: Rect;
|
|
17
|
+
rawResponse: string;
|
|
18
|
+
elementById: ElementById;
|
|
19
|
+
usage?: AIUsageInfo;
|
|
20
|
+
isOrderSensitive?: boolean;
|
|
21
|
+
}>;
|
|
22
|
+
export declare function AiLocateSection(options: {
|
|
23
|
+
context: UIContext<BaseElement>;
|
|
24
|
+
sectionDescription: TUserPrompt;
|
|
25
|
+
callAI?: typeof callAiFn<AISectionLocatorResponse>;
|
|
26
|
+
}): Promise<{
|
|
27
|
+
rect?: Rect;
|
|
28
|
+
imageBase64?: string;
|
|
29
|
+
error?: string;
|
|
30
|
+
rawResponse: string;
|
|
31
|
+
usage?: AIUsageInfo;
|
|
32
|
+
}>;
|
|
33
|
+
export declare function AiExtractElementInfo<T, ElementType extends BaseElement = BaseElement>(options: {
|
|
34
|
+
dataQuery: string | Record<string, string>;
|
|
35
|
+
multimodalPrompt?: TMultimodalPrompt;
|
|
36
|
+
context: UIContext<ElementType>;
|
|
37
|
+
extractOption?: InsightExtractOption;
|
|
38
|
+
}): Promise<{
|
|
39
|
+
parseResult: AIDataExtractionResponse<T>;
|
|
40
|
+
elementById: (idOrIndexId: string) => ElementType;
|
|
41
|
+
usage: AIUsageInfo | undefined;
|
|
42
|
+
}>;
|
|
43
|
+
export declare function AiAssert<ElementType extends BaseElement = BaseElement>(options: {
|
|
44
|
+
assertion: TUserPrompt;
|
|
45
|
+
context: UIContext<ElementType>;
|
|
46
|
+
}): Promise<{
|
|
47
|
+
content: AIAssertionResponse;
|
|
48
|
+
usage: AIUsageInfo | undefined;
|
|
49
|
+
}>;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { DeviceAction, PageType, PlanningAIResponse, UIContext } from '../types';
|
|
2
|
+
import { callAiFn } from './common';
|
|
3
|
+
export declare function plan(userInstruction: string, opts: {
|
|
4
|
+
context: UIContext;
|
|
5
|
+
pageType: PageType;
|
|
6
|
+
actionSpace: DeviceAction[];
|
|
7
|
+
callAI?: typeof callAiFn<PlanningAIResponse>;
|
|
8
|
+
log?: string;
|
|
9
|
+
actionContext?: string;
|
|
10
|
+
}): Promise<PlanningAIResponse>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const elementDescriberInstruction: () => string;
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ResponseFormatJSONSchema } from 'openai/resources/index';
|
|
2
|
+
export declare function systemPromptToExtract(): string;
|
|
3
|
+
export declare const extractDataQueryPrompt: (pageDescription: string, dataQuery: string | Record<string, string>) => Promise<string>;
|
|
4
|
+
export declare const extractDataSchema: ResponseFormatJSONSchema;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { PromptTemplate } from '@langchain/core/prompts';
|
|
2
|
+
import type { vlLocateMode } from '@midscene/shared/env';
|
|
3
|
+
import type { ResponseFormatJSONSchema } from 'openai/resources/index';
|
|
4
|
+
export declare function systemPromptToLocateElement(vlMode: ReturnType<typeof vlLocateMode>): string;
|
|
5
|
+
export declare const locatorSchema: ResponseFormatJSONSchema;
|
|
6
|
+
export declare const findElementPrompt: PromptTemplate<{
|
|
7
|
+
pageDescription: any;
|
|
8
|
+
targetElementDescription: any;
|
|
9
|
+
}, any>;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { DeviceAction } from '../../types';
|
|
2
|
+
import { PromptTemplate } from '@langchain/core/prompts';
|
|
3
|
+
import type { vlLocateMode } from '@midscene/shared/env';
|
|
4
|
+
import type { ResponseFormatJSONSchema } from 'openai/resources/index';
|
|
5
|
+
export declare const descriptionForAction: (action: DeviceAction, locatorScheme: string) => string;
|
|
6
|
+
export declare function systemPromptToTaskPlanning({ actionSpace, vlMode, }: {
|
|
7
|
+
actionSpace: DeviceAction[];
|
|
8
|
+
vlMode: ReturnType<typeof vlLocateMode>;
|
|
9
|
+
}): Promise<string>;
|
|
10
|
+
export declare const planSchema: ResponseFormatJSONSchema;
|
|
11
|
+
export declare const generateTaskBackgroundContext: (userInstruction: string, log?: string, userActionContext?: string) => string;
|
|
12
|
+
export declare const automationUserPrompt: (vlMode: ReturnType<typeof vlLocateMode>) => PromptTemplate<{
|
|
13
|
+
pageDescription: any;
|
|
14
|
+
taskBackgroundContext: any;
|
|
15
|
+
}, any>;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { PromptTemplate } from '@langchain/core/prompts';
|
|
2
|
+
import type { vlLocateMode } from '@midscene/shared/env';
|
|
3
|
+
export declare function systemPromptToLocateSection(vlMode: ReturnType<typeof vlLocateMode>): string;
|
|
4
|
+
export declare const sectionLocatorInstruction: PromptTemplate<{
|
|
5
|
+
sectionDescription: any;
|
|
6
|
+
}, any>;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
|
|
2
|
+
import { type ChromeRecordedEvent, type EventCounts, type EventSummary, type InputDescription, type ProcessedEvent, createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents } from './yaml-generator';
|
|
3
|
+
export interface PlaywrightGenerationOptions {
|
|
4
|
+
testName?: string;
|
|
5
|
+
includeScreenshots?: boolean;
|
|
6
|
+
includeTimestamps?: boolean;
|
|
7
|
+
maxScreenshots?: number;
|
|
8
|
+
description?: string;
|
|
9
|
+
viewportSize?: {
|
|
10
|
+
width: number;
|
|
11
|
+
height: number;
|
|
12
|
+
};
|
|
13
|
+
waitForNetworkIdle?: boolean;
|
|
14
|
+
waitForNetworkIdleTimeout?: number;
|
|
15
|
+
}
|
|
16
|
+
export type { ChromeRecordedEvent, EventCounts, InputDescription, ProcessedEvent, EventSummary, };
|
|
17
|
+
export { getScreenshotsForLLM, filterEventsByType, createEventCounts, extractInputDescriptions, processEventsForLLM, prepareEventSummary, createMessageContent, validateEvents, };
|
|
18
|
+
/**
|
|
19
|
+
* Generates Playwright test code from recorded events
|
|
20
|
+
*/
|
|
21
|
+
export declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options?: PlaywrightGenerationOptions) => Promise<string>;
|
|
22
|
+
/**
|
|
23
|
+
* Generates Playwright test code from recorded events with streaming support
|
|
24
|
+
*/
|
|
25
|
+
export declare const generatePlaywrightTestStream: (events: ChromeRecordedEvent[], options?: PlaywrightGenerationOptions & StreamingCodeGenerationOptions) => Promise<StreamingAIResponse>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function systemPromptToLocateElementPosition(): string;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import type { BaseElement, ElementTreeNode, Size, UIContext } from '../../types';
|
|
2
|
+
export declare function describeSize(size: Size): string;
|
|
3
|
+
export declare function describeElement(elements: (Pick<BaseElement, 'rect' | 'content'> & {
|
|
4
|
+
id: string;
|
|
5
|
+
})[]): string;
|
|
6
|
+
export declare const distanceThreshold = 16;
|
|
7
|
+
export declare function elementByPositionWithElementInfo(treeRoot: ElementTreeNode<BaseElement>, position: {
|
|
8
|
+
x: number;
|
|
9
|
+
y: number;
|
|
10
|
+
}, options?: {
|
|
11
|
+
requireStrictDistance?: boolean;
|
|
12
|
+
filterPositionElements?: boolean;
|
|
13
|
+
}): BaseElement | undefined;
|
|
14
|
+
export declare function distance(point1: {
|
|
15
|
+
x: number;
|
|
16
|
+
y: number;
|
|
17
|
+
}, point2: {
|
|
18
|
+
x: number;
|
|
19
|
+
y: number;
|
|
20
|
+
}): number;
|
|
21
|
+
export declare const samplePageDescription = "\nAnd the page is described as follows:\n====================\nThe size of the page: 1280 x 720\nSome of the elements are marked with a rectangle in the screenshot corresponding to the markerId, some are not.\n\nDescription of all the elements in screenshot:\n<div id=\"969f1637\" markerId=\"1\" left=\"100\" top=\"100\" width=\"100\" height=\"100\"> // The markerId indicated by the rectangle label in the screenshot\n <h4 id=\"b211ecb2\" markerId=\"5\" left=\"150\" top=\"150\" width=\"90\" height=\"60\">\n The username is accepted\n </h4>\n ...many more\n</div>\n====================\n";
|
|
22
|
+
export declare function describeUserPage<ElementType extends BaseElement = BaseElement>(context: Omit<UIContext<ElementType>, 'describer'>, opt?: {
|
|
23
|
+
truncateTextLength?: number;
|
|
24
|
+
filterNonTextContent?: boolean;
|
|
25
|
+
domIncluded?: boolean | 'visible-only';
|
|
26
|
+
visibleOnly?: boolean;
|
|
27
|
+
}): Promise<{
|
|
28
|
+
description: string;
|
|
29
|
+
elementById(idOrIndexId: string): ElementType;
|
|
30
|
+
elementByPosition(position: {
|
|
31
|
+
x: number;
|
|
32
|
+
y: number;
|
|
33
|
+
}, size: {
|
|
34
|
+
width: number;
|
|
35
|
+
height: number;
|
|
36
|
+
}): BaseElement | undefined;
|
|
37
|
+
insertElementByPosition(position: {
|
|
38
|
+
x: number;
|
|
39
|
+
y: number;
|
|
40
|
+
}): ElementType;
|
|
41
|
+
size: {
|
|
42
|
+
width: number;
|
|
43
|
+
height: number;
|
|
44
|
+
};
|
|
45
|
+
}>;
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
|
|
2
|
+
export interface EventCounts {
|
|
3
|
+
navigation: number;
|
|
4
|
+
click: number;
|
|
5
|
+
input: number;
|
|
6
|
+
scroll: number;
|
|
7
|
+
total: number;
|
|
8
|
+
}
|
|
9
|
+
export interface InputDescription {
|
|
10
|
+
description: string;
|
|
11
|
+
value: string;
|
|
12
|
+
}
|
|
13
|
+
export interface ProcessedEvent {
|
|
14
|
+
type: string;
|
|
15
|
+
timestamp: number;
|
|
16
|
+
url?: string;
|
|
17
|
+
title?: string;
|
|
18
|
+
elementDescription?: string;
|
|
19
|
+
value?: string;
|
|
20
|
+
pageInfo?: any;
|
|
21
|
+
elementRect?: any;
|
|
22
|
+
}
|
|
23
|
+
export interface EventSummary {
|
|
24
|
+
testName: string;
|
|
25
|
+
startUrl: string;
|
|
26
|
+
eventCounts: EventCounts;
|
|
27
|
+
urls: string[];
|
|
28
|
+
clickDescriptions: string[];
|
|
29
|
+
inputDescriptions: InputDescription[];
|
|
30
|
+
events: ProcessedEvent[];
|
|
31
|
+
}
|
|
32
|
+
export interface ChromeRecordedEvent {
|
|
33
|
+
type: string;
|
|
34
|
+
timestamp: number;
|
|
35
|
+
url?: string;
|
|
36
|
+
title?: string;
|
|
37
|
+
elementDescription?: string;
|
|
38
|
+
value?: string;
|
|
39
|
+
pageInfo?: any;
|
|
40
|
+
elementRect?: any;
|
|
41
|
+
screenshotBefore?: string;
|
|
42
|
+
screenshotAfter?: string;
|
|
43
|
+
screenshotWithBox?: string;
|
|
44
|
+
}
|
|
45
|
+
export interface YamlGenerationOptions {
|
|
46
|
+
testName?: string;
|
|
47
|
+
includeTimestamps?: boolean;
|
|
48
|
+
maxScreenshots?: number;
|
|
49
|
+
description?: string;
|
|
50
|
+
}
|
|
51
|
+
export interface FilteredEvents {
|
|
52
|
+
navigationEvents: ChromeRecordedEvent[];
|
|
53
|
+
clickEvents: ChromeRecordedEvent[];
|
|
54
|
+
inputEvents: ChromeRecordedEvent[];
|
|
55
|
+
scrollEvents: ChromeRecordedEvent[];
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Get screenshots from events for LLM context
|
|
59
|
+
*/
|
|
60
|
+
export declare const getScreenshotsForLLM: (events: ChromeRecordedEvent[], maxScreenshots?: number) => string[];
|
|
61
|
+
/**
|
|
62
|
+
* Filter events by type for easier processing
|
|
63
|
+
*/
|
|
64
|
+
export declare const filterEventsByType: (events: ChromeRecordedEvent[]) => FilteredEvents;
|
|
65
|
+
/**
|
|
66
|
+
* Create event counts summary
|
|
67
|
+
*/
|
|
68
|
+
export declare const createEventCounts: (filteredEvents: FilteredEvents, totalEvents: number) => EventCounts;
|
|
69
|
+
/**
|
|
70
|
+
* Extract input descriptions from input events
|
|
71
|
+
*/
|
|
72
|
+
export declare const extractInputDescriptions: (inputEvents: ChromeRecordedEvent[]) => InputDescription[];
|
|
73
|
+
/**
|
|
74
|
+
* Process events for LLM consumption
|
|
75
|
+
*/
|
|
76
|
+
export declare const processEventsForLLM: (events: ChromeRecordedEvent[]) => ProcessedEvent[];
|
|
77
|
+
/**
|
|
78
|
+
* Prepare comprehensive event summary for LLM
|
|
79
|
+
*/
|
|
80
|
+
export declare const prepareEventSummary: (events: ChromeRecordedEvent[], options?: {
|
|
81
|
+
testName?: string;
|
|
82
|
+
maxScreenshots?: number;
|
|
83
|
+
}) => EventSummary;
|
|
84
|
+
/**
|
|
85
|
+
* Create message content for LLM with optional screenshots
|
|
86
|
+
*/
|
|
87
|
+
export declare const createMessageContent: (promptText: string, screenshots?: string[], includeScreenshots?: boolean) => any[];
|
|
88
|
+
/**
|
|
89
|
+
* Validate events before processing
|
|
90
|
+
*/
|
|
91
|
+
export declare const validateEvents: (events: ChromeRecordedEvent[]) => void;
|
|
92
|
+
/**
|
|
93
|
+
* Generates YAML test configuration from recorded events using AI
|
|
94
|
+
*/
|
|
95
|
+
export declare const generateYamlTest: (events: ChromeRecordedEvent[], options?: YamlGenerationOptions) => Promise<string>;
|
|
96
|
+
/**
|
|
97
|
+
* Generates YAML test configuration from recorded events using AI with streaming support
|
|
98
|
+
*/
|
|
99
|
+
export declare const generateYamlTestStream: (events: ChromeRecordedEvent[], options?: YamlGenerationOptions & StreamingCodeGenerationOptions) => Promise<StreamingAIResponse>;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { type AIUsageInfo } from '../../types';
|
|
2
|
+
import type { StreamingCallback } from '../../types';
|
|
3
|
+
import OpenAI from 'openai';
|
|
4
|
+
import type { ChatCompletionMessageParam } from 'openai/resources/index';
|
|
5
|
+
import { AIActionType, type AIArgs } from '../common';
|
|
6
|
+
export declare function checkAIConfig(): boolean;
|
|
7
|
+
export declare function getModelName(): string;
|
|
8
|
+
export declare function call(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType, responseFormat?: OpenAI.ChatCompletionCreateParams['response_format'] | OpenAI.ResponseFormatJSONObject, options?: {
|
|
9
|
+
stream?: boolean;
|
|
10
|
+
onChunk?: StreamingCallback;
|
|
11
|
+
}): Promise<{
|
|
12
|
+
content: string;
|
|
13
|
+
usage?: AIUsageInfo;
|
|
14
|
+
isStreamed: boolean;
|
|
15
|
+
}>;
|
|
16
|
+
export declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
|
|
17
|
+
content: T;
|
|
18
|
+
usage?: AIUsageInfo;
|
|
19
|
+
}>;
|
|
20
|
+
export declare function callAiFnWithStringResponse<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
|
|
21
|
+
content: string;
|
|
22
|
+
usage?: AIUsageInfo;
|
|
23
|
+
}>;
|
|
24
|
+
export declare function extractJSONFromCodeBlock(response: string): string;
|
|
25
|
+
export declare function preprocessDoubaoBboxJson(input: string): string;
|
|
26
|
+
export declare function safeParseJson(input: string): any;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import type { AIUsageInfo, MidsceneYamlFlowItem, PlanningAction, Size } from '../types';
|
|
2
|
+
import { actionParser } from '@ui-tars/action-parser';
|
|
3
|
+
import type { ChatCompletionMessageParam } from 'openai/resources/index';
|
|
4
|
+
type ActionType = 'click' | 'drag' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait' | 'androidBackButton' | 'androidHomeButton' | 'androidRecentAppsButton' | 'androidLongPress' | 'androidPull';
|
|
5
|
+
export declare function vlmPlanning(options: {
|
|
6
|
+
userInstruction: string;
|
|
7
|
+
conversationHistory: ChatCompletionMessageParam[];
|
|
8
|
+
size: {
|
|
9
|
+
width: number;
|
|
10
|
+
height: number;
|
|
11
|
+
};
|
|
12
|
+
}): Promise<{
|
|
13
|
+
actions: PlanningAction<any>[];
|
|
14
|
+
actionsFromModel: ReturnType<typeof actionParser>['parsed'];
|
|
15
|
+
action_summary: string;
|
|
16
|
+
yamlFlow?: MidsceneYamlFlowItem[];
|
|
17
|
+
usage?: AIUsageInfo;
|
|
18
|
+
rawResponse?: string;
|
|
19
|
+
}>;
|
|
20
|
+
interface BaseAction {
|
|
21
|
+
action_type: ActionType;
|
|
22
|
+
action_inputs: Record<string, any>;
|
|
23
|
+
reflection: string | null;
|
|
24
|
+
thought: string | null;
|
|
25
|
+
}
|
|
26
|
+
interface ClickAction extends BaseAction {
|
|
27
|
+
action_type: 'click';
|
|
28
|
+
action_inputs: {
|
|
29
|
+
start_box: string;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
interface DragAction extends BaseAction {
|
|
33
|
+
action_type: 'drag';
|
|
34
|
+
action_inputs: {
|
|
35
|
+
start_box: string;
|
|
36
|
+
end_box: string;
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
interface WaitAction extends BaseAction {
|
|
40
|
+
action_type: 'wait';
|
|
41
|
+
action_inputs: {
|
|
42
|
+
time: string;
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
interface TypeAction extends BaseAction {
|
|
46
|
+
action_type: 'type';
|
|
47
|
+
action_inputs: {
|
|
48
|
+
content: string;
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
interface HotkeyAction extends BaseAction {
|
|
52
|
+
action_type: 'hotkey';
|
|
53
|
+
action_inputs: {
|
|
54
|
+
key: string;
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
interface ScrollAction extends BaseAction {
|
|
58
|
+
action_type: 'scroll';
|
|
59
|
+
action_inputs: {
|
|
60
|
+
direction: 'up' | 'down';
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
interface FinishedAction extends BaseAction {
|
|
64
|
+
action_type: 'finished';
|
|
65
|
+
action_inputs: Record<string, never>;
|
|
66
|
+
}
|
|
67
|
+
interface AndroidLongPressAction extends BaseAction {
|
|
68
|
+
action_type: 'androidLongPress';
|
|
69
|
+
action_inputs: {
|
|
70
|
+
start_coords: [number, number];
|
|
71
|
+
duration?: number;
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
export type Action = ClickAction | DragAction | TypeAction | HotkeyAction | ScrollAction | FinishedAction | WaitAction | AndroidLongPressAction;
|
|
75
|
+
export declare function resizeImageForUiTars(imageBase64: string, size: Size): Promise<string>;
|
|
76
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { imageInfo, imageInfoOfBase64, localImg2Base64, httpImg2Base64, resizeImg, saveBase64Image, zoomForGPT4o, } from '@midscene/shared/img';
|