@midscene/core 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +3 -0
- package/.eslintrc.js +9 -0
- package/CONTRIBUTING.md +5 -0
- package/LICENSE +21 -0
- package/demo_data/demo.actions.json +160 -0
- package/demo_data/demo.insight.json +3571 -0
- package/demo_data/index.d.ts +1 -0
- package/demo_data/index.js +6 -0
- package/dist/es/ai-model.js +429 -0
- package/dist/es/image.js +261 -0
- package/dist/es/index.js +1083 -0
- package/dist/es/utils.js +96 -0
- package/dist/lib/ai-model.js +467 -0
- package/dist/lib/image.js +307 -0
- package/dist/lib/index.js +1124 -0
- package/dist/lib/utils.js +141 -0
- package/dist/types/ai-model.d.ts +32 -0
- package/dist/types/image.d.ts +119 -0
- package/dist/types/index.d.ts +43 -0
- package/dist/types/types-1f7912d5.d.ts +219 -0
- package/dist/types/util-3a13ce3d.d.ts +21 -0
- package/dist/types/utils.d.ts +20 -0
- package/modern.config.ts +18 -0
- package/package.json +85 -0
- package/third-party-licenses.txt +415 -0
- package/tsconfig.json +22 -0
- package/vitest.config.ts +20 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/utils.ts
|
|
31
|
+
var utils_exports = {};
|
|
32
|
+
__export(utils_exports, {
|
|
33
|
+
commonScreenshotParam: () => commonScreenshotParam,
|
|
34
|
+
getDumpDir: () => getDumpDir,
|
|
35
|
+
getPkgInfo: () => getPkgInfo,
|
|
36
|
+
getTmpDir: () => getTmpDir,
|
|
37
|
+
getTmpFile: () => getTmpFile,
|
|
38
|
+
groupedActionDumpFileExt: () => groupedActionDumpFileExt,
|
|
39
|
+
insightDumpFileExt: () => insightDumpFileExt,
|
|
40
|
+
overlapped: () => overlapped,
|
|
41
|
+
setDumpDir: () => setDumpDir,
|
|
42
|
+
sleep: () => sleep,
|
|
43
|
+
writeDumpFile: () => writeDumpFile
|
|
44
|
+
});
|
|
45
|
+
module.exports = __toCommonJS(utils_exports);
|
|
46
|
+
var import_os = require("os");
|
|
47
|
+
var import_path = require("path");
|
|
48
|
+
var import_fs = require("fs");
|
|
49
|
+
var import_crypto = require("crypto");
|
|
50
|
+
var import_assert = __toESM(require("assert"));
|
|
51
|
+
var pkg;
|
|
52
|
+
function getPkgInfo() {
|
|
53
|
+
if (pkg) {
|
|
54
|
+
return pkg;
|
|
55
|
+
}
|
|
56
|
+
let pkgJsonFile = "";
|
|
57
|
+
if ((0, import_fs.existsSync)((0, import_path.join)(__dirname, "../package.json"))) {
|
|
58
|
+
pkgJsonFile = (0, import_path.join)(__dirname, "../package.json");
|
|
59
|
+
} else if ((0, import_fs.existsSync)((0, import_path.join)(__dirname, "../../../package.json"))) {
|
|
60
|
+
pkgJsonFile = (0, import_path.join)(__dirname, "../../../package.json");
|
|
61
|
+
}
|
|
62
|
+
if (pkgJsonFile) {
|
|
63
|
+
const { name, version } = JSON.parse((0, import_fs.readFileSync)(pkgJsonFile, "utf-8"));
|
|
64
|
+
pkg = { name, version };
|
|
65
|
+
return pkg;
|
|
66
|
+
} else {
|
|
67
|
+
return {
|
|
68
|
+
name: "midscene-unknown-page-name",
|
|
69
|
+
version: "0.0.0"
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
var logDir = (0, import_path.join)(process.cwd(), "./midscene_run/");
|
|
74
|
+
var logEnvReady = false;
|
|
75
|
+
var insightDumpFileExt = "insight-dump.json";
|
|
76
|
+
var groupedActionDumpFileExt = "web-dump.json";
|
|
77
|
+
function getDumpDir() {
|
|
78
|
+
return logDir;
|
|
79
|
+
}
|
|
80
|
+
function setDumpDir(dir) {
|
|
81
|
+
logDir = dir;
|
|
82
|
+
}
|
|
83
|
+
function writeDumpFile(fileName, fileExt, fileContent) {
|
|
84
|
+
if (!logEnvReady) {
|
|
85
|
+
(0, import_assert.default)(logDir, "logDir should be set before writing dump file");
|
|
86
|
+
if (!(0, import_fs.existsSync)(logDir)) {
|
|
87
|
+
(0, import_fs.mkdirSync)(logDir, { recursive: true });
|
|
88
|
+
}
|
|
89
|
+
const gitIgnorePath = (0, import_path.join)(logDir, "../.gitignore");
|
|
90
|
+
let gitIgnoreContent = "";
|
|
91
|
+
if ((0, import_fs.existsSync)(gitIgnorePath)) {
|
|
92
|
+
gitIgnoreContent = (0, import_fs.readFileSync)(gitIgnorePath, "utf-8");
|
|
93
|
+
}
|
|
94
|
+
const logDirName = (0, import_path.basename)(logDir);
|
|
95
|
+
if (!gitIgnoreContent.includes(`${logDirName}/`)) {
|
|
96
|
+
(0, import_fs.writeFileSync)(
|
|
97
|
+
gitIgnorePath,
|
|
98
|
+
`${gitIgnoreContent}
|
|
99
|
+
# MidScene.js dump files
|
|
100
|
+
${logDirName}/
|
|
101
|
+
`,
|
|
102
|
+
"utf-8"
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
logEnvReady = true;
|
|
106
|
+
}
|
|
107
|
+
const filePath = (0, import_path.join)(getDumpDir(), `${fileName}.${fileExt}`);
|
|
108
|
+
(0, import_fs.writeFileSync)(filePath, fileContent);
|
|
109
|
+
(0, import_fs.copyFileSync)(filePath, (0, import_path.join)(getDumpDir(), `latest.${fileExt}`));
|
|
110
|
+
return filePath;
|
|
111
|
+
}
|
|
112
|
+
function getTmpDir() {
|
|
113
|
+
const path = (0, import_path.join)((0, import_os.tmpdir)(), getPkgInfo().name);
|
|
114
|
+
(0, import_fs.mkdirSync)(path, { recursive: true });
|
|
115
|
+
return path;
|
|
116
|
+
}
|
|
117
|
+
function getTmpFile(fileExt) {
|
|
118
|
+
const filename = `${(0, import_crypto.randomUUID)()}.${fileExt}`;
|
|
119
|
+
return (0, import_path.join)(getTmpDir(), filename);
|
|
120
|
+
}
|
|
121
|
+
function overlapped(container, target) {
|
|
122
|
+
return container.left < target.left + target.width && container.left + container.width > target.left && container.top < target.top + target.height && container.top + container.height > target.top;
|
|
123
|
+
}
|
|
124
|
+
async function sleep(ms) {
|
|
125
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
126
|
+
}
|
|
127
|
+
var commonScreenshotParam = { type: "jpeg", quality: 75 };
|
|
128
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
129
|
+
0 && (module.exports = {
|
|
130
|
+
commonScreenshotParam,
|
|
131
|
+
getDumpDir,
|
|
132
|
+
getPkgInfo,
|
|
133
|
+
getTmpDir,
|
|
134
|
+
getTmpFile,
|
|
135
|
+
groupedActionDumpFileExt,
|
|
136
|
+
insightDumpFileExt,
|
|
137
|
+
overlapped,
|
|
138
|
+
setDumpDir,
|
|
139
|
+
sleep,
|
|
140
|
+
writeDumpFile
|
|
141
|
+
});
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { c as callToGetJSONObject } from './util-3a13ce3d.js';
|
|
2
|
+
export { d as describeUserPage } from './util-3a13ce3d.js';
|
|
3
|
+
export { ChatCompletionMessageParam } from 'openai/resources';
|
|
4
|
+
import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse } from './types-1f7912d5.js';
|
|
5
|
+
|
|
6
|
+
declare function systemPromptToFindElement(description: string, multi?: boolean): string;
|
|
7
|
+
|
|
8
|
+
declare function AiInspectElement<ElementType extends BaseElement = BaseElement>(options: {
|
|
9
|
+
context: UIContext<ElementType>;
|
|
10
|
+
multi: boolean;
|
|
11
|
+
findElementDescription: string;
|
|
12
|
+
callAI?: typeof callToGetJSONObject;
|
|
13
|
+
}): Promise<{
|
|
14
|
+
parseResult: AIElementParseResponse;
|
|
15
|
+
elementById: (id: string) => ElementType;
|
|
16
|
+
systemPrompt: string;
|
|
17
|
+
}>;
|
|
18
|
+
declare function AiExtractElementInfo<T, ElementType extends BaseElement = BaseElement>(options: {
|
|
19
|
+
dataQuery: string | Record<string, string>;
|
|
20
|
+
sectionConstraints: {
|
|
21
|
+
name: string;
|
|
22
|
+
description: string;
|
|
23
|
+
}[];
|
|
24
|
+
context: UIContext<ElementType>;
|
|
25
|
+
callAI?: typeof callToGetJSONObject;
|
|
26
|
+
}): Promise<{
|
|
27
|
+
parseResult: AISectionParseResponse<T>;
|
|
28
|
+
elementById: (id: string) => ElementType;
|
|
29
|
+
systemPrompt: string;
|
|
30
|
+
}>;
|
|
31
|
+
|
|
32
|
+
export { AiExtractElementInfo, AiInspectElement, callToGetJSONObject, systemPromptToFindElement };
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { Buffer } from 'buffer';
|
|
2
|
+
import { S as Size, R as Rect, g as UISection, U as UIContext, t as Color } from './types-1f7912d5.js';
|
|
3
|
+
import { Buffer as Buffer$1 } from 'node:buffer';
|
|
4
|
+
import 'openai/resources';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Retrieves the dimensions of an image asynchronously
|
|
8
|
+
*
|
|
9
|
+
* @param image - The image data, which can be a string path or a buffer
|
|
10
|
+
* @returns A Promise that resolves to an object containing the width and height of the image
|
|
11
|
+
* @throws Error if the image data is invalid
|
|
12
|
+
*/
|
|
13
|
+
declare function imageInfo(image: string | Buffer): Promise<Size>;
|
|
14
|
+
/**
|
|
15
|
+
* Retrieves the dimensions of an image from a base64-encoded string
|
|
16
|
+
*
|
|
17
|
+
* @param imageBase64 - The base64-encoded image data
|
|
18
|
+
* @returns A Promise that resolves to an object containing the width and height of the image
|
|
19
|
+
* @throws Error if the image data is invalid
|
|
20
|
+
*/
|
|
21
|
+
declare function imageInfoOfBase64(imageBase64: string): Promise<Size>;
|
|
22
|
+
/**
|
|
23
|
+
* Encodes an image file to a base64 encoded string
|
|
24
|
+
*
|
|
25
|
+
* @param image The path of the image file
|
|
26
|
+
* @param withHeader Determine whether to return data including the file header information, the default is true
|
|
27
|
+
*
|
|
28
|
+
* @returns The base64 encoded string of the image file, which may or may not include header information depending on the withHeader parameter
|
|
29
|
+
*
|
|
30
|
+
* @throws When the image type is not supported, an error will be thrown
|
|
31
|
+
*/
|
|
32
|
+
declare function base64Encoded(image: string, withHeader?: boolean): string;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Saves a Base64-encoded image to a file
|
|
36
|
+
*
|
|
37
|
+
* @param options - An object containing the Base64-encoded image data and the output file path
|
|
38
|
+
* @param options.base64Data - The Base64-encoded image data
|
|
39
|
+
* @param options.outputPath - The path where the image will be saved
|
|
40
|
+
* @throws Error if there is an error during the saving process
|
|
41
|
+
*/
|
|
42
|
+
declare function saveBase64Image(options: {
|
|
43
|
+
base64Data: string;
|
|
44
|
+
outputPath: string;
|
|
45
|
+
}): Promise<void>;
|
|
46
|
+
/**
|
|
47
|
+
* Transforms an image path into a base64-encoded string
|
|
48
|
+
* @param inputPath - The path of the image file to be encoded
|
|
49
|
+
* @returns A Promise that resolves to a base64-encoded string representing the image file
|
|
50
|
+
*/
|
|
51
|
+
declare function transformImgPathToBase64(inputPath: string): Promise<string>;
|
|
52
|
+
/**
|
|
53
|
+
* Resizes an image from a base64-encoded string
|
|
54
|
+
*
|
|
55
|
+
* @param base64Data - A base64-encoded string representing the image
|
|
56
|
+
* @returns A Promise that resolves to a base64-encoded string representing the resized image
|
|
57
|
+
* @throws An error if the width or height cannot be determined from the metadata
|
|
58
|
+
*/
|
|
59
|
+
declare function resizeImg(base64Data: string): Promise<string>;
|
|
60
|
+
/**
|
|
61
|
+
* Calculates new dimensions for an image while maintaining its aspect ratio.
|
|
62
|
+
*
|
|
63
|
+
* This function is designed to resize an image to fit within a specified maximum width and height
|
|
64
|
+
* while maintaining the original aspect ratio. If the original width or height exceeds the maximum
|
|
65
|
+
* dimensions, the image will be scaled down to fit.
|
|
66
|
+
*
|
|
67
|
+
* @param {number} originalWidth - The original width of the image.
|
|
68
|
+
* @param {number} originalHeight - The original height of the image.
|
|
69
|
+
* @returns {Object} An object containing the new width and height.
|
|
70
|
+
* @throws {Error} Throws an error if the width or height is not a positive number.
|
|
71
|
+
*/
|
|
72
|
+
declare function calculateNewDimensions(originalWidth: number, originalHeight: number): {
|
|
73
|
+
width: number;
|
|
74
|
+
height: number;
|
|
75
|
+
};
|
|
76
|
+
/**
|
|
77
|
+
* Trims an image and returns the trimming information, including the offset from the left and top edges, and the trimmed width and height
|
|
78
|
+
*
|
|
79
|
+
* @param image - The image to be trimmed. This can be a file path or a Buffer object containing the image data
|
|
80
|
+
* @returns A Promise that resolves to an object containing the trimming information. If the image does not need to be trimmed, this object will be null
|
|
81
|
+
*/
|
|
82
|
+
declare function trimImage(image: string | Buffer$1): Promise<{
|
|
83
|
+
trimOffsetLeft: number;
|
|
84
|
+
trimOffsetTop: number;
|
|
85
|
+
width: number;
|
|
86
|
+
height: number;
|
|
87
|
+
} | null>;
|
|
88
|
+
/**
|
|
89
|
+
* Aligns an image's coordinate system based on trimming information
|
|
90
|
+
*
|
|
91
|
+
* This function takes an image and a center rectangle as input. It first extracts the center
|
|
92
|
+
* rectangle from the image using the Sharp library and converts it to a buffer. Then, it calls
|
|
93
|
+
* the trimImage function to obtain the trimming information of the buffer image. If there is no
|
|
94
|
+
* trimming information, the original center rectangle is returned. If there is trimming information,
|
|
95
|
+
* a new rectangle is created based on the trimming information, with its top-left corner
|
|
96
|
+
* positioned at the negative offset of the trimming from the original center rectangle's top-left
|
|
97
|
+
* corner, and its width and height set to the trimmed image's dimensions.
|
|
98
|
+
*
|
|
99
|
+
* @param image The image file path or buffer to be processed
|
|
100
|
+
* @param center The center rectangle of the image, which is used to extract and align
|
|
101
|
+
* @returns A Promise that resolves to a rectangle object representing the aligned coordinates
|
|
102
|
+
* @throws Error if there is an error during image processing
|
|
103
|
+
*/
|
|
104
|
+
declare function alignCoordByTrim(image: string | Buffer$1, center: Rect): Promise<Rect>;
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Composes a section diagram based on the given sections and context
|
|
108
|
+
* It creates an SVG representation of the sections and converts it to a PNG image file
|
|
109
|
+
*
|
|
110
|
+
* @param sections - An array of UISection objects representing the sections to be included in the diagram
|
|
111
|
+
* @param context - The UIContext object containing the size information for the diagram
|
|
112
|
+
* @returns {Promise<{ file: string; sectionNameColorMap: Record<string, Color>; }>}
|
|
113
|
+
*/
|
|
114
|
+
declare function composeSectionDiagram(sections: UISection[], context: UIContext): Promise<{
|
|
115
|
+
file: string;
|
|
116
|
+
sectionNameColorMap: Record<string, Color>;
|
|
117
|
+
}>;
|
|
118
|
+
|
|
119
|
+
export { alignCoordByTrim, base64Encoded, calculateNewDimensions, composeSectionDiagram, imageInfo, imageInfoOfBase64, resizeImg, saveBase64Image, transformImgPathToBase64, trimImage };
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { c as callToGetJSONObject, r as retrieveElement, a as retrieveSection } from './util-3a13ce3d.js';
|
|
2
|
+
import { B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, a as InsightOptions, E as ExecutionTask, b as ExecutionTaskApply, c as ExecutionDump, P as PlanningAction } from './types-1f7912d5.js';
|
|
3
|
+
export { e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse, u as BaseAgentParserOpt, i as BasicSectionQuery, C as CallAIFn, t as Color, k as DumpMeta, n as ElementById, h as EnsureObject, X as ExectuionTaskPlanningParam, x as ExecutionRecorderItem, W as ExecutionTaskAction, V as ExecutionTaskActionApply, M as ExecutionTaskInsightLocate, K as ExecutionTaskInsightLocateApply, J as ExecutionTaskInsightLocateLog, H as ExecutionTaskInsightLocateOutput, G as ExecutionTaskInsightLocateParam, T as ExecutionTaskInsightQuery, Q as ExecutionTaskInsightQueryApply, O as ExecutionTaskInsightQueryOutput, N as ExecutionTaskInsightQueryParam, Z as ExecutionTaskPlanning, Y as ExecutionTaskPlanningApply, F as ExecutionTaskReturn, y as ExecutionTaskType, z as ExecutorContext, _ as GroupedActionDump, l as InsightDump, j as InsightExtractParam, L as LiteUISection, m as PartialInsightDumpFromSDK, o as PlanningAIResponse, q as PlanningActionParamHover, r as PlanningActionParamInputOrKeyPress, s as PlanningActionParamScroll, p as PlanningActionParamTap, w as PlaywrightParserOpt, d as Point, v as PuppeteerParserOpt, R as Rect, S as Size, g as UISection } from './types-1f7912d5.js';
|
|
4
|
+
export { setDumpDir } from './utils.js';
|
|
5
|
+
import 'openai/resources';
|
|
6
|
+
|
|
7
|
+
declare class Insight<ElementType extends BaseElement = BaseElement> {
|
|
8
|
+
contextRetrieverFn: () => Promise<UIContext<ElementType>> | UIContext<ElementType>;
|
|
9
|
+
aiVendorFn: typeof callToGetJSONObject;
|
|
10
|
+
onceDumpUpdatedFn?: DumpSubscriber;
|
|
11
|
+
taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
|
|
12
|
+
constructor(context: UIContext<ElementType> | (() => Promise<UIContext<ElementType>> | UIContext<ElementType>), opt?: InsightOptions);
|
|
13
|
+
locate(queryPrompt: string): Promise<ElementType | null>;
|
|
14
|
+
locate(queryPrompt: string, opt: {
|
|
15
|
+
multi: true;
|
|
16
|
+
}): Promise<ElementType[]>;
|
|
17
|
+
extract<T = any>(input: string): Promise<T>;
|
|
18
|
+
extract<T extends Record<string, string>>(input: T): Promise<Record<keyof T, any>>;
|
|
19
|
+
extract<T extends object>(input: Record<keyof T, string>): Promise<T>;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
declare class Executor {
|
|
23
|
+
name: string;
|
|
24
|
+
description?: string;
|
|
25
|
+
tasks: ExecutionTask[];
|
|
26
|
+
status: 'init' | 'pending' | 'running' | 'completed' | 'error';
|
|
27
|
+
errorMsg?: string;
|
|
28
|
+
dumpFileName?: string;
|
|
29
|
+
constructor(name: string, description?: string, tasks?: ExecutionTaskApply[]);
|
|
30
|
+
private markTaskAsPending;
|
|
31
|
+
append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void>;
|
|
32
|
+
flush(): Promise<void>;
|
|
33
|
+
dump(): ExecutionDump;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
declare const getElement: typeof retrieveElement;
|
|
37
|
+
declare const getSection: typeof retrieveSection;
|
|
38
|
+
|
|
39
|
+
declare function plan(context: UIContext, userPrompt: string): Promise<{
|
|
40
|
+
plans: PlanningAction[];
|
|
41
|
+
}>;
|
|
42
|
+
|
|
43
|
+
export { BaseElement, DumpSubscriber, ExecutionDump, ExecutionTask, ExecutionTaskApply, Executor, InsightOptions, InsightTaskInfo, PlanningAction, UIContext, Insight as default, getElement, getSection, plan };
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import { ChatCompletionMessageParam } from 'openai/resources';
|
|
2
|
+
|
|
3
|
+
interface Point {
|
|
4
|
+
left: number;
|
|
5
|
+
top: number;
|
|
6
|
+
}
|
|
7
|
+
interface Size {
|
|
8
|
+
width: number;
|
|
9
|
+
height: number;
|
|
10
|
+
}
|
|
11
|
+
type Rect = Point & Size;
|
|
12
|
+
declare enum NodeType {
|
|
13
|
+
'INPUT' = "INPUT Node",
|
|
14
|
+
'BUTTON' = "BUTTON Node",
|
|
15
|
+
'IMG' = "IMG Node",
|
|
16
|
+
'TEXT' = "TEXT Node"
|
|
17
|
+
}
|
|
18
|
+
declare abstract class BaseElement {
|
|
19
|
+
abstract id: string;
|
|
20
|
+
abstract attributes: {
|
|
21
|
+
['nodeType']: NodeType;
|
|
22
|
+
[key: string]: string;
|
|
23
|
+
};
|
|
24
|
+
abstract content: string;
|
|
25
|
+
abstract rect: Rect;
|
|
26
|
+
abstract center: [number, number];
|
|
27
|
+
abstract locator?: string;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* openai
|
|
31
|
+
*
|
|
32
|
+
*/
|
|
33
|
+
declare enum AIResponseFormat {
|
|
34
|
+
JSON = "json_object",
|
|
35
|
+
TEXT = "text"
|
|
36
|
+
}
|
|
37
|
+
interface AIElementParseResponse {
|
|
38
|
+
elements: {
|
|
39
|
+
id: string;
|
|
40
|
+
reason: string;
|
|
41
|
+
text: string;
|
|
42
|
+
}[];
|
|
43
|
+
errors?: string[];
|
|
44
|
+
}
|
|
45
|
+
interface AISectionParseResponse<DataShape> {
|
|
46
|
+
data: DataShape;
|
|
47
|
+
sections?: LiteUISection[];
|
|
48
|
+
errors?: string[];
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* context
|
|
52
|
+
*/
|
|
53
|
+
declare abstract class UIContext<ElementType extends BaseElement = BaseElement> {
|
|
54
|
+
abstract screenshotBase64: string;
|
|
55
|
+
abstract content: ElementType[];
|
|
56
|
+
abstract size: Size;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* insight
|
|
60
|
+
*/
|
|
61
|
+
type CallAIFn = <T>(messages: ChatCompletionMessageParam[]) => Promise<T>;
|
|
62
|
+
interface InsightOptions {
|
|
63
|
+
taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
|
|
64
|
+
aiVendorFn?: CallAIFn;
|
|
65
|
+
}
|
|
66
|
+
interface UISection {
|
|
67
|
+
name: string;
|
|
68
|
+
description: string;
|
|
69
|
+
sectionCharacteristics: string;
|
|
70
|
+
rect: Rect;
|
|
71
|
+
content: BaseElement[];
|
|
72
|
+
}
|
|
73
|
+
type EnsureObject<T> = {
|
|
74
|
+
[K in keyof T]: any;
|
|
75
|
+
};
|
|
76
|
+
interface BasicSectionQuery {
|
|
77
|
+
name?: string;
|
|
78
|
+
description?: string;
|
|
79
|
+
}
|
|
80
|
+
type InsightExtractParam = string | Record<string, string>;
|
|
81
|
+
interface InsightTaskInfo {
|
|
82
|
+
durationMs: number;
|
|
83
|
+
systemPrompt?: string;
|
|
84
|
+
rawResponse?: string;
|
|
85
|
+
}
|
|
86
|
+
interface DumpMeta {
|
|
87
|
+
sdkVersion: string;
|
|
88
|
+
logTime: number;
|
|
89
|
+
}
|
|
90
|
+
interface InsightDump extends DumpMeta {
|
|
91
|
+
type: 'locate' | 'extract';
|
|
92
|
+
logId: string;
|
|
93
|
+
context: UIContext;
|
|
94
|
+
userQuery: {
|
|
95
|
+
element?: string;
|
|
96
|
+
dataDemand?: InsightExtractParam;
|
|
97
|
+
sections?: Record<string, string>;
|
|
98
|
+
};
|
|
99
|
+
matchedSection: UISection[];
|
|
100
|
+
matchedElement: BaseElement[];
|
|
101
|
+
data: any;
|
|
102
|
+
taskInfo: InsightTaskInfo;
|
|
103
|
+
error?: string;
|
|
104
|
+
}
|
|
105
|
+
type PartialInsightDumpFromSDK = Omit<InsightDump, 'sdkVersion' | 'logTime' | 'logId'>;
|
|
106
|
+
type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;
|
|
107
|
+
interface LiteUISection {
|
|
108
|
+
name: string;
|
|
109
|
+
description: string;
|
|
110
|
+
sectionCharacteristics: string;
|
|
111
|
+
textIds: string[];
|
|
112
|
+
}
|
|
113
|
+
type ElementById = (id: string) => BaseElement | null;
|
|
114
|
+
/**
|
|
115
|
+
* planning
|
|
116
|
+
*
|
|
117
|
+
*/
|
|
118
|
+
interface PlanningAction<ParamType = any> {
|
|
119
|
+
thought: string;
|
|
120
|
+
type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error';
|
|
121
|
+
param: ParamType;
|
|
122
|
+
}
|
|
123
|
+
interface PlanningAIResponse {
|
|
124
|
+
queryLanguage: string;
|
|
125
|
+
actions: PlanningAction[];
|
|
126
|
+
error?: string;
|
|
127
|
+
}
|
|
128
|
+
type PlanningActionParamTap = null;
|
|
129
|
+
type PlanningActionParamHover = null;
|
|
130
|
+
interface PlanningActionParamInputOrKeyPress {
|
|
131
|
+
value: string;
|
|
132
|
+
}
|
|
133
|
+
interface PlanningActionParamScroll {
|
|
134
|
+
scrollType: 'ScrollUntilBottom' | 'ScrollUntilTop' | 'ScrollDown' | 'ScrollUp';
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* misc
|
|
138
|
+
*/
|
|
139
|
+
interface Color {
|
|
140
|
+
name: string;
|
|
141
|
+
hex: string;
|
|
142
|
+
}
|
|
143
|
+
interface BaseAgentParserOpt {
|
|
144
|
+
selector: string;
|
|
145
|
+
}
|
|
146
|
+
interface PuppeteerParserOpt extends BaseAgentParserOpt {
|
|
147
|
+
}
|
|
148
|
+
interface PlaywrightParserOpt extends BaseAgentParserOpt {
|
|
149
|
+
}
|
|
150
|
+
interface ExecutionRecorderItem {
|
|
151
|
+
type: 'screenshot';
|
|
152
|
+
ts: number;
|
|
153
|
+
screenshot?: string;
|
|
154
|
+
timing?: string;
|
|
155
|
+
}
|
|
156
|
+
type ExecutionTaskType = 'Planning' | 'Insight' | 'Action' | 'Assertion';
|
|
157
|
+
interface ExecutorContext {
|
|
158
|
+
task: ExecutionTask;
|
|
159
|
+
element?: BaseElement | null;
|
|
160
|
+
}
|
|
161
|
+
interface ExecutionTaskApply<Type extends ExecutionTaskType = any, TaskParam = any, TaskOutput = any, TaskLog = any> {
|
|
162
|
+
type: Type;
|
|
163
|
+
subType?: string;
|
|
164
|
+
param?: TaskParam;
|
|
165
|
+
executor: (param: TaskParam, context: ExecutorContext) => Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | void> | void;
|
|
166
|
+
}
|
|
167
|
+
interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
|
|
168
|
+
output?: TaskOutput;
|
|
169
|
+
log?: TaskLog;
|
|
170
|
+
recorder?: ExecutionRecorderItem[];
|
|
171
|
+
}
|
|
172
|
+
type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
|
|
173
|
+
status: 'pending' | 'running' | 'success' | 'fail' | 'cancelled';
|
|
174
|
+
error?: string;
|
|
175
|
+
timing?: {
|
|
176
|
+
start: number;
|
|
177
|
+
end?: number;
|
|
178
|
+
cost?: number;
|
|
179
|
+
};
|
|
180
|
+
};
|
|
181
|
+
interface ExecutionDump extends DumpMeta {
|
|
182
|
+
name: string;
|
|
183
|
+
description?: string;
|
|
184
|
+
tasks: ExecutionTask[];
|
|
185
|
+
}
|
|
186
|
+
interface ExecutionTaskInsightLocateParam {
|
|
187
|
+
prompt: string;
|
|
188
|
+
}
|
|
189
|
+
interface ExecutionTaskInsightLocateOutput {
|
|
190
|
+
element: BaseElement | null;
|
|
191
|
+
}
|
|
192
|
+
interface ExecutionTaskInsightLocateLog {
|
|
193
|
+
dump?: InsightDump;
|
|
194
|
+
}
|
|
195
|
+
type ExecutionTaskInsightLocateApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightLocateParam, ExecutionTaskInsightLocateOutput, ExecutionTaskInsightLocateLog>;
|
|
196
|
+
type ExecutionTaskInsightLocate = ExecutionTask<ExecutionTaskInsightLocateApply>;
|
|
197
|
+
interface ExecutionTaskInsightQueryParam {
|
|
198
|
+
dataDemand: InsightExtractParam;
|
|
199
|
+
}
|
|
200
|
+
interface ExecutionTaskInsightQueryOutput {
|
|
201
|
+
data: any;
|
|
202
|
+
}
|
|
203
|
+
type ExecutionTaskInsightQueryApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightQueryParam>;
|
|
204
|
+
type ExecutionTaskInsightQuery = ExecutionTask<ExecutionTaskInsightQueryApply>;
|
|
205
|
+
type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<'Action', ActionParam, void, void>;
|
|
206
|
+
type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;
|
|
207
|
+
type ExectuionTaskPlanningParam = PlanningAIResponse;
|
|
208
|
+
type ExecutionTaskPlanningApply = ExecutionTaskApply<'Planning', {
|
|
209
|
+
userPrompt: string;
|
|
210
|
+
}, {
|
|
211
|
+
plans: PlanningAction[];
|
|
212
|
+
}>;
|
|
213
|
+
type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;
|
|
214
|
+
interface GroupedActionDump {
|
|
215
|
+
groupName: string;
|
|
216
|
+
executions: ExecutionDump[];
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export { AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type ExecutionTaskReturn as F, type ExecutionTaskInsightLocateParam as G, type ExecutionTaskInsightLocateOutput as H, type InsightTaskInfo as I, type ExecutionTaskInsightLocateLog as J, type ExecutionTaskInsightLocateApply as K, type LiteUISection as L, type ExecutionTaskInsightLocate as M, type ExecutionTaskInsightQueryParam as N, type ExecutionTaskInsightQueryOutput as O, type PlanningAction as P, type ExecutionTaskInsightQueryApply as Q, type Rect as R, type Size as S, type ExecutionTaskInsightQuery as T, UIContext as U, type ExecutionTaskActionApply as V, type ExecutionTaskAction as W, type ExectuionTaskPlanningParam as X, type ExecutionTaskPlanningApply as Y, type ExecutionTaskPlanning as Z, type GroupedActionDump as _, type InsightOptions as a, type ExecutionTaskApply as b, type ExecutionDump as c, type Point as d, type AIElementParseResponse as e, type AISectionParseResponse as f, type UISection as g, type EnsureObject as h, type BasicSectionQuery as i, type InsightExtractParam as j, type DumpMeta as k, type InsightDump as l, type PartialInsightDumpFromSDK as m, type ElementById as n, type PlanningAIResponse as o, type PlanningActionParamTap as p, type PlanningActionParamHover as q, type PlanningActionParamInputOrKeyPress as r, type PlanningActionParamScroll as s, type Color as t, type BaseAgentParserOpt as u, type PuppeteerParserOpt as v, type PlaywrightParserOpt as w, type ExecutionRecorderItem as x, type ExecutionTaskType as y, type ExecutorContext as z };
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { ChatCompletionMessageParam } from 'openai/resources';
|
|
2
|
+
import { B as BaseElement, U as UIContext } from './types-1f7912d5.js';
|
|
3
|
+
|
|
4
|
+
declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[]): Promise<T>;
|
|
5
|
+
|
|
6
|
+
declare function describeUserPage<ElementType extends BaseElement = BaseElement>(context: Omit<UIContext<ElementType>, 'describer'>): Promise<{
|
|
7
|
+
description: string;
|
|
8
|
+
elementById(id: string): ElementType;
|
|
9
|
+
}>;
|
|
10
|
+
/**
|
|
11
|
+
* elements
|
|
12
|
+
*/
|
|
13
|
+
declare function retrieveElement(prompt: string, opt?: {
|
|
14
|
+
multi: boolean;
|
|
15
|
+
}): string;
|
|
16
|
+
/**
|
|
17
|
+
* sections
|
|
18
|
+
*/
|
|
19
|
+
declare function retrieveSection(prompt: string): string;
|
|
20
|
+
|
|
21
|
+
export { retrieveSection as a, callToGetJSONObject as c, describeUserPage as d, retrieveElement as r };
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { R as Rect } from './types-1f7912d5.js';
|
|
2
|
+
import 'openai/resources';
|
|
3
|
+
|
|
4
|
+
interface PkgInfo {
|
|
5
|
+
name: string;
|
|
6
|
+
version: string;
|
|
7
|
+
}
|
|
8
|
+
declare function getPkgInfo(): PkgInfo;
|
|
9
|
+
declare const insightDumpFileExt = "insight-dump.json";
|
|
10
|
+
declare const groupedActionDumpFileExt = "web-dump.json";
|
|
11
|
+
declare function getDumpDir(): string;
|
|
12
|
+
declare function setDumpDir(dir: string): void;
|
|
13
|
+
declare function writeDumpFile(fileName: string, fileExt: string, fileContent: string): string;
|
|
14
|
+
declare function getTmpDir(): string;
|
|
15
|
+
declare function getTmpFile(fileExt: string): string;
|
|
16
|
+
declare function overlapped(container: Rect, target: Rect): boolean;
|
|
17
|
+
declare function sleep(ms: number): Promise<unknown>;
|
|
18
|
+
declare const commonScreenshotParam: any;
|
|
19
|
+
|
|
20
|
+
export { commonScreenshotParam, getDumpDir, getPkgInfo, getTmpDir, getTmpFile, groupedActionDumpFileExt, insightDumpFileExt, overlapped, setDumpDir, sleep, writeDumpFile };
|
package/modern.config.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { moduleTools, defineConfig } from '@modern-js/module-tools';
|
|
2
|
+
|
|
3
|
+
export default defineConfig({
|
|
4
|
+
plugins: [moduleTools()],
|
|
5
|
+
buildPreset: 'npm-library',
|
|
6
|
+
buildConfig: {
|
|
7
|
+
platform: 'node',
|
|
8
|
+
input: {
|
|
9
|
+
index: 'src/index.ts',
|
|
10
|
+
utils: 'src/utils.ts',
|
|
11
|
+
image: 'src/image/index.ts',
|
|
12
|
+
'ai-model': 'src/ai-model/index.ts',
|
|
13
|
+
},
|
|
14
|
+
// input: ['src/utils.ts', 'src/index.ts', 'src/image/index.ts'],
|
|
15
|
+
externals: ['langsmith/wrappers', 'buffer'],
|
|
16
|
+
target: 'es2017'
|
|
17
|
+
},
|
|
18
|
+
});
|