@midscene/core 0.28.2-beta-20250910072710.0 → 0.28.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +17 -27
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +31 -43
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +3 -4
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/common.mjs +26 -16
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +3 -3
- package/dist/es/ai-model/inspect.mjs +34 -26
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +15 -14
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/common.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/playwright-generator.mjs +12 -6
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +3 -3
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +12 -6
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +23 -28
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +10 -10
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/insight/index.mjs +26 -20
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +5 -4
- package/dist/es/utils.mjs.map +1 -1
- package/dist/lib/agent/agent.js +16 -26
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/tasks.js +31 -43
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +3 -4
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/common.js +31 -18
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/index.js +15 -12
- package/dist/lib/ai-model/inspect.js +32 -24
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +14 -13
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/common.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/playwright-generator.js +11 -5
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +3 -3
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +11 -5
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +31 -36
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +8 -8
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/insight/index.js +23 -17
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +4 -3
- package/dist/lib/utils.js.map +1 -1
- package/dist/types/agent/agent.d.ts +0 -2
- package/dist/types/agent/tasks.d.ts +7 -8
- package/dist/types/agent/utils.d.ts +1 -3
- package/dist/types/ai-model/common.d.ts +11 -7
- package/dist/types/ai-model/index.d.ts +2 -2
- package/dist/types/ai-model/inspect.d.ts +6 -7
- package/dist/types/ai-model/llm-planning.d.ts +2 -2
- package/dist/types/ai-model/prompt/common.d.ts +2 -2
- package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
- package/dist/types/ai-model/prompt/llm-planning.d.ts +3 -3
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +2 -3
- package/dist/types/ai-model/prompt/util.d.ts +2 -3
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +2 -3
- package/dist/types/ai-model/service-caller/index.d.ts +5 -5
- package/dist/types/ai-model/ui-tars-planning.d.ts +3 -3
- package/dist/types/device/index.d.ts +2 -2
- package/dist/types/insight/index.d.ts +7 -12
- package/dist/types/types.d.ts +9 -0
- package/dist/types/utils.d.ts +1 -2
- package/package.json +3 -3
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { AIActionType, expandSearchArea } from "../ai-model/common.mjs";
|
|
2
|
-
import { AiExtractElementInfo, AiLocateElement,
|
|
1
|
+
import { AIActionType, callAiFn, expandSearchArea } from "../ai-model/common.mjs";
|
|
2
|
+
import { AiExtractElementInfo, AiLocateElement, callToGetJSONObject } from "../ai-model/index.mjs";
|
|
3
3
|
import { AiLocateSection } from "../ai-model/inspect.mjs";
|
|
4
4
|
import { elementDescriberInstruction } from "../ai-model/prompt/describe.mjs";
|
|
5
|
-
import { MIDSCENE_FORCE_DEEP_THINK, globalConfigManager } from "@midscene/shared/env";
|
|
5
|
+
import { MIDSCENE_FORCE_DEEP_THINK, getIsUseQwenVl, globalConfigManager, vlLocateMode } from "@midscene/shared/env";
|
|
6
6
|
import { compositeElementInfoImg, cropByRect } from "@midscene/shared/img";
|
|
7
7
|
import { getDebug } from "@midscene/shared/logger";
|
|
8
8
|
import { assert } from "@midscene/shared/utils";
|
|
@@ -19,8 +19,9 @@ function _define_property(obj, key, value) {
|
|
|
19
19
|
}
|
|
20
20
|
const debug = getDebug('ai:insight');
|
|
21
21
|
class Insight {
|
|
22
|
-
async locate(query, opt
|
|
22
|
+
async locate(query, opt) {
|
|
23
23
|
var _parseResult_errors;
|
|
24
|
+
const { callAI } = opt || {};
|
|
24
25
|
const queryPrompt = 'string' == typeof query ? query : query.prompt;
|
|
25
26
|
assert(queryPrompt, 'query is required for locate');
|
|
26
27
|
const dumpSubscriber = this.onceDumpUpdatedFn;
|
|
@@ -30,8 +31,10 @@ class Insight {
|
|
|
30
31
|
if (globalDeepThinkSwitch) debug('globalDeepThinkSwitch', globalDeepThinkSwitch);
|
|
31
32
|
let searchAreaPrompt;
|
|
32
33
|
if (query.deepThink || globalDeepThinkSwitch) searchAreaPrompt = query.prompt;
|
|
33
|
-
const
|
|
34
|
-
|
|
34
|
+
const modelPreferences = {
|
|
35
|
+
intent: 'grounding'
|
|
36
|
+
};
|
|
37
|
+
if (searchAreaPrompt && !vlLocateMode(modelPreferences)) {
|
|
35
38
|
console.warn('The "deepThink" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model');
|
|
36
39
|
searchAreaPrompt = void 0;
|
|
37
40
|
}
|
|
@@ -43,8 +46,7 @@ class Insight {
|
|
|
43
46
|
if (searchAreaPrompt) {
|
|
44
47
|
searchAreaResponse = await AiLocateSection({
|
|
45
48
|
context,
|
|
46
|
-
sectionDescription: searchAreaPrompt
|
|
47
|
-
modelConfig
|
|
49
|
+
sectionDescription: searchAreaPrompt
|
|
48
50
|
});
|
|
49
51
|
assert(searchAreaResponse.rect, `cannot find search area for "${searchAreaPrompt}"${searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''}`);
|
|
50
52
|
searchAreaRawResponse = searchAreaResponse.rawResponse;
|
|
@@ -53,11 +55,10 @@ class Insight {
|
|
|
53
55
|
}
|
|
54
56
|
const startTime = Date.now();
|
|
55
57
|
const { parseResult, rect, elementById, rawResponse, usage, isOrderSensitive } = await AiLocateElement({
|
|
56
|
-
|
|
58
|
+
callAI: callAI || this.aiVendorFn,
|
|
57
59
|
context,
|
|
58
60
|
targetElementDescription: queryPrompt,
|
|
59
|
-
searchConfig: searchAreaResponse
|
|
60
|
-
modelConfig
|
|
61
|
+
searchConfig: searchAreaResponse
|
|
61
62
|
});
|
|
62
63
|
const timeCost = Date.now() - startTime;
|
|
63
64
|
const taskInfo = {
|
|
@@ -115,11 +116,14 @@ class Insight {
|
|
|
115
116
|
rect
|
|
116
117
|
};
|
|
117
118
|
}
|
|
118
|
-
async extract(dataDemand,
|
|
119
|
+
async extract(dataDemand, opt, multimodalPrompt) {
|
|
119
120
|
var _parseResult_errors;
|
|
120
121
|
assert('object' == typeof dataDemand || 'string' == typeof dataDemand, `dataDemand should be object or string, but get ${typeof dataDemand}`);
|
|
121
122
|
const dumpSubscriber = this.onceDumpUpdatedFn;
|
|
122
123
|
this.onceDumpUpdatedFn = void 0;
|
|
124
|
+
const modelPreferences = {
|
|
125
|
+
intent: 'VQA'
|
|
126
|
+
};
|
|
123
127
|
const context = await this.contextRetrieverFn('extract');
|
|
124
128
|
const startTime = Date.now();
|
|
125
129
|
const { parseResult, usage } = await AiExtractElementInfo({
|
|
@@ -127,7 +131,7 @@ class Insight {
|
|
|
127
131
|
dataQuery: dataDemand,
|
|
128
132
|
multimodalPrompt,
|
|
129
133
|
extractOption: opt,
|
|
130
|
-
|
|
134
|
+
modelPreferences
|
|
131
135
|
});
|
|
132
136
|
const timeCost = Date.now() - startTime;
|
|
133
137
|
const taskInfo = {
|
|
@@ -159,12 +163,14 @@ class Insight {
|
|
|
159
163
|
usage
|
|
160
164
|
};
|
|
161
165
|
}
|
|
162
|
-
async describe(target,
|
|
166
|
+
async describe(target, opt) {
|
|
163
167
|
assert(target, 'target is required for insight.describe');
|
|
164
168
|
const context = await this.contextRetrieverFn('describe');
|
|
165
169
|
const { screenshotBase64, size } = context;
|
|
166
170
|
assert(screenshotBase64, 'screenshot is required for insight.describe');
|
|
167
|
-
const
|
|
171
|
+
const modelPreferences = {
|
|
172
|
+
intent: 'grounding'
|
|
173
|
+
};
|
|
168
174
|
const systemPrompt = elementDescriberInstruction();
|
|
169
175
|
const defaultRectSize = 30;
|
|
170
176
|
const targetRect = Array.isArray(target) ? {
|
|
@@ -184,9 +190,9 @@ class Insight {
|
|
|
184
190
|
borderThickness: 3
|
|
185
191
|
});
|
|
186
192
|
if (null == opt ? void 0 : opt.deepThink) {
|
|
187
|
-
const searchArea = expandSearchArea(targetRect, context.size,
|
|
193
|
+
const searchArea = expandSearchArea(targetRect, context.size, modelPreferences);
|
|
188
194
|
debug('describe: set searchArea', searchArea);
|
|
189
|
-
imagePayload = await cropByRect(imagePayload, searchArea,
|
|
195
|
+
imagePayload = await cropByRect(imagePayload, searchArea, getIsUseQwenVl(modelPreferences));
|
|
190
196
|
}
|
|
191
197
|
const msgs = [
|
|
192
198
|
{
|
|
@@ -206,8 +212,8 @@ class Insight {
|
|
|
206
212
|
]
|
|
207
213
|
}
|
|
208
214
|
];
|
|
209
|
-
const callAIFn = this.aiVendorFn;
|
|
210
|
-
const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT,
|
|
215
|
+
const callAIFn = this.aiVendorFn || callToGetJSONObject;
|
|
216
|
+
const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT, modelPreferences);
|
|
211
217
|
const { content } = res;
|
|
212
218
|
assert(!content.error, `describe failed: ${content.error}`);
|
|
213
219
|
assert(content.description, 'failed to describe the element');
|
|
@@ -215,7 +221,7 @@ class Insight {
|
|
|
215
221
|
}
|
|
216
222
|
constructor(context, opt){
|
|
217
223
|
_define_property(this, "contextRetrieverFn", void 0);
|
|
218
|
-
_define_property(this, "aiVendorFn",
|
|
224
|
+
_define_property(this, "aiVendorFn", callAiFn);
|
|
219
225
|
_define_property(this, "onceDumpUpdatedFn", void 0);
|
|
220
226
|
_define_property(this, "taskInfo", void 0);
|
|
221
227
|
assert(context, 'context is required for Insight');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"insight/index.mjs","sources":["webpack://@midscene/core/./src/insight/index.ts"],"sourcesContent":["import { AIActionType, type AIArgs, expandSearchArea } from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callAIWithObjectResponse,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightExtractOption,\n InsightExtractParam,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_FORCE_DEEP_THINK,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../ai-model/common';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\ninterface InsightOptions {\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n aiVendorFn?: typeof callAIWithObjectResponse;\n}\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: Exclude<InsightOptions['aiVendorFn'], undefined> =\n callAIWithObjectResponse;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n // just for unit test, aiVendorFn is callAIWithObjectResponse by default\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt: LocateOpts,\n modelConfig: IModelConfig,\n ): Promise<LocateResult> {\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = globalConfigManager.getEnvConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n const { vlMode } = modelConfig;\n\n if (searchAreaPrompt && !vlMode) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n modelConfig,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const {\n parseResult,\n rect,\n elementById,\n rawResponse,\n usage,\n isOrderSensitive,\n } = await AiLocateElement({\n callAIFn: this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n modelConfig,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n isOrderSensitive,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n modelConfig: IModelConfig,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<{\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n }> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelConfig,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data && !opt?.doNotThrowError) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n thought,\n usage,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n modelConfig: IModelConfig,\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64, size } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const { vlMode } = modelConfig;\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size, vlMode);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n vlMode === 'qwen-vl',\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn = this\n .aiVendorFn as typeof callAIWithObjectResponse<AIDescribeElementResponse>;\n\n const res = await callAIFn(\n msgs,\n AIActionType.DESCRIBE_ELEMENT,\n modelConfig,\n );\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Insight","query","opt","modelConfig","_parseResult_errors","queryPrompt","assert","dumpSubscriber","undefined","globalDeepThinkSwitch","globalConfigManager","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","vlMode","console","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","elementById","rawResponse","usage","isOrderSensitive","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","item","element","emitInsightDump","Error","dataDemand","multimodalPrompt","AiExtractElementInfo","data","thought","target","screenshotBase64","size","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","cropByRect","msgs","callAIFn","res","AIActionType","content","callAIWithObjectResponse","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;AA+CA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IAqCnB,MAAM,OACJC,KAA0B,EAC1BC,GAAe,EACfC,WAAyB,EACF;YAkFnBC;QAjFJ,MAAMC,cAAc,AAAiB,YAAjB,OAAOJ,QAAqBA,QAAQA,MAAM,MAAM;QACpEK,OAAOD,aAAa;QACpB,MAAME,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzBF,OAAO,AAAiB,YAAjB,OAAOL,OAAoB;QAElC,MAAMQ,wBAAwBC,oBAAoB,qBAAqB,CACrEC;QAEF,IAAIF,uBACFX,MAAM,yBAAyBW;QAEjC,IAAIG;QACJ,IAAIX,MAAM,SAAS,IAAIQ,uBACrBG,mBAAmBX,MAAM,MAAM;QAGjC,MAAM,EAAEY,MAAM,EAAE,GAAGV;QAEnB,IAAIS,oBAAoB,CAACC,QAAQ;YAC/BC,QAAQ,IAAI,CACV;YAEFF,mBAAmBJ;QACrB;QAEA,MAAMO,UAAUb,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,OAAO,AAAD,KAAM,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE/D,IAAIc;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIP,kBAAkB;YACpBO,qBAAqB,MAAMC,gBAAgB;gBACzCL;gBACA,oBAAoBH;gBACpBT;YACF;YACAG,OACEa,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAEP,iBAAiB,CAAC,EAChDO,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EACJC,WAAW,EACXC,IAAI,EACJC,WAAW,EACXC,WAAW,EACXC,KAAK,EACLC,gBAAgB,EACjB,GAAG,MAAMC,gBAAgB;YACxB,UAAU,IAAI,CAAC,UAAU;YACzBd;YACA,0BAA0BV;YAC1B,cAAcc;YACdhB;QACF;QAEA,MAAM2B,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACT;YAC/BI;YACAX;YACAC;YACAC;QACF;QAEA,IAAIe;QACJ,IAAI,QAAA7B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B6B,WAAW,CAAC,6BAA6B,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG5E,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS7B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAamB;YACb,MAAM;YACNO;YACA,WAAW,CAAC,CAACf;YACb,OAAOiB;QACT;QAEA,MAAME,WAA0B,EAAE;QACjCZ,CAAAA,YAAY,QAAQ,IAAI,EAAC,EAAG,OAAO,CAAC,CAACa;YACpC,IAAI,QAAQA,MAAM;gBAChB,MAAMC,UAAUZ,YAAYW,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,EAAE;gBAEpC,IAAI,CAACC,SAAS,YACZvB,QAAQ,IAAI,CACV,CAAC,+BAA+B,EAAEsB,KAAK,EAAE,CAAC,0CAA0C,CAAC;gBAIzFD,SAAS,IAAI,CAACE;YAChB;QACF;QAEAC,gBACE;YACE,GAAGJ,QAAQ;YACX,gBAAgBC;QAClB,GACA5B;QAGF,IAAI0B,UACF,MAAM,IAAIM,MAAMN;QAGlB3B,OACE6B,SAAS,MAAM,IAAI,GACnB,CAAC,0CAA0C,EAAEA,SAAS,MAAM,EAAE;QAGhE,IAAIA,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,IAAIA,QAAQ,CAAC,EAAE,CAAE,EAAE;gBACnB,SAASA,QAAQ,CAAC,EAAE,CAAE,OAAO;gBAC7B,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM,IAAI,EAAE;gBACjC,YAAYA,QAAQ,CAAC,EAAE,CAAE,UAAU;gBACnCP;YACF;YACAJ;QACF;QAEF,OAAO;YACL,SAAS;YACTA;QACF;IACF;IAEA,MAAM,QACJgB,UAA+B,EAC/BrC,WAAyB,EACzBD,GAA0B,EAC1BuC,gBAAoC,EAKnC;YA4BGrC;QA3BJE,OACE,AAAsB,YAAtB,OAAOkC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMjC,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzB,MAAMO,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE9C,MAAMM,YAAYC,KAAK,GAAG;QAE1B,MAAM,EAAEC,WAAW,EAAEI,KAAK,EAAE,GAAG,MAAMe,qBAAwB;YAC3D3B;YACA,WAAWyB;YACXC;YACA,eAAevC;YACfC;QACF;QAEA,MAAM2B,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACT;QAC9B;QAEA,IAAIU;QACJ,IAAI,QAAA7B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B6B,WAAW,CAAC,qBAAqB,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTM;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNT;YACA,OAAOE;QACT;QAEA,MAAM,EAAEU,IAAI,EAAEC,OAAO,EAAE,GAAGrB,eAAe,CAAC;QAG1Ce,gBACE;YACE,GAAGJ,QAAQ;YACXS;QACF,GACApC;QAGF,IAAI0B,YAAY,CAACU,QAAQ,CAACzC,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,eAAe,AAAD,GAC3C,MAAM,IAAIqC,MAAMN;QAGlB,OAAO;YACLU;YACAC;YACAjB;QACF;IACF;IAEA,MAAM,SACJkB,MAA+B,EAC/B1C,WAAyB,EACzBD,GAEC,EACwD;QACzDI,OAAOuC,QAAQ;QACf,MAAM9B,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAC9C,MAAM,EAAE+B,gBAAgB,EAAEC,IAAI,EAAE,GAAGhC;QACnCT,OAAOwC,kBAAkB;QAEzB,MAAM,EAAEjC,MAAM,EAAE,GAAGV;QACnB,MAAM6C,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBT;YAChBC;YACA,sBAAsB;gBACpB;oBACE,MAAMI;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIjD,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,SAAS,EAAE;YAClB,MAAMc,aAAawC,iBAAiBL,YAAYpC,QAAQ,IAAI,EAAEF;YAC9Df,MAAM,4BAA4BkB;YAClCsC,eAAe,MAAMG,WACnBH,cACAtC,YACAH,AAAW,cAAXA;QAEJ;QAEA,MAAM6C,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASV;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMK,WAAW,IAAI,CAClB,UAAU;QAEb,MAAMC,MAAM,MAAMD,SAChBD,MACAG,aAAa,gBAAgB,EAC7B1D;QAGF,MAAM,EAAE2D,OAAO,EAAE,GAAGF;QACpBtD,OAAO,CAACwD,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1DxD,OAAOwD,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IAvUA,YACE/C,OAEmE,EACnEb,GAAoB,CACpB;QAhBF;QAIA,qCACE6D;QAEF;QAEA;QAQEzD,OAAOS,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMiD,QAAQ,OAAO,CAACjD;QAIlD,IAAI,AAA2B,WAApBb,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,UAAU,AAAD,GACvB,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,QAAQ,AAAD,GACrB,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAoTF"}
|
|
1
|
+
{"version":3,"file":"insight/index.mjs","sources":["webpack://@midscene/core/./src/insight/index.ts"],"sourcesContent":["import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightExtractOption,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n type IModelPreferences,\n MIDSCENE_FORCE_DEEP_THINK,\n getIsUseQwenVl,\n globalConfigManager,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../ai-model/common';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = globalConfigManager.getEnvConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n const modelPreferences: IModelPreferences = {\n intent: 'grounding',\n };\n\n if (searchAreaPrompt && !vlLocateMode(modelPreferences)) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const {\n parseResult,\n rect,\n elementById,\n rawResponse,\n usage,\n isOrderSensitive,\n } = await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n isOrderSensitive,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<{\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n }> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const modelPreferences: IModelPreferences = {\n intent: 'VQA',\n };\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelPreferences,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data && !opt?.doNotThrowError) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n thought,\n usage,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64, size } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const modelPreferences: IModelPreferences = { intent: 'grounding' };\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(\n targetRect,\n context.size,\n modelPreferences,\n );\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getIsUseQwenVl(modelPreferences),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(\n msgs,\n AIActionType.DESCRIBE_ELEMENT,\n modelPreferences,\n );\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Insight","query","opt","_parseResult_errors","callAI","queryPrompt","assert","dumpSubscriber","undefined","globalDeepThinkSwitch","globalConfigManager","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","modelPreferences","vlLocateMode","console","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","elementById","rawResponse","usage","isOrderSensitive","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","item","element","emitInsightDump","Error","dataDemand","multimodalPrompt","AiExtractElementInfo","data","thought","target","screenshotBase64","size","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","cropByRect","getIsUseQwenVl","msgs","callAIFn","callToGetJSONObject","res","AIActionType","content","callAiFn","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;AAoDA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IAmCnB,MAAM,OACJC,KAA0B,EAC1BC,GAAgB,EACO;YAkFnBC;QAjFJ,MAAM,EAAEC,MAAM,EAAE,GAAGF,OAAO,CAAC;QAC3B,MAAMG,cAAc,AAAiB,YAAjB,OAAOJ,QAAqBA,QAAQA,MAAM,MAAM;QACpEK,OAAOD,aAAa;QACpB,MAAME,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzBF,OAAO,AAAiB,YAAjB,OAAOL,OAAoB;QAElC,MAAMQ,wBAAwBC,oBAAoB,qBAAqB,CACrEC;QAEF,IAAIF,uBACFX,MAAM,yBAAyBW;QAEjC,IAAIG;QACJ,IAAIX,MAAM,SAAS,IAAIQ,uBACrBG,mBAAmBX,MAAM,MAAM;QAEjC,MAAMY,mBAAsC;YAC1C,QAAQ;QACV;QAEA,IAAID,oBAAoB,CAACE,aAAaD,mBAAmB;YACvDE,QAAQ,IAAI,CACV;YAEFH,mBAAmBJ;QACrB;QAEA,MAAMQ,UAAUd,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,OAAO,AAAD,KAAM,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE/D,IAAIe;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIR,kBAAkB;YACpBQ,qBAAqB,MAAMC,gBAAgB;gBACzCL;gBACA,oBAAoBJ;YACtB;YACAN,OACEc,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAER,iBAAiB,CAAC,EAChDQ,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EACJC,WAAW,EACXC,IAAI,EACJC,WAAW,EACXC,WAAW,EACXC,KAAK,EACLC,gBAAgB,EACjB,GAAG,MAAMC,gBAAgB;YACxB,QAAQ1B,UAAU,IAAI,CAAC,UAAU;YACjCY;YACA,0BAA0BX;YAC1B,cAAce;QAChB;QAEA,MAAMW,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACT;YAC/BI;YACAX;YACAC;YACAC;QACF;QAEA,IAAIe;QACJ,IAAI,QAAA/B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B+B,WAAW,CAAC,6BAA6B,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG5E,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS9B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAaoB;YACb,MAAM;YACNO;YACA,WAAW,CAAC,CAACf;YACb,OAAOiB;QACT;QAEA,MAAME,WAA0B,EAAE;QACjCZ,CAAAA,YAAY,QAAQ,IAAI,EAAC,EAAG,OAAO,CAAC,CAACa;YACpC,IAAI,QAAQA,MAAM;gBAChB,MAAMC,UAAUZ,YAAYW,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,EAAE;gBAEpC,IAAI,CAACC,SAAS,YACZvB,QAAQ,IAAI,CACV,CAAC,+BAA+B,EAAEsB,KAAK,EAAE,CAAC,0CAA0C,CAAC;gBAIzFD,SAAS,IAAI,CAACE;YAChB;QACF;QAEAC,gBACE;YACE,GAAGJ,QAAQ;YACX,gBAAgBC;QAClB,GACA7B;QAGF,IAAI2B,UACF,MAAM,IAAIM,MAAMN;QAGlB5B,OACE8B,SAAS,MAAM,IAAI,GACnB,CAAC,0CAA0C,EAAEA,SAAS,MAAM,EAAE;QAGhE,IAAIA,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,IAAIA,QAAQ,CAAC,EAAE,CAAE,EAAE;gBACnB,SAASA,QAAQ,CAAC,EAAE,CAAE,OAAO;gBAC7B,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM,IAAI,EAAE;gBACjC,YAAYA,QAAQ,CAAC,EAAE,CAAE,UAAU;gBACnCP;YACF;YACAJ;QACF;QAEF,OAAO;YACL,SAAS;YACTA;QACF;IACF;IAEA,MAAM,QACJgB,UAA+B,EAC/BvC,GAA0B,EAC1BwC,gBAAoC,EAKnC;YA+BGvC;QA9BJG,OACE,AAAsB,YAAtB,OAAOmC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMlC,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzB,MAAMK,mBAAsC;YAC1C,QAAQ;QACV;QAEA,MAAMG,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE9C,MAAMM,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEI,KAAK,EAAE,GAAG,MAAMe,qBAAwB;YAC3D3B;YACA,WAAWyB;YACXC;YACA,eAAexC;YACfW;QACF;QAEA,MAAMkB,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACT;QAC9B;QAEA,IAAIU;QACJ,IAAI,QAAA/B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B+B,WAAW,CAAC,qBAAqB,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTM;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNT;YACA,OAAOE;QACT;QAEA,MAAM,EAAEU,IAAI,EAAEC,OAAO,EAAE,GAAGrB,eAAe,CAAC;QAG1Ce,gBACE;YACE,GAAGJ,QAAQ;YACXS;QACF,GACArC;QAGF,IAAI2B,YAAY,CAACU,QAAQ,CAAC1C,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,eAAe,AAAD,GAC3C,MAAM,IAAIsC,MAAMN;QAGlB,OAAO;YACLU;YACAC;YACAjB;QACF;IACF;IAEA,MAAM,SACJkB,MAA+B,EAC/B5C,GAEC,EACwD;QACzDI,OAAOwC,QAAQ;QACf,MAAM9B,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAC9C,MAAM,EAAE+B,gBAAgB,EAAEC,IAAI,EAAE,GAAGhC;QACnCV,OAAOyC,kBAAkB;QAEzB,MAAMlC,mBAAsC;YAAE,QAAQ;QAAY;QAClE,MAAMoC,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBT;YAChBC;YACA,sBAAsB;gBACpB;oBACE,MAAMI;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIlD,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,SAAS,EAAE;YAClB,MAAMe,aAAawC,iBACjBL,YACApC,QAAQ,IAAI,EACZH;YAEFf,MAAM,4BAA4BmB;YAClCsC,eAAe,MAAMG,WACnBH,cACAtC,YACA0C,eAAe9C;QAEnB;QAEA,MAAM+C,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,WACJ,IAAI,CAAC,UAAU,IAAIC;QAErB,MAAMC,MAAM,MAAMF,SAChBD,MACAI,aAAa,gBAAgB,EAC7BnD;QAGF,MAAM,EAAEoD,OAAO,EAAE,GAAGF;QACpBzD,OAAO,CAAC2D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D3D,OAAO2D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IA1UA,YACEjD,OAEmE,EACnEd,GAAoB,CACpB;QAfF;QAIA,qCAAoDgE;QAEpD;QAEA;QAQE5D,OAAOU,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMmD,QAAQ,OAAO,CAACnD;QAGlD,IAAI,AAA2B,WAApBd,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,UAAU,AAAD,GACvB,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,QAAQ,AAAD,GACrB,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAwTF"}
|
package/dist/es/types.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.mjs","sources":["webpack://@midscene/core/./src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type { TModelConfigFn } from '@midscene/shared/env';\nimport type {\n BaseElement,\n ElementTreeNode,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport type { TUserPrompt } from './ai-model/common';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n model_description: string | undefined;\n intent: string | undefined;\n};\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseById = {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n};\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type AISingleElementResponse = AISingleElementResponseById;\nexport interface AIElementLocatorResponse {\n elements: {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n }[];\n bbox?: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport type AIElementResponse =\n | AIElementLocatorResponse\n | AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext<ElementType extends BaseElement = BaseElement> {\n abstract screenshotBase64: string;\n\n abstract tree: ElementTreeNode<ElementType>;\n\n abstract size: Size;\n\n abstract _isFrozen?: boolean;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type InsightExtractParam = string | Record<string, string>;\n\nexport type LocateResultElement = {\n center: [number, number];\n rect: Rect;\n id: string;\n indexId?: number;\n xpaths: string[];\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n isOrderSensitive?: boolean;\n};\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport interface InsightTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n}\n\nexport interface DumpMeta {\n sdkVersion: string;\n logTime: number;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface InsightDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: InsightExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: BaseElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: InsightTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialInsightDumpFromSDK = Omit<\n InsightDump,\n 'sdkVersion' | 'logTime' | 'logId' | 'model_name'\n>;\n\nexport type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type InsightAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n id?: string;\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type: string;\n param: ParamType;\n locate?: PlanningLocateParam | null;\n}\n\nexport interface PlanningAIResponse {\n action?: PlanningAction; // this is the qwen mode\n actions?: PlanningAction[];\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n}\n\nexport type PlanningActionParamTap = null;\nexport type PlanningActionParamHover = null;\nexport type PlanningActionParamRightClick = null;\n\nexport interface PlanningActionParamInputOrKeyPress {\n value: string;\n autoDismissKeyboard?: boolean;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {};\n\nexport interface AndroidLongPressParam {\n duration?: number;\n}\n\nexport interface AndroidPullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType =\n | 'Planning'\n | 'Insight'\n | 'Action'\n | 'Assertion'\n | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n locate?: PlanningLocateParam | null;\n uiContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n}\n\n/*\ntask - insight-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskInsightDumpLog {\n dump?: InsightDump;\n}\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - insight-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: InsightExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n InsightAssertionResponse,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n log?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n groupName: string;\n groupDescription?: string;\n modelBriefs: string[];\n executions: ExecutionDump[];\n}\n\nexport type InterfaceType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android'\n | string;\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport interface DeviceAction<T = any> {\n name: string;\n description?: string;\n interfaceAlias?: string;\n paramSchema?: z.ZodType<T>;\n call: (param: T, context: ExecutorContext) => Promise<void> | void;\n}\n\n/**\n * Web-specific types\n */\nexport interface WebElementInfo extends BaseElement {\n id: string;\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n}\n\nexport type WebUIContext = UIContext<WebElementInfo>;\n\n/**\n * Agent\n */\n\nexport interface AgentOpt {\n testId?: string;\n cacheId?: string;\n groupName?: string;\n groupDescription?: string;\n /* if auto generate report, default true */\n generateReport?: boolean;\n /* if auto print report msg, default true */\n autoPrintReportMsg?: boolean;\n onTaskStartTip?: OnTaskStartTip;\n aiActionContext?: string;\n /* custom report file name */\n reportFileName?: string;\n modelConfig?: TModelConfigFn;\n}\n"],"names":["AIResponseFormat","UIContext"],"mappings":";AAsCO,IAAKA,yBAAgBA,WAAAA,GAAAA,SAAhBA,gBAAgB;;;WAAhBA;;AAwFL,MAAeC;AAQtB"}
|
|
1
|
+
{"version":3,"file":"types.mjs","sources":["webpack://@midscene/core/./src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type { TModelConfigFn } from '@midscene/shared/env';\nimport type {\n BaseElement,\n ElementTreeNode,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport type { TUserPrompt } from './ai-model/common';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n time_cost: number | undefined;\n model_name: string | undefined;\n model_description: string | undefined;\n intent: string | undefined;\n};\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseById = {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n};\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type AISingleElementResponse = AISingleElementResponseById;\nexport interface AIElementLocatorResponse {\n elements: {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n }[];\n bbox?: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport type AIElementResponse =\n | AIElementLocatorResponse\n | AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext<ElementType extends BaseElement = BaseElement> {\n abstract screenshotBase64: string;\n\n abstract tree: ElementTreeNode<ElementType>;\n\n abstract size: Size;\n\n abstract _isFrozen?: boolean;\n}\n\n/**\n * insight\n */\n\nexport type CallAIFn = <T>(\n messages: ChatCompletionMessageParam[],\n) => Promise<T>;\n\nexport interface InsightOptions {\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n aiVendorFn?: CallAIFn;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type InsightExtractParam = string | Record<string, string>;\n\nexport type LocateResultElement = {\n center: [number, number];\n rect: Rect;\n id: string;\n indexId?: number;\n xpaths: string[];\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n isOrderSensitive?: boolean;\n};\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport interface InsightTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n}\n\nexport interface DumpMeta {\n sdkVersion: string;\n logTime: number;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface InsightDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: InsightExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: BaseElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: InsightTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialInsightDumpFromSDK = Omit<\n InsightDump,\n 'sdkVersion' | 'logTime' | 'logId' | 'model_name'\n>;\n\nexport type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type InsightAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n id?: string;\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type: string;\n param: ParamType;\n locate?: PlanningLocateParam | null;\n}\n\nexport interface PlanningAIResponse {\n action?: PlanningAction; // this is the qwen mode\n actions?: PlanningAction[];\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n}\n\nexport type PlanningActionParamTap = null;\nexport type PlanningActionParamHover = null;\nexport type PlanningActionParamRightClick = null;\n\nexport interface PlanningActionParamInputOrKeyPress {\n value: string;\n autoDismissKeyboard?: boolean;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {};\n\nexport interface AndroidLongPressParam {\n duration?: number;\n}\n\nexport interface AndroidPullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType =\n | 'Planning'\n | 'Insight'\n | 'Action'\n | 'Assertion'\n | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n locate?: PlanningLocateParam | null;\n uiContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n}\n\n/*\ntask - insight-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskInsightDumpLog {\n dump?: InsightDump;\n}\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - insight-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: InsightExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n InsightAssertionResponse,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n log?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n groupName: string;\n groupDescription?: string;\n modelBriefs: string[];\n executions: ExecutionDump[];\n}\n\nexport type InterfaceType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android'\n | string;\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport interface DeviceAction<T = any> {\n name: string;\n description?: string;\n interfaceAlias?: string;\n paramSchema?: z.ZodType<T>;\n call: (param: T, context: ExecutorContext) => Promise<void> | void;\n}\n\n/**\n * Web-specific types\n */\nexport interface WebElementInfo extends BaseElement {\n id: string;\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n}\n\nexport type WebUIContext = UIContext<WebElementInfo>;\n\n/**\n * Agent\n */\n\nexport interface AgentOpt {\n testId?: string;\n cacheId?: string;\n groupName?: string;\n groupDescription?: string;\n /* if auto generate report, default true */\n generateReport?: boolean;\n /* if auto print report msg, default true */\n autoPrintReportMsg?: boolean;\n onTaskStartTip?: OnTaskStartTip;\n aiActionContext?: string;\n /* custom report file name */\n reportFileName?: string;\n modelConfig?: TModelConfigFn;\n}\n"],"names":["AIResponseFormat","UIContext"],"mappings":";AAsCO,IAAKA,yBAAgBA,WAAAA,GAAAA,SAAhBA,gBAAgB;;;WAAhBA;;AAwFL,MAAeC;AAQtB"}
|