@midscene/core 1.1.0 → 1.1.1-beta-20260106014949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/index.mjs +2 -2
- package/dist/es/ai-model/inspect.mjs +5 -5
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +2 -2
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/playwright-generator.mjs +4 -4
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +4 -4
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +6 -7
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +1 -2
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/common.mjs +1 -10
- package/dist/es/common.mjs.map +1 -1
- package/dist/es/service/index.mjs +2 -2
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/index.js +11 -14
- package/dist/lib/ai-model/inspect.js +4 -4
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +1 -1
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/playwright-generator.js +3 -3
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +3 -3
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +6 -7
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +1 -2
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/common.js +1 -13
- package/dist/lib/common.js.map +1 -1
- package/dist/lib/service/index.js +1 -1
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/index.d.ts +1 -1
- package/dist/types/ai-model/service-caller/index.d.ts +4 -4
- package/dist/types/common.d.ts +0 -8
- package/package.json +2 -2
package/dist/lib/agent/utils.js
CHANGED
|
@@ -148,7 +148,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
|
|
|
148
148
|
return;
|
|
149
149
|
}
|
|
150
150
|
}
|
|
151
|
-
const getMidsceneVersion = ()=>"1.1.0";
|
|
151
|
+
const getMidsceneVersion = ()=>"1.1.1-beta-20260106014949.0";
|
|
152
152
|
const parsePrompt = (prompt)=>{
|
|
153
153
|
if ('string' == typeof prompt) return {
|
|
154
154
|
textPrompt: prompt,
|
|
@@ -24,35 +24,34 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
resizeImageForUiTars: ()=>external_ui_tars_planning_js_namespaceObject.resizeImageForUiTars,
|
|
27
28
|
callAIWithObjectResponse: ()=>index_js_namespaceObject.callAIWithObjectResponse,
|
|
28
29
|
TUserPromptSchema: ()=>external_common_js_namespaceObject.TUserPromptSchema,
|
|
29
30
|
generatePlaywrightTest: ()=>playwright_generator_js_namespaceObject.generatePlaywrightTest,
|
|
30
31
|
describeUserPage: ()=>util_js_namespaceObject.describeUserPage,
|
|
31
32
|
plan: ()=>external_llm_planning_js_namespaceObject.plan,
|
|
33
|
+
systemPromptToLocateElement: ()=>llm_locator_js_namespaceObject.systemPromptToLocateElement,
|
|
34
|
+
callAI: ()=>index_js_namespaceObject.callAI,
|
|
32
35
|
parseActionParam: ()=>external_common_js_namespaceObject.parseActionParam,
|
|
36
|
+
adaptBboxToRect: ()=>external_common_js_namespaceObject.adaptBboxToRect,
|
|
37
|
+
AiLocateElement: ()=>external_inspect_js_namespaceObject.AiLocateElement,
|
|
33
38
|
AiExtractElementInfo: ()=>external_inspect_js_namespaceObject.AiExtractElementInfo,
|
|
34
39
|
callAIWithStringResponse: ()=>index_js_namespaceObject.callAIWithStringResponse,
|
|
40
|
+
uiTarsPlanning: ()=>external_ui_tars_planning_js_namespaceObject.uiTarsPlanning,
|
|
35
41
|
generateYamlTest: ()=>yaml_generator_js_namespaceObject.generateYamlTest,
|
|
36
42
|
SizeSchema: ()=>external_common_js_namespaceObject.SizeSchema,
|
|
43
|
+
findAllMidsceneLocatorField: ()=>external_common_js_namespaceObject.findAllMidsceneLocatorField,
|
|
44
|
+
generatePlaywrightTestStream: ()=>playwright_generator_js_namespaceObject.generatePlaywrightTestStream,
|
|
37
45
|
AiJudgeOrderSensitive: ()=>external_inspect_js_namespaceObject.AiJudgeOrderSensitive,
|
|
46
|
+
AiLocateSection: ()=>external_inspect_js_namespaceObject.AiLocateSection,
|
|
38
47
|
getMidsceneLocationSchema: ()=>external_common_js_namespaceObject.getMidsceneLocationSchema,
|
|
39
48
|
ConversationHistory: ()=>external_conversation_history_js_namespaceObject.ConversationHistory,
|
|
40
49
|
generateYamlTestStream: ()=>yaml_generator_js_namespaceObject.generateYamlTestStream,
|
|
41
50
|
loadActionParam: ()=>external_common_js_namespaceObject.loadActionParam,
|
|
42
|
-
|
|
51
|
+
TMultimodalPromptSchema: ()=>external_common_js_namespaceObject.TMultimodalPromptSchema,
|
|
43
52
|
PointSchema: ()=>external_common_js_namespaceObject.PointSchema,
|
|
44
|
-
AIActionType: ()=>external_common_js_namespaceObject.AIActionType,
|
|
45
53
|
RectSchema: ()=>external_common_js_namespaceObject.RectSchema,
|
|
46
|
-
|
|
47
|
-
systemPromptToLocateElement: ()=>llm_locator_js_namespaceObject.systemPromptToLocateElement,
|
|
48
|
-
callAI: ()=>index_js_namespaceObject.callAI,
|
|
49
|
-
adaptBboxToRect: ()=>external_common_js_namespaceObject.adaptBboxToRect,
|
|
50
|
-
AiLocateElement: ()=>external_inspect_js_namespaceObject.AiLocateElement,
|
|
51
|
-
uiTarsPlanning: ()=>external_ui_tars_planning_js_namespaceObject.uiTarsPlanning,
|
|
52
|
-
findAllMidsceneLocatorField: ()=>external_common_js_namespaceObject.findAllMidsceneLocatorField,
|
|
53
|
-
generatePlaywrightTestStream: ()=>playwright_generator_js_namespaceObject.generatePlaywrightTestStream,
|
|
54
|
-
AiLocateSection: ()=>external_inspect_js_namespaceObject.AiLocateSection,
|
|
55
|
-
TMultimodalPromptSchema: ()=>external_common_js_namespaceObject.TMultimodalPromptSchema
|
|
54
|
+
dumpActionParam: ()=>external_common_js_namespaceObject.dumpActionParam
|
|
56
55
|
});
|
|
57
56
|
const index_js_namespaceObject = require("./service-caller/index.js");
|
|
58
57
|
const llm_locator_js_namespaceObject = require("./prompt/llm-locator.js");
|
|
@@ -64,7 +63,6 @@ const external_llm_planning_js_namespaceObject = require("./llm-planning.js");
|
|
|
64
63
|
const external_common_js_namespaceObject = require("../common.js");
|
|
65
64
|
const external_ui_tars_planning_js_namespaceObject = require("./ui-tars-planning.js");
|
|
66
65
|
const external_conversation_history_js_namespaceObject = require("./conversation-history.js");
|
|
67
|
-
exports.AIActionType = __webpack_exports__.AIActionType;
|
|
68
66
|
exports.AiExtractElementInfo = __webpack_exports__.AiExtractElementInfo;
|
|
69
67
|
exports.AiJudgeOrderSensitive = __webpack_exports__.AiJudgeOrderSensitive;
|
|
70
68
|
exports.AiLocateElement = __webpack_exports__.AiLocateElement;
|
|
@@ -94,7 +92,6 @@ exports.resizeImageForUiTars = __webpack_exports__.resizeImageForUiTars;
|
|
|
94
92
|
exports.systemPromptToLocateElement = __webpack_exports__.systemPromptToLocateElement;
|
|
95
93
|
exports.uiTarsPlanning = __webpack_exports__.uiTarsPlanning;
|
|
96
94
|
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
97
|
-
"AIActionType",
|
|
98
95
|
"AiExtractElementInfo",
|
|
99
96
|
"AiJudgeOrderSensitive",
|
|
100
97
|
"AiLocateElement",
|
|
@@ -140,7 +140,7 @@ async function AiLocateElement(options) {
|
|
|
140
140
|
});
|
|
141
141
|
msgs.push(...addOns);
|
|
142
142
|
}
|
|
143
|
-
const res = await callAIFn(msgs,
|
|
143
|
+
const res = await callAIFn(msgs, modelConfig);
|
|
144
144
|
const rawResponse = JSON.stringify(res.content);
|
|
145
145
|
let resRect;
|
|
146
146
|
let matchedElements = 'elements' in res.content ? res.content.elements : [];
|
|
@@ -212,7 +212,7 @@ async function AiLocateSection(options) {
|
|
|
212
212
|
});
|
|
213
213
|
msgs.push(...addOns);
|
|
214
214
|
}
|
|
215
|
-
const result = await (0, index_js_namespaceObject.callAIWithObjectResponse)(msgs,
|
|
215
|
+
const result = await (0, index_js_namespaceObject.callAIWithObjectResponse)(msgs, modelConfig);
|
|
216
216
|
let sectionRect;
|
|
217
217
|
const sectionBbox = result.content.bbox;
|
|
218
218
|
if (sectionBbox) {
|
|
@@ -279,7 +279,7 @@ async function AiExtractElementInfo(options) {
|
|
|
279
279
|
});
|
|
280
280
|
msgs.push(...addOns);
|
|
281
281
|
}
|
|
282
|
-
const result = await (0, index_js_namespaceObject.callAIWithObjectResponse)(msgs,
|
|
282
|
+
const result = await (0, index_js_namespaceObject.callAIWithObjectResponse)(msgs, modelConfig);
|
|
283
283
|
return {
|
|
284
284
|
parseResult: result.content,
|
|
285
285
|
usage: result.usage,
|
|
@@ -299,7 +299,7 @@ async function AiJudgeOrderSensitive(description, callAIFn, modelConfig) {
|
|
|
299
299
|
content: userPrompt
|
|
300
300
|
}
|
|
301
301
|
];
|
|
302
|
-
const result = await callAIFn(msgs,
|
|
302
|
+
const result = await callAIFn(msgs, modelConfig);
|
|
303
303
|
return {
|
|
304
304
|
isOrderSensitive: result.content.isOrderSensitive ?? false,
|
|
305
305
|
usage: result.usage
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n mergeRects,\n} from '../common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements = 'elements' in res.content ? res.content.elements : [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen2.5-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const { screenshotBase64 } = context;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n reasoning_content: result.reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(\n msgs,\n AIActionType.INSPECT_ELEMENT, // Reuse existing action type for now\n modelConfig,\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","adaptBboxToRect","rectCenter","element","generateElementByPosition","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;ACgDA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7BM,IAAAA,sBAAAA,MAAAA,AAAAA,EACEL,0BACA;IAEF,MAAMM,+BAA+BjB,wBACnCW;IAEF,MAAMO,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBF;IAChD,MAAMG,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BP;IAEjD,IAAIQ,eAAeP;IACnB,IAAIQ,aAAab,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIc,cAAcd,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIe,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIf,QAAQ,YAAY,EAAE;QACxBO,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFa,eAAeb,QAAQ,YAAY,CAAC,WAAW;QAC/Cc,aAAad,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCe,cAAcf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCgB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIV,AAAW,iBAAXA,QAAyB;QAClC,MAAMa,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMvB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOP,0BAAuC;QAChD,MAAMkB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMC,MAAM,MAAMlB,SAASR,MAAM2B,mCAAAA,YAAAA,CAAAA,eAA4B,EAAElB;IAE/D,MAAMmB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBAAkB,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IAC3E,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAI,UAAUI,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRR,IAAI,OAAO,CAAC,IAAI,EAChBP,YACAC,aACAf,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BgB,oBACAC,qBACAZ;YAGFjB,aAAa,WAAWqC;YAExB,MAAMK,aAAa;gBACjB,GAAGL,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMM,UAA+BC,AAAAA,IAAAA,yBAAAA,yBAAAA,AAAAA,EACnCF,YACAtB;YAEFmB,SAAS,EAAE;YAEX,IAAII,SACFL,kBAAkB;gBAACK;aAAQ;QAE/B;IACF,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACN,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEO,IAAI,CAAC,CAAC;aAFtBP,SAAS;YAACO;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMT;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOF,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBpC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEoC,kBAAkB,EAAEjC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMU,eAAe2B,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BjC;IACjD,MAAMkC,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpCjD,wBAAwB8C;IAE1B,MAAM1C,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMiC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMjB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQ4C,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA1C,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMqB,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB/C,MACA2B,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBlB;IAGF,IAAIuC;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAahB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBe,aACA3C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFf,aAAa,0BAA0BuD;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DnD,aAAa,wBAAwBwD;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASpB,MAAM,OAAO,CAACoB,OAC/B,GAAG,CAAC,CAACA,OACGnB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLmB,MACA/C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNf,aAAa,qBAAqByD;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DzD,aAAa,iBAAiB2D;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYhD,QAAQ,IAAI,EAAEI;QACzDf,aAAa,2BAA2BqD;IAC1C;IAEA,IAAIS,cAAc9C;IAClB,IAAIqC,aAAa;QACf,MAAMU,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BhD,kBACAqC,aACAtC,AAAW,iBAAXA;QAEF+C,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAajB,KAAK,SAAS,CAACiB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwBvD,OAO7C;IACC,MAAM,EAAEwD,SAAS,EAAEvD,OAAO,EAAEwD,aAAa,EAAE/D,gBAAgB,EAAEU,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAe+C,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IACrB,MAAM,EAAEpD,gBAAgB,EAAE,GAAGL;IAE7B,MAAM0D,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5B5D,QAAQ,eAAe,IAAI,IAC3BwD;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKvD;YACL,QAAQ;QACV;IACF;IAGFuD,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMhE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASkD;QACX;KACD;IAED,IAAInE,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMqB,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB/C,MACA2B,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBlB;IAEF,OAAO;QACL,aAAaqC,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;QACnB,mBAAmBA,OAAO,iBAAiB;IAC7C;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB5D,QAAwE,EACxEC,WAAyB;IAKzB,MAAMO,eAAeqD,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BH;IAE7C,MAAMpE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASsD;QACX;KACD;IAED,MAAMxB,SAAS,MAAMtC,SACnBR,MACA2B,mCAAAA,YAAAA,CAAAA,eAA4B,EAC5BlB;IAGF,OAAO;QACL,kBAAkBqC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements = 'elements' in res.content ? res.content.elements : [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen2.5-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const { screenshotBase64 } = context;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n reasoning_content: result.reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","res","rawResponse","JSON","resRect","matchedElements","errors","Array","adaptBboxToRect","rectCenter","element","generateElementByPosition","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;AC2CA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7BM,IAAAA,sBAAAA,MAAAA,AAAAA,EACEL,0BACA;IAEF,MAAMM,+BAA+BjB,wBACnCW;IAEF,MAAMO,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBF;IAChD,MAAMG,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BP;IAEjD,IAAIQ,eAAeP;IACnB,IAAIQ,aAAab,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIc,cAAcd,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIe,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIf,QAAQ,YAAY,EAAE;QACxBO,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFa,eAAeb,QAAQ,YAAY,CAAC,WAAW;QAC/Cc,aAAad,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCe,cAAcf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCgB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIV,AAAW,iBAAXA,QAAyB;QAClC,MAAMa,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMvB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOP,0BAAuC;QAChD,MAAMkB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMC,MAAM,MAAMlB,SAASR,MAAMS;IAEjC,MAAMkB,cAAcC,KAAK,SAAS,CAACF,IAAI,OAAO;IAE9C,IAAIG;IACJ,IAAIC,kBAAkB,cAAcJ,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IAC3E,IAAIK,SACF,YAAYL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBM,MAAM,OAAO,CAACN,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAG,UAAUI,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRP,IAAI,OAAO,CAAC,IAAI,EAChBP,YACAC,aACAf,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BgB,oBACAC,qBACAZ;YAGFjB,aAAa,WAAWoC;YAExB,MAAMK,aAAa;gBACjB,GAAGL,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMM,UAA+BC,AAAAA,IAAAA,yBAAAA,yBAAAA,AAAAA,EACnCF,YACArB;YAEFkB,SAAS,EAAE;YAEX,IAAII,SACFL,kBAAkB;gBAACK;aAAQ;QAE/B;IACF,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACN,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEO,IAAI,CAAC,CAAC;aAFtBP,SAAS;YAACO;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMT;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOD,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAec,gBAAgBnC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEmC,kBAAkB,EAAEhC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMU,eAAe0B,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BhC;IACjD,MAAMiC,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpChD,wBAAwB6C;IAE1B,MAAMzC,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMgC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMhB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQ2C,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAzC,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMoB,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB9C,MACAS;IAGF,IAAIsC;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAahB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBe,aACA1C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFf,aAAa,0BAA0BsD;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DlD,aAAa,wBAAwBuD;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASpB,MAAM,OAAO,CAACoB,OAC/B,GAAG,CAAC,CAACA,OACGnB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLmB,MACA9C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNf,aAAa,qBAAqBwD;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DxD,aAAa,iBAAiB0D;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAY/C,QAAQ,IAAI,EAAEI;QACzDf,aAAa,2BAA2BoD;IAC1C;IAEA,IAAIS,cAAc7C;IAClB,IAAIoC,aAAa;QACf,MAAMU,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1B/C,kBACAoC,aACArC,AAAW,iBAAXA;QAEF8C,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAajB,KAAK,SAAS,CAACiB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwBtD,OAO7C;IACC,MAAM,EAAEuD,SAAS,EAAEtD,OAAO,EAAEuD,aAAa,EAAE9D,gBAAgB,EAAEU,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAe8C,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IACrB,MAAM,EAAEnD,gBAAgB,EAAE,GAAGL;IAE7B,MAAMyD,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5B3D,QAAQ,eAAe,IAAI,IAC3BuD;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKtD;YACL,QAAQ;QACV;IACF;IAGFsD,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM/D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASiD;QACX;KACD;IAED,IAAIlE,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMoB,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB9C,MACAS;IAEF,OAAO;QACL,aAAaoC,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;QACnB,mBAAmBA,OAAO,iBAAiB;IAC7C;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB3D,QAAwE,EACxEC,WAAyB;IAKzB,MAAMO,eAAeoD,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BH;IAE7C,MAAMnE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASqD;QACX;KACD;IAED,MAAMxB,SAAS,MAAMrC,SAASR,MAAMS;IAEpC,OAAO;QACL,kBAAkBoC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
|
|
@@ -110,7 +110,7 @@ async function plan(userInstruction, opts) {
|
|
|
110
110
|
...instruction,
|
|
111
111
|
...historyLog
|
|
112
112
|
];
|
|
113
|
-
const { content: planFromAI, contentString: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAIWithObjectResponse)(msgs,
|
|
113
|
+
const { content: planFromAI, contentString: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAIWithObjectResponse)(msgs, modelConfig, {
|
|
114
114
|
deepThink: 'unset' === opts.deepThink ? void 0 : opts.deepThink
|
|
115
115
|
});
|
|
116
116
|
const actions = planFromAI.action ? [
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIActionType,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n reasoning_content,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","screenshotBase64","size","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","reasoning_content","callAIWithObjectResponse","AIActionType","undefined","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACiBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;IAC/B;IAEA,IAAIS,eAAeL;IACnB,IAAIM,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAC7B,MAAMO,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBhB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMiB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEjB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAImB;IAEJ,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACe;IAC3B,MAAMC,aAAahB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMoB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACRL,MACAM,mCAAAA,YAAAA,CAAAA,IAAiB,EACjBxB,aACA;QACE,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAe2B,SAAY3B,KAAK,SAAS;IACpE;IAGF,MAAM4B,UAAUP,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,MAAMQ,cAAkC;QACtC,GAAGR,UAAU;QACbO;QACAN;QACAC;QACAC;QACA,UAAUM,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EACRF,SACA5B,KAAK,WAAW,EAChBqB,WAAW,KAAK;IAEpB;IAEAU,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOV,YAAY;IAEnBO,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBlC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACgC,SAAWA,OAAO,IAAI,KAAKC;QAG9BrC,MAAM,+BAA+BsC;QACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENtC,MAAM,gBAAgBuC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgBhC,AAAWqB,WAAXrB,QAElB0B,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA5B,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEA,IACEsB,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBW,QAAQ,IAAI,CACV,8EACAzC;IAIJI,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMmB;YACR;SACD;IACH;IAEA,OAAOO;AACT"}
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n reasoning_content,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n modelConfig,\n {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","screenshotBase64","size","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","reasoning_content","callAIWithObjectResponse","undefined","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACgBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;IAC/B;IAEA,IAAIS,eAAeL;IACnB,IAAIM,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAC7B,MAAMO,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBhB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMiB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEjB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAImB;IAEJ,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACe;IAC3B,MAAMC,aAAahB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMoB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACRL,MACAlB,aACA;QACE,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAe0B,SAAY1B,KAAK,SAAS;IACpE;IAGF,MAAM2B,UAAUN,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,MAAMO,cAAkC;QACtC,GAAGP,UAAU;QACbM;QACAL;QACAC;QACAC;QACA,UAAUK,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EACRF,SACA3B,KAAK,WAAW,EAChBqB,WAAW,KAAK;IAEpB;IAEAS,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOT,YAAY;IAEnBM,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBjC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAAC+B,SAAWA,OAAO,IAAI,KAAKC;QAG9BpC,MAAM,+BAA+BqC;QACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENrC,MAAM,gBAAgBsC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgB/B,AAAWoB,WAAXpB,QAElByB,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA3B,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEA,IACEqB,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBW,QAAQ,IAAI,CACV,8EACAxC;IAIJI,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMmB;YACR;SACD;IACH;IAEA,OAAOM;AACT"}
|
|
@@ -84,7 +84,7 @@ ${constants_namespaceObject.PLAYWRIGHT_EXAMPLE_CODE}`;
|
|
|
84
84
|
content: messageContent
|
|
85
85
|
}
|
|
86
86
|
];
|
|
87
|
-
const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt,
|
|
87
|
+
const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt, modelConfig);
|
|
88
88
|
if (response?.content && 'string' == typeof response.content) return response.content;
|
|
89
89
|
throw new Error('Failed to generate Playwright test code');
|
|
90
90
|
};
|
|
@@ -135,12 +135,12 @@ ${constants_namespaceObject.PLAYWRIGHT_EXAMPLE_CODE}`;
|
|
|
135
135
|
content: messageContent
|
|
136
136
|
}
|
|
137
137
|
];
|
|
138
|
-
if (options.stream && options.onChunk) return await (0, external_index_js_namespaceObject.callAI)(prompt,
|
|
138
|
+
if (options.stream && options.onChunk) return await (0, external_index_js_namespaceObject.callAI)(prompt, modelConfig, {
|
|
139
139
|
stream: true,
|
|
140
140
|
onChunk: options.onChunk
|
|
141
141
|
});
|
|
142
142
|
{
|
|
143
|
-
const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt,
|
|
143
|
+
const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt, modelConfig);
|
|
144
144
|
if (response?.content && 'string' == typeof response.content) return {
|
|
145
145
|
content: response.content,
|
|
146
146
|
usage: response.usage,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/playwright-generator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/playwright-generator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { PLAYWRIGHT_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { AIActionType, callAI, callAIWithStringResponse } from '../index';\n// Import shared utilities and types from yaml-generator\nimport {\n type ChromeRecordedEvent,\n type EventCounts,\n type EventSummary,\n type InputDescription,\n type ProcessedEvent,\n createEventCounts,\n createMessageContent,\n extractInputDescriptions,\n filterEventsByType,\n getScreenshotsForLLM,\n prepareEventSummary,\n processEventsForLLM,\n validateEvents,\n} from './yaml-generator';\n\n// Playwright-specific interfaces\nexport interface PlaywrightGenerationOptions {\n testName?: string;\n includeScreenshots?: boolean;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n viewportSize?: { width: number; height: number };\n waitForNetworkIdle?: boolean;\n waitForNetworkIdleTimeout?: number;\n}\n\n// Re-export shared types for backward compatibility\nexport type {\n ChromeRecordedEvent,\n EventCounts,\n InputDescription,\n ProcessedEvent,\n EventSummary,\n};\n\n// Re-export shared utilities for backward compatibility\nexport {\n getScreenshotsForLLM,\n filterEventsByType,\n createEventCounts,\n extractInputDescriptions,\n processEventsForLLM,\n prepareEventSummary,\n createMessageContent,\n validateEvents,\n};\n\n/**\n * Generates Playwright test code from recorded events\n */\nexport const generatePlaywrightTest = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2000,\n viewportSize: options.viewportSize || { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate Playwright test code');\n};\n\n/**\n * Generates Playwright test code from recorded events with streaming support\n */\nexport const generatePlaywrightTestStream = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2000,\n viewportSize: options.viewportSize || { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n8. can't wrap this test code in markdown code block\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code with streaming\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, AIActionType.TEXT, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate Playwright test code');\n }\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","generatePlaywrightTest","events","options","modelConfig","validateEvents","summary","prepareEventSummary","playwrightSummary","screenshots","getScreenshotsForLLM","promptText","JSON","messageContent","createMessageContent","systemPrompt","PLAYWRIGHT_EXAMPLE_CODE","prompt","response","callAIWithStringResponse","AIActionType","Error","generatePlaywrightTestStream","callAI"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;ACuDO,MAAMI,yBAAyB,OACpCC,QACAC,SACAC;IAGAC,IAAAA,2CAAAA,cAAAA,AAAAA,EAAeH;IAGf,MAAMI,UAAUC,AAAAA,IAAAA,2CAAAA,mBAAAA,AAAAA,EAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,AAAAA,IAAAA,2CAAAA,oBAAAA,AAAAA,EAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;8LAWiJ,CAAC;IAG7L,MAAMK,iBAAiBC,AAAAA,IAAAA,2CAAAA,oBAAAA,AAAAA,EACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,0BAAAA,uBAAuBA,EAAE;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,MAAMK,WAAW,MAAMC,AAAAA,IAAAA,kCAAAA,wBAAAA,AAAAA,EACrBF,QACAG,kCAAAA,YAAAA,CAAAA,IAAiB,EACjBhB;IAGF,IAAIc,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;IAGzB,MAAM,IAAIG,MAAM;AAClB;AAKO,MAAMC,+BAA+B,OAC1CpB,QACAC,SACAC;IAGAC,IAAAA,2CAAAA,cAAAA,AAAAA,EAAeH;IAGf,MAAMI,UAAUC,AAAAA,IAAAA,2CAAAA,mBAAAA,AAAAA,EAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,AAAAA,IAAAA,2CAAAA,oBAAAA,AAAAA,EAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;;8LAYiJ,CAAC;IAG7L,MAAMK,iBAAiBC,AAAAA,IAAAA,2CAAAA,oBAAAA,AAAAA,EACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,0BAAAA,uBAAuBA,EAAE;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,IAAIV,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAMoB,AAAAA,IAAAA,kCAAAA,MAAAA,AAAAA,EAAON,QAAQG,kCAAAA,YAAAA,CAAAA,IAAiB,EAAEhB,aAAa;QAC1D,QAAQ;QACR,SAASD,QAAQ,OAAO;IAC1B;IACK;QAEL,MAAMe,WAAW,MAAMC,AAAAA,IAAAA,kCAAAA,wBAAAA,AAAAA,EACrBF,QACAG,kCAAAA,YAAAA,CAAAA,IAAiB,EACjBhB;QAGF,IAAIc,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;YACL,SAASA,SAAS,OAAO;YACzB,OAAOA,SAAS,KAAK;YACrB,YAAY;QACd;QAGF,MAAM,IAAIG,MAAM;IAClB;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/playwright-generator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/playwright-generator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { PLAYWRIGHT_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { callAI, callAIWithStringResponse } from '../index';\n// Import shared utilities and types from yaml-generator\nimport {\n type ChromeRecordedEvent,\n type EventCounts,\n type EventSummary,\n type InputDescription,\n type ProcessedEvent,\n createEventCounts,\n createMessageContent,\n extractInputDescriptions,\n filterEventsByType,\n getScreenshotsForLLM,\n prepareEventSummary,\n processEventsForLLM,\n validateEvents,\n} from './yaml-generator';\n\n// Playwright-specific interfaces\nexport interface PlaywrightGenerationOptions {\n testName?: string;\n includeScreenshots?: boolean;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n viewportSize?: { width: number; height: number };\n waitForNetworkIdle?: boolean;\n waitForNetworkIdleTimeout?: number;\n}\n\n// Re-export shared types for backward compatibility\nexport type {\n ChromeRecordedEvent,\n EventCounts,\n InputDescription,\n ProcessedEvent,\n EventSummary,\n};\n\n// Re-export shared utilities for backward compatibility\nexport {\n getScreenshotsForLLM,\n filterEventsByType,\n createEventCounts,\n extractInputDescriptions,\n processEventsForLLM,\n prepareEventSummary,\n createMessageContent,\n validateEvents,\n};\n\n/**\n * Generates Playwright test code from recorded events\n */\nexport const generatePlaywrightTest = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2000,\n viewportSize: options.viewportSize || { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n const response = await callAIWithStringResponse(prompt, modelConfig);\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate Playwright test code');\n};\n\n/**\n * Generates Playwright test code from recorded events with streaming support\n */\nexport const generatePlaywrightTestStream = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2000,\n viewportSize: options.viewportSize || { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n8. can't wrap this test code in markdown code block\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code with streaming\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(prompt, modelConfig);\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate Playwright test code');\n }\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","generatePlaywrightTest","events","options","modelConfig","validateEvents","summary","prepareEventSummary","playwrightSummary","screenshots","getScreenshotsForLLM","promptText","JSON","messageContent","createMessageContent","systemPrompt","PLAYWRIGHT_EXAMPLE_CODE","prompt","response","callAIWithStringResponse","Error","generatePlaywrightTestStream","callAI"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;ACuDO,MAAMI,yBAAyB,OACpCC,QACAC,SACAC;IAGAC,IAAAA,2CAAAA,cAAAA,AAAAA,EAAeH;IAGf,MAAMI,UAAUC,AAAAA,IAAAA,2CAAAA,mBAAAA,AAAAA,EAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,AAAAA,IAAAA,2CAAAA,oBAAAA,AAAAA,EAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;8LAWiJ,CAAC;IAG7L,MAAMK,iBAAiBC,AAAAA,IAAAA,2CAAAA,oBAAAA,AAAAA,EACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,0BAAAA,uBAAuBA,EAAE;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,MAAMK,WAAW,MAAMC,AAAAA,IAAAA,kCAAAA,wBAAAA,AAAAA,EAAyBF,QAAQb;IAExD,IAAIc,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;IAGzB,MAAM,IAAIE,MAAM;AAClB;AAKO,MAAMC,+BAA+B,OAC1CnB,QACAC,SACAC;IAGAC,IAAAA,2CAAAA,cAAAA,AAAAA,EAAeH;IAGf,MAAMI,UAAUC,AAAAA,IAAAA,2CAAAA,mBAAAA,AAAAA,EAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,AAAAA,IAAAA,2CAAAA,oBAAAA,AAAAA,EAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;;8LAYiJ,CAAC;IAG7L,MAAMK,iBAAiBC,AAAAA,IAAAA,2CAAAA,oBAAAA,AAAAA,EACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,0BAAAA,uBAAuBA,EAAE;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,IAAIV,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAMmB,AAAAA,IAAAA,kCAAAA,MAAAA,AAAAA,EAAOL,QAAQb,aAAa;QACvC,QAAQ;QACR,SAASD,QAAQ,OAAO;IAC1B;IACK;QAEL,MAAMe,WAAW,MAAMC,AAAAA,IAAAA,kCAAAA,wBAAAA,AAAAA,EAAyBF,QAAQb;QAExD,IAAIc,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;YACL,SAASA,SAAS,OAAO;YACzB,OAAOA,SAAS,KAAK;YACrB,YAAY;QACd;QAGF,MAAM,IAAIE,MAAM;IAClB;AACF"}
|
|
@@ -178,7 +178,7 @@ Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdow
|
|
|
178
178
|
}))
|
|
179
179
|
});
|
|
180
180
|
}
|
|
181
|
-
const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt,
|
|
181
|
+
const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt, modelConfig);
|
|
182
182
|
if (response?.content && 'string' == typeof response.content) return response.content;
|
|
183
183
|
throw new Error('Failed to generate YAML test configuration');
|
|
184
184
|
} catch (error) {
|
|
@@ -234,12 +234,12 @@ Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdow
|
|
|
234
234
|
}))
|
|
235
235
|
});
|
|
236
236
|
}
|
|
237
|
-
if (options.stream && options.onChunk) return await (0, external_index_js_namespaceObject.callAI)(prompt,
|
|
237
|
+
if (options.stream && options.onChunk) return await (0, external_index_js_namespaceObject.callAI)(prompt, modelConfig, {
|
|
238
238
|
stream: true,
|
|
239
239
|
onChunk: options.onChunk
|
|
240
240
|
});
|
|
241
241
|
{
|
|
242
|
-
const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt,
|
|
242
|
+
const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt, modelConfig);
|
|
243
243
|
if (response?.content && 'string' == typeof response.content) return {
|
|
244
244
|
content: response.content,
|
|
245
245
|
usage: response.usage,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/yaml-generator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/yaml-generator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { YAML_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n AIActionType,\n type ChatCompletionMessageParam,\n callAI,\n callAIWithStringResponse,\n} from '../index';\n\n// Common interfaces for test generation (shared between YAML and Playwright)\nexport interface EventCounts {\n navigation: number;\n click: number;\n input: number;\n scroll: number;\n total: number;\n}\n\nexport interface InputDescription {\n description: string;\n value: string;\n}\n\nexport interface ProcessedEvent {\n type: string;\n timestamp: number;\n url?: string;\n title?: string;\n elementDescription?: string;\n value?: string;\n pageInfo?: any;\n elementRect?: any;\n}\n\nexport interface EventSummary {\n testName: string;\n startUrl: string;\n eventCounts: EventCounts;\n urls: string[];\n clickDescriptions: string[];\n inputDescriptions: InputDescription[];\n events: ProcessedEvent[];\n}\n\n// Common ChromeRecordedEvent interface\nexport interface ChromeRecordedEvent {\n type: string;\n timestamp: number;\n url?: string;\n title?: string;\n elementDescription?: string;\n value?: string;\n pageInfo?: any;\n elementRect?: any;\n screenshotBefore?: string;\n screenshotAfter?: string;\n screenshotWithBox?: string;\n}\n\nexport interface YamlGenerationOptions {\n testName?: string;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n}\n\nexport interface FilteredEvents {\n navigationEvents: ChromeRecordedEvent[];\n clickEvents: ChromeRecordedEvent[];\n inputEvents: ChromeRecordedEvent[];\n scrollEvents: ChromeRecordedEvent[];\n}\n\n// Common utility functions (shared between YAML and Playwright generators)\n\n/**\n * Get screenshots from events for LLM context\n */\nexport const getScreenshotsForLLM = (\n events: ChromeRecordedEvent[],\n maxScreenshots = 1,\n): string[] => {\n // Find events with screenshots, prioritizing navigation and click events\n const eventsWithScreenshots = events.filter(\n (event) =>\n event.screenshotBefore ||\n event.screenshotAfter ||\n event.screenshotWithBox,\n );\n\n // Sort them by priority (navigation first, then clicks, then others)\n const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {\n if (a.type === 'navigation' && b.type !== 'navigation') return -1;\n if (a.type !== 'navigation' && b.type === 'navigation') return 1;\n if (a.type === 'click' && b.type !== 'click') return -1;\n if (a.type !== 'click' && b.type === 'click') return 1;\n return 0;\n });\n\n // Extract up to maxScreenshots screenshots\n const screenshots: string[] = [];\n for (const event of sortedEvents) {\n // Prefer the most informative screenshot\n const screenshot =\n event.screenshotWithBox ||\n event.screenshotAfter ||\n event.screenshotBefore;\n if (screenshot && !screenshots.includes(screenshot)) {\n screenshots.push(screenshot);\n if (screenshots.length >= maxScreenshots) break;\n }\n }\n\n return screenshots;\n};\n\n/**\n * Filter events by type for easier processing\n */\nexport const filterEventsByType = (\n events: ChromeRecordedEvent[],\n): FilteredEvents => {\n return {\n navigationEvents: events.filter((event) => event.type === 'navigation'),\n clickEvents: events.filter((event) => event.type === 'click'),\n inputEvents: events.filter((event) => event.type === 'input'),\n scrollEvents: events.filter((event) => event.type === 'scroll'),\n };\n};\n\n/**\n * Create event counts summary\n */\nexport const createEventCounts = (\n filteredEvents: FilteredEvents,\n totalEvents: number,\n): EventCounts => {\n return {\n navigation: filteredEvents.navigationEvents.length,\n click: filteredEvents.clickEvents.length,\n input: filteredEvents.inputEvents.length,\n scroll: filteredEvents.scrollEvents.length,\n total: totalEvents,\n };\n};\n\n/**\n * Extract input descriptions from input events\n */\nexport const extractInputDescriptions = (\n inputEvents: ChromeRecordedEvent[],\n): InputDescription[] => {\n return inputEvents\n .map((event) => ({\n description: event.elementDescription || '',\n value: event.value || '',\n }))\n .filter((item) => item.description && item.value);\n};\n\n/**\n * Process events for LLM consumption\n */\nexport const processEventsForLLM = (\n events: ChromeRecordedEvent[],\n): ProcessedEvent[] => {\n return events.map((event) => ({\n type: event.type,\n timestamp: event.timestamp,\n url: event.url,\n title: event.title,\n elementDescription: event.elementDescription,\n value: event.value,\n pageInfo: event.pageInfo,\n elementRect: event.elementRect,\n }));\n};\n\n/**\n * Prepare comprehensive event summary for LLM\n */\nexport const prepareEventSummary = (\n events: ChromeRecordedEvent[],\n options: { testName?: string; maxScreenshots?: number } = {},\n): EventSummary => {\n const filteredEvents = filterEventsByType(events);\n const eventCounts = createEventCounts(filteredEvents, events.length);\n\n // Extract useful information from events\n const startUrl =\n filteredEvents.navigationEvents.length > 0\n ? filteredEvents.navigationEvents[0].url || ''\n : '';\n\n const clickDescriptions = filteredEvents.clickEvents\n .map((event) => event.elementDescription)\n .filter((desc): desc is string => Boolean(desc))\n .slice(0, 10);\n\n const inputDescriptions = extractInputDescriptions(\n filteredEvents.inputEvents,\n ).slice(0, 10);\n\n const urls = filteredEvents.navigationEvents\n .map((e) => e.url)\n .filter((url): url is string => Boolean(url))\n .slice(0, 5);\n\n const processedEvents = processEventsForLLM(events);\n\n return {\n testName: options.testName || 'Automated test from recorded events',\n startUrl,\n eventCounts,\n urls,\n clickDescriptions,\n inputDescriptions,\n events: processedEvents,\n };\n};\n\n/**\n * Create message content for LLM with optional screenshots\n */\nexport const createMessageContent = (\n promptText: string,\n screenshots: string[] = [],\n includeScreenshots = true,\n) => {\n const messageContent: any[] = [\n {\n type: 'text',\n text: promptText,\n },\n ];\n\n // Add screenshots if available and requested\n if (includeScreenshots && screenshots.length > 0) {\n messageContent.unshift({\n type: 'text',\n text: 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n screenshots.forEach((screenshot) => {\n messageContent.push({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n });\n });\n }\n\n return messageContent;\n};\n\n/**\n * Validate events before processing\n */\nexport const validateEvents = (events: ChromeRecordedEvent[]): void => {\n if (!events.length) {\n throw new Error('No events provided for test generation');\n }\n};\n\n// YAML-specific generation functions\n\n/**\n * Generates YAML test configuration from recorded events using AI\n */\nexport const generateYamlTest = async (\n events: ChromeRecordedEvent[],\n options: YamlGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n try {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add YAML-specific options to summary\n const yamlSummary = {\n ...summary,\n includeTimestamps: options.includeTimestamps || false,\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(\n events,\n options.maxScreenshots || 3,\n );\n\n // Use LLM to generate the YAML test configuration\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`,\n },\n {\n role: 'user',\n content: `Generate YAML test for Midscene.js automation from recorded browser events.\n\nEvent Summary:\n${JSON.stringify(yamlSummary, null, 2)}\n\nConvert events:\n- navigation → target.url\n- click → aiTap with element description\n- input → aiInput with value and locate\n- scroll → aiScroll with appropriate direction\n- Add aiAssert for important state changes\n\nImportant: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`yaml or \\`\\`\\`). Start directly with the YAML content.`,\n },\n ];\n\n // Add screenshots if available and requested\n if (screenshots.length > 0) {\n prompt.push({\n role: 'user',\n content:\n 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n prompt.push({\n role: 'user',\n content: screenshots.map((screenshot) => ({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n })),\n });\n }\n\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate YAML test configuration');\n } catch (error) {\n throw new Error(`Failed to generate YAML test: ${error}`);\n }\n};\n\n/**\n * Generates YAML test configuration from recorded events using AI with streaming support\n */\nexport const generateYamlTestStream = async (\n events: ChromeRecordedEvent[],\n options: YamlGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n try {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add YAML-specific options to summary\n const yamlSummary = {\n ...summary,\n includeTimestamps: options.includeTimestamps || false,\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(\n events,\n options.maxScreenshots || 3,\n );\n\n // Use LLM to generate the YAML test configuration\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`,\n },\n {\n role: 'user',\n content: `Generate YAML test for Midscene.js automation from recorded browser events.\n\nEvent Summary:\n${JSON.stringify(yamlSummary, null, 2)}\n\nConvert events:\n- navigation → target.url\n- click → aiTap with element description\n- input → aiInput with value and locate\n- scroll → aiScroll with appropriate direction\n- Add aiAssert for important state changes\n\nImportant: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`yaml or \\`\\`\\`). Start directly with the YAML content.`,\n },\n ];\n\n // Add screenshots if available and requested\n if (screenshots.length > 0) {\n prompt.push({\n role: 'user',\n content:\n 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n prompt.push({\n role: 'user',\n content: screenshots.map((screenshot) => ({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n })),\n });\n }\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, AIActionType.TEXT, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate YAML test configuration');\n }\n } catch (error) {\n throw new Error(`Failed to generate YAML test: ${error}`);\n }\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","getScreenshotsForLLM","events","maxScreenshots","eventsWithScreenshots","event","sortedEvents","a","b","screenshots","screenshot","filterEventsByType","createEventCounts","filteredEvents","totalEvents","extractInputDescriptions","inputEvents","item","processEventsForLLM","prepareEventSummary","options","eventCounts","startUrl","clickDescriptions","desc","Boolean","inputDescriptions","urls","e","url","processedEvents","createMessageContent","promptText","includeScreenshots","messageContent","validateEvents","Error","generateYamlTest","modelConfig","summary","yamlSummary","prompt","YAML_EXAMPLE_CODE","JSON","response","callAIWithStringResponse","AIActionType","error","generateYamlTestStream","callAI"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;AC4EO,MAAMI,uBAAuB,CAClCC,QACAC,iBAAiB,CAAC;IAGlB,MAAMC,wBAAwBF,OAAO,MAAM,CACzC,CAACG,QACCA,MAAM,gBAAgB,IACtBA,MAAM,eAAe,IACrBA,MAAM,iBAAiB;IAI3B,MAAMC,eAAe;WAAIF;KAAsB,CAAC,IAAI,CAAC,CAACG,GAAGC;QACvD,IAAID,AAAW,iBAAXA,EAAE,IAAI,IAAqBC,AAAW,iBAAXA,EAAE,IAAI,EAAmB,OAAO;QAC/D,IAAID,AAAW,iBAAXA,EAAE,IAAI,IAAqBC,AAAW,iBAAXA,EAAE,IAAI,EAAmB,OAAO;QAC/D,IAAID,AAAW,YAAXA,EAAE,IAAI,IAAgBC,AAAW,YAAXA,EAAE,IAAI,EAAc,OAAO;QACrD,IAAID,AAAW,YAAXA,EAAE,IAAI,IAAgBC,AAAW,YAAXA,EAAE,IAAI,EAAc,OAAO;QACrD,OAAO;IACT;IAGA,MAAMC,cAAwB,EAAE;IAChC,KAAK,MAAMJ,SAASC,aAAc;QAEhC,MAAMI,aACJL,MAAM,iBAAiB,IACvBA,MAAM,eAAe,IACrBA,MAAM,gBAAgB;QACxB,IAAIK,cAAc,CAACD,YAAY,QAAQ,CAACC,aAAa;YACnDD,YAAY,IAAI,CAACC;YACjB,IAAID,YAAY,MAAM,IAAIN,gBAAgB;QAC5C;IACF;IAEA,OAAOM;AACT;AAKO,MAAME,qBAAqB,CAChCT,SAEO;QACL,kBAAkBA,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,iBAAfA,MAAM,IAAI;QACrD,aAAaH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,YAAfA,MAAM,IAAI;QAChD,aAAaH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,YAAfA,MAAM,IAAI;QAChD,cAAcH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,aAAfA,MAAM,IAAI;IACnD;AAMK,MAAMO,oBAAoB,CAC/BC,gBACAC,cAEO;QACL,YAAYD,eAAe,gBAAgB,CAAC,MAAM;QAClD,OAAOA,eAAe,WAAW,CAAC,MAAM;QACxC,OAAOA,eAAe,WAAW,CAAC,MAAM;QACxC,QAAQA,eAAe,YAAY,CAAC,MAAM;QAC1C,OAAOC;IACT;AAMK,MAAMC,2BAA2B,CACtCC,cAEOA,YACJ,GAAG,CAAC,CAACX,QAAW;YACf,aAAaA,MAAM,kBAAkB,IAAI;YACzC,OAAOA,MAAM,KAAK,IAAI;QACxB,IACC,MAAM,CAAC,CAACY,OAASA,KAAK,WAAW,IAAIA,KAAK,KAAK;AAM7C,MAAMC,sBAAsB,CACjChB,SAEOA,OAAO,GAAG,CAAC,CAACG,QAAW;YAC5B,MAAMA,MAAM,IAAI;YAChB,WAAWA,MAAM,SAAS;YAC1B,KAAKA,MAAM,GAAG;YACd,OAAOA,MAAM,KAAK;YAClB,oBAAoBA,MAAM,kBAAkB;YAC5C,OAAOA,MAAM,KAAK;YAClB,UAAUA,MAAM,QAAQ;YACxB,aAAaA,MAAM,WAAW;QAChC;AAMK,MAAMc,sBAAsB,CACjCjB,QACAkB,UAA0D,CAAC,CAAC;IAE5D,MAAMP,iBAAiBF,mBAAmBT;IAC1C,MAAMmB,cAAcT,kBAAkBC,gBAAgBX,OAAO,MAAM;IAGnE,MAAMoB,WACJT,eAAe,gBAAgB,CAAC,MAAM,GAAG,IACrCA,eAAe,gBAAgB,CAAC,EAAE,CAAC,GAAG,IAAI,KAC1C;IAEN,MAAMU,oBAAoBV,eAAe,WAAW,CACjD,GAAG,CAAC,CAACR,QAAUA,MAAM,kBAAkB,EACvC,MAAM,CAAC,CAACmB,OAAyBC,QAAQD,OACzC,KAAK,CAAC,GAAG;IAEZ,MAAME,oBAAoBX,yBACxBF,eAAe,WAAW,EAC1B,KAAK,CAAC,GAAG;IAEX,MAAMc,OAAOd,eAAe,gBAAgB,CACzC,GAAG,CAAC,CAACe,IAAMA,EAAE,GAAG,EAChB,MAAM,CAAC,CAACC,MAAuBJ,QAAQI,MACvC,KAAK,CAAC,GAAG;IAEZ,MAAMC,kBAAkBZ,oBAAoBhB;IAE5C,OAAO;QACL,UAAUkB,QAAQ,QAAQ,IAAI;QAC9BE;QACAD;QACAM;QACAJ;QACAG;QACA,QAAQI;IACV;AACF;AAKO,MAAMC,uBAAuB,CAClCC,YACAvB,cAAwB,EAAE,EAC1BwB,qBAAqB,IAAI;IAEzB,MAAMC,iBAAwB;QAC5B;YACE,MAAM;YACN,MAAMF;QACR;KACD;IAGD,IAAIC,sBAAsBxB,YAAY,MAAM,GAAG,GAAG;QAChDyB,eAAe,OAAO,CAAC;YACrB,MAAM;YACN,MAAM;QACR;QAEAzB,YAAY,OAAO,CAAC,CAACC;YACnBwB,eAAe,IAAI,CAAC;gBAClB,MAAM;gBACN,WAAW;oBACT,KAAKxB;gBACP;YACF;QACF;IACF;IAEA,OAAOwB;AACT;AAKO,MAAMC,iBAAiB,CAACjC;IAC7B,IAAI,CAACA,OAAO,MAAM,EAChB,MAAM,IAAIkC,MAAM;AAEpB;AAOO,MAAMC,mBAAmB,OAC9BnC,QACAkB,SACAkB;IAEA,IAAI;QAEFH,eAAejC;QAGf,MAAMqC,UAAUpB,oBAAoBjB,QAAQ;YAC1C,UAAUkB,QAAQ,QAAQ;YAC1B,gBAAgBA,QAAQ,cAAc,IAAI;QAC5C;QAGA,MAAMoB,cAAc;YAClB,GAAGD,OAAO;YACV,mBAAmBnB,QAAQ,iBAAiB,IAAI;QAClD;QAGA,MAAMX,cAAcR,qBAClBC,QACAkB,QAAQ,cAAc,IAAI;QAI5B,MAAMqB,SAAuC;YAC3C;gBACE,MAAM;gBACN,SAAS,CAAC,4GAA4G,EAAEC,0BAAAA,iBAAiBA,EAAE;YAC7I;YACA;gBACE,MAAM;gBACN,SAAS,CAAC;;;AAGlB,EAAEC,KAAK,SAAS,CAACH,aAAa,MAAM,GAAG;;;;;;;;;8JASuH,CAAC;YACzJ;SACD;QAGD,IAAI/B,YAAY,MAAM,GAAG,GAAG;YAC1BgC,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SACE;YACJ;YAEAA,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SAAShC,YAAY,GAAG,CAAC,CAACC,aAAgB;wBACxC,MAAM;wBACN,WAAW;4BACT,KAAKA;wBACP;oBACF;YACF;QACF;QAEA,MAAMkC,WAAW,MAAMC,AAAAA,IAAAA,kCAAAA,wBAAAA,AAAAA,EACrBJ,QACAK,kCAAAA,YAAAA,CAAAA,IAAiB,EACjBR;QAGF,IAAIM,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;QAGzB,MAAM,IAAIR,MAAM;IAClB,EAAE,OAAOW,OAAO;QACd,MAAM,IAAIX,MAAM,CAAC,8BAA8B,EAAEW,OAAO;IAC1D;AACF;AAKO,MAAMC,yBAAyB,OACpC9C,QACAkB,SACAkB;IAEA,IAAI;QAEFH,eAAejC;QAGf,MAAMqC,UAAUpB,oBAAoBjB,QAAQ;YAC1C,UAAUkB,QAAQ,QAAQ;YAC1B,gBAAgBA,QAAQ,cAAc,IAAI;QAC5C;QAGA,MAAMoB,cAAc;YAClB,GAAGD,OAAO;YACV,mBAAmBnB,QAAQ,iBAAiB,IAAI;QAClD;QAGA,MAAMX,cAAcR,qBAClBC,QACAkB,QAAQ,cAAc,IAAI;QAI5B,MAAMqB,SAAuC;YAC3C;gBACE,MAAM;gBACN,SAAS,CAAC,4GAA4G,EAAEC,0BAAAA,iBAAiBA,EAAE;YAC7I;YACA;gBACE,MAAM;gBACN,SAAS,CAAC;;;AAGlB,EAAEC,KAAK,SAAS,CAACH,aAAa,MAAM,GAAG;;;;;;;;;8JASuH,CAAC;YACzJ;SACD;QAGD,IAAI/B,YAAY,MAAM,GAAG,GAAG;YAC1BgC,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SACE;YACJ;YAEAA,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SAAShC,YAAY,GAAG,CAAC,CAACC,aAAgB;wBACxC,MAAM;wBACN,WAAW;4BACT,KAAKA;wBACP;oBACF;YACF;QACF;QAEA,IAAIU,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAM6B,AAAAA,IAAAA,kCAAAA,MAAAA,AAAAA,EAAOR,QAAQK,kCAAAA,YAAAA,CAAAA,IAAiB,EAAER,aAAa;YAC1D,QAAQ;YACR,SAASlB,QAAQ,OAAO;QAC1B;QACK;YAEL,MAAMwB,WAAW,MAAMC,AAAAA,IAAAA,kCAAAA,wBAAAA,AAAAA,EACrBJ,QACAK,kCAAAA,YAAAA,CAAAA,IAAiB,EACjBR;YAGF,IAAIM,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;gBACL,SAASA,SAAS,OAAO;gBACzB,OAAOA,SAAS,KAAK;gBACrB,YAAY;YACd;YAGF,MAAM,IAAIR,MAAM;QAClB;IACF,EAAE,OAAOW,OAAO;QACd,MAAM,IAAIX,MAAM,CAAC,8BAA8B,EAAEW,OAAO;IAC1D;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/yaml-generator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/yaml-generator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { YAML_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n type ChatCompletionMessageParam,\n callAI,\n callAIWithStringResponse,\n} from '../index';\n\n// Common interfaces for test generation (shared between YAML and Playwright)\nexport interface EventCounts {\n navigation: number;\n click: number;\n input: number;\n scroll: number;\n total: number;\n}\n\nexport interface InputDescription {\n description: string;\n value: string;\n}\n\nexport interface ProcessedEvent {\n type: string;\n timestamp: number;\n url?: string;\n title?: string;\n elementDescription?: string;\n value?: string;\n pageInfo?: any;\n elementRect?: any;\n}\n\nexport interface EventSummary {\n testName: string;\n startUrl: string;\n eventCounts: EventCounts;\n urls: string[];\n clickDescriptions: string[];\n inputDescriptions: InputDescription[];\n events: ProcessedEvent[];\n}\n\n// Common ChromeRecordedEvent interface\nexport interface ChromeRecordedEvent {\n type: string;\n timestamp: number;\n url?: string;\n title?: string;\n elementDescription?: string;\n value?: string;\n pageInfo?: any;\n elementRect?: any;\n screenshotBefore?: string;\n screenshotAfter?: string;\n screenshotWithBox?: string;\n}\n\nexport interface YamlGenerationOptions {\n testName?: string;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n}\n\nexport interface FilteredEvents {\n navigationEvents: ChromeRecordedEvent[];\n clickEvents: ChromeRecordedEvent[];\n inputEvents: ChromeRecordedEvent[];\n scrollEvents: ChromeRecordedEvent[];\n}\n\n// Common utility functions (shared between YAML and Playwright generators)\n\n/**\n * Get screenshots from events for LLM context\n */\nexport const getScreenshotsForLLM = (\n events: ChromeRecordedEvent[],\n maxScreenshots = 1,\n): string[] => {\n // Find events with screenshots, prioritizing navigation and click events\n const eventsWithScreenshots = events.filter(\n (event) =>\n event.screenshotBefore ||\n event.screenshotAfter ||\n event.screenshotWithBox,\n );\n\n // Sort them by priority (navigation first, then clicks, then others)\n const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {\n if (a.type === 'navigation' && b.type !== 'navigation') return -1;\n if (a.type !== 'navigation' && b.type === 'navigation') return 1;\n if (a.type === 'click' && b.type !== 'click') return -1;\n if (a.type !== 'click' && b.type === 'click') return 1;\n return 0;\n });\n\n // Extract up to maxScreenshots screenshots\n const screenshots: string[] = [];\n for (const event of sortedEvents) {\n // Prefer the most informative screenshot\n const screenshot =\n event.screenshotWithBox ||\n event.screenshotAfter ||\n event.screenshotBefore;\n if (screenshot && !screenshots.includes(screenshot)) {\n screenshots.push(screenshot);\n if (screenshots.length >= maxScreenshots) break;\n }\n }\n\n return screenshots;\n};\n\n/**\n * Filter events by type for easier processing\n */\nexport const filterEventsByType = (\n events: ChromeRecordedEvent[],\n): FilteredEvents => {\n return {\n navigationEvents: events.filter((event) => event.type === 'navigation'),\n clickEvents: events.filter((event) => event.type === 'click'),\n inputEvents: events.filter((event) => event.type === 'input'),\n scrollEvents: events.filter((event) => event.type === 'scroll'),\n };\n};\n\n/**\n * Create event counts summary\n */\nexport const createEventCounts = (\n filteredEvents: FilteredEvents,\n totalEvents: number,\n): EventCounts => {\n return {\n navigation: filteredEvents.navigationEvents.length,\n click: filteredEvents.clickEvents.length,\n input: filteredEvents.inputEvents.length,\n scroll: filteredEvents.scrollEvents.length,\n total: totalEvents,\n };\n};\n\n/**\n * Extract input descriptions from input events\n */\nexport const extractInputDescriptions = (\n inputEvents: ChromeRecordedEvent[],\n): InputDescription[] => {\n return inputEvents\n .map((event) => ({\n description: event.elementDescription || '',\n value: event.value || '',\n }))\n .filter((item) => item.description && item.value);\n};\n\n/**\n * Process events for LLM consumption\n */\nexport const processEventsForLLM = (\n events: ChromeRecordedEvent[],\n): ProcessedEvent[] => {\n return events.map((event) => ({\n type: event.type,\n timestamp: event.timestamp,\n url: event.url,\n title: event.title,\n elementDescription: event.elementDescription,\n value: event.value,\n pageInfo: event.pageInfo,\n elementRect: event.elementRect,\n }));\n};\n\n/**\n * Prepare comprehensive event summary for LLM\n */\nexport const prepareEventSummary = (\n events: ChromeRecordedEvent[],\n options: { testName?: string; maxScreenshots?: number } = {},\n): EventSummary => {\n const filteredEvents = filterEventsByType(events);\n const eventCounts = createEventCounts(filteredEvents, events.length);\n\n // Extract useful information from events\n const startUrl =\n filteredEvents.navigationEvents.length > 0\n ? filteredEvents.navigationEvents[0].url || ''\n : '';\n\n const clickDescriptions = filteredEvents.clickEvents\n .map((event) => event.elementDescription)\n .filter((desc): desc is string => Boolean(desc))\n .slice(0, 10);\n\n const inputDescriptions = extractInputDescriptions(\n filteredEvents.inputEvents,\n ).slice(0, 10);\n\n const urls = filteredEvents.navigationEvents\n .map((e) => e.url)\n .filter((url): url is string => Boolean(url))\n .slice(0, 5);\n\n const processedEvents = processEventsForLLM(events);\n\n return {\n testName: options.testName || 'Automated test from recorded events',\n startUrl,\n eventCounts,\n urls,\n clickDescriptions,\n inputDescriptions,\n events: processedEvents,\n };\n};\n\n/**\n * Create message content for LLM with optional screenshots\n */\nexport const createMessageContent = (\n promptText: string,\n screenshots: string[] = [],\n includeScreenshots = true,\n) => {\n const messageContent: any[] = [\n {\n type: 'text',\n text: promptText,\n },\n ];\n\n // Add screenshots if available and requested\n if (includeScreenshots && screenshots.length > 0) {\n messageContent.unshift({\n type: 'text',\n text: 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n screenshots.forEach((screenshot) => {\n messageContent.push({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n });\n });\n }\n\n return messageContent;\n};\n\n/**\n * Validate events before processing\n */\nexport const validateEvents = (events: ChromeRecordedEvent[]): void => {\n if (!events.length) {\n throw new Error('No events provided for test generation');\n }\n};\n\n// YAML-specific generation functions\n\n/**\n * Generates YAML test configuration from recorded events using AI\n */\nexport const generateYamlTest = async (\n events: ChromeRecordedEvent[],\n options: YamlGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n try {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add YAML-specific options to summary\n const yamlSummary = {\n ...summary,\n includeTimestamps: options.includeTimestamps || false,\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(\n events,\n options.maxScreenshots || 3,\n );\n\n // Use LLM to generate the YAML test configuration\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`,\n },\n {\n role: 'user',\n content: `Generate YAML test for Midscene.js automation from recorded browser events.\n\nEvent Summary:\n${JSON.stringify(yamlSummary, null, 2)}\n\nConvert events:\n- navigation → target.url\n- click → aiTap with element description\n- input → aiInput with value and locate\n- scroll → aiScroll with appropriate direction\n- Add aiAssert for important state changes\n\nImportant: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`yaml or \\`\\`\\`). Start directly with the YAML content.`,\n },\n ];\n\n // Add screenshots if available and requested\n if (screenshots.length > 0) {\n prompt.push({\n role: 'user',\n content:\n 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n prompt.push({\n role: 'user',\n content: screenshots.map((screenshot) => ({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n })),\n });\n }\n\n const response = await callAIWithStringResponse(prompt, modelConfig);\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate YAML test configuration');\n } catch (error) {\n throw new Error(`Failed to generate YAML test: ${error}`);\n }\n};\n\n/**\n * Generates YAML test configuration from recorded events using AI with streaming support\n */\nexport const generateYamlTestStream = async (\n events: ChromeRecordedEvent[],\n options: YamlGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n try {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add YAML-specific options to summary\n const yamlSummary = {\n ...summary,\n includeTimestamps: options.includeTimestamps || false,\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(\n events,\n options.maxScreenshots || 3,\n );\n\n // Use LLM to generate the YAML test configuration\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`,\n },\n {\n role: 'user',\n content: `Generate YAML test for Midscene.js automation from recorded browser events.\n\nEvent Summary:\n${JSON.stringify(yamlSummary, null, 2)}\n\nConvert events:\n- navigation → target.url\n- click → aiTap with element description\n- input → aiInput with value and locate\n- scroll → aiScroll with appropriate direction\n- Add aiAssert for important state changes\n\nImportant: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`yaml or \\`\\`\\`). Start directly with the YAML content.`,\n },\n ];\n\n // Add screenshots if available and requested\n if (screenshots.length > 0) {\n prompt.push({\n role: 'user',\n content:\n 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n prompt.push({\n role: 'user',\n content: screenshots.map((screenshot) => ({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n })),\n });\n }\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(prompt, modelConfig);\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate YAML test configuration');\n }\n } catch (error) {\n throw new Error(`Failed to generate YAML test: ${error}`);\n }\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","getScreenshotsForLLM","events","maxScreenshots","eventsWithScreenshots","event","sortedEvents","a","b","screenshots","screenshot","filterEventsByType","createEventCounts","filteredEvents","totalEvents","extractInputDescriptions","inputEvents","item","processEventsForLLM","prepareEventSummary","options","eventCounts","startUrl","clickDescriptions","desc","Boolean","inputDescriptions","urls","e","url","processedEvents","createMessageContent","promptText","includeScreenshots","messageContent","validateEvents","Error","generateYamlTest","modelConfig","summary","yamlSummary","prompt","YAML_EXAMPLE_CODE","JSON","response","callAIWithStringResponse","error","generateYamlTestStream","callAI"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;AC2EO,MAAMI,uBAAuB,CAClCC,QACAC,iBAAiB,CAAC;IAGlB,MAAMC,wBAAwBF,OAAO,MAAM,CACzC,CAACG,QACCA,MAAM,gBAAgB,IACtBA,MAAM,eAAe,IACrBA,MAAM,iBAAiB;IAI3B,MAAMC,eAAe;WAAIF;KAAsB,CAAC,IAAI,CAAC,CAACG,GAAGC;QACvD,IAAID,AAAW,iBAAXA,EAAE,IAAI,IAAqBC,AAAW,iBAAXA,EAAE,IAAI,EAAmB,OAAO;QAC/D,IAAID,AAAW,iBAAXA,EAAE,IAAI,IAAqBC,AAAW,iBAAXA,EAAE,IAAI,EAAmB,OAAO;QAC/D,IAAID,AAAW,YAAXA,EAAE,IAAI,IAAgBC,AAAW,YAAXA,EAAE,IAAI,EAAc,OAAO;QACrD,IAAID,AAAW,YAAXA,EAAE,IAAI,IAAgBC,AAAW,YAAXA,EAAE,IAAI,EAAc,OAAO;QACrD,OAAO;IACT;IAGA,MAAMC,cAAwB,EAAE;IAChC,KAAK,MAAMJ,SAASC,aAAc;QAEhC,MAAMI,aACJL,MAAM,iBAAiB,IACvBA,MAAM,eAAe,IACrBA,MAAM,gBAAgB;QACxB,IAAIK,cAAc,CAACD,YAAY,QAAQ,CAACC,aAAa;YACnDD,YAAY,IAAI,CAACC;YACjB,IAAID,YAAY,MAAM,IAAIN,gBAAgB;QAC5C;IACF;IAEA,OAAOM;AACT;AAKO,MAAME,qBAAqB,CAChCT,SAEO;QACL,kBAAkBA,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,iBAAfA,MAAM,IAAI;QACrD,aAAaH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,YAAfA,MAAM,IAAI;QAChD,aAAaH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,YAAfA,MAAM,IAAI;QAChD,cAAcH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,aAAfA,MAAM,IAAI;IACnD;AAMK,MAAMO,oBAAoB,CAC/BC,gBACAC,cAEO;QACL,YAAYD,eAAe,gBAAgB,CAAC,MAAM;QAClD,OAAOA,eAAe,WAAW,CAAC,MAAM;QACxC,OAAOA,eAAe,WAAW,CAAC,MAAM;QACxC,QAAQA,eAAe,YAAY,CAAC,MAAM;QAC1C,OAAOC;IACT;AAMK,MAAMC,2BAA2B,CACtCC,cAEOA,YACJ,GAAG,CAAC,CAACX,QAAW;YACf,aAAaA,MAAM,kBAAkB,IAAI;YACzC,OAAOA,MAAM,KAAK,IAAI;QACxB,IACC,MAAM,CAAC,CAACY,OAASA,KAAK,WAAW,IAAIA,KAAK,KAAK;AAM7C,MAAMC,sBAAsB,CACjChB,SAEOA,OAAO,GAAG,CAAC,CAACG,QAAW;YAC5B,MAAMA,MAAM,IAAI;YAChB,WAAWA,MAAM,SAAS;YAC1B,KAAKA,MAAM,GAAG;YACd,OAAOA,MAAM,KAAK;YAClB,oBAAoBA,MAAM,kBAAkB;YAC5C,OAAOA,MAAM,KAAK;YAClB,UAAUA,MAAM,QAAQ;YACxB,aAAaA,MAAM,WAAW;QAChC;AAMK,MAAMc,sBAAsB,CACjCjB,QACAkB,UAA0D,CAAC,CAAC;IAE5D,MAAMP,iBAAiBF,mBAAmBT;IAC1C,MAAMmB,cAAcT,kBAAkBC,gBAAgBX,OAAO,MAAM;IAGnE,MAAMoB,WACJT,eAAe,gBAAgB,CAAC,MAAM,GAAG,IACrCA,eAAe,gBAAgB,CAAC,EAAE,CAAC,GAAG,IAAI,KAC1C;IAEN,MAAMU,oBAAoBV,eAAe,WAAW,CACjD,GAAG,CAAC,CAACR,QAAUA,MAAM,kBAAkB,EACvC,MAAM,CAAC,CAACmB,OAAyBC,QAAQD,OACzC,KAAK,CAAC,GAAG;IAEZ,MAAME,oBAAoBX,yBACxBF,eAAe,WAAW,EAC1B,KAAK,CAAC,GAAG;IAEX,MAAMc,OAAOd,eAAe,gBAAgB,CACzC,GAAG,CAAC,CAACe,IAAMA,EAAE,GAAG,EAChB,MAAM,CAAC,CAACC,MAAuBJ,QAAQI,MACvC,KAAK,CAAC,GAAG;IAEZ,MAAMC,kBAAkBZ,oBAAoBhB;IAE5C,OAAO;QACL,UAAUkB,QAAQ,QAAQ,IAAI;QAC9BE;QACAD;QACAM;QACAJ;QACAG;QACA,QAAQI;IACV;AACF;AAKO,MAAMC,uBAAuB,CAClCC,YACAvB,cAAwB,EAAE,EAC1BwB,qBAAqB,IAAI;IAEzB,MAAMC,iBAAwB;QAC5B;YACE,MAAM;YACN,MAAMF;QACR;KACD;IAGD,IAAIC,sBAAsBxB,YAAY,MAAM,GAAG,GAAG;QAChDyB,eAAe,OAAO,CAAC;YACrB,MAAM;YACN,MAAM;QACR;QAEAzB,YAAY,OAAO,CAAC,CAACC;YACnBwB,eAAe,IAAI,CAAC;gBAClB,MAAM;gBACN,WAAW;oBACT,KAAKxB;gBACP;YACF;QACF;IACF;IAEA,OAAOwB;AACT;AAKO,MAAMC,iBAAiB,CAACjC;IAC7B,IAAI,CAACA,OAAO,MAAM,EAChB,MAAM,IAAIkC,MAAM;AAEpB;AAOO,MAAMC,mBAAmB,OAC9BnC,QACAkB,SACAkB;IAEA,IAAI;QAEFH,eAAejC;QAGf,MAAMqC,UAAUpB,oBAAoBjB,QAAQ;YAC1C,UAAUkB,QAAQ,QAAQ;YAC1B,gBAAgBA,QAAQ,cAAc,IAAI;QAC5C;QAGA,MAAMoB,cAAc;YAClB,GAAGD,OAAO;YACV,mBAAmBnB,QAAQ,iBAAiB,IAAI;QAClD;QAGA,MAAMX,cAAcR,qBAClBC,QACAkB,QAAQ,cAAc,IAAI;QAI5B,MAAMqB,SAAuC;YAC3C;gBACE,MAAM;gBACN,SAAS,CAAC,4GAA4G,EAAEC,0BAAAA,iBAAiBA,EAAE;YAC7I;YACA;gBACE,MAAM;gBACN,SAAS,CAAC;;;AAGlB,EAAEC,KAAK,SAAS,CAACH,aAAa,MAAM,GAAG;;;;;;;;;8JASuH,CAAC;YACzJ;SACD;QAGD,IAAI/B,YAAY,MAAM,GAAG,GAAG;YAC1BgC,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SACE;YACJ;YAEAA,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SAAShC,YAAY,GAAG,CAAC,CAACC,aAAgB;wBACxC,MAAM;wBACN,WAAW;4BACT,KAAKA;wBACP;oBACF;YACF;QACF;QAEA,MAAMkC,WAAW,MAAMC,AAAAA,IAAAA,kCAAAA,wBAAAA,AAAAA,EAAyBJ,QAAQH;QAExD,IAAIM,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;QAGzB,MAAM,IAAIR,MAAM;IAClB,EAAE,OAAOU,OAAO;QACd,MAAM,IAAIV,MAAM,CAAC,8BAA8B,EAAEU,OAAO;IAC1D;AACF;AAKO,MAAMC,yBAAyB,OACpC7C,QACAkB,SACAkB;IAEA,IAAI;QAEFH,eAAejC;QAGf,MAAMqC,UAAUpB,oBAAoBjB,QAAQ;YAC1C,UAAUkB,QAAQ,QAAQ;YAC1B,gBAAgBA,QAAQ,cAAc,IAAI;QAC5C;QAGA,MAAMoB,cAAc;YAClB,GAAGD,OAAO;YACV,mBAAmBnB,QAAQ,iBAAiB,IAAI;QAClD;QAGA,MAAMX,cAAcR,qBAClBC,QACAkB,QAAQ,cAAc,IAAI;QAI5B,MAAMqB,SAAuC;YAC3C;gBACE,MAAM;gBACN,SAAS,CAAC,4GAA4G,EAAEC,0BAAAA,iBAAiBA,EAAE;YAC7I;YACA;gBACE,MAAM;gBACN,SAAS,CAAC;;;AAGlB,EAAEC,KAAK,SAAS,CAACH,aAAa,MAAM,GAAG;;;;;;;;;8JASuH,CAAC;YACzJ;SACD;QAGD,IAAI/B,YAAY,MAAM,GAAG,GAAG;YAC1BgC,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SACE;YACJ;YAEAA,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SAAShC,YAAY,GAAG,CAAC,CAACC,aAAgB;wBACxC,MAAM;wBACN,WAAW;4BACT,KAAKA;wBACP;oBACF;YACF;QACF;QAEA,IAAIU,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAM4B,AAAAA,IAAAA,kCAAAA,MAAAA,AAAAA,EAAOP,QAAQH,aAAa;YACvC,QAAQ;YACR,SAASlB,QAAQ,OAAO;QAC1B;QACK;YAEL,MAAMwB,WAAW,MAAMC,AAAAA,IAAAA,kCAAAA,wBAAAA,AAAAA,EAAyBJ,QAAQH;YAExD,IAAIM,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;gBACL,SAASA,SAAS,OAAO;gBACzB,OAAOA,SAAS,KAAK;gBACrB,YAAY;YACd;YAGF,MAAM,IAAIR,MAAM;QAClB;IACF,EAAE,OAAOU,OAAO;QACd,MAAM,IAAIV,MAAM,CAAC,8BAA8B,EAAEU,OAAO;IAC1D;AACF"}
|
|
@@ -47,7 +47,7 @@ const utils_namespaceObject = require("@midscene/shared/utils");
|
|
|
47
47
|
const external_jsonrepair_namespaceObject = require("jsonrepair");
|
|
48
48
|
const external_openai_namespaceObject = require("openai");
|
|
49
49
|
var external_openai_default = /*#__PURE__*/ __webpack_require__.n(external_openai_namespaceObject);
|
|
50
|
-
async function createChatClient({
|
|
50
|
+
async function createChatClient({ modelConfig }) {
|
|
51
51
|
const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode, createOpenAIClient, timeout } = modelConfig;
|
|
52
52
|
let proxyAgent;
|
|
53
53
|
const debugProxy = (0, logger_namespaceObject.getDebug)('ai:call:proxy');
|
|
@@ -146,9 +146,8 @@ async function createChatClient({ AIActionTypeValue, modelConfig }) {
|
|
|
146
146
|
vlMode
|
|
147
147
|
};
|
|
148
148
|
}
|
|
149
|
-
async function callAI(messages,
|
|
149
|
+
async function callAI(messages, modelConfig, options) {
|
|
150
150
|
const { completion, modelName, modelDescription, uiTarsVersion, vlMode } = await createChatClient({
|
|
151
|
-
AIActionTypeValue,
|
|
152
151
|
modelConfig
|
|
153
152
|
});
|
|
154
153
|
const maxTokens = env_namespaceObject.globalConfigManager.getEnvConfigValue(env_namespaceObject.MIDSCENE_MODEL_MAX_TOKENS) ?? env_namespaceObject.globalConfigManager.getEnvConfigValue(env_namespaceObject.OPENAI_MAX_TOKENS);
|
|
@@ -284,8 +283,8 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
|
284
283
|
throw newError;
|
|
285
284
|
}
|
|
286
285
|
}
|
|
287
|
-
async function callAIWithObjectResponse(messages,
|
|
288
|
-
const response = await callAI(messages,
|
|
286
|
+
async function callAIWithObjectResponse(messages, modelConfig, options) {
|
|
287
|
+
const response = await callAI(messages, modelConfig, {
|
|
289
288
|
deepThink: options?.deepThink
|
|
290
289
|
});
|
|
291
290
|
(0, utils_namespaceObject.assert)(response, 'empty response');
|
|
@@ -299,8 +298,8 @@ async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig
|
|
|
299
298
|
reasoning_content: response.reasoning_content
|
|
300
299
|
};
|
|
301
300
|
}
|
|
302
|
-
async function callAIWithStringResponse(msgs,
|
|
303
|
-
const { content, usage } = await callAI(msgs,
|
|
301
|
+
async function callAIWithStringResponse(msgs, modelConfig) {
|
|
302
|
+
const { content, usage } = await callAI(msgs, modelConfig);
|
|
304
303
|
return {
|
|
305
304
|
content,
|
|
306
305
|
usage
|