@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
@@ -0,0 +1,167 @@
1
+ import { ScreenshotItem } from "../screenshot-item.mjs";
2
+ import { uploadTestInfoToServer } from "../utils.mjs";
3
+ import { MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, globalConfigManager } from "@godscene/shared/env";
4
+ import { generateElementByRect } from "@godscene/shared/extractor";
5
+ import { imageInfoOfBase64, resizeImgBase64 } from "@godscene/shared/img";
6
+ import { getDebug } from "@godscene/shared/logger";
7
+ import { assert, logMsg, uuid } from "@godscene/shared/utils";
8
+ import dayjs from "dayjs";
9
+ import { debug as external_task_cache_mjs_debug } from "./task-cache.mjs";
10
+ async function commonContextParser(interfaceInstance, _opt) {
11
+ const debug = getDebug('commonContextParser');
12
+ assert(interfaceInstance, 'interfaceInstance is required');
13
+ debug("Getting interface description");
14
+ const description = interfaceInstance.describe?.() || '';
15
+ debug("Interface description end");
16
+ debug('Uploading test info to server');
17
+ uploadTestInfoToServer({
18
+ testUrl: description,
19
+ serverUrl: _opt.uploadServerUrl
20
+ });
21
+ debug('UploadTestInfoToServer end');
22
+ debug('will get size');
23
+ const interfaceSize = await interfaceInstance.size();
24
+ const { width: logicalWidth, height: logicalHeight } = interfaceSize;
25
+ if (interfaceSize.dpr) console.warn('Warning: return value of interface.size() include a dpr property, which is not expected and ignored. ');
26
+ if (!Number.isFinite(logicalWidth) || !Number.isFinite(logicalHeight)) throw new Error(`Invalid interface size: width and height must be finite numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`);
27
+ if (logicalWidth <= 0 || logicalHeight <= 0) throw new Error(`Invalid interface size: width and height must be positive numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`);
28
+ debug(`size: ${logicalWidth}x${logicalHeight}`);
29
+ const screenshotBase64 = await interfaceInstance.screenshotBase64();
30
+ const screenshotCapturedAt = Date.now();
31
+ assert(screenshotBase64, 'screenshotBase64 is required');
32
+ debug('will get screenshot dimensions');
33
+ const { width: imgWidth, height: imgHeight } = await imageInfoOfBase64(screenshotBase64);
34
+ if (!Number.isFinite(imgWidth) || !Number.isFinite(imgHeight)) throw new Error(`Invalid screenshot dimensions: width and height must be finite numbers. Received width: ${imgWidth}, height: ${imgHeight}`);
35
+ if (imgWidth <= 0 || imgHeight <= 0) throw new Error(`Invalid screenshot dimensions: width and height must be positive numbers. Received width: ${imgWidth}, height: ${imgHeight}`);
36
+ debug('screenshot dimensions', imgWidth, 'x', imgHeight);
37
+ const logicalIsPortrait = logicalWidth < logicalHeight;
38
+ const screenshotIsPortrait = imgWidth < imgHeight;
39
+ let finalLogicalWidth = logicalWidth;
40
+ if (logicalIsPortrait !== screenshotIsPortrait) {
41
+ debug(`Orientation mismatch detected: logical size ${logicalWidth}x${logicalHeight} (${logicalIsPortrait ? 'portrait' : 'landscape'}) vs screenshot ${imgWidth}x${imgHeight} (${screenshotIsPortrait ? 'portrait' : 'landscape'}). Swapping logical dimensions.`);
42
+ finalLogicalWidth = logicalHeight;
43
+ }
44
+ const userShrinkFactor = _opt.screenshotShrinkFactor ?? 1;
45
+ if (!Number.isFinite(userShrinkFactor) || userShrinkFactor < 1) throw new Error(`Invalid screenshotShrinkFactor: must be a finite number >= 1. Received: ${userShrinkFactor}`);
46
+ const dpr = imgWidth / finalLogicalWidth;
47
+ debug('calculated dpr:', dpr);
48
+ const shrunkShotToLogicalRatio = dpr / userShrinkFactor;
49
+ debug('shrunkShotToLogicalRatio', shrunkShotToLogicalRatio);
50
+ if (1 !== userShrinkFactor) {
51
+ const targetWidth = Math.round(imgWidth / userShrinkFactor);
52
+ const targetHeight = Math.round(imgHeight / userShrinkFactor);
53
+ debug(`Applying screenshot shrink factor: ${userShrinkFactor} (physical: ${imgWidth}x${imgHeight} -> target: ${targetWidth}x${targetHeight})`);
54
+ const resizedBase64 = await resizeImgBase64(screenshotBase64, {
55
+ width: targetWidth,
56
+ height: targetHeight
57
+ });
58
+ return {
59
+ shotSize: {
60
+ width: targetWidth,
61
+ height: targetHeight
62
+ },
63
+ deprecatedDpr: dpr,
64
+ screenshot: ScreenshotItem.create(resizedBase64, screenshotCapturedAt),
65
+ shrunkShotToLogicalRatio
66
+ };
67
+ }
68
+ return {
69
+ shotSize: {
70
+ width: imgWidth,
71
+ height: imgHeight
72
+ },
73
+ deprecatedDpr: dpr,
74
+ screenshot: ScreenshotItem.create(screenshotBase64, screenshotCapturedAt),
75
+ shrunkShotToLogicalRatio
76
+ };
77
+ }
78
+ function getReportFileName(tag = 'web') {
79
+ const reportTagName = globalConfigManager.getEnvConfigValue(MIDSCENE_REPORT_TAG_NAME);
80
+ const dateTimeInFileName = dayjs().format('YYYY-MM-DD_HH-mm-ss');
81
+ const uniqueId = uuid().substring(0, 8);
82
+ return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
83
+ }
84
+ function printReportMsg(filepath) {
85
+ if (globalConfigManager.getEnvConfigInBoolean(MIDSCENE_REPORT_QUIET)) return;
86
+ logMsg(`Midscene - report file updated: ${filepath}`);
87
+ }
88
+ function ifPlanLocateParamIsBbox(planLocateParam) {
89
+ return !!(planLocateParam.bbox && Array.isArray(planLocateParam.bbox) && 4 === planLocateParam.bbox.length);
90
+ }
91
+ function matchElementFromPlan(planLocateParam) {
92
+ if (!planLocateParam) return;
93
+ if (planLocateParam.bbox) {
94
+ const rect = {
95
+ left: planLocateParam.bbox[0],
96
+ top: planLocateParam.bbox[1],
97
+ width: planLocateParam.bbox[2] - planLocateParam.bbox[0] + 1,
98
+ height: planLocateParam.bbox[3] - planLocateParam.bbox[1] + 1
99
+ };
100
+ const element = generateElementByRect(rect, 'string' == typeof planLocateParam.prompt ? planLocateParam.prompt : planLocateParam.prompt?.prompt || '');
101
+ return element;
102
+ }
103
+ }
104
+ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable) {
105
+ if (!cacheEntry) return;
106
+ if (false === cacheable) return void external_task_cache_mjs_debug('cache disabled for prompt: %s', cachePrompt);
107
+ if (!context.taskCache?.isCacheResultUsed) return;
108
+ if (!context.interfaceInstance.rectMatchesCacheFeature) return void external_task_cache_mjs_debug('interface does not implement rectMatchesCacheFeature, skip cache');
109
+ try {
110
+ const rect = await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry);
111
+ const element = {
112
+ center: [
113
+ Math.round(rect.left + rect.width / 2),
114
+ Math.round(rect.top + rect.height / 2)
115
+ ],
116
+ rect,
117
+ description: 'string' == typeof cachePrompt ? cachePrompt : cachePrompt.prompt || ''
118
+ };
119
+ external_task_cache_mjs_debug('cache hit, prompt: %s', cachePrompt);
120
+ return element;
121
+ } catch (error) {
122
+ external_task_cache_mjs_debug('rectMatchesCacheFeature error: %s', error);
123
+ return;
124
+ }
125
+ }
126
+ const getMidsceneVersion = ()=>"1.7.10";
127
+ const parsePrompt = (prompt)=>{
128
+ if ('string' == typeof prompt) return {
129
+ textPrompt: prompt,
130
+ multimodalPrompt: void 0
131
+ };
132
+ return {
133
+ textPrompt: prompt.prompt,
134
+ multimodalPrompt: prompt.images ? {
135
+ images: prompt.images,
136
+ convertHttpImage2Base64: !!prompt.convertHttpImage2Base64
137
+ } : void 0
138
+ };
139
+ };
140
+ const transformLogicalElementToScreenshot = (element, shrunkShotToLogicalRatio)=>{
141
+ if (1 === shrunkShotToLogicalRatio) return element;
142
+ return {
143
+ ...element,
144
+ center: [
145
+ Math.round(element.center[0] * shrunkShotToLogicalRatio),
146
+ Math.round(element.center[1] * shrunkShotToLogicalRatio)
147
+ ],
148
+ rect: {
149
+ ...element.rect,
150
+ left: Math.round(element.rect.left * shrunkShotToLogicalRatio),
151
+ top: Math.round(element.rect.top * shrunkShotToLogicalRatio),
152
+ width: Math.round(element.rect.width * shrunkShotToLogicalRatio),
153
+ height: Math.round(element.rect.height * shrunkShotToLogicalRatio)
154
+ }
155
+ };
156
+ };
157
+ const transformLogicalRectToScreenshotRect = (rect, shrunkShotToLogicalRatio)=>{
158
+ if (1 === shrunkShotToLogicalRatio) return rect;
159
+ return {
160
+ ...rect,
161
+ left: Math.round(rect.left * shrunkShotToLogicalRatio),
162
+ top: Math.round(rect.top * shrunkShotToLogicalRatio),
163
+ width: Math.round(rect.width * shrunkShotToLogicalRatio),
164
+ height: Math.round(rect.height * shrunkShotToLogicalRatio)
165
+ };
166
+ };
167
+ export { commonContextParser, getMidsceneVersion, getReportFileName, ifPlanLocateParamIsBbox, matchElementFromCache, matchElementFromPlan, parsePrompt, printReportMsg, transformLogicalElementToScreenshot, transformLogicalRectToScreenshotRect };
@@ -0,0 +1,237 @@
1
+ import { adaptBbox, pointToBbox } from "../../common.mjs";
2
+ import { getDebug } from "@godscene/shared/logger";
3
+ const debug = getDebug('auto-glm-actions');
4
+ const AUTO_GLM_COORDINATE_MAX = 1000;
5
+ function autoGLMCoordinateToBbox(x, y, width, height) {
6
+ const bbox = pointToBbox(x, y, 10);
7
+ return adaptBbox(bbox, width, height, 'auto-glm');
8
+ }
9
+ const BACK_BUTTON_NAMES = [
10
+ 'AndroidBackButton',
11
+ 'HarmonyBackButton'
12
+ ];
13
+ const HOME_BUTTON_NAMES = [
14
+ 'AndroidHomeButton',
15
+ 'HarmonyHomeButton'
16
+ ];
17
+ function findActionName(actionSpace, knownNames, defaultName) {
18
+ if (!actionSpace) return defaultName;
19
+ const match = actionSpace.find((a)=>knownNames.includes(a.name));
20
+ return match ? match.name : defaultName;
21
+ }
22
+ function transformAutoGLMAction(action, size, actionSpace) {
23
+ try {
24
+ switch(action._metadata){
25
+ case 'finish':
26
+ {
27
+ const finishAction = action;
28
+ debug('Transform finish action:', finishAction);
29
+ return [
30
+ {
31
+ type: 'Finished',
32
+ param: {},
33
+ thought: finishAction.message
34
+ }
35
+ ];
36
+ }
37
+ case 'do':
38
+ {
39
+ const doAction = action;
40
+ switch(doAction.action){
41
+ case 'Tap':
42
+ {
43
+ const tapAction = doAction;
44
+ debug('Transform Tap action:', tapAction);
45
+ const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(tapAction.element[0], tapAction.element[1], size.width, size.height);
46
+ const locate = {
47
+ prompt: '',
48
+ bbox: [
49
+ x1,
50
+ y1,
51
+ x2,
52
+ y2
53
+ ]
54
+ };
55
+ return [
56
+ {
57
+ type: 'Tap',
58
+ param: {
59
+ locate
60
+ }
61
+ }
62
+ ];
63
+ }
64
+ case 'Double Tap':
65
+ {
66
+ const doubleTapAction = doAction;
67
+ debug('Transform Double Tap action:', doubleTapAction);
68
+ const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(doubleTapAction.element[0], doubleTapAction.element[1], size.width, size.height);
69
+ const locate = {
70
+ prompt: '',
71
+ bbox: [
72
+ x1,
73
+ y1,
74
+ x2,
75
+ y2
76
+ ]
77
+ };
78
+ return [
79
+ {
80
+ type: 'DoubleClick',
81
+ param: {
82
+ locate
83
+ }
84
+ }
85
+ ];
86
+ }
87
+ case 'Type':
88
+ {
89
+ const typeAction = doAction;
90
+ debug('Transform Type action:', typeAction);
91
+ return [
92
+ {
93
+ type: 'Input',
94
+ param: {
95
+ value: typeAction.text
96
+ }
97
+ }
98
+ ];
99
+ }
100
+ case 'Swipe':
101
+ {
102
+ const swipeAction = doAction;
103
+ debug('Transform Swipe action:', swipeAction);
104
+ const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(swipeAction.start[0], swipeAction.start[1], size.width, size.height);
105
+ const locate = {
106
+ prompt: '',
107
+ bbox: [
108
+ x1,
109
+ y1,
110
+ x2,
111
+ y2
112
+ ]
113
+ };
114
+ const deltaX = swipeAction.end[0] - swipeAction.start[0];
115
+ const deltaY = swipeAction.end[1] - swipeAction.start[1];
116
+ let direction;
117
+ let distance;
118
+ const absDeltaX = Math.abs(deltaX);
119
+ const absDeltaY = Math.abs(deltaY);
120
+ if (absDeltaY > absDeltaX) {
121
+ distance = Math.round(absDeltaY * size.height / AUTO_GLM_COORDINATE_MAX);
122
+ direction = deltaY > 0 ? 'up' : 'down';
123
+ } else {
124
+ distance = Math.round(absDeltaX * size.width / AUTO_GLM_COORDINATE_MAX);
125
+ direction = deltaX > 0 ? 'left' : 'right';
126
+ }
127
+ debug(`Calculate swipe direction: ${direction}, distance: ${distance}`);
128
+ return [
129
+ {
130
+ type: 'Scroll',
131
+ param: {
132
+ locate,
133
+ distance,
134
+ direction
135
+ },
136
+ thought: swipeAction.think || ''
137
+ }
138
+ ];
139
+ }
140
+ case 'Long Press':
141
+ {
142
+ const longPressAction = doAction;
143
+ debug('Transform Long Press action:', longPressAction);
144
+ const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(longPressAction.element[0], longPressAction.element[1], size.width, size.height);
145
+ const locate = {
146
+ prompt: '',
147
+ bbox: [
148
+ x1,
149
+ y1,
150
+ x2,
151
+ y2
152
+ ]
153
+ };
154
+ return [
155
+ {
156
+ type: 'LongPress',
157
+ param: {
158
+ locate
159
+ },
160
+ thought: longPressAction.think || ''
161
+ }
162
+ ];
163
+ }
164
+ case 'Back':
165
+ {
166
+ const backAction = doAction;
167
+ debug('Transform Back action:', backAction);
168
+ return [
169
+ {
170
+ type: findActionName(actionSpace, BACK_BUTTON_NAMES, 'AndroidBackButton'),
171
+ param: {},
172
+ thought: backAction.think || ''
173
+ }
174
+ ];
175
+ }
176
+ case 'Home':
177
+ {
178
+ const homeAction = doAction;
179
+ debug('Transform Home action:', homeAction);
180
+ return [
181
+ {
182
+ type: findActionName(actionSpace, HOME_BUTTON_NAMES, 'AndroidHomeButton'),
183
+ param: {},
184
+ thought: homeAction.think || ''
185
+ }
186
+ ];
187
+ }
188
+ case 'Wait':
189
+ {
190
+ const waitAction = doAction;
191
+ debug('Transform Wait action:', waitAction);
192
+ return [
193
+ {
194
+ type: 'Sleep',
195
+ param: {
196
+ timeMs: waitAction.durationMs
197
+ },
198
+ thought: waitAction.think || ''
199
+ }
200
+ ];
201
+ }
202
+ case 'Launch':
203
+ {
204
+ const launchAction = doAction;
205
+ debug('Transform Launch action:', launchAction);
206
+ return [
207
+ {
208
+ type: 'Launch',
209
+ param: {
210
+ uri: launchAction.app
211
+ },
212
+ thought: launchAction.think || ''
213
+ }
214
+ ];
215
+ }
216
+ case 'Interact':
217
+ throw new Error('Action "Interact" from auto-glm is not supported in the current implementation.');
218
+ case 'Call_API':
219
+ throw new Error('Action "Call_API" from auto-glm is not supported in the current implementation.');
220
+ case 'Take_over':
221
+ throw new Error('Action "Take_over" from auto-glm is not supported in the current implementation.');
222
+ case 'Note':
223
+ throw new Error('Action "Note" from auto-glm is not supported in the current implementation.');
224
+ default:
225
+ throw new Error(`Unknown do() action type: ${doAction.action}`);
226
+ }
227
+ }
228
+ default:
229
+ throw new Error(`Unknown action metadata: ${action._metadata}`);
230
+ }
231
+ } catch (error) {
232
+ const errorMessage = error instanceof Error ? error.message : String(error);
233
+ debug('Transform error:', errorMessage);
234
+ throw new Error(`Failed to transform action: ${errorMessage}`);
235
+ }
236
+ }
237
+ export { transformAutoGLMAction };
@@ -0,0 +1,6 @@
1
+ import { getAutoGLMLocatePrompt, getAutoGLMPlanPrompt } from "./prompt.mjs";
2
+ import { parseAction, parseAutoGLMLocateResponse, parseAutoGLMResponse } from "./parser.mjs";
3
+ import { autoGLMPlanning } from "./planning.mjs";
4
+ import { transformAutoGLMAction } from "./actions.mjs";
5
+ import { isAutoGLM, isUITars } from "./util.mjs";
6
+ export { autoGLMPlanning, getAutoGLMLocatePrompt, getAutoGLMPlanPrompt, isAutoGLM, isUITars, parseAction, parseAutoGLMLocateResponse, parseAutoGLMResponse, transformAutoGLMAction };
@@ -0,0 +1,237 @@
1
+ import { getDebug } from "@godscene/shared/logger";
2
+ const debug = getDebug('auto-glm-parser');
3
+ const extractValueAfter = (src, key)=>{
4
+ const idx = src.indexOf(key);
5
+ if (-1 === idx) throw new Error(`Missing key ${key} in action payload ${src}`);
6
+ let rest = src.slice(idx + key.length).trim();
7
+ if (rest.endsWith('")')) rest = rest.slice(0, -2);
8
+ return rest;
9
+ };
10
+ function parseAction(response) {
11
+ debug('Parsing action:', response);
12
+ let trimmedResponse = '';
13
+ try {
14
+ trimmedResponse = response.content.trim();
15
+ if (trimmedResponse.startsWith('do(action="Type"') || trimmedResponse.startsWith('do(action="Type_Name"')) {
16
+ const text = extractValueAfter(trimmedResponse, 'text="');
17
+ return {
18
+ _metadata: 'do',
19
+ action: 'Type',
20
+ text,
21
+ think: response.think
22
+ };
23
+ }
24
+ if (trimmedResponse.startsWith('finish(message=')) {
25
+ let message = extractValueAfter(trimmedResponse, 'finish(message="');
26
+ if (message.endsWith(')')) message = message.slice(0, -1);
27
+ return {
28
+ _metadata: 'finish',
29
+ message,
30
+ think: response.think
31
+ };
32
+ }
33
+ if (trimmedResponse.startsWith('do(')) {
34
+ const actionMatch = trimmedResponse.match(/do\(action="([^"]+)"/);
35
+ if (!actionMatch) throw new Error(`Failed to extract action type from do() call; raw="${trimmedResponse}"`);
36
+ const actionType = actionMatch[1];
37
+ const baseAction = {
38
+ _metadata: 'do',
39
+ think: response.think
40
+ };
41
+ switch(actionType){
42
+ case 'Tap':
43
+ {
44
+ const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
45
+ if (!elementMatch) throw new Error(`Failed to extract element coordinates for Tap; raw="${trimmedResponse}"`);
46
+ return {
47
+ ...baseAction,
48
+ action: 'Tap',
49
+ element: [
50
+ Number(elementMatch[1]),
51
+ Number(elementMatch[2])
52
+ ]
53
+ };
54
+ }
55
+ case 'Double Tap':
56
+ {
57
+ const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
58
+ if (!elementMatch) throw new Error(`Failed to extract element coordinates for Double Tap; raw="${trimmedResponse}"`);
59
+ return {
60
+ ...baseAction,
61
+ action: 'Double Tap',
62
+ element: [
63
+ Number(elementMatch[1]),
64
+ Number(elementMatch[2])
65
+ ]
66
+ };
67
+ }
68
+ case 'Swipe':
69
+ {
70
+ const startMatch = trimmedResponse.match(/start=\[(\d+),(\d+)\]/);
71
+ const endMatch = trimmedResponse.match(/end=\[(\d+),(\d+)\]/);
72
+ if (!startMatch || !endMatch) throw new Error(`Failed to extract start/end coordinates for Swipe; raw="${trimmedResponse}"`);
73
+ return {
74
+ ...baseAction,
75
+ action: 'Swipe',
76
+ start: [
77
+ Number(startMatch[1]),
78
+ Number(startMatch[2])
79
+ ],
80
+ end: [
81
+ Number(endMatch[1]),
82
+ Number(endMatch[2])
83
+ ]
84
+ };
85
+ }
86
+ case 'Long Press':
87
+ {
88
+ const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
89
+ if (!elementMatch) throw new Error(`Failed to extract element coordinates for Long Press; raw="${trimmedResponse}"`);
90
+ return {
91
+ ...baseAction,
92
+ action: 'Long Press',
93
+ element: [
94
+ Number(elementMatch[1]),
95
+ Number(elementMatch[2])
96
+ ]
97
+ };
98
+ }
99
+ case 'Launch':
100
+ {
101
+ const app = extractValueAfter(trimmedResponse, 'app="');
102
+ return {
103
+ ...baseAction,
104
+ action: 'Launch',
105
+ app
106
+ };
107
+ }
108
+ case 'Back':
109
+ return {
110
+ ...baseAction,
111
+ action: 'Back'
112
+ };
113
+ case 'Home':
114
+ return {
115
+ ...baseAction,
116
+ action: 'Home'
117
+ };
118
+ case 'Wait':
119
+ {
120
+ const durationMatch = trimmedResponse.match(/duration=(?:["\[])?(\d+)/);
121
+ if (!durationMatch) throw new Error(`Failed to extract duration for Wait; raw="${trimmedResponse}"`);
122
+ const seconds = Number.parseInt(durationMatch[1], 10);
123
+ const durationMs = 1000 * seconds;
124
+ return {
125
+ ...baseAction,
126
+ action: 'Wait',
127
+ durationMs
128
+ };
129
+ }
130
+ case 'Interact':
131
+ return {
132
+ ...baseAction,
133
+ action: 'Interact'
134
+ };
135
+ case 'Call_API':
136
+ {
137
+ const instruction = extractValueAfter(trimmedResponse, 'instruction="');
138
+ return {
139
+ ...baseAction,
140
+ action: 'Call_API',
141
+ instruction
142
+ };
143
+ }
144
+ case 'Take_over':
145
+ {
146
+ const message = extractValueAfter(trimmedResponse, 'message="');
147
+ return {
148
+ ...baseAction,
149
+ action: 'Take_over',
150
+ message
151
+ };
152
+ }
153
+ case 'Note':
154
+ {
155
+ const message = extractValueAfter(trimmedResponse, 'message="');
156
+ return {
157
+ ...baseAction,
158
+ action: 'Note',
159
+ message
160
+ };
161
+ }
162
+ default:
163
+ throw new Error(`Unknown action type: ${actionType}; raw="${trimmedResponse}"`);
164
+ }
165
+ }
166
+ throw new Error(`Failed to parse action: ${trimmedResponse}`);
167
+ } catch (error) {
168
+ const errorMessage = error instanceof Error ? error.message : String(error);
169
+ throw new Error(`Failed to parse action: ${errorMessage}; raw="${trimmedResponse}"`);
170
+ }
171
+ }
172
+ function parseAutoGLMResponse(content) {
173
+ if (content.includes('finish(message=')) {
174
+ const parts = content.split('finish(message=');
175
+ const think = parts[0].trim();
176
+ const actionContent = `finish(message=${parts[1]}`;
177
+ return {
178
+ think,
179
+ content: actionContent
180
+ };
181
+ }
182
+ if (content.includes('do(action=')) {
183
+ const parts = content.split('do(action=');
184
+ const think = parts[0].trim();
185
+ const actionContent = `do(action=${parts[1]}`;
186
+ return {
187
+ think,
188
+ content: actionContent
189
+ };
190
+ }
191
+ if (content.includes('<answer>')) {
192
+ const parts = content.split('<answer>');
193
+ const think = parts[0].replace(/<think>/g, '').replace(/<\/think>/g, '').trim();
194
+ const actionContent = parts[1].replace(/<\/answer>/g, '').trim();
195
+ return {
196
+ think,
197
+ content: actionContent
198
+ };
199
+ }
200
+ return {
201
+ think: '',
202
+ content
203
+ };
204
+ }
205
+ function parseAutoGLMLocateResponse(rawResponse) {
206
+ const { think, content: actionContent } = parseAutoGLMResponse(rawResponse);
207
+ if (!actionContent.startsWith('do(action="Tap"')) return {
208
+ think,
209
+ coordinates: null,
210
+ error: `Unexpected action type in auto-glm locate response: ${actionContent}`
211
+ };
212
+ try {
213
+ const elementMatch = actionContent.match(/element=\[(\d+),(\d+)\]/);
214
+ if (!elementMatch) return {
215
+ think,
216
+ coordinates: null,
217
+ error: `Failed to extract element coordinates from auto-glm response: ${actionContent}`
218
+ };
219
+ const x = Number(elementMatch[1]);
220
+ const y = Number(elementMatch[2]);
221
+ return {
222
+ think,
223
+ coordinates: {
224
+ x,
225
+ y
226
+ }
227
+ };
228
+ } catch (e) {
229
+ const errorMessage = e instanceof Error ? e.message : String(e);
230
+ return {
231
+ think,
232
+ coordinates: null,
233
+ error: `Failed to parse coordinates "${actionContent}" with errorMessage: ${errorMessage}`
234
+ };
235
+ }
236
+ }
237
+ export { extractValueAfter, parseAction, parseAutoGLMLocateResponse, parseAutoGLMResponse };