@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
File without changes
@@ -0,0 +1,39 @@
1
+ import { TaskRunner } from "../task-runner.mjs";
2
+ function _define_property(obj, key, value) {
3
+ if (key in obj) Object.defineProperty(obj, key, {
4
+ value: value,
5
+ enumerable: true,
6
+ configurable: true,
7
+ writable: true
8
+ });
9
+ else obj[key] = value;
10
+ return obj;
11
+ }
12
+ class ExecutionSession {
13
+ async append(tasks, options) {
14
+ await this.runner.append(tasks, options);
15
+ }
16
+ async appendAndRun(tasks, options) {
17
+ return this.runner.appendAndFlush(tasks, options);
18
+ }
19
+ async run(options) {
20
+ return this.runner.flush(options);
21
+ }
22
+ isInErrorState() {
23
+ return this.runner.isInErrorState();
24
+ }
25
+ latestErrorTask() {
26
+ return this.runner.latestErrorTask();
27
+ }
28
+ appendErrorPlan(errorMsg) {
29
+ return this.runner.appendErrorPlan(errorMsg);
30
+ }
31
+ getRunner() {
32
+ return this.runner;
33
+ }
34
+ constructor(name, contextProvider, options){
35
+ _define_property(this, "runner", void 0);
36
+ this.runner = new TaskRunner(name, contextProvider, options);
37
+ }
38
+ }
39
+ export { ExecutionSession };
@@ -0,0 +1,6 @@
1
+ import { Agent, createAgent } from "./agent.mjs";
2
+ import { commonContextParser, getReportFileName, printReportMsg } from "./utils.mjs";
3
+ import { extractInsightParam, locateParamStr, paramStr, taskTitleStr, typeStr } from "./ui-utils.mjs";
4
+ import { TaskCache, cacheFileExt } from "./task-cache.mjs";
5
+ import { TaskExecutor } from "./tasks.mjs";
6
+ export { Agent, TaskCache, TaskExecutor, cacheFileExt, commonContextParser, createAgent, extractInsightParam, getReportFileName, locateParamStr, paramStr, printReportMsg, taskTitleStr, typeStr };
@@ -0,0 +1,343 @@
1
+ import { findAllMidsceneLocatorField, parseActionParam } from "../ai-model/index.mjs";
2
+ import { setTimingFieldOnce } from "../task-timing.mjs";
3
+ import { ServiceError } from "../types.mjs";
4
+ import { sleep } from "../utils.mjs";
5
+ import { generateElementByRect } from "@godscene/shared/extractor";
6
+ import { getDebug } from "@godscene/shared/logger";
7
+ import { assert } from "@godscene/shared/utils";
8
+ import { ifPlanLocateParamIsBbox, matchElementFromCache, matchElementFromPlan, transformLogicalElementToScreenshot, transformLogicalRectToScreenshotRect } from "./utils.mjs";
9
+ function _define_property(obj, key, value) {
10
+ if (key in obj) Object.defineProperty(obj, key, {
11
+ value: value,
12
+ enumerable: true,
13
+ configurable: true,
14
+ writable: true
15
+ });
16
+ else obj[key] = value;
17
+ return obj;
18
+ }
19
+ const debug = getDebug('agent:task-builder');
20
+ function hasNonEmptyCache(cache) {
21
+ return null != cache && 'object' == typeof cache && Object.keys(cache).length > 0;
22
+ }
23
+ function locatePlanForLocate(param) {
24
+ const locate = 'string' == typeof param ? {
25
+ prompt: param
26
+ } : param;
27
+ const locatePlan = {
28
+ type: 'Locate',
29
+ param: locate,
30
+ thought: ''
31
+ };
32
+ return locatePlan;
33
+ }
34
+ class TaskBuilder {
35
+ async build(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options) {
36
+ const tasks = [];
37
+ const cacheable = options?.cacheable;
38
+ const context = {
39
+ tasks,
40
+ modelConfigForPlanning,
41
+ modelConfigForDefaultIntent,
42
+ cacheable,
43
+ deepLocate: options?.deepLocate,
44
+ abortSignal: options?.abortSignal
45
+ };
46
+ const planHandlers = new Map([
47
+ [
48
+ 'Locate',
49
+ (plan)=>this.handleLocatePlan(plan, context)
50
+ ],
51
+ [
52
+ 'Finished',
53
+ (plan)=>this.handleFinishedPlan(plan, context)
54
+ ]
55
+ ]);
56
+ const defaultHandler = (plan)=>this.handleActionPlan(plan, context);
57
+ for (const plan of plans){
58
+ const handler = planHandlers.get(plan.type) ?? defaultHandler;
59
+ await handler(plan);
60
+ }
61
+ return {
62
+ tasks
63
+ };
64
+ }
65
+ handleFinishedPlan(plan, context) {
66
+ const taskActionFinished = {
67
+ type: 'Action Space',
68
+ subType: 'Finished',
69
+ param: null,
70
+ thought: plan.thought,
71
+ executor: async ()=>{}
72
+ };
73
+ context.tasks.push(taskActionFinished);
74
+ }
75
+ async handleLocatePlan(plan, context) {
76
+ const taskLocate = this.createLocateTask(plan, plan.param, context);
77
+ context.tasks.push(taskLocate);
78
+ }
79
+ async handleActionPlan(plan, context) {
80
+ const planType = plan.type;
81
+ const actionSpace = this.actionSpace;
82
+ const action = actionSpace.find((item)=>item.name === planType);
83
+ const param = plan.param;
84
+ if (!action) throw new Error(`Action type '${planType}' not found`);
85
+ const locateFields = action ? findAllMidsceneLocatorField(action.paramSchema) : [];
86
+ const requiredLocateFields = action ? findAllMidsceneLocatorField(action.paramSchema, true) : [];
87
+ locateFields.forEach((field)=>{
88
+ if (param[field]) {
89
+ const locatePlan = locatePlanForLocate(param[field]);
90
+ debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`, `hasBbox=${ifPlanLocateParamIsBbox(param[field])}`);
91
+ const locateTask = this.createLocateTask(locatePlan, param[field], context, (result)=>{
92
+ param[field] = result;
93
+ });
94
+ context.tasks.push(locateTask);
95
+ } else {
96
+ assert(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
97
+ debug(`field '${field}' is not provided for action ${planType}`);
98
+ }
99
+ });
100
+ const task = {
101
+ type: 'Action Space',
102
+ subType: planType,
103
+ thought: plan.thought,
104
+ param: plan.param,
105
+ executor: async (param, taskContext)=>{
106
+ const timing = taskContext.task.timing;
107
+ debug('executing action', planType, param, `taskContext.element.center: ${taskContext.element?.center}`);
108
+ const uiContext = taskContext.uiContext;
109
+ assert(uiContext, 'uiContext is required for Action task');
110
+ requiredLocateFields.forEach((field)=>{
111
+ assert(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
112
+ });
113
+ setTimingFieldOnce(timing, 'beforeInvokeActionHookStart');
114
+ const delayBeforeRunner = action.delayBeforeRunner ?? 200;
115
+ try {
116
+ await Promise.all([
117
+ (async ()=>{
118
+ if (this.interface.beforeInvokeAction) {
119
+ debug(`will call "beforeInvokeAction" for interface with action name ${action.name}`);
120
+ await this.interface.beforeInvokeAction(action.name, param);
121
+ debug(`called "beforeInvokeAction" for interface with action name ${action.name}`);
122
+ }
123
+ })(),
124
+ delayBeforeRunner > 0 ? sleep(delayBeforeRunner) : Promise.resolve()
125
+ ]);
126
+ } catch (originalError) {
127
+ const originalMessage = originalError?.message || String(originalError);
128
+ throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
129
+ cause: originalError
130
+ });
131
+ }
132
+ setTimingFieldOnce(timing, 'beforeInvokeActionHookEnd');
133
+ const { shrunkShotToLogicalRatio } = uiContext;
134
+ if (void 0 === shrunkShotToLogicalRatio) throw new Error('shrunkShotToLogicalRatio is not defined in Action task');
135
+ if (action.paramSchema) try {
136
+ param = parseActionParam(param, action.paramSchema, {
137
+ shrunkShotToLogicalRatio
138
+ });
139
+ } catch (error) {
140
+ throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
141
+ cause: error
142
+ });
143
+ }
144
+ setTimingFieldOnce(timing, 'callActionStart');
145
+ debug('calling action', action.name);
146
+ const actionFn = action.call.bind(this.interface);
147
+ const actionResult = await actionFn(param, taskContext);
148
+ setTimingFieldOnce(timing, 'callActionEnd');
149
+ debug('called action', action.name, 'result:', actionResult);
150
+ setTimingFieldOnce(timing, 'afterInvokeActionHookStart');
151
+ const delayAfterRunner = action.delayAfterRunner ?? this.waitAfterAction ?? 300;
152
+ if (delayAfterRunner > 0) await sleep(delayAfterRunner);
153
+ try {
154
+ if (this.interface.afterInvokeAction) {
155
+ debug(`will call "afterInvokeAction" for interface with action name ${action.name}`);
156
+ await this.interface.afterInvokeAction(action.name, param);
157
+ debug(`called "afterInvokeAction" for interface with action name ${action.name}`);
158
+ }
159
+ } catch (originalError) {
160
+ const originalMessage = originalError?.message || String(originalError);
161
+ throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
162
+ cause: originalError
163
+ });
164
+ }
165
+ setTimingFieldOnce(timing, 'afterInvokeActionHookEnd');
166
+ return {
167
+ output: actionResult
168
+ };
169
+ }
170
+ };
171
+ context.tasks.push(task);
172
+ }
173
+ createLocateTask(plan, detailedLocateParam, context, onResult) {
174
+ const { cacheable, modelConfigForDefaultIntent, deepLocate, abortSignal } = context;
175
+ let locateParam = detailedLocateParam;
176
+ if ('string' == typeof locateParam) locateParam = {
177
+ prompt: locateParam
178
+ };
179
+ if (void 0 !== cacheable) locateParam = {
180
+ ...locateParam,
181
+ cacheable
182
+ };
183
+ if (deepLocate && !locateParam.deepLocate) locateParam = {
184
+ ...locateParam,
185
+ deepLocate: true
186
+ };
187
+ const taskLocator = {
188
+ type: 'Planning',
189
+ subType: 'Locate',
190
+ param: locateParam,
191
+ thought: plan.thought,
192
+ executor: async (param, taskContext)=>{
193
+ const { task } = taskContext;
194
+ let { uiContext } = taskContext;
195
+ assert(param?.prompt || param?.bbox, `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
196
+ if (!uiContext) uiContext = await this.service.contextRetrieverFn();
197
+ assert(uiContext, 'uiContext is required for Service task');
198
+ const { shrunkShotToLogicalRatio } = uiContext;
199
+ if (void 0 === shrunkShotToLogicalRatio) throw new Error('shrunkShotToLogicalRatio is not defined in locate task');
200
+ let locateDump;
201
+ let locateResult;
202
+ const applyDump = (dump)=>{
203
+ if (!dump) return;
204
+ locateDump = dump;
205
+ task.log = {
206
+ dump,
207
+ rawResponse: dump.taskInfo?.rawResponse
208
+ };
209
+ task.usage = dump.taskInfo?.usage;
210
+ if (dump.taskInfo?.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
211
+ if (dump.taskInfo?.reasoning_content) task.reasoning_content = dump.taskInfo.reasoning_content;
212
+ };
213
+ const planLocatedElement = ifPlanLocateParamIsBbox(param) ? matchElementFromPlan(param) : void 0;
214
+ const elementFromBbox = param.deepLocate ? void 0 : planLocatedElement;
215
+ const isPlanHit = !!elementFromBbox;
216
+ let rectFromXpath;
217
+ if (!isPlanHit && param.xpath && this.interface.rectMatchesCacheFeature) try {
218
+ rectFromXpath = await this.interface.rectMatchesCacheFeature({
219
+ xpaths: [
220
+ param.xpath
221
+ ]
222
+ });
223
+ } catch {}
224
+ const elementFromXpath = rectFromXpath ? generateElementByRect(transformLogicalRectToScreenshotRect(rectFromXpath, shrunkShotToLogicalRatio), 'string' == typeof param.prompt ? param.prompt : param.prompt?.prompt || '') : void 0;
225
+ const isXpathHit = !!elementFromXpath;
226
+ const cachePrompt = param.prompt;
227
+ const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
228
+ const cacheEntry = locateCacheRecord?.cacheContent?.cache;
229
+ const elementFromCacheResult = isPlanHit || isXpathHit ? null : await matchElementFromCache({
230
+ taskCache: this.taskCache,
231
+ interfaceInstance: this.interface
232
+ }, cacheEntry, cachePrompt, param.cacheable);
233
+ const elementFromCache = elementFromCacheResult ? transformLogicalElementToScreenshot(elementFromCacheResult, shrunkShotToLogicalRatio) : void 0;
234
+ const isCacheHit = !!elementFromCache;
235
+ let elementFromAiLocate;
236
+ const timing = taskContext.task.timing;
237
+ if (!isXpathHit && !isCacheHit && !isPlanHit) try {
238
+ setTimingFieldOnce(timing, 'callAiStart');
239
+ locateResult = await this.service.locate(param, {
240
+ context: uiContext,
241
+ planLocatedElement
242
+ }, modelConfigForDefaultIntent, abortSignal);
243
+ applyDump(locateResult.dump);
244
+ elementFromAiLocate = locateResult.element;
245
+ } catch (error) {
246
+ if (error instanceof ServiceError) applyDump(error.dump);
247
+ throw error;
248
+ } finally{
249
+ setTimingFieldOnce(timing, 'callAiEnd');
250
+ }
251
+ const element = elementFromBbox || elementFromXpath || elementFromCache || elementFromAiLocate;
252
+ const locateCacheAlreadyExists = hasNonEmptyCache(locateCacheRecord?.cacheContent?.cache);
253
+ let currentCacheEntry;
254
+ if (element && this.taskCache && !isCacheHit && (!isPlanHit || !locateCacheAlreadyExists) && param?.cacheable !== false) if (this.interface.cacheFeatureForPoint) try {
255
+ let pointForCache = element.center;
256
+ if (1 !== shrunkShotToLogicalRatio) {
257
+ pointForCache = [
258
+ Math.round(element.center[0] / shrunkShotToLogicalRatio),
259
+ Math.round(element.center[1] / shrunkShotToLogicalRatio)
260
+ ];
261
+ debug('Transformed coordinates for cacheFeatureForPoint: %o -> %o', element.center, pointForCache);
262
+ }
263
+ const feature = await this.interface.cacheFeatureForPoint(pointForCache, {
264
+ targetDescription: 'string' == typeof param.prompt ? param.prompt : param.prompt?.prompt,
265
+ modelConfig: modelConfigForDefaultIntent
266
+ });
267
+ if (hasNonEmptyCache(feature)) {
268
+ debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
269
+ currentCacheEntry = feature;
270
+ this.taskCache.updateOrAppendCacheRecord({
271
+ type: 'locate',
272
+ prompt: cachePrompt,
273
+ cache: feature
274
+ }, locateCacheRecord);
275
+ } else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
276
+ } catch (error) {
277
+ debug('cacheFeatureForPoint failed: %s', error);
278
+ }
279
+ else debug('cacheFeatureForPoint is not supported, skip cache update');
280
+ if (!element) {
281
+ if (locateDump) throw new ServiceError(`Element not found : ${param.prompt}`, locateDump);
282
+ throw new Error(`Element not found: ${param.prompt}`);
283
+ }
284
+ let hitBy;
285
+ if (isPlanHit) hitBy = {
286
+ from: 'Plan',
287
+ context: {
288
+ bbox: param.bbox
289
+ }
290
+ };
291
+ else if (isXpathHit) hitBy = {
292
+ from: 'User expected path',
293
+ context: {
294
+ xpath: param.xpath
295
+ }
296
+ };
297
+ else if (isCacheHit) hitBy = {
298
+ from: 'Cache',
299
+ context: {
300
+ cacheEntry,
301
+ cacheToSave: currentCacheEntry
302
+ }
303
+ };
304
+ if (this.interface.getElementFromPoint && element.center) try {
305
+ const [x, y] = element.center;
306
+ const data = await this.interface.getElementFromPoint({
307
+ x,
308
+ y
309
+ });
310
+ element.allPaths = data?.allPaths || [];
311
+ element.containerPaths = data?.containerPaths || [];
312
+ } catch (error) {
313
+ element.allPaths = [];
314
+ element.containerPaths = [];
315
+ }
316
+ onResult?.(element);
317
+ return {
318
+ output: {
319
+ element: {
320
+ ...element,
321
+ dpr: uiContext.deprecatedDpr
322
+ }
323
+ },
324
+ hitBy
325
+ };
326
+ }
327
+ };
328
+ return taskLocator;
329
+ }
330
+ constructor({ interfaceInstance, service, taskCache, actionSpace, waitAfterAction }){
331
+ _define_property(this, "interface", void 0);
332
+ _define_property(this, "service", void 0);
333
+ _define_property(this, "taskCache", void 0);
334
+ _define_property(this, "actionSpace", void 0);
335
+ _define_property(this, "waitAfterAction", void 0);
336
+ this.interface = interfaceInstance;
337
+ this.service = service;
338
+ this.taskCache = taskCache;
339
+ this.actionSpace = actionSpace;
340
+ this.waitAfterAction = waitAfterAction;
341
+ }
342
+ }
343
+ export { TaskBuilder, locatePlanForLocate };
@@ -0,0 +1,212 @@
1
+ import node_assert from "node:assert";
2
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
3
+ import { dirname, join } from "node:path";
4
+ import { isDeepStrictEqual } from "node:util";
5
+ import { getMidsceneRunSubDir } from "@godscene/shared/common";
6
+ import { MIDSCENE_CACHE_MAX_FILENAME_LENGTH, globalConfigManager } from "@godscene/shared/env";
7
+ import { getDebug } from "@godscene/shared/logger";
8
+ import { generateHashId, ifInBrowser, ifInWorker, replaceIllegalPathCharsAndSpace } from "@godscene/shared/utils";
9
+ import js_yaml from "js-yaml";
10
+ import semver from "semver";
11
+ import { getMidsceneVersion } from "./utils.mjs";
12
+ function _define_property(obj, key, value) {
13
+ if (key in obj) Object.defineProperty(obj, key, {
14
+ value: value,
15
+ enumerable: true,
16
+ configurable: true,
17
+ writable: true
18
+ });
19
+ else obj[key] = value;
20
+ return obj;
21
+ }
22
+ const DEFAULT_CACHE_MAX_FILENAME_LENGTH = 200;
23
+ const debug = getDebug('cache');
24
+ const lowestSupportedMidsceneVersion = '0.16.10';
25
+ const cacheFileExt = '.cache.yaml';
26
+ class TaskCache {
27
+ matchCache(prompt, type) {
28
+ if (!this.isCacheResultUsed) return;
29
+ const promptStr = 'string' == typeof prompt ? prompt : JSON.stringify(prompt);
30
+ for(let i = 0; i < this.cacheOriginalLength; i++){
31
+ const item = this.cache.caches[i];
32
+ const key = `${type}:${promptStr}:${i}`;
33
+ if (item.type === type && isDeepStrictEqual(item.prompt, prompt) && !this.matchedCacheIndices.has(key)) {
34
+ if ('locate' === item.type) {
35
+ const locateItem = item;
36
+ if (!locateItem.cache && Array.isArray(locateItem.xpaths)) locateItem.cache = {
37
+ xpaths: locateItem.xpaths
38
+ };
39
+ if ('xpaths' in locateItem) locateItem.xpaths = void 0;
40
+ }
41
+ this.matchedCacheIndices.add(key);
42
+ debug('cache found and marked as used, type: %s, prompt: %s, index: %d', type, prompt, i);
43
+ return {
44
+ cacheContent: item,
45
+ cacheUsable: true,
46
+ updateFn: (cb)=>{
47
+ debug('will call updateFn to update cache, type: %s, prompt: %s, index: %d', type, prompt, i);
48
+ cb(item);
49
+ if (this.readOnlyMode) return void debug('read-only mode, cache updated in memory but not flushed to file');
50
+ debug('cache updated, will flush to file, type: %s, prompt: %s, index: %d', type, prompt, i);
51
+ this.flushCacheToFile();
52
+ }
53
+ };
54
+ }
55
+ }
56
+ debug('no unused cache found, type: %s, prompt: %s', type, prompt);
57
+ }
58
+ matchPlanCache(prompt) {
59
+ const result = this.matchCache(prompt, 'plan');
60
+ if (!result) return;
61
+ const yamlWorkflow = result.cacheContent.yamlWorkflow;
62
+ if (!yamlWorkflow?.trim()) {
63
+ debug('plan cache matched but yamlWorkflow is empty, treat as cache miss');
64
+ return {
65
+ ...result,
66
+ cacheUsable: false
67
+ };
68
+ }
69
+ try {
70
+ const parsed = js_yaml.load(yamlWorkflow);
71
+ const hasNonEmptyFlow = parsed?.tasks?.some((task)=>Array.isArray(task.flow) && task.flow.length > 0);
72
+ if (!hasNonEmptyFlow) {
73
+ debug('plan cache matched but flow is empty, treat as cache miss');
74
+ return {
75
+ ...result,
76
+ cacheUsable: false
77
+ };
78
+ }
79
+ } catch {
80
+ debug('plan cache matched but yamlWorkflow is invalid, treat as cache miss');
81
+ return {
82
+ ...result,
83
+ cacheUsable: false
84
+ };
85
+ }
86
+ return result;
87
+ }
88
+ matchLocateCache(prompt) {
89
+ return this.matchCache(prompt, 'locate');
90
+ }
91
+ appendCache(cache) {
92
+ debug('will append cache', cache);
93
+ this.cache.caches.push(cache);
94
+ if (this.readOnlyMode) return void debug('read-only mode, cache appended to memory but not flushed to file');
95
+ this.flushCacheToFile();
96
+ }
97
+ loadCacheFromFile() {
98
+ const cacheFile = this.cacheFilePath;
99
+ node_assert(cacheFile, 'cache file path is required');
100
+ if (!existsSync(cacheFile)) return void debug('no cache file found, path: %s', cacheFile);
101
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, '.json');
102
+ if (existsSync(jsonTypeCacheFile) && this.isCacheResultUsed) return void console.warn(`An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`);
103
+ try {
104
+ const data = readFileSync(cacheFile, 'utf8');
105
+ const jsonData = js_yaml.load(data);
106
+ const version = getMidsceneVersion();
107
+ if (!version) return void debug('no midscene version info, will not read cache from file');
108
+ if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes('beta')) return void console.warn(`You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.\nPlease delete the existing cache and rebuild it. Sorry for the inconvenience.\ncache file: ${cacheFile}`);
109
+ debug('cache loaded from file, path: %s, cache version: %s, record length: %s', cacheFile, jsonData.midsceneVersion, jsonData.caches.length);
110
+ jsonData.midsceneVersion = getMidsceneVersion();
111
+ return jsonData;
112
+ } catch (err) {
113
+ debug('cache file exists but load failed, path: %s, error: %s', cacheFile, err);
114
+ return;
115
+ }
116
+ }
117
+ flushCacheToFile(options) {
118
+ const version = getMidsceneVersion();
119
+ if (!version) return void debug('no midscene version info, will not write cache to file');
120
+ if (!this.cacheFilePath) return void debug('no cache file path, will not write cache to file');
121
+ if (options?.cleanUnused) if (this.isCacheResultUsed) {
122
+ const originalLength = this.cache.caches.length;
123
+ const usedIndices = new Set();
124
+ for (const key of this.matchedCacheIndices){
125
+ const parts = key.split(':');
126
+ const index = Number.parseInt(parts[parts.length - 1], 10);
127
+ if (!Number.isNaN(index)) usedIndices.add(index);
128
+ }
129
+ this.cache.caches = this.cache.caches.filter((_, index)=>{
130
+ const isUsed = usedIndices.has(index);
131
+ const isNew = index >= this.cacheOriginalLength;
132
+ return isUsed || isNew;
133
+ });
134
+ const removedCount = originalLength - this.cache.caches.length;
135
+ removedCount > 0 ? debug('cleaned %d unused cache record(s)', removedCount) : debug('no unused cache to clean');
136
+ } else debug('skip cleaning: cache is not used for reading');
137
+ try {
138
+ const dir = dirname(this.cacheFilePath);
139
+ if (!existsSync(dir)) {
140
+ mkdirSync(dir, {
141
+ recursive: true
142
+ });
143
+ debug('created cache directory: %s', dir);
144
+ }
145
+ const sortedCaches = [
146
+ ...this.cache.caches
147
+ ].sort((a, b)=>{
148
+ if ('plan' === a.type && 'locate' === b.type) return -1;
149
+ if ('locate' === a.type && 'plan' === b.type) return 1;
150
+ return 0;
151
+ });
152
+ const cacheToWrite = {
153
+ ...this.cache,
154
+ caches: sortedCaches
155
+ };
156
+ const yamlData = js_yaml.dump(cacheToWrite, {
157
+ lineWidth: -1
158
+ });
159
+ writeFileSync(this.cacheFilePath, yamlData);
160
+ debug('cache flushed to file: %s', this.cacheFilePath);
161
+ } catch (err) {
162
+ debug('write cache to file failed, path: %s, error: %s', this.cacheFilePath, err);
163
+ }
164
+ }
165
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
166
+ if (cachedRecord) if ('plan' === newRecord.type) cachedRecord.updateFn((cache)=>{
167
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
168
+ });
169
+ else cachedRecord.updateFn((cache)=>{
170
+ const locateCache = cache;
171
+ locateCache.cache = newRecord.cache;
172
+ if ('xpaths' in locateCache) locateCache.xpaths = void 0;
173
+ });
174
+ else this.appendCache(newRecord);
175
+ }
176
+ constructor(cacheId, isCacheResultUsed, cacheFilePath, options = {}){
177
+ _define_property(this, "cacheId", void 0);
178
+ _define_property(this, "cacheFilePath", void 0);
179
+ _define_property(this, "cache", void 0);
180
+ _define_property(this, "isCacheResultUsed", void 0);
181
+ _define_property(this, "cacheOriginalLength", void 0);
182
+ _define_property(this, "readOnlyMode", void 0);
183
+ _define_property(this, "writeOnlyMode", void 0);
184
+ _define_property(this, "matchedCacheIndices", new Set());
185
+ node_assert(cacheId, 'cacheId is required');
186
+ let safeCacheId = replaceIllegalPathCharsAndSpace(cacheId);
187
+ const cacheMaxFilenameLength = globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_CACHE_MAX_FILENAME_LENGTH) ?? DEFAULT_CACHE_MAX_FILENAME_LENGTH;
188
+ if (Buffer.byteLength(safeCacheId, 'utf8') > cacheMaxFilenameLength) {
189
+ const prefix = safeCacheId.slice(0, 32);
190
+ const hash = generateHashId(void 0, safeCacheId);
191
+ safeCacheId = `${prefix}-${hash}`;
192
+ }
193
+ this.cacheId = safeCacheId;
194
+ this.cacheFilePath = ifInBrowser || ifInWorker ? void 0 : cacheFilePath || join(getMidsceneRunSubDir('cache'), `${this.cacheId}${cacheFileExt}`);
195
+ const readOnlyMode = Boolean(options?.readOnly);
196
+ const writeOnlyMode = Boolean(options?.writeOnly);
197
+ if (readOnlyMode && writeOnlyMode) throw new Error('TaskCache cannot be both read-only and write-only');
198
+ this.isCacheResultUsed = writeOnlyMode ? false : isCacheResultUsed;
199
+ this.readOnlyMode = readOnlyMode;
200
+ this.writeOnlyMode = writeOnlyMode;
201
+ let cacheContent;
202
+ if (this.cacheFilePath && !this.writeOnlyMode) cacheContent = this.loadCacheFromFile();
203
+ if (!cacheContent) cacheContent = {
204
+ midsceneVersion: getMidsceneVersion(),
205
+ cacheId: this.cacheId,
206
+ caches: []
207
+ };
208
+ this.cache = cacheContent;
209
+ this.cacheOriginalLength = this.isCacheResultUsed ? this.cache.caches.length : 0;
210
+ }
211
+ }
212
+ export { TaskCache, cacheFileExt, debug };