@rpascene/core 0.30.18 → 0.30.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/dist/es/agent/agent.mjs +6 -6
  2. package/dist/es/agent/task-cache.mjs +0 -2
  3. package/dist/es/agent/tasks.mjs +14 -49
  4. package/dist/es/agent/ui-utils.mjs +0 -2
  5. package/dist/es/agent/utils.mjs +1 -3
  6. package/dist/es/ai-model/action-executor.mjs +6 -7
  7. package/dist/es/ai-model/common.mjs +0 -2
  8. package/dist/es/ai-model/conversation-history.mjs +0 -2
  9. package/dist/es/ai-model/inspect.mjs +6 -8
  10. package/dist/es/ai-model/llm-planning.mjs +2 -4
  11. package/dist/es/ai-model/prompt/assertion.mjs +0 -2
  12. package/dist/es/ai-model/prompt/common.mjs +0 -2
  13. package/dist/es/ai-model/prompt/describe.mjs +0 -2
  14. package/dist/es/ai-model/prompt/extraction.mjs +0 -2
  15. package/dist/es/ai-model/prompt/llm-locator.mjs +0 -2
  16. package/dist/es/ai-model/prompt/llm-planning.mjs +0 -2
  17. package/dist/es/ai-model/prompt/llm-section-locator.mjs +0 -2
  18. package/dist/es/ai-model/prompt/playwright-generator.mjs +6 -7
  19. package/dist/es/ai-model/prompt/ui-tars-locator.mjs +0 -2
  20. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +0 -2
  21. package/dist/es/ai-model/prompt/util.mjs +0 -2
  22. package/dist/es/ai-model/prompt/yaml-generator.mjs +6 -7
  23. package/dist/es/ai-model/service-caller/index.mjs +11 -8
  24. package/dist/es/ai-model/ui-tars-planning.mjs +2 -4
  25. package/dist/es/device/index.mjs +0 -2
  26. package/dist/es/index.mjs +0 -2
  27. package/dist/es/insight/index.mjs +9 -8
  28. package/dist/es/insight/utils.mjs +0 -2
  29. package/dist/es/report.mjs +0 -2
  30. package/dist/es/types.mjs +0 -2
  31. package/dist/es/utils.mjs +2 -4
  32. package/dist/es/yaml/builder.mjs +0 -2
  33. package/dist/es/yaml/player.mjs +0 -2
  34. package/dist/es/yaml/utils.mjs +0 -2
  35. package/dist/lib/agent/agent.js +6 -6
  36. package/dist/lib/agent/index.js +0 -2
  37. package/dist/lib/agent/task-cache.js +0 -2
  38. package/dist/lib/agent/tasks.js +14 -49
  39. package/dist/lib/agent/ui-utils.js +0 -2
  40. package/dist/lib/agent/utils.js +1 -3
  41. package/dist/lib/ai-model/action-executor.js +6 -7
  42. package/dist/lib/ai-model/common.js +0 -2
  43. package/dist/lib/ai-model/conversation-history.js +0 -2
  44. package/dist/lib/ai-model/index.js +0 -2
  45. package/dist/lib/ai-model/inspect.js +6 -8
  46. package/dist/lib/ai-model/llm-planning.js +2 -4
  47. package/dist/lib/ai-model/prompt/assertion.js +0 -2
  48. package/dist/lib/ai-model/prompt/common.js +0 -2
  49. package/dist/lib/ai-model/prompt/describe.js +0 -2
  50. package/dist/lib/ai-model/prompt/extraction.js +0 -2
  51. package/dist/lib/ai-model/prompt/llm-locator.js +0 -2
  52. package/dist/lib/ai-model/prompt/llm-planning.js +0 -2
  53. package/dist/lib/ai-model/prompt/llm-section-locator.js +0 -2
  54. package/dist/lib/ai-model/prompt/playwright-generator.js +6 -7
  55. package/dist/lib/ai-model/prompt/ui-tars-locator.js +0 -2
  56. package/dist/lib/ai-model/prompt/ui-tars-planning.js +0 -2
  57. package/dist/lib/ai-model/prompt/util.js +0 -2
  58. package/dist/lib/ai-model/prompt/yaml-generator.js +6 -7
  59. package/dist/lib/ai-model/service-caller/index.js +11 -8
  60. package/dist/lib/ai-model/ui-tars-planning.js +2 -4
  61. package/dist/lib/device/index.js +0 -2
  62. package/dist/lib/image/index.js +0 -2
  63. package/dist/lib/index.js +0 -2
  64. package/dist/lib/insight/index.js +9 -8
  65. package/dist/lib/insight/utils.js +0 -2
  66. package/dist/lib/report.js +0 -2
  67. package/dist/lib/tree.js +0 -2
  68. package/dist/lib/types.js +0 -2
  69. package/dist/lib/utils.js +2 -4
  70. package/dist/lib/yaml/builder.js +0 -2
  71. package/dist/lib/yaml/index.js +0 -2
  72. package/dist/lib/yaml/player.js +0 -2
  73. package/dist/lib/yaml/utils.js +0 -2
  74. package/dist/lib/yaml.js +0 -2
  75. package/dist/types/agent/agent.d.ts +2 -0
  76. package/dist/types/agent/tasks.d.ts +2 -6
  77. package/dist/types/ai-model/action-executor.d.ts +2 -2
  78. package/dist/types/ai-model/common.d.ts +1 -1
  79. package/dist/types/ai-model/inspect.d.ts +3 -0
  80. package/dist/types/ai-model/llm-planning.d.ts +1 -0
  81. package/dist/types/ai-model/prompt/playwright-generator.d.ts +2 -2
  82. package/dist/types/ai-model/prompt/yaml-generator.d.ts +2 -2
  83. package/dist/types/ai-model/service-caller/index.d.ts +3 -2
  84. package/dist/types/ai-model/ui-tars-planning.d.ts +1 -0
  85. package/dist/types/insight/index.d.ts +3 -2
  86. package/dist/types/types.d.ts +1 -0
  87. package/package.json +3 -3
  88. package/dist/es/agent/agent.mjs.map +0 -1
  89. package/dist/es/agent/task-cache.mjs.map +0 -1
  90. package/dist/es/agent/tasks.mjs.map +0 -1
  91. package/dist/es/agent/ui-utils.mjs.map +0 -1
  92. package/dist/es/agent/utils.mjs.map +0 -1
  93. package/dist/es/ai-model/action-executor.mjs.map +0 -1
  94. package/dist/es/ai-model/common.mjs.map +0 -1
  95. package/dist/es/ai-model/conversation-history.mjs.map +0 -1
  96. package/dist/es/ai-model/inspect.mjs.map +0 -1
  97. package/dist/es/ai-model/llm-planning.mjs.map +0 -1
  98. package/dist/es/ai-model/prompt/assertion.mjs.map +0 -1
  99. package/dist/es/ai-model/prompt/common.mjs.map +0 -1
  100. package/dist/es/ai-model/prompt/describe.mjs.map +0 -1
  101. package/dist/es/ai-model/prompt/extraction.mjs.map +0 -1
  102. package/dist/es/ai-model/prompt/llm-locator.mjs.map +0 -1
  103. package/dist/es/ai-model/prompt/llm-planning.mjs.map +0 -1
  104. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +0 -1
  105. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +0 -1
  106. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +0 -1
  107. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +0 -1
  108. package/dist/es/ai-model/prompt/util.mjs.map +0 -1
  109. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +0 -1
  110. package/dist/es/ai-model/service-caller/index.mjs.map +0 -1
  111. package/dist/es/ai-model/ui-tars-planning.mjs.map +0 -1
  112. package/dist/es/device/index.mjs.map +0 -1
  113. package/dist/es/index.mjs.map +0 -1
  114. package/dist/es/insight/index.mjs.map +0 -1
  115. package/dist/es/insight/utils.mjs.map +0 -1
  116. package/dist/es/report.mjs.map +0 -1
  117. package/dist/es/types.mjs.map +0 -1
  118. package/dist/es/utils.mjs.map +0 -1
  119. package/dist/es/yaml/builder.mjs.map +0 -1
  120. package/dist/es/yaml/player.mjs.map +0 -1
  121. package/dist/es/yaml/utils.mjs.map +0 -1
  122. package/dist/lib/agent/agent.js.map +0 -1
  123. package/dist/lib/agent/index.js.map +0 -1
  124. package/dist/lib/agent/task-cache.js.map +0 -1
  125. package/dist/lib/agent/tasks.js.map +0 -1
  126. package/dist/lib/agent/ui-utils.js.map +0 -1
  127. package/dist/lib/agent/utils.js.map +0 -1
  128. package/dist/lib/ai-model/action-executor.js.map +0 -1
  129. package/dist/lib/ai-model/common.js.map +0 -1
  130. package/dist/lib/ai-model/conversation-history.js.map +0 -1
  131. package/dist/lib/ai-model/index.js.map +0 -1
  132. package/dist/lib/ai-model/inspect.js.map +0 -1
  133. package/dist/lib/ai-model/llm-planning.js.map +0 -1
  134. package/dist/lib/ai-model/prompt/assertion.js.map +0 -1
  135. package/dist/lib/ai-model/prompt/common.js.map +0 -1
  136. package/dist/lib/ai-model/prompt/describe.js.map +0 -1
  137. package/dist/lib/ai-model/prompt/extraction.js.map +0 -1
  138. package/dist/lib/ai-model/prompt/llm-locator.js.map +0 -1
  139. package/dist/lib/ai-model/prompt/llm-planning.js.map +0 -1
  140. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +0 -1
  141. package/dist/lib/ai-model/prompt/playwright-generator.js.map +0 -1
  142. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +0 -1
  143. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +0 -1
  144. package/dist/lib/ai-model/prompt/util.js.map +0 -1
  145. package/dist/lib/ai-model/prompt/yaml-generator.js.map +0 -1
  146. package/dist/lib/ai-model/service-caller/index.js.map +0 -1
  147. package/dist/lib/ai-model/ui-tars-planning.js.map +0 -1
  148. package/dist/lib/device/index.js.map +0 -1
  149. package/dist/lib/image/index.js.map +0 -1
  150. package/dist/lib/index.js.map +0 -1
  151. package/dist/lib/insight/index.js.map +0 -1
  152. package/dist/lib/insight/utils.js.map +0 -1
  153. package/dist/lib/report.js.map +0 -1
  154. package/dist/lib/tree.js.map +0 -1
  155. package/dist/lib/types.js.map +0 -1
  156. package/dist/lib/utils.js.map +0 -1
  157. package/dist/lib/yaml/builder.js.map +0 -1
  158. package/dist/lib/yaml/index.js.map +0 -1
  159. package/dist/lib/yaml/player.js.map +0 -1
  160. package/dist/lib/yaml/utils.js.map +0 -1
  161. package/dist/lib/yaml.js.map +0 -1
@@ -285,6 +285,8 @@ class Agent {
285
285
  }
286
286
  async aiAction(taskPrompt, opt) {
287
287
  var _this_taskCache, _this_taskCache1;
288
+ const abortSignal = null == opt ? void 0 : opt.abortSignal;
289
+ if (null == abortSignal ? void 0 : abortSignal.aborted) throw new Error(`aiAct aborted: ${abortSignal.reason || 'signal already aborted'}`);
288
290
  const modelConfig = this.modelConfigManager.getModelConfig('planning');
289
291
  const cacheable = null == opt ? void 0 : opt.cacheable;
290
292
  const isVlmUiTars = 'vlm-ui-tars' === modelConfig.vlMode;
@@ -297,7 +299,7 @@ class Agent {
297
299
  const yaml = null == (_matchedCache_cacheContent1 = matchedCache.cacheContent) ? void 0 : _matchedCache_cacheContent1.yamlWorkflow;
298
300
  return this.runYaml(yaml);
299
301
  }
300
- const { output, executor } = await this.taskExecutor.action(taskPrompt, modelConfig, this.opts.aiActionContext, cacheable);
302
+ const { output, executor } = await this.taskExecutor.action(taskPrompt, modelConfig, this.opts.aiActionContext, cacheable, abortSignal);
301
303
  console.log(output, 'output');
302
304
  if (this.taskCache && (null == output ? void 0 : output.yamlFlow) && false !== cacheable) {
303
305
  const yamlContent = {
@@ -349,7 +351,7 @@ class Agent {
349
351
  return this.aiString(prompt, opt);
350
352
  }
351
353
  async describeElementAtPoint(center, opt) {
352
- const { verifyPrompt = true, retryLimit = 3 } = opt || {};
354
+ const { verifyPrompt = true, retryLimit = 3, abortSignal } = opt || {};
353
355
  let success = false;
354
356
  let retryCount = 0;
355
357
  let resultPrompt = '';
@@ -360,7 +362,8 @@ class Agent {
360
362
  debug('aiDescribe', center, 'verifyPrompt', verifyPrompt, 'retryCount', retryCount, 'deepThink', deepThink);
361
363
  const modelConfig = this.modelConfigManager.getModelConfig('grounding');
362
364
  const text = await this.insight.describe(center, modelConfig, {
363
- deepThink
365
+ deepThink,
366
+ abortSignal
364
367
  });
365
368
  debug('aiDescribe text', text);
366
369
  assert(text.description, `failed to describe element at [${center}]`);
@@ -478,7 +481,6 @@ class Agent {
478
481
  async destroy() {
479
482
  var _this_interface_destroy, _this_interface;
480
483
  if (this.destroyed) return;
481
- this.taskExecutor.cancelPlanning();
482
484
  await (null == (_this_interface_destroy = (_this_interface = this.interface).destroy) ? void 0 : _this_interface_destroy.call(_this_interface));
483
485
  this.resetDump();
484
486
  this.destroyed = true;
@@ -633,5 +635,3 @@ class Agent {
633
635
  }
634
636
  const createAgent = (interfaceInstance, opts)=>new Agent(interfaceInstance, opts);
635
637
  export { Agent, createAgent };
636
-
637
- //# sourceMappingURL=agent.mjs.map
@@ -180,5 +180,3 @@ class TaskCache {
180
180
  }
181
181
  }
182
182
  export { TaskCache, cacheFileExt, debug };
183
-
184
- //# sourceMappingURL=task-cache.mjs.map
@@ -81,7 +81,7 @@ class TaskExecutor {
81
81
  thought: plan.thought,
82
82
  executor: async (param, taskContext)=>{
83
83
  var _this_taskCache, _locateCacheRecord_cacheContent;
84
- const { task } = taskContext;
84
+ const { task, abortSignal } = taskContext;
85
85
  assert((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
86
86
  let insightDump;
87
87
  let usage;
@@ -119,7 +119,7 @@ class TaskExecutor {
119
119
  const planHitFlag = !!elementFromPlan;
120
120
  const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
121
121
  context: uiContext
122
- }, modelConfig)).element;
122
+ }, modelConfig, abortSignal)).element;
123
123
  const aiLocateHitFlag = !!elementFromAiLocate;
124
124
  const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
125
125
  if (this.interface.getElementFromPoint && element.center) try {
@@ -465,44 +465,21 @@ class TaskExecutor {
465
465
  getReplanningCycleLimit(isVlmUiTars) {
466
466
  return this.replanningCycleLimit || globalConfigManager.getEnvConfigInNumber(MIDSCENE_REPLANNING_CYCLE_LIMIT) || (isVlmUiTars ? defaultVlmUiTarsReplanningCycleLimit : defaultReplanningCycleLimit);
467
467
  }
468
- cancelPlanning() {
469
- var _this_abortController;
470
- console.log("\u53D6\u6D88\u6267\u884C\u89C4\u5212");
471
- this.isPlanning = false;
472
- null == (_this_abortController = this.abortController) || _this_abortController.abort();
473
- }
474
- cleanupPlanning() {
475
- var _this_abortController;
476
- null == (_this_abortController = this.abortController) || _this_abortController.abort();
477
- this.isPlanning = false;
478
- }
479
- isActive() {
480
- return this.isPlanning;
481
- }
482
- async action(userPrompt, modelConfig, actionContext, cacheable) {
483
- console.log(actionContext, 'actionContext--------------');
484
- if (this.isPlanning) {
485
- var _this_abortController;
486
- console.warn('Executor is already running');
487
- null == (_this_abortController = this.abortController) || _this_abortController.abort();
488
- }
468
+ async action(userPrompt, modelConfig, actionContext, cacheable, abortSignal) {
489
469
  this.conversationHistory.reset();
490
470
  const userPromptList = userPrompt.split('\n').map((item)=>item.trim()).filter(Boolean);
491
471
  let nextPrompt = userPromptList.shift();
492
- const abortController = new AbortController();
493
- this.abortController = abortController;
494
472
  const taskExecutor = new Executor(taskTitleStr('Action', userPrompt), {
495
473
  onTaskStart: this.onTaskStartCallback,
496
- signal: abortController.signal
474
+ abortSignal
497
475
  });
498
- this.isPlanning = true;
499
476
  let replanCount = 0;
500
477
  const yamlFlow = [];
501
478
  const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
502
- while(this.isPlanning && nextPrompt){
479
+ while(nextPrompt){
503
480
  var _result_output;
504
481
  console.log('action while running', nextPrompt);
505
- if (abortController.signal.aborted) return {
482
+ if (null == abortSignal ? void 0 : abortSignal.aborted) return {
506
483
  output: void 0,
507
484
  executor: taskExecutor
508
485
  };
@@ -515,14 +492,10 @@ class TaskExecutor {
515
492
  const result = await taskExecutor.flush();
516
493
  console.dir(null == result ? void 0 : null == (_result_output = result.output) ? void 0 : _result_output.actions, 'result?.output?.actions');
517
494
  const planResult = null == result ? void 0 : result.output;
518
- console.log('more_actions_needed_by_instruction:', null == planResult ? void 0 : planResult.more_actions_needed_by_instruction);
519
- if (taskExecutor.isInErrorState()) {
520
- this.cleanupPlanning();
521
- return {
522
- output: planResult,
523
- executor: taskExecutor
524
- };
525
- }
495
+ if (taskExecutor.isInErrorState()) return {
496
+ output: planResult,
497
+ executor: taskExecutor
498
+ };
526
499
  const plans = (null == planResult ? void 0 : planResult.actions) || [];
527
500
  yamlFlow.push(...(null == planResult ? void 0 : planResult.yamlFlow) || []);
528
501
  let executables;
@@ -530,17 +503,13 @@ class TaskExecutor {
530
503
  executables = await this.convertPlanToExecutable(plans, modelConfig, cacheable);
531
504
  taskExecutor.append(executables.tasks);
532
505
  } catch (error) {
533
- this.cleanupPlanning();
534
506
  return this.appendErrorPlan(taskExecutor, `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`, modelConfig);
535
507
  }
536
508
  await taskExecutor.flush();
537
- if (taskExecutor.isInErrorState()) {
538
- this.cleanupPlanning();
539
- return {
540
- output: void 0,
541
- executor: taskExecutor
542
- };
543
- }
509
+ if (taskExecutor.isInErrorState()) return {
510
+ output: void 0,
511
+ executor: taskExecutor
512
+ };
544
513
  if (null == planResult ? void 0 : planResult.more_actions_needed_by_instruction) replanCount++;
545
514
  else {
546
515
  this.conversationHistory.reset();
@@ -548,7 +517,6 @@ class TaskExecutor {
548
517
  replanCount = 0;
549
518
  }
550
519
  }
551
- this.cleanupPlanning();
552
520
  return {
553
521
  output: {
554
522
  yamlFlow
@@ -711,7 +679,6 @@ class TaskExecutor {
711
679
  _define_property(this, "onTaskStartCallback", void 0);
712
680
  _define_property(this, "replanningCycleLimit", void 0);
713
681
  _define_property(this, "isPlanning", false);
714
- _define_property(this, "abortController", null);
715
682
  this.interface = interfaceInstance;
716
683
  this.insight = insight;
717
684
  this.taskCache = opts.taskCache;
@@ -721,5 +688,3 @@ class TaskExecutor {
721
688
  }
722
689
  }
723
690
  export { TaskExecutor, locatePlanForLocate };
724
-
725
- //# sourceMappingURL=tasks.mjs.map
@@ -68,5 +68,3 @@ function paramStr(task) {
68
68
  return JSON.stringify(value, void 0, 2);
69
69
  }
70
70
  export { locateParamStr, paramStr, pullParamStr, scrollParamStr, taskTitleStr, typeStr };
71
-
72
- //# sourceMappingURL=ui-utils.mjs.map
@@ -143,7 +143,7 @@ function trimContextByViewport(execution) {
143
143
  }) : execution.tasks
144
144
  };
145
145
  }
146
- const getRpasceneVersion = ()=>"0.30.18";
146
+ const getRpasceneVersion = ()=>"0.30.19";
147
147
  const parsePrompt = (prompt)=>{
148
148
  if ('string' == typeof prompt) return {
149
149
  textPrompt: prompt,
@@ -158,5 +158,3 @@ const parsePrompt = (prompt)=>{
158
158
  };
159
159
  };
160
160
  export { commonContextParser, generateCacheId, getCurrentExecutionFile, getReportFileName, getRpasceneVersion, matchElementFromCache, matchElementFromPlan, parsePrompt, printReportMsg, trimContextByViewport };
161
-
162
- //# sourceMappingURL=utils.mjs.map
@@ -42,8 +42,8 @@ class Executor {
42
42
  start: Date.now()
43
43
  };
44
44
  try {
45
- var _this_signal;
46
- if (null == (_this_signal = this.signal) ? void 0 : _this_signal.aborted) throw 'Operation stopped';
45
+ var _this_abortSignal;
46
+ if (null == (_this_abortSignal = this.abortSignal) ? void 0 : _this_abortSignal.aborted) throw 'Operation stopped';
47
47
  task.status = 'running';
48
48
  try {
49
49
  if (this.onTaskStart) await this.onTaskStart(task);
@@ -60,7 +60,8 @@ class Executor {
60
60
  let returnValue;
61
61
  const executorContext = {
62
62
  task,
63
- element: null == previousFindOutput ? void 0 : previousFindOutput.element
63
+ element: null == previousFindOutput ? void 0 : previousFindOutput.element,
64
+ abortSignal: this.abortSignal
64
65
  };
65
66
  if ('Insight' === task.type) {
66
67
  assert('Locate' === task.subType || 'Query' === task.subType || 'Assert' === task.subType || 'WaitFor' === task.subType || 'Boolean' === task.subType || 'Number' === task.subType || 'String' === task.subType, `unsupported insight subType: ${task.subType}`);
@@ -121,14 +122,12 @@ class Executor {
121
122
  _define_property(this, "tasks", void 0);
122
123
  _define_property(this, "status", void 0);
123
124
  _define_property(this, "onTaskStart", void 0);
124
- _define_property(this, "signal", void 0);
125
+ _define_property(this, "abortSignal", void 0);
125
126
  this.status = (null == options ? void 0 : options.tasks) && options.tasks.length > 0 ? 'pending' : 'init';
126
127
  this.name = name;
127
128
  this.tasks = ((null == options ? void 0 : options.tasks) || []).map((item)=>this.markTaskAsPending(item));
128
129
  this.onTaskStart = null == options ? void 0 : options.onTaskStart;
129
- this.signal = null == options ? void 0 : options.signal;
130
+ this.abortSignal = null == options ? void 0 : options.abortSignal;
130
131
  }
131
132
  }
132
133
  export { Executor };
133
-
134
- //# sourceMappingURL=action-executor.mjs.map
@@ -351,5 +351,3 @@ const parseActionParam = (rawParam, zodSchema)=>{
351
351
  return validated;
352
352
  };
353
353
  export { common_AIActionType as AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptQwenBbox, buildYamlFlowFromPlans, dumpActionParam, dumpRpasceneLocatorField, expandSearchArea, fillBboxParam, findAllRpasceneLocatorField, getRpasceneLocationSchema, ifRpasceneLocatorField, loadActionParam, markupImageForLLM, mergeRects, normalized01000, parseActionParam, warnGPT4oSizeLimit };
354
-
355
- //# sourceMappingURL=common.mjs.map
@@ -54,5 +54,3 @@ class ConversationHistory {
54
54
  }
55
55
  }
56
56
  export { ConversationHistory };
57
-
58
- //# sourceMappingURL=conversation-history.mjs.map
@@ -54,7 +54,7 @@ const promptsToChatParam = async (multimodalPrompt)=>{
54
54
  return msgs;
55
55
  };
56
56
  async function AiLocateElement(options) {
57
- const { context, targetElementDescription, callAIFn, modelConfig } = options;
57
+ const { context, targetElementDescription, callAIFn, modelConfig, abortSignal } = options;
58
58
  const { vlMode } = modelConfig;
59
59
  const { screenshotBase64 } = context;
60
60
  const { description, elementById, insertElementByPosition } = await describeUserPage(context, {
@@ -116,7 +116,7 @@ async function AiLocateElement(options) {
116
116
  });
117
117
  msgs.push(...addOns);
118
118
  }
119
- const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);
119
+ const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig, abortSignal);
120
120
  const rawResponse = JSON.stringify(res.content);
121
121
  let resRect;
122
122
  let matchedElements = 'elements' in res.content ? res.content.elements : [];
@@ -163,7 +163,7 @@ async function AiLocateElement(options) {
163
163
  };
164
164
  }
165
165
  async function AiLocateSection(options) {
166
- const { context, sectionDescription, modelConfig } = options;
166
+ const { context, sectionDescription, modelConfig, abortSignal } = options;
167
167
  const { vlMode } = modelConfig;
168
168
  const { screenshotBase64 } = context;
169
169
  const systemPrompt = systemPromptToLocateSection(vlMode);
@@ -199,7 +199,7 @@ async function AiLocateSection(options) {
199
199
  });
200
200
  msgs.push(...addOns);
201
201
  }
202
- const result = await callAIWithObjectResponse(msgs, AIActionType.EXTRACT_DATA, modelConfig);
202
+ const result = await callAIWithObjectResponse(msgs, AIActionType.EXTRACT_DATA, modelConfig, abortSignal);
203
203
  let sectionRect;
204
204
  const sectionBbox = result.content.bbox;
205
205
  if (sectionBbox) {
@@ -233,7 +233,7 @@ async function AiLocateSection(options) {
233
233
  };
234
234
  }
235
235
  async function AiExtractElementInfo(options) {
236
- const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } = options;
236
+ const { dataQuery, context, extractOption, multimodalPrompt, modelConfig, abortSignal } = options;
237
237
  const { vlMode } = modelConfig;
238
238
  const systemPrompt = systemPromptToExtract();
239
239
  const { screenshotBase64 } = context;
@@ -274,7 +274,7 @@ async function AiExtractElementInfo(options) {
274
274
  });
275
275
  msgs.push(...addOns);
276
276
  }
277
- const result = await callAIWithObjectResponse(msgs, AIActionType.EXTRACT_DATA, modelConfig);
277
+ const result = await callAIWithObjectResponse(msgs, AIActionType.EXTRACT_DATA, modelConfig, abortSignal);
278
278
  return {
279
279
  parseResult: result.content,
280
280
  elementById,
@@ -282,5 +282,3 @@ async function AiExtractElementInfo(options) {
282
282
  };
283
283
  }
284
284
  export { AiExtractElementInfo, AiLocateElement, AiLocateSection };
285
-
286
- //# sourceMappingURL=inspect.mjs.map
@@ -8,7 +8,7 @@ import { callAIWithObjectResponse } from "./service-caller/index.mjs";
8
8
  const debug = getDebug('planning');
9
9
  async function plan(userInstruction, opts) {
10
10
  var _opts_conversationHistory, _planFromAI_action;
11
- const { context, modelConfig, conversationHistory } = opts;
11
+ const { context, modelConfig, conversationHistory, abortSignal } = opts;
12
12
  const { screenshotBase64, size } = context;
13
13
  const { modelName, vlMode } = modelConfig;
14
14
  const { description: pageDescription, elementById } = await describeUserPage(context, {
@@ -84,7 +84,7 @@ async function plan(userInstruction, opts) {
84
84
  ]
85
85
  }
86
86
  ];
87
- const { content, usage } = await callAIWithObjectResponse(msgs, AIActionType.PLAN, modelConfig);
87
+ const { content, usage } = await callAIWithObjectResponse(msgs, AIActionType.PLAN, modelConfig, abortSignal);
88
88
  const rawResponse = JSON.stringify(content, void 0, 2);
89
89
  const planFromAI = content;
90
90
  const actions = ((null == (_planFromAI_action = planFromAI.action) ? void 0 : _planFromAI_action.type) ? [
@@ -136,5 +136,3 @@ async function plan(userInstruction, opts) {
136
136
  return returnValue;
137
137
  }
138
138
  export { plan };
139
-
140
- //# sourceMappingURL=llm-planning.mjs.map
@@ -27,5 +27,3 @@ const assertSchema = {
27
27
  }
28
28
  };
29
29
  export { assertSchema };
30
-
31
- //# sourceMappingURL=assertion.mjs.map
@@ -3,5 +3,3 @@ function bboxDescription(vlMode) {
3
3
  return '2d bounding box as [xmin, ymin, xmax, ymax]';
4
4
  }
5
5
  export { bboxDescription };
6
-
7
- //# sourceMappingURL=common.mjs.map
@@ -40,5 +40,3 @@ Return JSON:
40
40
  "error"?: "error message if any"
41
41
  }`;
42
42
  export { elementDescriberInstruction };
43
-
44
- //# sourceMappingURL=describe.mjs.map
@@ -136,5 +136,3 @@ const extractDataSchema = {
136
136
  }
137
137
  };
138
138
  export { extractDataQueryPrompt, extractDataSchema, systemPromptToExtract };
139
-
140
- //# sourceMappingURL=extraction.mjs.map
@@ -271,5 +271,3 @@ Here is the item user want to find:
271
271
  ]
272
272
  });
273
273
  export { findElementPrompt, locatorSchema, systemPromptToLocateElement };
274
-
275
- //# sourceMappingURL=llm-locator.mjs.map
@@ -363,5 +363,3 @@ const planSchema = {
363
363
  }
364
364
  };
365
365
  export { descriptionForAction, planSchema, systemPromptToTaskPlanning };
366
-
367
- //# sourceMappingURL=llm-planning.mjs.map
@@ -43,5 +43,3 @@ const sectionLocatorInstruction = new PromptTemplate({
43
43
  ]
44
44
  });
45
45
  export { sectionLocatorInstruction, systemPromptToLocateSection };
46
-
47
- //# sourceMappingURL=llm-section-locator.mjs.map
@@ -1,7 +1,7 @@
1
1
  import { PLAYWRIGHT_EXAMPLE_CODE } from "@rpascene/shared/constants";
2
2
  import { AIActionType, callAI, callAIWithStringResponse } from "../index.mjs";
3
3
  import { createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents } from "./yaml-generator.mjs";
4
- const generatePlaywrightTest = async (events, options, modelConfig)=>{
4
+ const generatePlaywrightTest = async (events, options, modelConfig, abortSignal)=>{
5
5
  validateEvents(events);
6
6
  const summary = prepareEventSummary(events, {
7
7
  testName: options.testName,
@@ -47,11 +47,11 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
47
47
  content: messageContent
48
48
  }
49
49
  ];
50
- const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig);
50
+ const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig, abortSignal);
51
51
  if ((null == response ? void 0 : response.content) && 'string' == typeof response.content) return response.content;
52
52
  throw new Error('Failed to generate Playwright test code');
53
53
  };
54
- const generatePlaywrightTestStream = async (events, options, modelConfig)=>{
54
+ const generatePlaywrightTestStream = async (events, options, modelConfig, abortSignal)=>{
55
55
  validateEvents(events);
56
56
  const summary = prepareEventSummary(events, {
57
57
  testName: options.testName,
@@ -100,10 +100,11 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
100
100
  ];
101
101
  if (options.stream && options.onChunk) return await callAI(prompt, AIActionType.TEXT, modelConfig, {
102
102
  stream: true,
103
- onChunk: options.onChunk
103
+ onChunk: options.onChunk,
104
+ abortSignal
104
105
  });
105
106
  {
106
- const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig);
107
+ const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig, abortSignal);
107
108
  if ((null == response ? void 0 : response.content) && 'string' == typeof response.content) return {
108
109
  content: response.content,
109
110
  usage: response.usage,
@@ -113,5 +114,3 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
113
114
  }
114
115
  };
115
116
  export { createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, generatePlaywrightTest, generatePlaywrightTestStream, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents };
116
-
117
- //# sourceMappingURL=playwright-generator.mjs.map
@@ -30,5 +30,3 @@ call_user() # Submit the task and call the user when the task is unsolvable, or
30
30
  `;
31
31
  }
32
32
  export { systemPromptToLocateElementPosition };
33
-
34
- //# sourceMappingURL=ui-tars-locator.mjs.map
@@ -32,5 +32,3 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
32
32
  }
33
33
  const getSummary = (prediction)=>prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, '').trim();
34
34
  export { getSummary, getUiTarsPlanningPrompt };
35
-
36
- //# sourceMappingURL=ui-tars-planning.mjs.map
@@ -120,5 +120,3 @@ async function describeUserPage(context, opt) {
120
120
  };
121
121
  }
122
122
  export { describeElement, describeSize, describeUserPage, distance, distanceThreshold, elementByPositionWithElementInfo, samplePageDescription };
123
-
124
- //# sourceMappingURL=util.mjs.map
@@ -92,7 +92,7 @@ const createMessageContent = (promptText, screenshots = [], includeScreenshots =
92
92
  const validateEvents = (events)=>{
93
93
  if (!events.length) throw new Error('No events provided for test generation');
94
94
  };
95
- const generateYamlTest = async (events, options, modelConfig)=>{
95
+ const generateYamlTest = async (events, options, modelConfig, abortSignal)=>{
96
96
  try {
97
97
  validateEvents(events);
98
98
  const summary = prepareEventSummary(events, {
@@ -141,14 +141,14 @@ Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdow
141
141
  }))
142
142
  });
143
143
  }
144
- const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig);
144
+ const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig, abortSignal);
145
145
  if ((null == response ? void 0 : response.content) && 'string' == typeof response.content) return response.content;
146
146
  throw new Error('Failed to generate YAML test configuration');
147
147
  } catch (error) {
148
148
  throw new Error(`Failed to generate YAML test: ${error}`);
149
149
  }
150
150
  };
151
- const generateYamlTestStream = async (events, options, modelConfig)=>{
151
+ const generateYamlTestStream = async (events, options, modelConfig, abortSignal)=>{
152
152
  try {
153
153
  validateEvents(events);
154
154
  const summary = prepareEventSummary(events, {
@@ -199,10 +199,11 @@ Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdow
199
199
  }
200
200
  if (options.stream && options.onChunk) return await callAI(prompt, AIActionType.TEXT, modelConfig, {
201
201
  stream: true,
202
- onChunk: options.onChunk
202
+ onChunk: options.onChunk,
203
+ abortSignal
203
204
  });
204
205
  {
205
- const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig);
206
+ const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig, abortSignal);
206
207
  if ((null == response ? void 0 : response.content) && 'string' == typeof response.content) return {
207
208
  content: response.content,
208
209
  usage: response.usage,
@@ -215,5 +216,3 @@ Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdow
215
216
  }
216
217
  };
217
218
  export { createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, generateYamlTest, generateYamlTestStream, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents };
218
-
219
- //# sourceMappingURL=yaml-generator.mjs.map
@@ -2,10 +2,10 @@ import { AIResponseFormat } from "../../types.mjs";
2
2
  import { Anthropic } from "@anthropic-ai/sdk";
3
3
  import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
4
4
  import { MIDSCENE_API_TYPE, MIDSCENE_LANGSMITH_DEBUG, OPENAI_MAX_TOKENS, globalConfigManager } from "@rpascene/shared/env";
5
+ import { YHT_NUMBER_ENV_KEYS, YHT_STRING_ENV_KEYS } from "@rpascene/shared/env/types";
5
6
  import { parseBase64 } from "@rpascene/shared/img";
6
7
  import { getDebug } from "@rpascene/shared/logger";
7
8
  import { assert, ifInBrowser } from "@rpascene/shared/utils";
8
- import { YHT_NUMBER_ENV_KEYS, YHT_STRING_ENV_KEYS } from "@rpascene/shared/env/types";
9
9
  import { HttpsProxyAgent } from "https-proxy-agent";
10
10
  import { jsonrepair } from "jsonrepair";
11
11
  import openai_0, { AzureOpenAI } from "openai";
@@ -346,8 +346,10 @@ const getResponseFormat = (modelName, AIActionTypeValue)=>{
346
346
  };
347
347
  return responseFormat;
348
348
  };
349
- async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig) {
350
- const response = await callAI(messages, AIActionTypeValue, modelConfig);
349
+ async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig, abortSignal) {
350
+ const response = await callAI(messages, AIActionTypeValue, modelConfig, {
351
+ abortSignal
352
+ });
351
353
  assert(response, 'empty response');
352
354
  const vlMode = modelConfig.vlMode;
353
355
  const jsonContent = safeParseJson(response.content, vlMode);
@@ -356,8 +358,10 @@ async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig
356
358
  usage: response.usage
357
359
  };
358
360
  }
359
- async function callAIWithStringResponse(msgs, AIActionTypeValue, modelConfig) {
360
- const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);
361
+ async function callAIWithStringResponse(msgs, AIActionTypeValue, modelConfig, abortSignal) {
362
+ const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig, {
363
+ abortSignal
364
+ });
361
365
  return {
362
366
  content,
363
367
  usage
@@ -440,7 +444,8 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
440
444
  yht_access_token: config.yht_access_token,
441
445
  traceId: config.traceId
442
446
  },
443
- body: JSON.stringify(requestBody)
447
+ body: JSON.stringify(requestBody),
448
+ signal: null == options ? void 0 : options.abortSignal
444
449
  });
445
450
  if (!response.ok) {
446
451
  const errorText = await response.text();
@@ -544,5 +549,3 @@ function convertToYhtMessageFormat(messages) {
544
549
  });
545
550
  }
546
551
  export { callAI, callAIOld, callAIWithObjectResponse, callAIWithStringResponse, defaultYhtConfig, extractJSONFromCodeBlock, getResponseFormat, preprocessDoubaoBboxJson, safeParseJson };
547
-
548
- //# sourceMappingURL=index.mjs.map
@@ -16,7 +16,7 @@ const pointToBbox = (point, width, height)=>[
16
16
  Math.round(Math.min(point.y + bboxSize / 2, height))
17
17
  ];
18
18
  async function uiTarsPlanning(userInstruction, options) {
19
- const { conversationHistory, context, modelConfig } = options;
19
+ const { conversationHistory, context, modelConfig, abortSignal } = options;
20
20
  const { uiTarsModelVersion } = modelConfig;
21
21
  const systemPrompt = getUiTarsPlanningPrompt() + userInstruction;
22
22
  const imagePayload = await resizeImageForUiTars(context.screenshotBase64, context.size, uiTarsModelVersion);
@@ -37,7 +37,7 @@ async function uiTarsPlanning(userInstruction, options) {
37
37
  content: systemPrompt
38
38
  },
39
39
  ...conversationHistory.snapshot()
40
- ], AIActionType.INSPECT_ELEMENT, modelConfig);
40
+ ], AIActionType.INSPECT_ELEMENT, modelConfig, abortSignal);
41
41
  const convertedText = convertBboxToCoordinates(res.content);
42
42
  const { size } = context;
43
43
  const { parsed } = actionParser({
@@ -197,5 +197,3 @@ async function resizeImageForUiTars(imageBase64, size, uiTarsVersion) {
197
197
  return imageBase64;
198
198
  }
199
199
  export { resizeImageForUiTars, uiTarsPlanning };
200
-
201
- //# sourceMappingURL=ui-tars-planning.mjs.map
@@ -148,5 +148,3 @@ const defineActionClearInput = (call)=>defineAction({
148
148
  call
149
149
  });
150
150
  export { AbstractInterface, ActionLongPressParamSchema, ActionSwipeParamSchema, actionClearInputParamSchema, actionDoubleClickParamSchema, actionDragAndDropParamSchema, actionHoverParamSchema, actionInputParamSchema, actionKeyboardPressParamSchema, actionRightClickParamSchema, actionScrollParamSchema, actionTapParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionInput, defineActionKeyboardPress, defineActionLongPress, defineActionRightClick, defineActionScroll, defineActionSwipe, defineActionTap };
151
-
152
- //# sourceMappingURL=index.mjs.map
package/dist/es/index.mjs CHANGED
@@ -7,5 +7,3 @@ import { MIDSCENE_MODEL_NAME } from "@rpascene/shared/env";
7
7
  import { Agent, createAgent } from "./agent/index.mjs";
8
8
  const src = insight;
9
9
  export { Agent, AiLocateElement, Executor, insight as Insight, MIDSCENE_MODEL_NAME, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, createAgent, src as default, describeUserPage, getRpasceneLocationSchema, getVersion, plan, z };
10
-
11
- //# sourceMappingURL=index.mjs.map