@midscene/core 1.2.0 → 1.2.1-beta-20260109060244.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.2.0",
4
+ "version": "1.2.1-beta-20260109060244.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -89,7 +89,7 @@
89
89
  "semver": "7.5.2",
90
90
  "undici": "^6.0.0",
91
91
  "zod": "3.24.3",
92
- "@midscene/shared": "1.2.0"
92
+ "@midscene/shared": "1.2.1-beta-20260109060244.0"
93
93
  },
94
94
  "devDependencies": {
95
95
  "@rslib/core": "^0.18.3",
@@ -1,34 +0,0 @@
1
- import { getPreferredLanguage } from "@midscene/shared/env";
2
- function systemPromptToLocateElementPosition() {
3
- const preferredLanguage = getPreferredLanguage();
4
- return `
5
- You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
6
-
7
- ## Output Format
8
- \`\`\`
9
- Thought: ...
10
- Action: ...
11
- \`\`\`
12
-
13
- ## Action Space
14
- click(start_box='[x1, y1, x2, y2]')
15
- left_double(start_box='[x1, y1, x2, y2]')
16
- right_single(start_box='[x1, y1, x2, y2]')
17
- drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
18
- hotkey(key='')
19
- type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.
20
- scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
21
- wait() #Sleep for 5s and take a screenshot to check for any changes.
22
- finished()
23
- call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
24
-
25
- ## Note
26
- - Use ${preferredLanguage} in \`Thought\` part.
27
- - Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
28
-
29
- ## User Instruction
30
- `;
31
- }
32
- export { systemPromptToLocateElementPosition };
33
-
34
- //# sourceMappingURL=ui-tars-locator.mjs.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"ai-model/prompt/ui-tars-locator.mjs","sources":["../../../../src/ai-model/prompt/ui-tars-locator.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\n// claude 3.5 sonnet computer The ability to understand the content of the image is better, Does not provide element snapshot effect\nexport function systemPromptToLocateElementPosition() {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. \n\n## Output Format\n\\`\\`\\`\nThought: ...\nAction: ...\n\\`\\`\\`\n\n## Action Space\nclick(start_box='[x1, y1, x2, y2]')\nleft_double(start_box='[x1, y1, x2, y2]')\nright_single(start_box='[x1, y1, x2, y2]')\ndrag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')\nhotkey(key='')\ntype(content='') #If you want to submit your input, use \"\\\\n\" at the end of \\`content\\`.\nscroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')\nwait() #Sleep for 5s and take a screenshot to check for any changes.\nfinished()\ncall_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.\n\n## Note\n- Use ${preferredLanguage} in \\`Thought\\` part.\n- Write a small plan and finally summarize your next action (with its target element) in one sentence in \\`Thought\\` part.\n\n## User Instruction\n `;\n}\n"],"names":["systemPromptToLocateElementPosition","preferredLanguage","getPreferredLanguage"],"mappings":";AAGO,SAASA;IACd,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;MAsBJ,EAAED,kBAAkB;;;;IAItB,CAAC;AACL"}
@@ -1,68 +0,0 @@
1
- "use strict";
2
- var __webpack_require__ = {};
3
- (()=>{
4
- __webpack_require__.d = (exports1, definition)=>{
5
- for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
- enumerable: true,
7
- get: definition[key]
8
- });
9
- };
10
- })();
11
- (()=>{
12
- __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
- })();
14
- (()=>{
15
- __webpack_require__.r = (exports1)=>{
16
- if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
- value: 'Module'
18
- });
19
- Object.defineProperty(exports1, '__esModule', {
20
- value: true
21
- });
22
- };
23
- })();
24
- var __webpack_exports__ = {};
25
- __webpack_require__.r(__webpack_exports__);
26
- __webpack_require__.d(__webpack_exports__, {
27
- systemPromptToLocateElementPosition: ()=>systemPromptToLocateElementPosition
28
- });
29
- const env_namespaceObject = require("@midscene/shared/env");
30
- function systemPromptToLocateElementPosition() {
31
- const preferredLanguage = (0, env_namespaceObject.getPreferredLanguage)();
32
- return `
33
- You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
34
-
35
- ## Output Format
36
- \`\`\`
37
- Thought: ...
38
- Action: ...
39
- \`\`\`
40
-
41
- ## Action Space
42
- click(start_box='[x1, y1, x2, y2]')
43
- left_double(start_box='[x1, y1, x2, y2]')
44
- right_single(start_box='[x1, y1, x2, y2]')
45
- drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
46
- hotkey(key='')
47
- type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.
48
- scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
49
- wait() #Sleep for 5s and take a screenshot to check for any changes.
50
- finished()
51
- call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
52
-
53
- ## Note
54
- - Use ${preferredLanguage} in \`Thought\` part.
55
- - Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
56
-
57
- ## User Instruction
58
- `;
59
- }
60
- exports.systemPromptToLocateElementPosition = __webpack_exports__.systemPromptToLocateElementPosition;
61
- for(var __rspack_i in __webpack_exports__)if (-1 === [
62
- "systemPromptToLocateElementPosition"
63
- ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
64
- Object.defineProperty(exports, '__esModule', {
65
- value: true
66
- });
67
-
68
- //# sourceMappingURL=ui-tars-locator.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"ai-model/prompt/ui-tars-locator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/ui-tars-locator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { getPreferredLanguage } from '@midscene/shared/env';\n\n// claude 3.5 sonnet computer The ability to understand the content of the image is better, Does not provide element snapshot effect\nexport function systemPromptToLocateElementPosition() {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. \n\n## Output Format\n\\`\\`\\`\nThought: ...\nAction: ...\n\\`\\`\\`\n\n## Action Space\nclick(start_box='[x1, y1, x2, y2]')\nleft_double(start_box='[x1, y1, x2, y2]')\nright_single(start_box='[x1, y1, x2, y2]')\ndrag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')\nhotkey(key='')\ntype(content='') #If you want to submit your input, use \"\\\\n\" at the end of \\`content\\`.\nscroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')\nwait() #Sleep for 5s and take a screenshot to check for any changes.\nfinished()\ncall_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.\n\n## Note\n- Use ${preferredLanguage} in \\`Thought\\` part.\n- Write a small plan and finally summarize your next action (with its target element) in one sentence in \\`Thought\\` part.\n\n## User Instruction\n `;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","systemPromptToLocateElementPosition","preferredLanguage","getPreferredLanguage"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;ACHO,SAASI;IACd,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;MAsBJ,EAAED,kBAAkB;;;;IAItB,CAAC;AACL"}
@@ -1 +0,0 @@
1
- export declare function systemPromptToLocateElementPosition(): string;