@midscene/core 1.0.1-beta-20251208075922.0 → 1.0.1-beta-20251208112226.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/es/agent/agent.mjs +22 -7
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +30 -25
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/task-cache.mjs +1 -1
  6. package/dist/es/agent/task-cache.mjs.map +1 -1
  7. package/dist/es/agent/tasks.mjs +17 -7
  8. package/dist/es/agent/tasks.mjs.map +1 -1
  9. package/dist/es/agent/utils.mjs +1 -1
  10. package/dist/es/ai-model/conversation-history.mjs +19 -2
  11. package/dist/es/ai-model/conversation-history.mjs.map +1 -1
  12. package/dist/es/ai-model/llm-planning.mjs +3 -2
  13. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  14. package/dist/es/ai-model/prompt/llm-planning.mjs +20 -95
  15. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  16. package/dist/es/ai-model/service-caller/index.mjs +54 -39
  17. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  18. package/dist/es/device/index.mjs +13 -7
  19. package/dist/es/device/index.mjs.map +1 -1
  20. package/dist/es/utils.mjs +2 -2
  21. package/dist/lib/agent/agent.js +24 -9
  22. package/dist/lib/agent/agent.js.map +1 -1
  23. package/dist/lib/agent/common.js +1 -1
  24. package/dist/lib/agent/execution-session.js +2 -2
  25. package/dist/lib/agent/index.js +2 -2
  26. package/dist/lib/agent/task-builder.js +32 -27
  27. package/dist/lib/agent/task-builder.js.map +1 -1
  28. package/dist/lib/agent/task-cache.js +3 -3
  29. package/dist/lib/agent/task-cache.js.map +1 -1
  30. package/dist/lib/agent/tasks.js +19 -9
  31. package/dist/lib/agent/tasks.js.map +1 -1
  32. package/dist/lib/agent/ui-utils.js +2 -2
  33. package/dist/lib/agent/utils.js +3 -3
  34. package/dist/lib/ai-model/conversation-history.js +21 -4
  35. package/dist/lib/ai-model/conversation-history.js.map +1 -1
  36. package/dist/lib/ai-model/index.js +2 -2
  37. package/dist/lib/ai-model/inspect.js +2 -2
  38. package/dist/lib/ai-model/llm-planning.js +5 -4
  39. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  40. package/dist/lib/ai-model/prompt/assertion.js +2 -2
  41. package/dist/lib/ai-model/prompt/common.js +2 -2
  42. package/dist/lib/ai-model/prompt/describe.js +2 -2
  43. package/dist/lib/ai-model/prompt/extraction.js +2 -2
  44. package/dist/lib/ai-model/prompt/llm-locator.js +2 -2
  45. package/dist/lib/ai-model/prompt/llm-planning.js +22 -100
  46. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  47. package/dist/lib/ai-model/prompt/llm-section-locator.js +2 -2
  48. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +2 -2
  49. package/dist/lib/ai-model/prompt/playwright-generator.js +2 -2
  50. package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
  51. package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
  52. package/dist/lib/ai-model/prompt/util.js +2 -2
  53. package/dist/lib/ai-model/prompt/yaml-generator.js +2 -2
  54. package/dist/lib/ai-model/service-caller/index.js +55 -43
  55. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  56. package/dist/lib/ai-model/ui-tars-planning.js +2 -2
  57. package/dist/lib/common.js +2 -2
  58. package/dist/lib/device/device-options.js +1 -1
  59. package/dist/lib/device/index.js +15 -9
  60. package/dist/lib/device/index.js.map +1 -1
  61. package/dist/lib/image/index.js +2 -2
  62. package/dist/lib/index.js +2 -2
  63. package/dist/lib/report.js +2 -2
  64. package/dist/lib/service/index.js +2 -2
  65. package/dist/lib/service/utils.js +2 -2
  66. package/dist/lib/task-runner.js +2 -2
  67. package/dist/lib/tree.js +2 -2
  68. package/dist/lib/types.js +3 -3
  69. package/dist/lib/utils.js +4 -4
  70. package/dist/lib/yaml/builder.js +2 -2
  71. package/dist/lib/yaml/index.js +4 -4
  72. package/dist/lib/yaml/player.js +2 -2
  73. package/dist/lib/yaml/utils.js +2 -2
  74. package/dist/lib/yaml.js +1 -1
  75. package/dist/types/agent/agent.d.ts +1 -1
  76. package/dist/types/agent/task-builder.d.ts +4 -2
  77. package/dist/types/agent/tasks.d.ts +5 -2
  78. package/dist/types/ai-model/conversation-history.d.ts +6 -1
  79. package/dist/types/ai-model/llm-planning.d.ts +1 -0
  80. package/dist/types/ai-model/prompt/llm-planning.d.ts +0 -2
  81. package/dist/types/ai-model/service-caller/index.d.ts +1 -3
  82. package/dist/types/device/index.d.ts +10 -2
  83. package/package.json +5 -5
@@ -47,9 +47,9 @@ function buildYaml(env, tasks) {
47
47
  });
48
48
  }
49
49
  exports.buildYaml = __webpack_exports__.buildYaml;
50
- for(var __rspack_i in __webpack_exports__)if (-1 === [
50
+ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
51
51
  "buildYaml"
52
- ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
52
+ ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
53
53
  Object.defineProperty(exports, '__esModule', {
54
54
  value: true
55
55
  });
@@ -1,12 +1,12 @@
1
1
  "use strict";
2
2
  var __webpack_modules__ = {
3
- "./builder" (module) {
3
+ "./builder": function(module) {
4
4
  module.exports = require("./builder.js");
5
5
  },
6
- "./player" (module) {
6
+ "./player": function(module) {
7
7
  module.exports = require("./player.js");
8
8
  },
9
- "./utils" (module) {
9
+ "./utils": function(module) {
10
10
  module.exports = require("./utils.js");
11
11
  }
12
12
  };
@@ -66,7 +66,7 @@ var __webpack_exports__ = {};
66
66
  for(const __rspack_import_key in _utils__rspack_import_2)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_utils__rspack_import_2[__rspack_import_key];
67
67
  __webpack_require__.d(__webpack_exports__, __rspack_reexport);
68
68
  })();
69
- for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
69
+ for(var __webpack_i__ in __webpack_exports__)exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
70
70
  Object.defineProperty(exports, '__esModule', {
71
71
  value: true
72
72
  });
@@ -414,9 +414,9 @@ class ScriptPlayer {
414
414
  }
415
415
  }
416
416
  exports.ScriptPlayer = __webpack_exports__.ScriptPlayer;
417
- for(var __rspack_i in __webpack_exports__)if (-1 === [
417
+ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
418
418
  "ScriptPlayer"
419
- ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
419
+ ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
420
420
  Object.defineProperty(exports, '__esModule', {
421
421
  value: true
422
422
  });
@@ -113,12 +113,12 @@ exports.buildDetailedLocateParam = __webpack_exports__.buildDetailedLocateParam;
113
113
  exports.buildDetailedLocateParamAndRestParams = __webpack_exports__.buildDetailedLocateParamAndRestParams;
114
114
  exports.interpolateEnvVars = __webpack_exports__.interpolateEnvVars;
115
115
  exports.parseYamlScript = __webpack_exports__.parseYamlScript;
116
- for(var __rspack_i in __webpack_exports__)if (-1 === [
116
+ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
117
117
  "buildDetailedLocateParam",
118
118
  "buildDetailedLocateParamAndRestParams",
119
119
  "interpolateEnvVars",
120
120
  "parseYamlScript"
121
- ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
121
+ ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
122
122
  Object.defineProperty(exports, '__esModule', {
123
123
  value: true
124
124
  });
package/dist/lib/yaml.js CHANGED
@@ -12,7 +12,7 @@ var __webpack_require__ = {};
12
12
  })();
13
13
  var __webpack_exports__ = {};
14
14
  __webpack_require__.r(__webpack_exports__);
15
- for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
15
+ for(var __webpack_i__ in __webpack_exports__)exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
16
16
  Object.defineProperty(exports, '__esModule', {
17
17
  value: true
18
18
  });
@@ -95,7 +95,7 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
95
95
  aiScroll(scrollParam: ScrollParam, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
96
96
  aiAct(taskPrompt: string, opt?: {
97
97
  cacheable?: boolean;
98
- _deepThink?: boolean;
98
+ planningStrategy?: 'fast' | 'standard' | 'max';
99
99
  }): Promise<{
100
100
  result: Record<string, any>;
101
101
  } | {
@@ -1,6 +1,6 @@
1
1
  import type { AbstractInterface } from '../device';
2
2
  import type Service from '../service';
3
- import type { DetailedLocateParam, ExecutionTaskActionApply, ExecutionTaskApply, PlanningAction, PlanningActionParamSleep, PlanningLocateParam } from '../types';
3
+ import type { DetailedLocateParam, DeviceAction, ExecutionTaskActionApply, ExecutionTaskApply, PlanningAction, PlanningActionParamSleep, PlanningLocateParam } from '../types';
4
4
  import type { IModelConfig } from '@midscene/shared/env';
5
5
  import type { TaskCache } from './task-cache';
6
6
  export declare function locatePlanForLocate(param: string | DetailedLocateParam): PlanningAction<PlanningLocateParam>;
@@ -8,6 +8,7 @@ interface TaskBuilderDeps {
8
8
  interfaceInstance: AbstractInterface;
9
9
  service: Service;
10
10
  taskCache?: TaskCache;
11
+ actionSpace: DeviceAction[];
11
12
  }
12
13
  interface BuildOptions {
13
14
  cacheable?: boolean;
@@ -17,7 +18,8 @@ export declare class TaskBuilder {
17
18
  private readonly interface;
18
19
  private readonly service;
19
20
  private readonly taskCache?;
20
- constructor({ interfaceInstance, service, taskCache }: TaskBuilderDeps);
21
+ private readonly actionSpace;
22
+ constructor({ interfaceInstance, service, taskCache, actionSpace, }: TaskBuilderDeps);
21
23
  build(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: BuildOptions): Promise<{
22
24
  tasks: ExecutionTaskApply[];
23
25
  }>;
@@ -3,7 +3,7 @@ import type { AbstractInterface } from '../device';
3
3
  import type Service from '../service';
4
4
  import type { TaskRunner } from '../task-runner';
5
5
  import { TaskExecutionError } from '../task-runner';
6
- import type { ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamSleep, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam } from '../types';
6
+ import type { ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamSleep, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam, DeviceAction } from '../types';
7
7
  import type { IModelConfig } from '@midscene/shared/env';
8
8
  import type { TaskCache } from './task-cache';
9
9
  export { locatePlanForLocate } from './task-builder';
@@ -20,6 +20,7 @@ export declare class TaskExecutor {
20
20
  interface: AbstractInterface;
21
21
  service: Service;
22
22
  taskCache?: TaskCache;
23
+ private readonly providedActionSpace;
23
24
  private readonly taskBuilder;
24
25
  private conversationHistory;
25
26
  onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
@@ -31,8 +32,10 @@ export declare class TaskExecutor {
31
32
  onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
32
33
  replanningCycleLimit?: number;
33
34
  hooks?: TaskExecutorHooks;
35
+ actionSpace: DeviceAction[];
34
36
  });
35
37
  private createExecutionSession;
38
+ private getActionSpace;
36
39
  convertPlanToExecutable(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: {
37
40
  cacheable?: boolean;
38
41
  subTask?: boolean;
@@ -43,7 +46,7 @@ export declare class TaskExecutor {
43
46
  runner: TaskRunner;
44
47
  }>;
45
48
  runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
46
- action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, backgroundKnowledge?: string, cacheable?: boolean, replanningCycleLimitOverride?: number): Promise<ExecutionResult<{
49
+ action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, backgroundKnowledge?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number): Promise<ExecutionResult<{
47
50
  yamlFlow?: MidsceneYamlFlowItem[];
48
51
  } | undefined>>;
49
52
  private createTypeQueryTask;
@@ -10,7 +10,12 @@ export declare class ConversationHistory {
10
10
  append(message: ChatCompletionMessageParam): void;
11
11
  seed(messages: ChatCompletionMessageParam[]): void;
12
12
  reset(): void;
13
- snapshot(): ChatCompletionMessageParam[];
13
+ /**
14
+ * Snapshot the conversation history, and replace the images with text if the number of images exceeds the limit.
15
+ * @param maxImages - The maximum number of images to include in the snapshot. Undefined means no limit.
16
+ * @returns The snapshot of the conversation history.
17
+ */
18
+ snapshot(maxImages?: number): ChatCompletionMessageParam[];
14
19
  get length(): number;
15
20
  [Symbol.iterator](): IterableIterator<ChatCompletionMessageParam>;
16
21
  toJSON(): ChatCompletionMessageParam[];
@@ -9,4 +9,5 @@ export declare function plan(userInstruction: string, opts: {
9
9
  modelConfig: IModelConfig;
10
10
  conversationHistory: ConversationHistory;
11
11
  includeBbox: boolean;
12
+ imagesIncludeCount?: number;
12
13
  }): Promise<PlanningAIResponse>;
@@ -1,10 +1,8 @@
1
1
  import type { DeviceAction } from '../../types';
2
2
  import type { TVlModeTypes } from '@midscene/shared/env';
3
- import type { ResponseFormatJSONSchema } from 'openai/resources/index';
4
3
  export declare const descriptionForAction: (action: DeviceAction<any>, locatorSchemaTypeDescription: string) => string;
5
4
  export declare function systemPromptToTaskPlanning({ actionSpace, vlMode, includeBbox, }: {
6
5
  actionSpace: DeviceAction<any>[];
7
6
  vlMode: TVlModeTypes | undefined;
8
7
  includeBbox: boolean;
9
8
  }): Promise<string>;
10
- export declare const planSchema: ResponseFormatJSONSchema;
@@ -1,9 +1,8 @@
1
1
  import { type AIUsageInfo } from '../../types';
2
2
  import type { StreamingCallback } from '../../types';
3
3
  import { type IModelConfig, type TVlModeTypes } from '@midscene/shared/env';
4
- import OpenAI from 'openai';
5
4
  import type { ChatCompletionMessageParam } from 'openai/resources/index';
6
- import { AIActionType, type AIArgs } from '../../common';
5
+ import type { AIActionType, AIArgs } from '../../common';
7
6
  export declare function callAI(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType, modelConfig: IModelConfig, options?: {
8
7
  stream?: boolean;
9
8
  onChunk?: StreamingCallback;
@@ -12,7 +11,6 @@ export declare function callAI(messages: ChatCompletionMessageParam[], AIActionT
12
11
  usage?: AIUsageInfo;
13
12
  isStreamed: boolean;
14
13
  }>;
15
- export declare const getResponseFormat: (modelName: string, AIActionTypeValue: AIActionType) => OpenAI.ChatCompletionCreateParams["response_format"] | OpenAI.ResponseFormatJSONObject;
16
14
  export declare function callAIWithObjectResponse<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType, modelConfig: IModelConfig): Promise<{
17
15
  content: T;
18
16
  contentString: string;
@@ -7,7 +7,7 @@ export declare abstract class AbstractInterface {
7
7
  abstract interfaceType: string;
8
8
  abstract screenshotBase64(): Promise<string>;
9
9
  abstract size(): Promise<Size>;
10
- abstract actionSpace(): DeviceAction[] | Promise<DeviceAction[]>;
10
+ abstract actionSpace(): DeviceAction[];
11
11
  abstract cacheFeatureForRect?(rect: Rect, options?: {
12
12
  targetDescription?: string;
13
13
  modelConfig?: IModelConfig;
@@ -2153,14 +2153,22 @@ export type ActionClearInputParam = {
2153
2153
  export declare const defineActionClearInput: (call: (param: ActionClearInputParam) => Promise<void>) => DeviceAction<ActionClearInputParam>;
2154
2154
  export declare const actionAssertParamSchema: z.ZodObject<{
2155
2155
  condition: z.ZodString;
2156
+ thought: z.ZodString;
2157
+ result: z.ZodBoolean;
2156
2158
  }, "strip", z.ZodTypeAny, {
2157
2159
  condition: string;
2160
+ thought: string;
2161
+ result: boolean;
2158
2162
  }, {
2159
2163
  condition: string;
2164
+ thought: string;
2165
+ result: boolean;
2160
2166
  }>;
2161
2167
  export type ActionAssertParam = {
2162
2168
  condition: string;
2169
+ thought: string;
2170
+ result: boolean;
2163
2171
  };
2164
- export declare const defineActionAssert: (call: (param: ActionAssertParam) => Promise<void>) => DeviceAction<ActionAssertParam>;
2172
+ export declare const defineActionAssert: () => DeviceAction<ActionAssertParam>;
2165
2173
  export type { DeviceAction } from '../types';
2166
2174
  export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.0.1-beta-20251208075922.0",
4
+ "version": "1.0.1-beta-20251208112226.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -82,17 +82,17 @@
82
82
  "@ui-tars/action-parser": "1.2.3",
83
83
  "dayjs": "^1.11.11",
84
84
  "dotenv": "^16.4.5",
85
- "https-proxy-agent": "7.0.2",
85
+ "fetch-socks": "^1.3.0",
86
86
  "openai": "6.3.0",
87
+ "undici": "^6.0.0",
87
88
  "jsonrepair": "3.12.0",
88
89
  "semver": "7.5.2",
89
90
  "js-yaml": "4.1.0",
90
91
  "zod": "3.24.3",
91
- "socks-proxy-agent": "8.0.4",
92
- "@midscene/shared": "1.0.1-beta-20251208075922.0"
92
+ "@midscene/shared": "1.0.1-beta-20251208112226.0"
93
93
  },
94
94
  "devDependencies": {
95
- "@rslib/core": "^0.18.3",
95
+ "@rslib/core": "^0.18.2",
96
96
  "@types/node": "^18.0.0",
97
97
  "@types/node-fetch": "2.6.11",
98
98
  "@types/js-yaml": "4.0.9",