@midscene/core 0.2.1-beta-20240807115314.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/index.js CHANGED
@@ -1076,7 +1076,7 @@ function writeDumpFile(opts) {
1076
1076
  `${gitIgnoreContent}
1077
1077
  # Midscene.js dump files
1078
1078
  ${logDirName}/report
1079
- ${logDirName}/dump-logger
1079
+ ${logDirName}/dump
1080
1080
  `,
1081
1081
  "utf-8"
1082
1082
  );
@@ -1090,6 +1090,19 @@ ${logDirName}/dump-logger
1090
1090
  }
1091
1091
  return filePath;
1092
1092
  }
1093
+ function replacerForPageObject(key, value) {
1094
+ var _a, _b;
1095
+ if (value && ((_a = value.constructor) == null ? void 0 : _a.name) === "Page") {
1096
+ return "[Page object]";
1097
+ }
1098
+ if (value && ((_b = value.constructor) == null ? void 0 : _b.name) === "Browser") {
1099
+ return "[Browser object]";
1100
+ }
1101
+ return value;
1102
+ }
1103
+ function stringifyDumpData(data, indents) {
1104
+ return JSON.stringify(data, replacerForPageObject, indents);
1105
+ }
1093
1106
 
1094
1107
  // src/action/executor.ts
1095
1108
  var Executor = class {
@@ -4983,7 +4996,7 @@ function writeInsightDump(data, logId, dumpSubscriber) {
4983
4996
  logFileName = `${pid}_${baseData.logTime}-${Math.random()}`;
4984
4997
  }
4985
4998
  }
4986
- const dataString = JSON.stringify(finalData, null, 2);
4999
+ const dataString = stringifyDumpData(finalData, 2);
4987
5000
  if (typeof logIdIndexMap[id] === "number") {
4988
5001
  logContent[logIdIndexMap[id]] = dataString;
4989
5002
  } else {
@@ -5329,11 +5342,14 @@ function systemPromptToTaskPlanning(query) {
5329
5342
  * param: { scrollType: 'ScrollUntilBottom', 'ScrollUntilTop', 'ScrollDown', 'ScrollUp' }
5330
5343
  * type: 'Error'
5331
5344
  * param: { message: string }, the error message
5345
+ * type: 'Sleep'
5346
+ * param: { timeMs: number }, wait for timeMs milliseconds
5332
5347
 
5333
5348
  Here is an example of how to decompose a task.
5334
- When a user says 'Input "Weather in Shanghai" into the search bar, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
5349
+ When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
5335
5350
  * Find: 'The search bar'
5336
5351
  * Input: 'Weather in Shanghai'
5352
+ * Sleep: 1000
5337
5353
  * KeyboardPress: 'Enter'
5338
5354
 
5339
5355
  Remember:
package/dist/es/utils.js CHANGED
@@ -64,7 +64,7 @@ function writeDumpFile(opts) {
64
64
  `${gitIgnoreContent}
65
65
  # Midscene.js dump files
66
66
  ${logDirName}/report
67
- ${logDirName}/dump-logger
67
+ ${logDirName}/dump
68
68
  `,
69
69
  "utf-8"
70
70
  );
@@ -94,6 +94,19 @@ async function sleep(ms) {
94
94
  return new Promise((resolve) => setTimeout(resolve, ms));
95
95
  }
96
96
  var commonScreenshotParam = { type: "jpeg", quality: 75 };
97
+ function replacerForPageObject(key, value) {
98
+ var _a, _b;
99
+ if (value && ((_a = value.constructor) == null ? void 0 : _a.name) === "Page") {
100
+ return "[Page object]";
101
+ }
102
+ if (value && ((_b = value.constructor) == null ? void 0 : _b.name) === "Browser") {
103
+ return "[Browser object]";
104
+ }
105
+ return value;
106
+ }
107
+ function stringifyDumpData(data, indents) {
108
+ return JSON.stringify(data, replacerForPageObject, indents);
109
+ }
97
110
  export {
98
111
  commonScreenshotParam,
99
112
  getDumpDir,
@@ -104,7 +117,9 @@ export {
104
117
  groupedActionDumpFileExt,
105
118
  insightDumpFileExt,
106
119
  overlapped,
120
+ replacerForPageObject,
107
121
  setDumpDir,
108
122
  sleep,
123
+ stringifyDumpData,
109
124
  writeDumpFile
110
125
  };
package/dist/lib/index.js CHANGED
@@ -1091,7 +1091,7 @@ function writeDumpFile(opts) {
1091
1091
  `${gitIgnoreContent}
1092
1092
  # Midscene.js dump files
1093
1093
  ${logDirName}/report
1094
- ${logDirName}/dump-logger
1094
+ ${logDirName}/dump
1095
1095
  `,
1096
1096
  "utf-8"
1097
1097
  );
@@ -1105,6 +1105,19 @@ ${logDirName}/dump-logger
1105
1105
  }
1106
1106
  return filePath;
1107
1107
  }
1108
+ function replacerForPageObject(key, value) {
1109
+ var _a, _b;
1110
+ if (value && ((_a = value.constructor) == null ? void 0 : _a.name) === "Page") {
1111
+ return "[Page object]";
1112
+ }
1113
+ if (value && ((_b = value.constructor) == null ? void 0 : _b.name) === "Browser") {
1114
+ return "[Browser object]";
1115
+ }
1116
+ return value;
1117
+ }
1118
+ function stringifyDumpData(data, indents) {
1119
+ return JSON.stringify(data, replacerForPageObject, indents);
1120
+ }
1108
1121
 
1109
1122
  // src/action/executor.ts
1110
1123
  var Executor = class {
@@ -4998,7 +5011,7 @@ function writeInsightDump(data, logId, dumpSubscriber) {
4998
5011
  logFileName = `${pid}_${baseData.logTime}-${Math.random()}`;
4999
5012
  }
5000
5013
  }
5001
- const dataString = JSON.stringify(finalData, null, 2);
5014
+ const dataString = stringifyDumpData(finalData, 2);
5002
5015
  if (typeof logIdIndexMap[id] === "number") {
5003
5016
  logContent[logIdIndexMap[id]] = dataString;
5004
5017
  } else {
@@ -5344,11 +5357,14 @@ function systemPromptToTaskPlanning(query) {
5344
5357
  * param: { scrollType: 'ScrollUntilBottom', 'ScrollUntilTop', 'ScrollDown', 'ScrollUp' }
5345
5358
  * type: 'Error'
5346
5359
  * param: { message: string }, the error message
5360
+ * type: 'Sleep'
5361
+ * param: { timeMs: number }, wait for timeMs milliseconds
5347
5362
 
5348
5363
  Here is an example of how to decompose a task.
5349
- When a user says 'Input "Weather in Shanghai" into the search bar, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
5364
+ When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
5350
5365
  * Find: 'The search bar'
5351
5366
  * Input: 'Weather in Shanghai'
5367
+ * Sleep: 1000
5352
5368
  * KeyboardPress: 'Enter'
5353
5369
 
5354
5370
  Remember:
package/dist/lib/utils.js CHANGED
@@ -39,8 +39,10 @@ __export(utils_exports, {
39
39
  groupedActionDumpFileExt: () => groupedActionDumpFileExt,
40
40
  insightDumpFileExt: () => insightDumpFileExt,
41
41
  overlapped: () => overlapped,
42
+ replacerForPageObject: () => replacerForPageObject,
42
43
  setDumpDir: () => setDumpDir,
43
44
  sleep: () => sleep,
45
+ stringifyDumpData: () => stringifyDumpData,
44
46
  writeDumpFile: () => writeDumpFile
45
47
  });
46
48
  module.exports = __toCommonJS(utils_exports);
@@ -103,7 +105,7 @@ function writeDumpFile(opts) {
103
105
  `${gitIgnoreContent}
104
106
  # Midscene.js dump files
105
107
  ${logDirName}/report
106
- ${logDirName}/dump-logger
108
+ ${logDirName}/dump
107
109
  `,
108
110
  "utf-8"
109
111
  );
@@ -133,6 +135,19 @@ async function sleep(ms) {
133
135
  return new Promise((resolve) => setTimeout(resolve, ms));
134
136
  }
135
137
  var commonScreenshotParam = { type: "jpeg", quality: 75 };
138
+ function replacerForPageObject(key, value) {
139
+ var _a, _b;
140
+ if (value && ((_a = value.constructor) == null ? void 0 : _a.name) === "Page") {
141
+ return "[Page object]";
142
+ }
143
+ if (value && ((_b = value.constructor) == null ? void 0 : _b.name) === "Browser") {
144
+ return "[Browser object]";
145
+ }
146
+ return value;
147
+ }
148
+ function stringifyDumpData(data, indents) {
149
+ return JSON.stringify(data, replacerForPageObject, indents);
150
+ }
136
151
  // Annotate the CommonJS export names for ESM import in node:
137
152
  0 && (module.exports = {
138
153
  commonScreenshotParam,
@@ -144,7 +159,9 @@ var commonScreenshotParam = { type: "jpeg", quality: 75 };
144
159
  groupedActionDumpFileExt,
145
160
  insightDumpFileExt,
146
161
  overlapped,
162
+ replacerForPageObject,
147
163
  setDumpDir,
148
164
  sleep,
165
+ stringifyDumpData,
149
166
  writeDumpFile
150
167
  });
@@ -1,7 +1,7 @@
1
- import { c as callToGetJSONObject } from './util-3931e76e.js';
2
- export { d as describeUserPage } from './util-3931e76e.js';
1
+ import { c as callToGetJSONObject } from './util-7b3c592c.js';
2
+ export { d as describeUserPage } from './util-7b3c592c.js';
3
3
  export { ChatCompletionMessageParam } from 'openai/resources';
4
- import { B as BaseElement, U as UIContext, f as AIElementParseResponse, g as AISectionParseResponse } from './types-2c92867c.js';
4
+ import { B as BaseElement, U as UIContext, f as AIElementParseResponse, g as AISectionParseResponse } from './types-3ef00c38.js';
5
5
 
6
6
  declare function systemPromptToFindElement(description: string, multi?: boolean): string;
7
7
 
@@ -1,5 +1,5 @@
1
1
  import { Buffer } from 'node:buffer';
2
- import { S as Size, R as Rect, i as UISection, U as UIContext, w as Color } from './types-2c92867c.js';
2
+ import { S as Size, R as Rect, i as UISection, U as UIContext, x as Color } from './types-3ef00c38.js';
3
3
  import 'openai/resources';
4
4
 
5
5
  /**
@@ -1,6 +1,6 @@
1
- import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse, P as PlanningAction } from './types-2c92867c.js';
2
- export { h as AIAssertionResponse, f as AIElementParseResponse, A as AIResponseFormat, g as AISectionParseResponse, x as BaseAgentParserOpt, k as BasicSectionQuery, C as CallAIFn, w as Color, m as DumpMeta, p as ElementById, j as EnsureObject, F as ExecutionRecorderItem, a1 as ExecutionTaskAction, a0 as ExecutionTaskActionApply, $ as ExecutionTaskInsightAssertion, _ as ExecutionTaskInsightAssertionApply, Z as ExecutionTaskInsightAssertionParam, N as ExecutionTaskInsightDumpLog, Q as ExecutionTaskInsightLocate, O as ExecutionTaskInsightLocateApply, M as ExecutionTaskInsightLocateOutput, K as ExecutionTaskInsightLocateParam, Y as ExecutionTaskInsightQuery, X as ExecutionTaskInsightQueryApply, W as ExecutionTaskInsightQueryOutput, V as ExecutionTaskInsightQueryParam, a3 as ExecutionTaskPlanning, a2 as ExecutionTaskPlanningApply, J as ExecutionTaskReturn, G as ExecutionTaskType, H as ExecutorContext, a4 as GroupedActionDump, n as InsightDump, l as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, q as PlanningAIResponse, v as PlanningActionParamAssert, s as PlanningActionParamHover, t as PlanningActionParamInputOrKeyPress, u as PlanningActionParamScroll, r as PlanningActionParamTap, z as PlaywrightParserOpt, e as Point, y as PuppeteerParserOpt, R as Rect, S as Size, T as TaskCacheInfo, i as UISection } from './types-2c92867c.js';
3
- import { c as callToGetJSONObject, r as retrieveElement, a as retrieveSection } from './util-3931e76e.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse, P as PlanningAction } from './types-3ef00c38.js';
2
+ export { h as AIAssertionResponse, f as AIElementParseResponse, A as AIResponseFormat, g as AISectionParseResponse, y as BaseAgentParserOpt, k as BasicSectionQuery, C as CallAIFn, x as Color, m as DumpMeta, p as ElementById, j as EnsureObject, G as ExecutionRecorderItem, a2 as ExecutionTaskAction, a1 as ExecutionTaskActionApply, a0 as ExecutionTaskInsightAssertion, $ as ExecutionTaskInsightAssertionApply, _ as ExecutionTaskInsightAssertionParam, O as ExecutionTaskInsightDumpLog, V as ExecutionTaskInsightLocate, Q as ExecutionTaskInsightLocateApply, N as ExecutionTaskInsightLocateOutput, M as ExecutionTaskInsightLocateParam, Z as ExecutionTaskInsightQuery, Y as ExecutionTaskInsightQueryApply, X as ExecutionTaskInsightQueryOutput, W as ExecutionTaskInsightQueryParam, a4 as ExecutionTaskPlanning, a3 as ExecutionTaskPlanningApply, K as ExecutionTaskReturn, H as ExecutionTaskType, J as ExecutorContext, a5 as GroupedActionDump, n as InsightDump, l as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, q as PlanningAIResponse, v as PlanningActionParamAssert, s as PlanningActionParamHover, t as PlanningActionParamInputOrKeyPress, u as PlanningActionParamScroll, w as PlanningActionParamSleep, r as PlanningActionParamTap, F as PlaywrightParserOpt, e as Point, z as PuppeteerParserOpt, R as Rect, S as Size, T as TaskCacheInfo, i as UISection } from './types-3ef00c38.js';
3
+ import { c as callToGetJSONObject, r as retrieveElement, a as retrieveSection } from './util-7b3c592c.js';
4
4
  export { setDumpDir } from './utils.js';
5
5
  import 'openai/resources';
6
6
 
@@ -124,7 +124,7 @@ type InsightAssertionResponse = AIAssertionResponse;
124
124
  */
125
125
  interface PlanningAction<ParamType = any> {
126
126
  thought?: string;
127
- type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert';
127
+ type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'Sleep';
128
128
  param: ParamType;
129
129
  }
130
130
  interface PlanningAIResponse {
@@ -143,6 +143,9 @@ interface PlanningActionParamScroll {
143
143
  interface PlanningActionParamAssert {
144
144
  assertion: string;
145
145
  }
146
+ interface PlanningActionParamSleep {
147
+ timeMs: number;
148
+ }
146
149
  /**
147
150
  * misc
148
151
  */
@@ -234,4 +237,4 @@ interface GroupedActionDump {
234
237
  executions: ExecutionDump[];
235
238
  }
236
239
 
237
- export { type ExecutionTaskInsightAssertion as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type ExecutionRecorderItem as F, type ExecutionTaskType as G, type ExecutorContext as H, type InsightTaskInfo as I, type ExecutionTaskReturn as J, type ExecutionTaskInsightLocateParam as K, type LiteUISection as L, type ExecutionTaskInsightLocateOutput as M, type ExecutionTaskInsightDumpLog as N, type ExecutionTaskInsightLocateApply as O, type PlanningAction as P, type ExecutionTaskInsightLocate as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightQueryParam as V, type ExecutionTaskInsightQueryOutput as W, type ExecutionTaskInsightQueryApply as X, type ExecutionTaskInsightQuery as Y, type ExecutionTaskInsightAssertionParam as Z, type ExecutionTaskInsightAssertionApply as _, type ExecutionTaskApply as a, type ExecutionTaskActionApply as a0, type ExecutionTaskAction as a1, type ExecutionTaskPlanningApply as a2, type ExecutionTaskPlanning as a3, type GroupedActionDump as a4, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type Point as e, type AIElementParseResponse as f, type AISectionParseResponse as g, type AIAssertionResponse as h, type UISection as i, type EnsureObject as j, type BasicSectionQuery as k, type InsightExtractParam as l, type DumpMeta as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type PlanningAIResponse as q, type PlanningActionParamTap as r, type PlanningActionParamHover as s, type PlanningActionParamInputOrKeyPress as t, type PlanningActionParamScroll as u, type PlanningActionParamAssert as v, type Color as w, type BaseAgentParserOpt as x, type PuppeteerParserOpt as y, type PlaywrightParserOpt as z };
240
+ export { type ExecutionTaskInsightAssertionApply as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PlaywrightParserOpt as F, type ExecutionRecorderItem as G, type ExecutionTaskType as H, type InsightTaskInfo as I, type ExecutorContext as J, type ExecutionTaskReturn as K, type LiteUISection as L, type ExecutionTaskInsightLocateParam as M, type ExecutionTaskInsightLocateOutput as N, type ExecutionTaskInsightDumpLog as O, type PlanningAction as P, type ExecutionTaskInsightLocateApply as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightLocate as V, type ExecutionTaskInsightQueryParam as W, type ExecutionTaskInsightQueryOutput as X, type ExecutionTaskInsightQueryApply as Y, type ExecutionTaskInsightQuery as Z, type ExecutionTaskInsightAssertionParam as _, type ExecutionTaskApply as a, type ExecutionTaskInsightAssertion as a0, type ExecutionTaskActionApply as a1, type ExecutionTaskAction as a2, type ExecutionTaskPlanningApply as a3, type ExecutionTaskPlanning as a4, type GroupedActionDump as a5, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type Point as e, type AIElementParseResponse as f, type AISectionParseResponse as g, type AIAssertionResponse as h, type UISection as i, type EnsureObject as j, type BasicSectionQuery as k, type InsightExtractParam as l, type DumpMeta as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type PlanningAIResponse as q, type PlanningActionParamTap as r, type PlanningActionParamHover as s, type PlanningActionParamInputOrKeyPress as t, type PlanningActionParamScroll as u, type PlanningActionParamAssert as v, type PlanningActionParamSleep as w, type Color as x, type BaseAgentParserOpt as y, type PuppeteerParserOpt as z };
@@ -1,5 +1,5 @@
1
1
  import { ChatCompletionMessageParam } from 'openai/resources';
2
- import { B as BaseElement, U as UIContext } from './types-2c92867c.js';
2
+ import { B as BaseElement, U as UIContext } from './types-3ef00c38.js';
3
3
 
4
4
  declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[]): Promise<T>;
5
5
 
@@ -1,4 +1,4 @@
1
- import { R as Rect } from './types-2c92867c.js';
1
+ import { R as Rect } from './types-3ef00c38.js';
2
2
  import 'openai/resources';
3
3
 
4
4
  interface PkgInfo {
@@ -22,5 +22,7 @@ declare function getTmpFile(fileExtWithoutDot: string): string;
22
22
  declare function overlapped(container: Rect, target: Rect): boolean;
23
23
  declare function sleep(ms: number): Promise<unknown>;
24
24
  declare const commonScreenshotParam: any;
25
+ declare function replacerForPageObject(key: string, value: any): any;
26
+ declare function stringifyDumpData(data: any, indents?: number): string;
25
27
 
26
- export { commonScreenshotParam, getDumpDir, getDumpDirPath, getPkgInfo, getTmpDir, getTmpFile, groupedActionDumpFileExt, insightDumpFileExt, overlapped, setDumpDir, sleep, writeDumpFile };
28
+ export { commonScreenshotParam, getDumpDir, getDumpDirPath, getPkgInfo, getTmpDir, getTmpFile, groupedActionDumpFileExt, insightDumpFileExt, overlapped, replacerForPageObject, setDumpDir, sleep, stringifyDumpData, writeDumpFile };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Hello, It's Midscene",
4
- "version": "0.2.1-beta-20240807115314.0",
4
+ "version": "0.2.1",
5
5
  "jsnext:source": "./src/index.ts",
6
6
  "main": "./dist/lib/index.js",
7
7
  "module": "./dist/es/index.js",