@midscene/core 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/image.js CHANGED
@@ -1,289 +1,19 @@
1
- // src/image/info.ts
2
- import assert from "assert";
3
- import { Buffer } from "buffer";
4
- import { readFileSync } from "fs";
5
- import Sharp from "sharp";
6
- async function imageInfo(image) {
7
- const { width, height } = await Sharp(image).metadata();
8
- assert(width && height, `invalid image: ${image}`);
9
- return { width, height };
10
- }
11
- async function imageInfoOfBase64(imageBase64) {
12
- const base64Data = imageBase64.replace(/^data:image\/\w+;base64,/, "");
13
- return imageInfo(Buffer.from(base64Data, "base64"));
14
- }
15
- function base64Encoded(image, withHeader = true) {
16
- const imageBuffer = readFileSync(image);
17
- if (!withHeader) {
18
- return imageBuffer.toString("base64");
19
- }
20
- if (image.endsWith("png")) {
21
- return `data:image/png;base64,${imageBuffer.toString("base64")}`;
22
- }
23
- if (image.endsWith("jpg") || image.endsWith("jpeg")) {
24
- return `data:image/jpeg;base64,${imageBuffer.toString("base64")}`;
25
- }
26
- throw new Error("unsupported image type");
27
- }
28
-
29
- // src/image/transform.ts
30
- import { Buffer as Buffer2 } from "buffer";
31
- import Sharp2 from "sharp";
32
- async function saveBase64Image(options) {
33
- const { base64Data, outputPath } = options;
34
- const base64Image = base64Data.split(";base64,").pop() || base64Data;
35
- const imageBuffer = Buffer2.from(base64Image, "base64");
36
- await Sharp2(imageBuffer).toFile(outputPath);
37
- console.log("Image successfully written to file.");
38
- }
39
- async function transformImgPathToBase64(inputPath) {
40
- return await Sharp2(inputPath).toBuffer().then((data) => {
41
- const base64Data = data.toString("base64");
42
- return base64Data;
43
- });
44
- }
45
- async function resizeImg(base64Data) {
46
- const base64Image = base64Data.split(";base64,").pop() || base64Data;
47
- const imageBuffer = Buffer2.from(base64Image, "base64");
48
- const metadata = await Sharp2(imageBuffer).metadata();
49
- const { width, height } = metadata;
50
- if (!width || !height) {
51
- throw Error("undefined width or height with url");
52
- }
53
- const newSize = calculateNewDimensions(width, height);
54
- return await Sharp2(imageBuffer).resize(newSize.width, newSize.height).toBuffer().then((data) => {
55
- const base64Data2 = data.toString("base64");
56
- return base64Data2;
57
- });
58
- }
59
- function calculateNewDimensions(originalWidth, originalHeight) {
60
- const maxWidth = 768;
61
- const maxHeight = 2048;
62
- let newWidth = originalWidth;
63
- let newHeight = originalHeight;
64
- const aspectRatio = originalWidth / originalHeight;
65
- if (originalWidth > maxWidth) {
66
- newWidth = maxWidth;
67
- newHeight = newWidth / aspectRatio;
68
- }
69
- if (newHeight > maxHeight) {
70
- newHeight = maxHeight;
71
- newWidth = newHeight * aspectRatio;
72
- }
73
- return {
74
- width: Math.round(newWidth),
75
- height: Math.round(newHeight)
76
- };
77
- }
78
- async function trimImage(image) {
79
- const imgInstance = Sharp2(image);
80
- const instanceInfo = await imgInstance.metadata();
81
- if (!instanceInfo.width || instanceInfo.width <= 3 || !instanceInfo.height || instanceInfo.height <= 3) {
82
- return null;
83
- }
84
- const { info } = await imgInstance.trim().toBuffer({
85
- resolveWithObject: true
86
- });
87
- if (typeof info.trimOffsetLeft === "undefined" || typeof info.trimOffsetTop === "undefined") {
88
- return null;
89
- }
90
- return {
91
- trimOffsetLeft: info.trimOffsetLeft,
92
- trimOffsetTop: info.trimOffsetTop,
93
- width: info.width,
94
- height: info.height
95
- };
96
- }
97
- async function alignCoordByTrim(image, centerRect) {
98
- const imgInfo = await Sharp2(image).metadata();
99
- if (!(imgInfo == null ? void 0 : imgInfo.width) || !imgInfo.height || imgInfo.width <= 3 || imgInfo.height <= 3) {
100
- return centerRect;
101
- }
102
- try {
103
- const img = await Sharp2(image).extract(centerRect).toBuffer();
104
- const trimInfo = await trimImage(img);
105
- if (!trimInfo) {
106
- return centerRect;
107
- }
108
- return {
109
- left: centerRect.left - trimInfo.trimOffsetLeft,
110
- top: centerRect.top - trimInfo.trimOffsetTop,
111
- width: trimInfo.width,
112
- height: trimInfo.height
113
- };
114
- } catch (e) {
115
- console.log(imgInfo);
116
- throw e;
117
- }
118
- }
119
-
120
- // src/image/visualization.ts
121
- import { Buffer as Buffer3 } from "buffer";
122
-
123
- // src/utils.ts
124
- import assert2 from "assert";
125
- import { randomUUID } from "crypto";
1
+ // src/image/index.ts
126
2
  import {
127
- existsSync,
128
- mkdirSync,
129
- readFileSync as readFileSync2,
130
- writeFileSync
131
- } from "fs";
132
- import { tmpdir } from "os";
133
- import path, { basename, dirname, join } from "path";
134
- var pkg;
135
- function getPkgInfo() {
136
- if (pkg) {
137
- return pkg;
138
- }
139
- const pkgDir = findNearestPackageJson(__dirname);
140
- assert2(pkgDir, "package.json not found");
141
- const pkgJsonFile = join(pkgDir, "package.json");
142
- if (pkgJsonFile) {
143
- const { name, version } = JSON.parse(readFileSync2(pkgJsonFile, "utf-8"));
144
- pkg = { name, version, dir: pkgDir };
145
- return pkg;
146
- }
147
- return {
148
- name: "midscene-unknown-page-name",
149
- version: "0.0.0",
150
- dir: pkgDir
151
- };
152
- }
153
- var logDir = join(process.cwd(), "./midscene_run/");
154
- function getTmpDir() {
155
- const path2 = join(tmpdir(), getPkgInfo().name);
156
- mkdirSync(path2, { recursive: true });
157
- return path2;
158
- }
159
- function getTmpFile(fileExtWithoutDot) {
160
- const filename = `${randomUUID()}.${fileExtWithoutDot}`;
161
- return join(getTmpDir(), filename);
162
- }
163
- function findNearestPackageJson(dir) {
164
- const packageJsonPath = path.join(dir, "package.json");
165
- if (existsSync(packageJsonPath)) {
166
- return dir;
167
- }
168
- const parentDir = path.dirname(dir);
169
- if (parentDir === dir) {
170
- return null;
171
- }
172
- return findNearestPackageJson(parentDir);
173
- }
174
-
175
- // src/image/visualization.ts
176
- import Sharp3 from "sharp";
177
- var colors = [
178
- {
179
- name: "Red",
180
- hex: "#FF0000"
181
- },
182
- {
183
- name: "Green",
184
- hex: "#00FF00"
185
- },
186
- {
187
- name: "Blue",
188
- hex: "#0000FF"
189
- },
190
- {
191
- name: "Yellow",
192
- hex: "#FFFF00"
193
- },
194
- {
195
- name: "Cyan",
196
- hex: "#00FFFF"
197
- },
198
- {
199
- name: "Magenta",
200
- hex: "#FF00FF"
201
- },
202
- {
203
- name: "Orange",
204
- hex: "#FFA500"
205
- },
206
- {
207
- name: "Purple",
208
- hex: "#800080"
209
- },
210
- {
211
- name: "Brown",
212
- hex: "#A52A2A"
213
- },
214
- {
215
- name: "Pink",
216
- hex: "#FFC0CB"
217
- },
218
- {
219
- name: "Light Blue",
220
- hex: "#ADD8E6"
221
- },
222
- {
223
- name: "Lime",
224
- hex: "#00FF00"
225
- },
226
- {
227
- name: "Violet",
228
- hex: "#EE82EE"
229
- },
230
- {
231
- name: "Gold",
232
- hex: "#FFD700"
233
- },
234
- {
235
- name: "Teal",
236
- hex: "#008080"
237
- }
238
- ];
239
- var sizeLimit = 512;
240
- var textFontSize = 12;
241
- async function composeSectionDiagram(sections, context) {
242
- const { width, height } = await imageInfo(context.screenshotBase64);
243
- const ratio = Math.min(sizeLimit / width, sizeLimit / height, 1);
244
- const canvasWidth = width * ratio;
245
- const canvasHeight = height * ratio;
246
- const sectionNameColorMap = {};
247
- const rects = sections.map((section, index) => {
248
- const { left, top, width: width2, height: height2 } = section.rect;
249
- const color = colors[index % colors.length];
250
- sectionNameColorMap[section.name] = color;
251
- return `
252
- <rect x="${left * ratio}" y="${top * ratio}" width="${width2 * ratio}" height="${height2 * ratio}" fill="${color.hex}" />
253
- <text x="${left * ratio}" y="${top * ratio + textFontSize}" font-family="Arial" font-size="${textFontSize}" fill="black">
254
- ${section.name}
255
- </text>
256
- `;
257
- });
258
- const rectangles = `
259
- <svg width="${canvasWidth}" height="${canvasHeight}">
260
- ${rects.join("\n")}
261
- </svg>
262
- `;
263
- const svgBuffer = Buffer3.from(rectangles);
264
- const file = getTmpFile("png");
265
- await Sharp3({
266
- create: {
267
- width: canvasWidth,
268
- height: canvasHeight,
269
- channels: 4,
270
- background: { r: 255, g: 255, b: 255, alpha: 1 }
271
- }
272
- }).composite([{ input: svgBuffer }]).png().toFile(file);
273
- return {
274
- file,
275
- sectionNameColorMap
276
- };
277
- }
3
+ imageInfo,
4
+ imageInfoOfBase64,
5
+ base64Encoded,
6
+ calculateNewDimensions,
7
+ resizeImg,
8
+ transformImgPathToBase64,
9
+ saveBase64Image
10
+ } from "@midscene/shared/img";
278
11
  export {
279
- alignCoordByTrim,
280
12
  base64Encoded,
281
13
  calculateNewDimensions,
282
- composeSectionDiagram,
283
14
  imageInfo,
284
15
  imageInfoOfBase64,
285
16
  resizeImg,
286
17
  saveBase64Image,
287
- transformImgPathToBase64,
288
- trimImage
18
+ transformImgPathToBase64
289
19
  };
package/dist/es/index.js CHANGED
@@ -1251,12 +1251,13 @@ var Executor = class {
1251
1251
  }
1252
1252
  if (successfullyCompleted) {
1253
1253
  this.status = "completed";
1254
- if (this.tasks.length) {
1255
- return this.tasks[this.tasks.length - 1].output;
1256
- }
1257
1254
  } else {
1258
1255
  this.status = "error";
1259
1256
  }
1257
+ if (this.tasks.length) {
1258
+ const outputIndex = Math.min(taskIndex, this.tasks.length - 1);
1259
+ return this.tasks[outputIndex].output;
1260
+ }
1260
1261
  }
1261
1262
  isInErrorState() {
1262
1263
  return this.status === "error";
@@ -1286,7 +1287,7 @@ var Executor = class {
1286
1287
  };
1287
1288
 
1288
1289
  // src/insight/index.ts
1289
- import assert10 from "assert";
1290
+ import assert9 from "assert";
1290
1291
 
1291
1292
  // src/ai-model/openai/index.ts
1292
1293
  import assert3 from "assert";
@@ -4519,6 +4520,7 @@ import OpenAI from "openai";
4519
4520
  var MIDSCENE_OPENAI_INIT_CONFIG_JSON = "MIDSCENE_OPENAI_INIT_CONFIG_JSON";
4520
4521
  var MIDSCENE_MODEL_NAME = "MIDSCENE_MODEL_NAME";
4521
4522
  var MIDSCENE_LANGSMITH_DEBUG = "MIDSCENE_LANGSMITH_DEBUG";
4523
+ var MIDSCENE_DEBUG_AI_PROFILE = "MIDSCENE_DEBUG_AI_PROFILE";
4522
4524
  var OPENAI_API_KEY = "OPENAI_API_KEY";
4523
4525
  function useOpenAIModel(useModel) {
4524
4526
  if (useModel && useModel !== "openAI")
@@ -4548,12 +4550,16 @@ async function createOpenAI() {
4548
4550
  }
4549
4551
  async function call(messages, responseFormat) {
4550
4552
  const openai = await createOpenAI();
4553
+ const shouldPrintTiming = typeof process.env[MIDSCENE_DEBUG_AI_PROFILE] === "string";
4554
+ shouldPrintTiming && console.time("Midscene - AI call");
4551
4555
  const completion = await openai.chat.completions.create({
4552
4556
  model,
4553
4557
  messages,
4554
4558
  response_format: { type: responseFormat },
4555
4559
  temperature: 0.2
4556
4560
  });
4561
+ shouldPrintTiming && console.timeEnd("Midscene - AI call");
4562
+ shouldPrintTiming && console.log("Midscene - AI usage", completion.usage);
4557
4563
  const { content } = completion.choices[0].message;
4558
4564
  assert3(content, "empty content");
4559
4565
  return content;
@@ -4701,30 +4707,18 @@ function multiDescription(multi) {
4701
4707
  }
4702
4708
 
4703
4709
  // src/ai-model/prompt/util.ts
4704
- import assert5 from "assert";
4705
-
4706
- // src/image/info.ts
4707
4710
  import assert4 from "assert";
4708
- import { Buffer as Buffer2 } from "buffer";
4709
- import { readFileSync as readFileSync2 } from "fs";
4710
- import Sharp from "sharp";
4711
- async function imageInfo(image) {
4712
- const { width, height } = await Sharp(image).metadata();
4713
- assert4(width && height, `invalid image: ${image}`);
4714
- return { width, height };
4715
- }
4716
- async function imageInfoOfBase64(imageBase64) {
4717
- const base64Data = imageBase64.replace(/^data:image\/\w+;base64,/, "");
4718
- return imageInfo(Buffer2.from(base64Data, "base64"));
4719
- }
4720
4711
 
4721
- // src/image/transform.ts
4722
- import { Buffer as Buffer3 } from "buffer";
4723
- import Sharp2 from "sharp";
4724
-
4725
- // src/image/visualization.ts
4726
- import { Buffer as Buffer4 } from "buffer";
4727
- import Sharp3 from "sharp";
4712
+ // src/image/index.ts
4713
+ import {
4714
+ imageInfo,
4715
+ imageInfoOfBase64,
4716
+ base64Encoded,
4717
+ calculateNewDimensions,
4718
+ resizeImg,
4719
+ transformImgPathToBase64,
4720
+ saveBase64Image
4721
+ } from "@midscene/shared/img";
4728
4722
 
4729
4723
  // src/ai-model/prompt/util.ts
4730
4724
  var characteristic = "You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.";
@@ -4818,7 +4812,7 @@ async function describeUserPage(context) {
4818
4812
  "elementInfos": ${JSON.stringify(elementInfosDescription)}
4819
4813
  }`,
4820
4814
  elementById(id) {
4821
- assert5(typeof id !== "undefined", "id is required for query");
4815
+ assert4(typeof id !== "undefined", "id is required for query");
4822
4816
  const item = idElementMap[`${id}`];
4823
4817
  return item;
4824
4818
  }
@@ -4883,10 +4877,10 @@ function retrieveSection(prompt) {
4883
4877
  }
4884
4878
 
4885
4879
  // src/ai-model/inspect.ts
4886
- import assert7 from "assert";
4880
+ import assert6 from "assert";
4887
4881
 
4888
4882
  // src/ai-model/coze/index.ts
4889
- import assert6 from "assert";
4883
+ import assert5 from "assert";
4890
4884
  import fetch2 from "node-fetch";
4891
4885
  var COZE_INSPECT_ELEMENT_BOT_ID = process.env.COZE_INSPECT_ELEMENT_BOT_ID || "";
4892
4886
  var COZE_AI_ACTION_BOT_ID = process.env.COZE_AI_ACTION_BOT_ID || "";
@@ -4939,7 +4933,7 @@ async function callCozeAi(options) {
4939
4933
  throw new Error("aiResponse is undefined", aiResponse);
4940
4934
  }
4941
4935
  const parseContent = (_b = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _b.content;
4942
- assert6(parseContent, "empty content");
4936
+ assert5(parseContent, "empty content");
4943
4937
  try {
4944
4938
  return JSON.parse(parseContent);
4945
4939
  } catch (err) {
@@ -5114,7 +5108,7 @@ DATA_DEMAND ends.
5114
5108
  }
5115
5109
  async function AiAssert(options) {
5116
5110
  const { assertion, context, useModel } = options;
5117
- assert7(assertion, "assertion should be a string");
5111
+ assert6(assertion, "assertion should be a string");
5118
5112
  const { screenshotBase64 } = context;
5119
5113
  const { description, elementById } = await describeUserPage(context);
5120
5114
  const systemPrompt = systemPromptToAssert();
@@ -5158,7 +5152,7 @@ async function AiAssert(options) {
5158
5152
  }
5159
5153
 
5160
5154
  // src/ai-model/automation/index.ts
5161
- import assert8 from "assert";
5155
+ import assert7 from "assert";
5162
5156
 
5163
5157
  // src/ai-model/automation/planning.ts
5164
5158
  function systemPromptToTaskPlanning() {
@@ -5264,8 +5258,8 @@ async function plan(userPrompt, opts, useModel) {
5264
5258
  });
5265
5259
  }
5266
5260
  const actions = (planFromAI == null ? void 0 : planFromAI.actions) || [];
5267
- assert8(planFromAI, "can't get planFromAI");
5268
- assert8(actions && actions.length > 0, "no actions in ai plan");
5261
+ assert7(planFromAI, "can't get planFromAI");
5262
+ assert7(actions && actions.length > 0, "no actions in ai plan");
5269
5263
  if (planFromAI.error) {
5270
5264
  throw new Error(planFromAI.error);
5271
5265
  }
@@ -5273,7 +5267,7 @@ async function plan(userPrompt, opts, useModel) {
5273
5267
  }
5274
5268
 
5275
5269
  // src/insight/utils.ts
5276
- import assert9 from "assert";
5270
+ import assert8 from "assert";
5277
5271
  import { randomUUID as randomUUID2 } from "crypto";
5278
5272
  import { existsSync as existsSync2 } from "fs";
5279
5273
  import { join as join2 } from "path";
@@ -5284,7 +5278,7 @@ var { pid } = process;
5284
5278
  var logFileExt = insightDumpFileExt;
5285
5279
  function writeInsightDump(data, logId, dumpSubscriber) {
5286
5280
  const logDir2 = getLogDir();
5287
- assert9(logDir2, "logDir should be set before writing dump file");
5281
+ assert8(logDir2, "logDir should be set before writing dump file");
5288
5282
  const id = logId || randomUUID2();
5289
5283
  const baseData = {
5290
5284
  sdkVersion: getPkgInfo().version,
@@ -5380,7 +5374,7 @@ var Insight = class {
5380
5374
  __publicField(this, "aiVendorFn", callAiFn);
5381
5375
  __publicField(this, "onceDumpUpdatedFn");
5382
5376
  __publicField(this, "taskInfo");
5383
- assert10(context, "context is required for Insight");
5377
+ assert9(context, "context is required for Insight");
5384
5378
  if (typeof context === "function") {
5385
5379
  this.contextRetrieverFn = context;
5386
5380
  } else {
@@ -5396,7 +5390,7 @@ var Insight = class {
5396
5390
  async locate(queryPrompt, opt) {
5397
5391
  var _a;
5398
5392
  const { callAI: callAI2, multi = false } = opt || {};
5399
- assert10(queryPrompt, "query is required for located");
5393
+ assert9(queryPrompt, "query is required for located");
5400
5394
  const dumpSubscriber = this.onceDumpUpdatedFn;
5401
5395
  this.onceDumpUpdatedFn = void 0;
5402
5396
  const context = await this.contextRetrieverFn();
@@ -5468,7 +5462,7 @@ ${parseResult.errors.join("\n")}`;
5468
5462
  }
5469
5463
  async extract(dataDemand) {
5470
5464
  var _a;
5471
- assert10(
5465
+ assert9(
5472
5466
  typeof dataDemand === "object" || typeof dataDemand === "string",
5473
5467
  `dataDemand should be object or string, but get ${typeof dataDemand}`
5474
5468
  );