@midscene/core 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/utils.js CHANGED
@@ -1,9 +1,15 @@
1
1
  // src/utils.ts
2
+ import assert from "assert";
3
+ import { randomUUID } from "crypto";
4
+ import {
5
+ copyFileSync,
6
+ existsSync,
7
+ mkdirSync,
8
+ readFileSync,
9
+ writeFileSync
10
+ } from "fs";
2
11
  import { tmpdir } from "os";
3
12
  import { basename, join } from "path";
4
- import { copyFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
5
- import { randomUUID } from "crypto";
6
- import assert from "assert";
7
13
  var pkg;
8
14
  function getPkgInfo() {
9
15
  if (pkg) {
@@ -19,12 +25,11 @@ function getPkgInfo() {
19
25
  const { name, version } = JSON.parse(readFileSync(pkgJsonFile, "utf-8"));
20
26
  pkg = { name, version };
21
27
  return pkg;
22
- } else {
23
- return {
24
- name: "midscene-unknown-page-name",
25
- version: "0.0.0"
26
- };
27
28
  }
29
+ return {
30
+ name: "midscene-unknown-page-name",
31
+ version: "0.0.0"
32
+ };
28
33
  }
29
34
  var logDir = join(process.cwd(), "./midscene_run/");
30
35
  var logEnvReady = false;
@@ -57,7 +62,7 @@ function writeDumpFile(opts) {
57
62
  writeFileSync(
58
63
  gitIgnorePath,
59
64
  `${gitIgnoreContent}
60
- # MidScene.js dump files
65
+ # Midscene.js dump files
61
66
  ${logDirName}/report
62
67
  ${logDirName}/dump-logger
63
68
  `,
@@ -1022,8 +1022,7 @@ __export(ai_model_exports, {
1022
1022
  module.exports = __toCommonJS(ai_model_exports);
1023
1023
 
1024
1024
  // src/ai-model/openai.ts
1025
- var import_assert = __toESM(require("assert"));
1026
- var import_openai = __toESM(require("openai"));
1025
+ var import_node_assert = __toESM(require("assert"));
1027
1026
 
1028
1027
  // ../../node_modules/.pnpm/langsmith@0.1.36_openai@4.47.1/node_modules/langsmith/dist/traceable.js
1029
1028
  var import_node_async_hooks = require("async_hooks");
@@ -4238,11 +4237,12 @@ var wrapOpenAI = (openai, options) => {
4238
4237
  };
4239
4238
 
4240
4239
  // src/ai-model/openai.ts
4240
+ var import_openai = __toESM(require("openai"));
4241
4241
  var envConfigKey = "MIDSCENE_OPENAI_INIT_CONFIG_JSON";
4242
4242
  var envModelKey = "MIDSCENE_MODEL_NAME";
4243
4243
  var envSmithDebug = "MIDSCENE_LANGSMITH_DEBUG";
4244
4244
  var extraConfig = {};
4245
- if (typeof process.env[envConfigKey] === "string") {
4245
+ if (typeof process.env[envConfigKey] === "string" && process.env[envConfigKey]) {
4246
4246
  console.log("config for openai loaded");
4247
4247
  extraConfig = JSON.parse(process.env[envConfigKey]);
4248
4248
  }
@@ -4268,12 +4268,12 @@ async function call(messages, responseFormat) {
4268
4268
  response_format: { type: responseFormat }
4269
4269
  });
4270
4270
  const { content } = completion.choices[0].message;
4271
- (0, import_assert.default)(content, "empty content");
4271
+ (0, import_node_assert.default)(content, "empty content");
4272
4272
  return content;
4273
4273
  }
4274
4274
  async function callToGetJSONObject(messages) {
4275
4275
  const response = await call(messages, "json_object" /* JSON */);
4276
- (0, import_assert.default)(response, "empty response");
4276
+ (0, import_node_assert.default)(response, "empty response");
4277
4277
  return JSON.parse(response);
4278
4278
  }
4279
4279
 
@@ -4311,9 +4311,9 @@ You are an expert in software page image (2D) and page element text analysis.
4311
4311
  "elements": [
4312
4312
  // If no matching elements are found, return an empty array []
4313
4313
  {
4314
- "reason": "xxx", // The thought process for finding the element, replace xxx with your thought process
4315
- "text": "xxx", // Replace xxx with the text of elementInfo, if none, leave empty
4316
- "id": "xxx" // Replace xxx with the ID of elementInfo
4314
+ "reason": "PLACEHOLDER", // The thought process for finding the element, replace PLACEHOLDER with your thought process
4315
+ "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4316
+ "id": "PLACEHOLDER" // Replace PLACEHOLDER with the ID of elementInfo
4317
4317
  }
4318
4318
  // More elements...
4319
4319
  ],
@@ -4409,16 +4409,16 @@ Output Example:
4409
4409
  }
4410
4410
 
4411
4411
  // src/ai-model/prompt/util.ts
4412
- var import_assert3 = __toESM(require("assert"));
4412
+ var import_node_assert4 = __toESM(require("assert"));
4413
4413
 
4414
4414
  // src/image/info.ts
4415
- var import_node_assert = __toESM(require("assert"));
4415
+ var import_node_assert2 = __toESM(require("assert"));
4416
4416
  var import_node_buffer = require("buffer");
4417
4417
  var import_node_fs = require("fs");
4418
4418
  var import_sharp = __toESM(require("sharp"));
4419
4419
  async function imageInfo(image) {
4420
4420
  const { width, height } = await (0, import_sharp.default)(image).metadata();
4421
- (0, import_node_assert.default)(width && height, `invalid image: ${image}`);
4421
+ (0, import_node_assert2.default)(width && height, `invalid image: ${image}`);
4422
4422
  return { width, height };
4423
4423
  }
4424
4424
  async function imageInfoOfBase64(imageBase64) {
@@ -4431,16 +4431,18 @@ var import_node_buffer2 = require("buffer");
4431
4431
  var import_sharp2 = __toESM(require("sharp"));
4432
4432
 
4433
4433
  // src/image/visualization.ts
4434
- var import_buffer = require("buffer");
4435
- var import_sharp3 = __toESM(require("sharp"));
4434
+ var import_node_buffer3 = require("buffer");
4436
4435
 
4437
4436
  // src/utils.ts
4438
- var import_os = require("os");
4439
- var import_path = require("path");
4440
- var import_fs = require("fs");
4441
- var import_crypto3 = require("crypto");
4442
- var import_assert2 = __toESM(require("assert"));
4443
- var logDir = (0, import_path.join)(process.cwd(), "./midscene_run/");
4437
+ var import_node_assert3 = __toESM(require("assert"));
4438
+ var import_node_crypto = require("crypto");
4439
+ var import_node_fs2 = require("fs");
4440
+ var import_node_os = require("os");
4441
+ var import_node_path = require("path");
4442
+ var logDir = (0, import_node_path.join)(process.cwd(), "./midscene_run/");
4443
+
4444
+ // src/image/visualization.ts
4445
+ var import_sharp3 = __toESM(require("sharp"));
4444
4446
 
4445
4447
  // src/ai-model/prompt/util.ts
4446
4448
  var characteristic = "You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.";
@@ -4479,7 +4481,7 @@ function promptsOfSectionQuery(constraints) {
4479
4481
  }
4480
4482
  const instruction = "Use your segment_a_web_page skill to find the following section(s)";
4481
4483
  const singleSection = (c) => {
4482
- (0, import_assert3.default)(
4484
+ (0, import_node_assert4.default)(
4483
4485
  c.name || c.description,
4484
4486
  "either `name` or `description` is required to define a section constraint"
4485
4487
  );
@@ -4559,34 +4561,45 @@ async function describeUserPage(context) {
4559
4561
  "elementInfos": ${JSON.stringify(elementInfosDescription)}
4560
4562
  }`,
4561
4563
  elementById(id) {
4562
- (0, import_assert3.default)(typeof id !== "undefined", "id is required for query");
4564
+ (0, import_node_assert4.default)(typeof id !== "undefined", "id is required for query");
4563
4565
  const item = idElementMap[`${id}`];
4564
4566
  return item;
4565
4567
  }
4566
4568
  };
4567
4569
  }
4568
4570
  function cropfieldInformation(elementsInfo) {
4569
- const elementInfosDescription = elementsInfo.map((item) => {
4570
- const { id, attributes = {}, rect, content } = item;
4571
- const tailorContent = truncateText(content);
4572
- const tailorAttributes = Object.keys(attributes).reduce((res, currentKey) => {
4573
- const attributeVal = attributes[currentKey];
4574
- res[currentKey] = truncateText(attributeVal);
4575
- return res;
4576
- }, {});
4577
- return {
4578
- id,
4579
- attributes: tailorAttributes,
4580
- rect,
4581
- content: tailorContent
4582
- };
4583
- });
4571
+ const elementInfosDescription = elementsInfo.map(
4572
+ (item) => {
4573
+ const { id, attributes = {}, rect, content } = item;
4574
+ const tailorContent = truncateText(content);
4575
+ const tailorAttributes = Object.keys(attributes).reduce(
4576
+ (res, currentKey) => {
4577
+ const attributeVal = attributes[currentKey];
4578
+ res[currentKey] = truncateText(attributeVal);
4579
+ return res;
4580
+ },
4581
+ {}
4582
+ );
4583
+ return {
4584
+ id,
4585
+ attributes: tailorAttributes,
4586
+ rect,
4587
+ content: tailorContent
4588
+ };
4589
+ }
4590
+ );
4584
4591
  return JSON.stringify(elementInfosDescription);
4585
4592
  }
4586
4593
 
4587
4594
  // src/ai-model/inspect.ts
4595
+ var import_node_assert5 = __toESM(require("assert"));
4588
4596
  async function AiInspectElement(options) {
4589
- const { context, multi, findElementDescription, callAI = callToGetJSONObject } = options;
4597
+ const {
4598
+ context,
4599
+ multi,
4600
+ findElementDescription,
4601
+ callAI = callToGetJSONObject
4602
+ } = options;
4590
4603
  const { screenshotBase64 } = context;
4591
4604
  const { description, elementById } = await describeUserPage(context);
4592
4605
  const systemPrompt = systemPromptToFindElement(findElementDescription, multi);
@@ -4612,12 +4625,16 @@ async function AiInspectElement(options) {
4612
4625
  const parseResult = await callAI(msgs);
4613
4626
  return {
4614
4627
  parseResult,
4615
- elementById,
4616
- systemPrompt
4628
+ elementById
4617
4629
  };
4618
4630
  }
4619
4631
  async function AiExtractElementInfo(options) {
4620
- const { dataQuery, sectionConstraints, context, callAI = callToGetJSONObject } = options;
4632
+ const {
4633
+ dataQuery,
4634
+ sectionConstraints,
4635
+ context,
4636
+ callAI = callToGetJSONObject
4637
+ } = options;
4621
4638
  const systemPrompt = systemPromptToExtract(dataQuery, sectionConstraints);
4622
4639
  const { screenshotBase64 } = context;
4623
4640
  const { description, elementById } = await describeUserPage(context);
@@ -4642,8 +4659,7 @@ async function AiExtractElementInfo(options) {
4642
4659
  const parseResult = await callAI(msgs);
4643
4660
  return {
4644
4661
  parseResult,
4645
- elementById,
4646
- systemPrompt
4662
+ elementById
4647
4663
  };
4648
4664
  }
4649
4665
  // Annotate the CommonJS export names for ESM import in node:
package/dist/lib/image.js CHANGED
@@ -64,7 +64,8 @@ function base64Encoded(image, withHeader = true) {
64
64
  }
65
65
  if (image.endsWith("png")) {
66
66
  return `data:image/png;base64,${imageBuffer.toString("base64")}`;
67
- } else if (image.endsWith("jpg") || image.endsWith("jpeg")) {
67
+ }
68
+ if (image.endsWith("jpg") || image.endsWith("jpeg")) {
68
69
  return `data:image/jpeg;base64,${imageBuffer.toString("base64")}`;
69
70
  }
70
71
  throw new Error("unsupported image type");
@@ -162,49 +163,48 @@ async function alignCoordByTrim(image, centerRect) {
162
163
  }
163
164
 
164
165
  // src/image/visualization.ts
165
- var import_buffer = require("buffer");
166
- var import_sharp3 = __toESM(require("sharp"));
166
+ var import_node_buffer3 = require("buffer");
167
167
 
168
168
  // src/utils.ts
169
- var import_os = require("os");
170
- var import_path = require("path");
171
- var import_fs = require("fs");
172
- var import_crypto = require("crypto");
173
- var import_assert = __toESM(require("assert"));
169
+ var import_node_assert2 = __toESM(require("assert"));
170
+ var import_node_crypto = require("crypto");
171
+ var import_node_fs2 = require("fs");
172
+ var import_node_os = require("os");
173
+ var import_node_path = require("path");
174
174
  var pkg;
175
175
  function getPkgInfo() {
176
176
  if (pkg) {
177
177
  return pkg;
178
178
  }
179
179
  let pkgJsonFile = "";
180
- if ((0, import_fs.existsSync)((0, import_path.join)(__dirname, "../package.json"))) {
181
- pkgJsonFile = (0, import_path.join)(__dirname, "../package.json");
182
- } else if ((0, import_fs.existsSync)((0, import_path.join)(__dirname, "../../../package.json"))) {
183
- pkgJsonFile = (0, import_path.join)(__dirname, "../../../package.json");
180
+ if ((0, import_node_fs2.existsSync)((0, import_node_path.join)(__dirname, "../package.json"))) {
181
+ pkgJsonFile = (0, import_node_path.join)(__dirname, "../package.json");
182
+ } else if ((0, import_node_fs2.existsSync)((0, import_node_path.join)(__dirname, "../../../package.json"))) {
183
+ pkgJsonFile = (0, import_node_path.join)(__dirname, "../../../package.json");
184
184
  }
185
185
  if (pkgJsonFile) {
186
- const { name, version } = JSON.parse((0, import_fs.readFileSync)(pkgJsonFile, "utf-8"));
186
+ const { name, version } = JSON.parse((0, import_node_fs2.readFileSync)(pkgJsonFile, "utf-8"));
187
187
  pkg = { name, version };
188
188
  return pkg;
189
- } else {
190
- return {
191
- name: "midscene-unknown-page-name",
192
- version: "0.0.0"
193
- };
194
189
  }
190
+ return {
191
+ name: "midscene-unknown-page-name",
192
+ version: "0.0.0"
193
+ };
195
194
  }
196
- var logDir = (0, import_path.join)(process.cwd(), "./midscene_run/");
195
+ var logDir = (0, import_node_path.join)(process.cwd(), "./midscene_run/");
197
196
  function getTmpDir() {
198
- const path = (0, import_path.join)((0, import_os.tmpdir)(), getPkgInfo().name);
199
- (0, import_fs.mkdirSync)(path, { recursive: true });
197
+ const path = (0, import_node_path.join)((0, import_node_os.tmpdir)(), getPkgInfo().name);
198
+ (0, import_node_fs2.mkdirSync)(path, { recursive: true });
200
199
  return path;
201
200
  }
202
201
  function getTmpFile(fileExtWithoutDot) {
203
- const filename = `${(0, import_crypto.randomUUID)()}.${fileExtWithoutDot}`;
204
- return (0, import_path.join)(getTmpDir(), filename);
202
+ const filename = `${(0, import_node_crypto.randomUUID)()}.${fileExtWithoutDot}`;
203
+ return (0, import_node_path.join)(getTmpDir(), filename);
205
204
  }
206
205
 
207
206
  // src/image/visualization.ts
207
+ var import_sharp3 = __toESM(require("sharp"));
208
208
  var colors = [
209
209
  {
210
210
  name: "Red",
@@ -291,7 +291,7 @@ async function composeSectionDiagram(sections, context) {
291
291
  ${rects.join("\n")}
292
292
  </svg>
293
293
  `;
294
- const svgBuffer = import_buffer.Buffer.from(rectangles);
294
+ const svgBuffer = import_node_buffer3.Buffer.from(rectangles);
295
295
  const file = getTmpFile("png");
296
296
  await (0, import_sharp3.default)({
297
297
  create: {