@midscene/core 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024-present MidScene.js
3
+ Copyright (c) 2024-present Midscene.js
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
package/README.md ADDED
@@ -0,0 +1,7 @@
1
+ ## Documentation
2
+
3
+ See https://midscenejs.com/ for details.
4
+
5
+ ## License
6
+
7
+ Midscene is MIT licensed.
@@ -1006,7 +1006,6 @@ var require_dist = __commonJS({
1006
1006
 
1007
1007
  // src/ai-model/openai.ts
1008
1008
  import assert from "assert";
1009
- import OpenAI from "openai";
1010
1009
 
1011
1010
  // ../../node_modules/.pnpm/langsmith@0.1.36_openai@4.47.1/node_modules/langsmith/dist/traceable.js
1012
1011
  import { AsyncLocalStorage } from "async_hooks";
@@ -4221,11 +4220,12 @@ var wrapOpenAI = (openai, options) => {
4221
4220
  };
4222
4221
 
4223
4222
  // src/ai-model/openai.ts
4223
+ import OpenAI from "openai";
4224
4224
  var envConfigKey = "MIDSCENE_OPENAI_INIT_CONFIG_JSON";
4225
4225
  var envModelKey = "MIDSCENE_MODEL_NAME";
4226
4226
  var envSmithDebug = "MIDSCENE_LANGSMITH_DEBUG";
4227
4227
  var extraConfig = {};
4228
- if (typeof process.env[envConfigKey] === "string") {
4228
+ if (typeof process.env[envConfigKey] === "string" && process.env[envConfigKey]) {
4229
4229
  console.log("config for openai loaded");
4230
4230
  extraConfig = JSON.parse(process.env[envConfigKey]);
4231
4231
  }
@@ -4294,9 +4294,9 @@ You are an expert in software page image (2D) and page element text analysis.
4294
4294
  "elements": [
4295
4295
  // If no matching elements are found, return an empty array []
4296
4296
  {
4297
- "reason": "xxx", // The thought process for finding the element, replace xxx with your thought process
4298
- "text": "xxx", // Replace xxx with the text of elementInfo, if none, leave empty
4299
- "id": "xxx" // Replace xxx with the ID of elementInfo
4297
+ "reason": "PLACEHOLDER", // The thought process for finding the element, replace PLACEHOLDER with your thought process
4298
+ "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4299
+ "id": "PLACEHOLDER" // Replace PLACEHOLDER with the ID of elementInfo
4300
4300
  }
4301
4301
  // More elements...
4302
4302
  ],
@@ -4415,16 +4415,24 @@ import Sharp2 from "sharp";
4415
4415
 
4416
4416
  // src/image/visualization.ts
4417
4417
  import { Buffer as Buffer4 } from "buffer";
4418
- import Sharp3 from "sharp";
4419
4418
 
4420
4419
  // src/utils.ts
4420
+ import assert3 from "assert";
4421
+ import { randomUUID } from "crypto";
4422
+ import {
4423
+ copyFileSync,
4424
+ existsSync,
4425
+ mkdirSync,
4426
+ readFileSync as readFileSync2,
4427
+ writeFileSync
4428
+ } from "fs";
4421
4429
  import { tmpdir } from "os";
4422
4430
  import { basename, join } from "path";
4423
- import { copyFileSync, existsSync, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "fs";
4424
- import { randomUUID } from "crypto";
4425
- import assert3 from "assert";
4426
4431
  var logDir = join(process.cwd(), "./midscene_run/");
4427
4432
 
4433
+ // src/image/visualization.ts
4434
+ import Sharp3 from "sharp";
4435
+
4428
4436
  // src/ai-model/prompt/util.ts
4429
4437
  var characteristic = "You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.";
4430
4438
  var contextFormatIntro = `
@@ -4549,27 +4557,38 @@ async function describeUserPage(context) {
4549
4557
  };
4550
4558
  }
4551
4559
  function cropfieldInformation(elementsInfo) {
4552
- const elementInfosDescription = elementsInfo.map((item) => {
4553
- const { id, attributes = {}, rect, content } = item;
4554
- const tailorContent = truncateText(content);
4555
- const tailorAttributes = Object.keys(attributes).reduce((res, currentKey) => {
4556
- const attributeVal = attributes[currentKey];
4557
- res[currentKey] = truncateText(attributeVal);
4558
- return res;
4559
- }, {});
4560
- return {
4561
- id,
4562
- attributes: tailorAttributes,
4563
- rect,
4564
- content: tailorContent
4565
- };
4566
- });
4560
+ const elementInfosDescription = elementsInfo.map(
4561
+ (item) => {
4562
+ const { id, attributes = {}, rect, content } = item;
4563
+ const tailorContent = truncateText(content);
4564
+ const tailorAttributes = Object.keys(attributes).reduce(
4565
+ (res, currentKey) => {
4566
+ const attributeVal = attributes[currentKey];
4567
+ res[currentKey] = truncateText(attributeVal);
4568
+ return res;
4569
+ },
4570
+ {}
4571
+ );
4572
+ return {
4573
+ id,
4574
+ attributes: tailorAttributes,
4575
+ rect,
4576
+ content: tailorContent
4577
+ };
4578
+ }
4579
+ );
4567
4580
  return JSON.stringify(elementInfosDescription);
4568
4581
  }
4569
4582
 
4570
4583
  // src/ai-model/inspect.ts
4584
+ import assert5 from "assert";
4571
4585
  async function AiInspectElement(options) {
4572
- const { context, multi, findElementDescription, callAI = callToGetJSONObject } = options;
4586
+ const {
4587
+ context,
4588
+ multi,
4589
+ findElementDescription,
4590
+ callAI = callToGetJSONObject
4591
+ } = options;
4573
4592
  const { screenshotBase64 } = context;
4574
4593
  const { description, elementById } = await describeUserPage(context);
4575
4594
  const systemPrompt = systemPromptToFindElement(findElementDescription, multi);
@@ -4595,12 +4614,16 @@ async function AiInspectElement(options) {
4595
4614
  const parseResult = await callAI(msgs);
4596
4615
  return {
4597
4616
  parseResult,
4598
- elementById,
4599
- systemPrompt
4617
+ elementById
4600
4618
  };
4601
4619
  }
4602
4620
  async function AiExtractElementInfo(options) {
4603
- const { dataQuery, sectionConstraints, context, callAI = callToGetJSONObject } = options;
4621
+ const {
4622
+ dataQuery,
4623
+ sectionConstraints,
4624
+ context,
4625
+ callAI = callToGetJSONObject
4626
+ } = options;
4604
4627
  const systemPrompt = systemPromptToExtract(dataQuery, sectionConstraints);
4605
4628
  const { screenshotBase64 } = context;
4606
4629
  const { description, elementById } = await describeUserPage(context);
@@ -4625,8 +4648,7 @@ async function AiExtractElementInfo(options) {
4625
4648
  const parseResult = await callAI(msgs);
4626
4649
  return {
4627
4650
  parseResult,
4628
- elementById,
4629
- systemPrompt
4651
+ elementById
4630
4652
  };
4631
4653
  }
4632
4654
  export {
package/dist/es/image.js CHANGED
@@ -19,7 +19,8 @@ function base64Encoded(image, withHeader = true) {
19
19
  }
20
20
  if (image.endsWith("png")) {
21
21
  return `data:image/png;base64,${imageBuffer.toString("base64")}`;
22
- } else if (image.endsWith("jpg") || image.endsWith("jpeg")) {
22
+ }
23
+ if (image.endsWith("jpg") || image.endsWith("jpeg")) {
23
24
  return `data:image/jpeg;base64,${imageBuffer.toString("base64")}`;
24
25
  }
25
26
  throw new Error("unsupported image type");
@@ -118,14 +119,19 @@ async function alignCoordByTrim(image, centerRect) {
118
119
 
119
120
  // src/image/visualization.ts
120
121
  import { Buffer as Buffer3 } from "buffer";
121
- import Sharp3 from "sharp";
122
122
 
123
123
  // src/utils.ts
124
+ import assert2 from "assert";
125
+ import { randomUUID } from "crypto";
126
+ import {
127
+ copyFileSync,
128
+ existsSync,
129
+ mkdirSync,
130
+ readFileSync as readFileSync2,
131
+ writeFileSync
132
+ } from "fs";
124
133
  import { tmpdir } from "os";
125
134
  import { basename, join } from "path";
126
- import { copyFileSync, existsSync, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "fs";
127
- import { randomUUID } from "crypto";
128
- import assert2 from "assert";
129
135
  var pkg;
130
136
  function getPkgInfo() {
131
137
  if (pkg) {
@@ -141,12 +147,11 @@ function getPkgInfo() {
141
147
  const { name, version } = JSON.parse(readFileSync2(pkgJsonFile, "utf-8"));
142
148
  pkg = { name, version };
143
149
  return pkg;
144
- } else {
145
- return {
146
- name: "midscene-unknown-page-name",
147
- version: "0.0.0"
148
- };
149
150
  }
151
+ return {
152
+ name: "midscene-unknown-page-name",
153
+ version: "0.0.0"
154
+ };
150
155
  }
151
156
  var logDir = join(process.cwd(), "./midscene_run/");
152
157
  function getTmpDir() {
@@ -160,6 +165,7 @@ function getTmpFile(fileExtWithoutDot) {
160
165
  }
161
166
 
162
167
  // src/image/visualization.ts
168
+ import Sharp3 from "sharp";
163
169
  var colors = [
164
170
  {
165
171
  name: "Red",