@midscene/core 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +7 -0
- package/dist/es/ai-model.js +52 -30
- package/dist/es/image.js +16 -10
- package/dist/es/index.js +594 -476
- package/dist/es/utils.js +14 -9
- package/dist/lib/ai-model.js +58 -42
- package/dist/lib/image.js +24 -24
- package/dist/lib/index.js +611 -499
- package/dist/lib/utils.js +32 -33
- package/dist/types/ai-model.d.ts +3 -5
- package/dist/types/image.d.ts +1 -1
- package/dist/types/index.d.ts +19 -19
- package/dist/types/{types-1f7912d5.d.ts → types-2c92867c.d.ts} +33 -15
- package/dist/types/{util-3a13ce3d.d.ts → util-3931e76e.d.ts} +1 -1
- package/dist/types/utils.d.ts +1 -1
- package/package.json +5 -4
package/LICENSE
CHANGED
package/README.md
ADDED
package/dist/es/ai-model.js
CHANGED
|
@@ -1006,7 +1006,6 @@ var require_dist = __commonJS({
|
|
|
1006
1006
|
|
|
1007
1007
|
// src/ai-model/openai.ts
|
|
1008
1008
|
import assert from "assert";
|
|
1009
|
-
import OpenAI from "openai";
|
|
1010
1009
|
|
|
1011
1010
|
// ../../node_modules/.pnpm/langsmith@0.1.36_openai@4.47.1/node_modules/langsmith/dist/traceable.js
|
|
1012
1011
|
import { AsyncLocalStorage } from "async_hooks";
|
|
@@ -4221,11 +4220,12 @@ var wrapOpenAI = (openai, options) => {
|
|
|
4221
4220
|
};
|
|
4222
4221
|
|
|
4223
4222
|
// src/ai-model/openai.ts
|
|
4223
|
+
import OpenAI from "openai";
|
|
4224
4224
|
var envConfigKey = "MIDSCENE_OPENAI_INIT_CONFIG_JSON";
|
|
4225
4225
|
var envModelKey = "MIDSCENE_MODEL_NAME";
|
|
4226
4226
|
var envSmithDebug = "MIDSCENE_LANGSMITH_DEBUG";
|
|
4227
4227
|
var extraConfig = {};
|
|
4228
|
-
if (typeof process.env[envConfigKey] === "string") {
|
|
4228
|
+
if (typeof process.env[envConfigKey] === "string" && process.env[envConfigKey]) {
|
|
4229
4229
|
console.log("config for openai loaded");
|
|
4230
4230
|
extraConfig = JSON.parse(process.env[envConfigKey]);
|
|
4231
4231
|
}
|
|
@@ -4294,9 +4294,9 @@ You are an expert in software page image (2D) and page element text analysis.
|
|
|
4294
4294
|
"elements": [
|
|
4295
4295
|
// If no matching elements are found, return an empty array []
|
|
4296
4296
|
{
|
|
4297
|
-
"reason": "
|
|
4298
|
-
"text": "
|
|
4299
|
-
"id": "
|
|
4297
|
+
"reason": "PLACEHOLDER", // The thought process for finding the element, replace PLACEHOLDER with your thought process
|
|
4298
|
+
"text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
|
|
4299
|
+
"id": "PLACEHOLDER" // Replace PLACEHOLDER with the ID of elementInfo
|
|
4300
4300
|
}
|
|
4301
4301
|
// More elements...
|
|
4302
4302
|
],
|
|
@@ -4415,16 +4415,24 @@ import Sharp2 from "sharp";
|
|
|
4415
4415
|
|
|
4416
4416
|
// src/image/visualization.ts
|
|
4417
4417
|
import { Buffer as Buffer4 } from "buffer";
|
|
4418
|
-
import Sharp3 from "sharp";
|
|
4419
4418
|
|
|
4420
4419
|
// src/utils.ts
|
|
4420
|
+
import assert3 from "assert";
|
|
4421
|
+
import { randomUUID } from "crypto";
|
|
4422
|
+
import {
|
|
4423
|
+
copyFileSync,
|
|
4424
|
+
existsSync,
|
|
4425
|
+
mkdirSync,
|
|
4426
|
+
readFileSync as readFileSync2,
|
|
4427
|
+
writeFileSync
|
|
4428
|
+
} from "fs";
|
|
4421
4429
|
import { tmpdir } from "os";
|
|
4422
4430
|
import { basename, join } from "path";
|
|
4423
|
-
import { copyFileSync, existsSync, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "fs";
|
|
4424
|
-
import { randomUUID } from "crypto";
|
|
4425
|
-
import assert3 from "assert";
|
|
4426
4431
|
var logDir = join(process.cwd(), "./midscene_run/");
|
|
4427
4432
|
|
|
4433
|
+
// src/image/visualization.ts
|
|
4434
|
+
import Sharp3 from "sharp";
|
|
4435
|
+
|
|
4428
4436
|
// src/ai-model/prompt/util.ts
|
|
4429
4437
|
var characteristic = "You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.";
|
|
4430
4438
|
var contextFormatIntro = `
|
|
@@ -4549,27 +4557,38 @@ async function describeUserPage(context) {
|
|
|
4549
4557
|
};
|
|
4550
4558
|
}
|
|
4551
4559
|
function cropfieldInformation(elementsInfo) {
|
|
4552
|
-
const elementInfosDescription = elementsInfo.map(
|
|
4553
|
-
|
|
4554
|
-
|
|
4555
|
-
|
|
4556
|
-
const
|
|
4557
|
-
|
|
4558
|
-
|
|
4559
|
-
|
|
4560
|
-
|
|
4561
|
-
|
|
4562
|
-
|
|
4563
|
-
|
|
4564
|
-
|
|
4565
|
-
|
|
4566
|
-
|
|
4560
|
+
const elementInfosDescription = elementsInfo.map(
|
|
4561
|
+
(item) => {
|
|
4562
|
+
const { id, attributes = {}, rect, content } = item;
|
|
4563
|
+
const tailorContent = truncateText(content);
|
|
4564
|
+
const tailorAttributes = Object.keys(attributes).reduce(
|
|
4565
|
+
(res, currentKey) => {
|
|
4566
|
+
const attributeVal = attributes[currentKey];
|
|
4567
|
+
res[currentKey] = truncateText(attributeVal);
|
|
4568
|
+
return res;
|
|
4569
|
+
},
|
|
4570
|
+
{}
|
|
4571
|
+
);
|
|
4572
|
+
return {
|
|
4573
|
+
id,
|
|
4574
|
+
attributes: tailorAttributes,
|
|
4575
|
+
rect,
|
|
4576
|
+
content: tailorContent
|
|
4577
|
+
};
|
|
4578
|
+
}
|
|
4579
|
+
);
|
|
4567
4580
|
return JSON.stringify(elementInfosDescription);
|
|
4568
4581
|
}
|
|
4569
4582
|
|
|
4570
4583
|
// src/ai-model/inspect.ts
|
|
4584
|
+
import assert5 from "assert";
|
|
4571
4585
|
async function AiInspectElement(options) {
|
|
4572
|
-
const {
|
|
4586
|
+
const {
|
|
4587
|
+
context,
|
|
4588
|
+
multi,
|
|
4589
|
+
findElementDescription,
|
|
4590
|
+
callAI = callToGetJSONObject
|
|
4591
|
+
} = options;
|
|
4573
4592
|
const { screenshotBase64 } = context;
|
|
4574
4593
|
const { description, elementById } = await describeUserPage(context);
|
|
4575
4594
|
const systemPrompt = systemPromptToFindElement(findElementDescription, multi);
|
|
@@ -4595,12 +4614,16 @@ async function AiInspectElement(options) {
|
|
|
4595
4614
|
const parseResult = await callAI(msgs);
|
|
4596
4615
|
return {
|
|
4597
4616
|
parseResult,
|
|
4598
|
-
elementById
|
|
4599
|
-
systemPrompt
|
|
4617
|
+
elementById
|
|
4600
4618
|
};
|
|
4601
4619
|
}
|
|
4602
4620
|
async function AiExtractElementInfo(options) {
|
|
4603
|
-
const {
|
|
4621
|
+
const {
|
|
4622
|
+
dataQuery,
|
|
4623
|
+
sectionConstraints,
|
|
4624
|
+
context,
|
|
4625
|
+
callAI = callToGetJSONObject
|
|
4626
|
+
} = options;
|
|
4604
4627
|
const systemPrompt = systemPromptToExtract(dataQuery, sectionConstraints);
|
|
4605
4628
|
const { screenshotBase64 } = context;
|
|
4606
4629
|
const { description, elementById } = await describeUserPage(context);
|
|
@@ -4625,8 +4648,7 @@ async function AiExtractElementInfo(options) {
|
|
|
4625
4648
|
const parseResult = await callAI(msgs);
|
|
4626
4649
|
return {
|
|
4627
4650
|
parseResult,
|
|
4628
|
-
elementById
|
|
4629
|
-
systemPrompt
|
|
4651
|
+
elementById
|
|
4630
4652
|
};
|
|
4631
4653
|
}
|
|
4632
4654
|
export {
|
package/dist/es/image.js
CHANGED
|
@@ -19,7 +19,8 @@ function base64Encoded(image, withHeader = true) {
|
|
|
19
19
|
}
|
|
20
20
|
if (image.endsWith("png")) {
|
|
21
21
|
return `data:image/png;base64,${imageBuffer.toString("base64")}`;
|
|
22
|
-
}
|
|
22
|
+
}
|
|
23
|
+
if (image.endsWith("jpg") || image.endsWith("jpeg")) {
|
|
23
24
|
return `data:image/jpeg;base64,${imageBuffer.toString("base64")}`;
|
|
24
25
|
}
|
|
25
26
|
throw new Error("unsupported image type");
|
|
@@ -118,14 +119,19 @@ async function alignCoordByTrim(image, centerRect) {
|
|
|
118
119
|
|
|
119
120
|
// src/image/visualization.ts
|
|
120
121
|
import { Buffer as Buffer3 } from "buffer";
|
|
121
|
-
import Sharp3 from "sharp";
|
|
122
122
|
|
|
123
123
|
// src/utils.ts
|
|
124
|
+
import assert2 from "assert";
|
|
125
|
+
import { randomUUID } from "crypto";
|
|
126
|
+
import {
|
|
127
|
+
copyFileSync,
|
|
128
|
+
existsSync,
|
|
129
|
+
mkdirSync,
|
|
130
|
+
readFileSync as readFileSync2,
|
|
131
|
+
writeFileSync
|
|
132
|
+
} from "fs";
|
|
124
133
|
import { tmpdir } from "os";
|
|
125
134
|
import { basename, join } from "path";
|
|
126
|
-
import { copyFileSync, existsSync, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "fs";
|
|
127
|
-
import { randomUUID } from "crypto";
|
|
128
|
-
import assert2 from "assert";
|
|
129
135
|
var pkg;
|
|
130
136
|
function getPkgInfo() {
|
|
131
137
|
if (pkg) {
|
|
@@ -141,12 +147,11 @@ function getPkgInfo() {
|
|
|
141
147
|
const { name, version } = JSON.parse(readFileSync2(pkgJsonFile, "utf-8"));
|
|
142
148
|
pkg = { name, version };
|
|
143
149
|
return pkg;
|
|
144
|
-
} else {
|
|
145
|
-
return {
|
|
146
|
-
name: "midscene-unknown-page-name",
|
|
147
|
-
version: "0.0.0"
|
|
148
|
-
};
|
|
149
150
|
}
|
|
151
|
+
return {
|
|
152
|
+
name: "midscene-unknown-page-name",
|
|
153
|
+
version: "0.0.0"
|
|
154
|
+
};
|
|
150
155
|
}
|
|
151
156
|
var logDir = join(process.cwd(), "./midscene_run/");
|
|
152
157
|
function getTmpDir() {
|
|
@@ -160,6 +165,7 @@ function getTmpFile(fileExtWithoutDot) {
|
|
|
160
165
|
}
|
|
161
166
|
|
|
162
167
|
// src/image/visualization.ts
|
|
168
|
+
import Sharp3 from "sharp";
|
|
163
169
|
var colors = [
|
|
164
170
|
{
|
|
165
171
|
name: "Red",
|