@intuned/browser-dev 0.1.4-dev.1 → 0.1.5-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/dist/ai/export.d.ts +1 -1
- package/dist/ai/index.d.ts +1 -1
- package/dist/ai/isPageLoaded.js +14 -3
- package/dist/ai/tests/testIsPageLoaded.spec.js +3 -3
- package/dist/helpers/downloadFile.js +37 -0
- package/dist/helpers/export.d.ts +10 -7
- package/dist/helpers/gotoUrl.js +1 -1
- package/dist/helpers/index.d.ts +10 -7
- package/dist/helpers/index.js +0 -19
- package/dist/helpers/tests/testDownloadFile.spec.js +41 -6
- package/dist/helpers/tests/testInjectAttachmentType.spec.js +482 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +35 -31
- package/dist/helpers/types/Attachment.js +11 -6
- package/dist/helpers/types/index.js +1 -20
- package/dist/helpers/uploadFileToS3.js +2 -2
- package/dist/helpers/validateDataUsingSchema.js +30 -71
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +4 -4
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +55 -8
- package/generated-docs/ai/functions/extractStructuredData.mdx +5 -5
- package/generated-docs/ai/functions/isPageLoaded.mdx +1 -0
- package/generated-docs/helpers/functions/clickButtonAndWait.mdx +63 -0
- package/generated-docs/helpers/functions/clickUntilExhausted.mdx +112 -0
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +1 -7
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +5 -5
- package/how-to-generate-docs.md +1 -0
- package/package.json +1 -1
- package/dist/helpers/types/CustomTypeRegistry.js +0 -48
|
@@ -23,10 +23,10 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
23
23
|
age: 30,
|
|
24
24
|
extra_field: "allowed"
|
|
25
25
|
};
|
|
26
|
-
|
|
26
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
27
27
|
data,
|
|
28
28
|
schema
|
|
29
|
-
})).
|
|
29
|
+
})).not.toThrow();
|
|
30
30
|
});
|
|
31
31
|
(0, _extendedTest.test)("should validate data using schema with invalid data (missing required field)", async () => {
|
|
32
32
|
const schema = {
|
|
@@ -44,12 +44,12 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
44
44
|
const invalidData = {
|
|
45
45
|
name: "John Doe"
|
|
46
46
|
};
|
|
47
|
-
|
|
47
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
48
48
|
data: invalidData,
|
|
49
49
|
schema
|
|
50
|
-
})).
|
|
50
|
+
})).toThrow(_types.ValidationError);
|
|
51
51
|
try {
|
|
52
|
-
|
|
52
|
+
(0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
53
53
|
data: invalidData,
|
|
54
54
|
schema
|
|
55
55
|
});
|
|
@@ -84,10 +84,10 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
84
84
|
age: 25,
|
|
85
85
|
other: "value"
|
|
86
86
|
}];
|
|
87
|
-
|
|
87
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
88
88
|
data,
|
|
89
89
|
schema
|
|
90
|
-
})).
|
|
90
|
+
})).not.toThrow();
|
|
91
91
|
});
|
|
92
92
|
(0, _extendedTest.test)("should validate data using schema with list containing invalid object", async () => {
|
|
93
93
|
const schema = {
|
|
@@ -112,12 +112,12 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
112
112
|
name: "Jane Doe",
|
|
113
113
|
age: "25"
|
|
114
114
|
}];
|
|
115
|
-
|
|
115
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
116
116
|
data: invalidData,
|
|
117
117
|
schema
|
|
118
|
-
})).
|
|
118
|
+
})).toThrow(_types.ValidationError);
|
|
119
119
|
try {
|
|
120
|
-
|
|
120
|
+
(0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
121
121
|
data: invalidData,
|
|
122
122
|
schema
|
|
123
123
|
});
|
|
@@ -128,7 +128,7 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
128
128
|
(0, _extendedTest.expect)(error.data).toEqual(invalidData);
|
|
129
129
|
}
|
|
130
130
|
});
|
|
131
|
-
(0, _extendedTest.test)("should validate data using schema with attachment
|
|
131
|
+
(0, _extendedTest.test)("should validate data using schema with attachment type", async () => {
|
|
132
132
|
const schema = {
|
|
133
133
|
type: "object",
|
|
134
134
|
properties: {
|
|
@@ -146,18 +146,19 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
146
146
|
fileName: "documents/report.pdf",
|
|
147
147
|
bucket: "my-bucket",
|
|
148
148
|
region: "us-east-1",
|
|
149
|
+
key: "documents/report.pdf",
|
|
149
150
|
endpoint: null,
|
|
150
151
|
suggestedFileName: "Monthly Report.pdf",
|
|
151
152
|
fileType: "document"
|
|
152
153
|
},
|
|
153
154
|
name: "Test File Upload"
|
|
154
155
|
};
|
|
155
|
-
|
|
156
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
156
157
|
data: validData,
|
|
157
158
|
schema
|
|
158
|
-
})).
|
|
159
|
+
})).not.toThrow();
|
|
159
160
|
});
|
|
160
|
-
(0, _extendedTest.test)("should validate data using schema with invalid attachment
|
|
161
|
+
(0, _extendedTest.test)("should validate data using schema with invalid attachment type", async () => {
|
|
161
162
|
const schema = {
|
|
162
163
|
type: "object",
|
|
163
164
|
properties: {
|
|
@@ -176,19 +177,18 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
176
177
|
},
|
|
177
178
|
name: "Test File Upload"
|
|
178
179
|
};
|
|
179
|
-
|
|
180
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
180
181
|
data: invalidData,
|
|
181
182
|
schema
|
|
182
|
-
})).
|
|
183
|
+
})).toThrow(_types.ValidationError);
|
|
183
184
|
try {
|
|
184
|
-
|
|
185
|
+
(0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
185
186
|
data: invalidData,
|
|
186
187
|
schema
|
|
187
188
|
});
|
|
188
189
|
} catch (error) {
|
|
189
190
|
(0, _extendedTest.expect)(error).toBeInstanceOf(_types.ValidationError);
|
|
190
191
|
(0, _extendedTest.expect)(error.message).toContain("Data validation failed");
|
|
191
|
-
(0, _extendedTest.expect)(error.message).toContain("does not match custom type 'attachment'");
|
|
192
192
|
(0, _extendedTest.expect)(error.data).toEqual(invalidData);
|
|
193
193
|
}
|
|
194
194
|
});
|
|
@@ -213,6 +213,7 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
213
213
|
fileName: "doc1.pdf",
|
|
214
214
|
bucket: "bucket1",
|
|
215
215
|
region: "us-east-1",
|
|
216
|
+
key: "files/doc1.pdf",
|
|
216
217
|
suggestedFileName: "Document 1.pdf"
|
|
217
218
|
},
|
|
218
219
|
description: "First document"
|
|
@@ -221,14 +222,15 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
221
222
|
fileName: "doc2.pdf",
|
|
222
223
|
bucket: "bucket2",
|
|
223
224
|
region: "us-west-2",
|
|
225
|
+
key: "files/doc2.pdf",
|
|
224
226
|
suggestedFileName: "Document 2.pdf"
|
|
225
227
|
},
|
|
226
228
|
description: "Second document"
|
|
227
229
|
}];
|
|
228
|
-
|
|
230
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
229
231
|
data: validData,
|
|
230
232
|
schema
|
|
231
|
-
})).
|
|
233
|
+
})).not.toThrow();
|
|
232
234
|
});
|
|
233
235
|
(0, _extendedTest.test)("should validate data using schema with invalid attachment in array", async () => {
|
|
234
236
|
const schema = {
|
|
@@ -251,6 +253,7 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
251
253
|
fileName: "doc1.pdf",
|
|
252
254
|
bucket: "bucket1",
|
|
253
255
|
region: "us-east-1",
|
|
256
|
+
key: "files/doc1.pdf",
|
|
254
257
|
suggestedFileName: "Document 1.pdf"
|
|
255
258
|
},
|
|
256
259
|
description: "First document"
|
|
@@ -260,23 +263,22 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
260
263
|
},
|
|
261
264
|
description: "Second document"
|
|
262
265
|
}];
|
|
263
|
-
|
|
266
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
264
267
|
data: invalidData,
|
|
265
268
|
schema
|
|
266
|
-
})).
|
|
269
|
+
})).toThrow(_types.ValidationError);
|
|
267
270
|
try {
|
|
268
|
-
|
|
271
|
+
(0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
269
272
|
data: invalidData,
|
|
270
273
|
schema
|
|
271
274
|
});
|
|
272
275
|
} catch (error) {
|
|
273
276
|
(0, _extendedTest.expect)(error).toBeInstanceOf(_types.ValidationError);
|
|
274
277
|
(0, _extendedTest.expect)(error.message).toContain("Data validation failed");
|
|
275
|
-
(0, _extendedTest.expect)(error.message).toContain("does not match custom type 'attachment'");
|
|
276
278
|
(0, _extendedTest.expect)(error.data).toEqual(invalidData);
|
|
277
279
|
}
|
|
278
280
|
});
|
|
279
|
-
(0, _extendedTest.test)("should handle mixed
|
|
281
|
+
(0, _extendedTest.test)("should handle mixed attachment and standard types", async () => {
|
|
280
282
|
const schema = {
|
|
281
283
|
type: "object",
|
|
282
284
|
properties: {
|
|
@@ -300,31 +302,33 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
300
302
|
fileName: "mixed-test.pdf",
|
|
301
303
|
bucket: "test-bucket",
|
|
302
304
|
region: "us-east-1",
|
|
305
|
+
key: "files/mixed-test.pdf",
|
|
303
306
|
suggestedFileName: "Mixed Test.pdf"
|
|
304
307
|
},
|
|
305
308
|
title: "Mixed validation test",
|
|
306
309
|
count: 42,
|
|
307
310
|
active: true
|
|
308
311
|
};
|
|
309
|
-
|
|
312
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
310
313
|
data: validData,
|
|
311
314
|
schema
|
|
312
|
-
})).
|
|
315
|
+
})).not.toThrow();
|
|
313
316
|
const invalidStandardData = {
|
|
314
317
|
attachment: {
|
|
315
318
|
fileName: "mixed-test.pdf",
|
|
316
319
|
bucket: "test-bucket",
|
|
317
320
|
region: "us-east-1",
|
|
321
|
+
key: "files/mixed-test.pdf",
|
|
318
322
|
suggestedFileName: "Mixed Test.pdf"
|
|
319
323
|
},
|
|
320
324
|
title: "Mixed validation test",
|
|
321
325
|
count: "not-a-number",
|
|
322
326
|
active: true
|
|
323
327
|
};
|
|
324
|
-
|
|
328
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
325
329
|
data: invalidStandardData,
|
|
326
330
|
schema
|
|
327
|
-
})).
|
|
331
|
+
})).toThrow(_types.ValidationError);
|
|
328
332
|
const invalidCustomData = {
|
|
329
333
|
attachment: {
|
|
330
334
|
fileName: "mixed-test.pdf"
|
|
@@ -333,10 +337,10 @@ var _extendedTest = require("../../common/extendedTest");
|
|
|
333
337
|
count: 42,
|
|
334
338
|
active: true
|
|
335
339
|
};
|
|
336
|
-
|
|
340
|
+
(0, _extendedTest.expect)(() => (0, _validateDataUsingSchema.validateDataUsingSchema)({
|
|
337
341
|
data: invalidCustomData,
|
|
338
342
|
schema
|
|
339
|
-
})).
|
|
343
|
+
})).toThrow(_types.ValidationError);
|
|
340
344
|
});
|
|
341
345
|
});
|
|
342
346
|
});
|
|
@@ -12,16 +12,18 @@ var _zod = require("zod");
|
|
|
12
12
|
const AttachmentTypeSchema = exports.AttachmentTypeSchema = _zod.z.literal("document");
|
|
13
13
|
const AttachmentSchema = exports.AttachmentSchema = _zod.z.object({
|
|
14
14
|
fileName: _zod.z.string(),
|
|
15
|
+
key: _zod.z.string(),
|
|
15
16
|
bucket: _zod.z.string(),
|
|
16
17
|
region: _zod.z.string(),
|
|
17
18
|
suggestedFileName: _zod.z.string(),
|
|
18
|
-
endpoint: _zod.z.string().optional(),
|
|
19
|
-
fileType: AttachmentTypeSchema.optional()
|
|
19
|
+
endpoint: _zod.z.string().nullable().optional(),
|
|
20
|
+
fileType: AttachmentTypeSchema.optional().nullable()
|
|
20
21
|
});
|
|
21
22
|
class Attachment {
|
|
22
|
-
constructor(fileName, bucket, region, suggestedFileName, endpoint, fileType) {
|
|
23
|
+
constructor(fileName, key, bucket, region, suggestedFileName, endpoint, fileType) {
|
|
23
24
|
const validatedData = AttachmentSchema.parse({
|
|
24
25
|
fileName,
|
|
26
|
+
key,
|
|
25
27
|
bucket,
|
|
26
28
|
region,
|
|
27
29
|
suggestedFileName,
|
|
@@ -29,8 +31,10 @@ class Attachment {
|
|
|
29
31
|
fileType
|
|
30
32
|
});
|
|
31
33
|
this.fileName = validatedData.fileName;
|
|
34
|
+
this.key = validatedData.key;
|
|
32
35
|
this.bucket = validatedData.bucket;
|
|
33
36
|
this.region = validatedData.region;
|
|
37
|
+
this.key = validatedData.key;
|
|
34
38
|
this.suggestedFileName = validatedData.suggestedFileName;
|
|
35
39
|
this.endpoint = validatedData.endpoint;
|
|
36
40
|
this.fileType = validatedData.fileType;
|
|
@@ -41,6 +45,7 @@ class Attachment {
|
|
|
41
45
|
toDict() {
|
|
42
46
|
return {
|
|
43
47
|
fileName: this.fileName,
|
|
48
|
+
key: this.key,
|
|
44
49
|
bucket: this.bucket,
|
|
45
50
|
region: this.region,
|
|
46
51
|
suggestedFileName: this.suggestedFileName,
|
|
@@ -54,13 +59,13 @@ class Attachment {
|
|
|
54
59
|
}
|
|
55
60
|
static fromDict(data) {
|
|
56
61
|
const validatedData = AttachmentSchema.parse(data);
|
|
57
|
-
return new Attachment(validatedData.fileName, validatedData.bucket, validatedData.region, validatedData.suggestedFileName, validatedData.endpoint, validatedData.fileType);
|
|
62
|
+
return new Attachment(validatedData.fileName, validatedData.key, validatedData.bucket, validatedData.region, validatedData.suggestedFileName, validatedData.endpoint, validatedData.fileType);
|
|
58
63
|
}
|
|
59
64
|
async getSignedUrl(expiration = 3600 * 24 * 5) {
|
|
60
65
|
if ((0, _utils.isGenerateCodeMode)()) {
|
|
61
66
|
return "https://not.real.com";
|
|
62
67
|
}
|
|
63
|
-
const s3Client = (0, _getS3Client.getS3Client)(this.endpoint, this.region);
|
|
68
|
+
const s3Client = (0, _getS3Client.getS3Client)(this.endpoint ?? undefined, this.region);
|
|
64
69
|
try {
|
|
65
70
|
const response = await (0, _s3RequestPresigner.getSignedUrl)(s3Client, new _clientS.GetObjectCommand({
|
|
66
71
|
Bucket: this.bucket,
|
|
@@ -94,7 +99,7 @@ class SignedUrlAttachment extends Attachment {
|
|
|
94
99
|
const bucket = url.hostname.split(".")[0];
|
|
95
100
|
const key = url.pathname.substring(1);
|
|
96
101
|
const validatedUrl = _zod.z.string().url().parse(downloadSignedUrl);
|
|
97
|
-
super(key, bucket, "", suggestedFileName ?? fileName);
|
|
102
|
+
super(fileName, key, bucket, "", suggestedFileName ?? fileName);
|
|
98
103
|
this.downloadSignedUrl = validatedUrl;
|
|
99
104
|
}
|
|
100
105
|
async getSignedUrl() {
|
|
@@ -9,24 +9,6 @@ Object.defineProperty(exports, "Attachment", {
|
|
|
9
9
|
return _Attachment.Attachment;
|
|
10
10
|
}
|
|
11
11
|
});
|
|
12
|
-
Object.defineProperty(exports, "AttachmentValidator", {
|
|
13
|
-
enumerable: true,
|
|
14
|
-
get: function () {
|
|
15
|
-
return _CustomTypeRegistry.AttachmentValidator;
|
|
16
|
-
}
|
|
17
|
-
});
|
|
18
|
-
Object.defineProperty(exports, "CustomTypeRegistry", {
|
|
19
|
-
enumerable: true,
|
|
20
|
-
get: function () {
|
|
21
|
-
return _CustomTypeRegistry.CustomTypeRegistry;
|
|
22
|
-
}
|
|
23
|
-
});
|
|
24
|
-
Object.defineProperty(exports, "CustomTypeValidator", {
|
|
25
|
-
enumerable: true,
|
|
26
|
-
get: function () {
|
|
27
|
-
return _CustomTypeRegistry.CustomTypeValidator;
|
|
28
|
-
}
|
|
29
|
-
});
|
|
30
12
|
Object.defineProperty(exports, "RunEnvironment", {
|
|
31
13
|
enumerable: true,
|
|
32
14
|
get: function () {
|
|
@@ -47,5 +29,4 @@ Object.defineProperty(exports, "ValidationError", {
|
|
|
47
29
|
});
|
|
48
30
|
var _ValidationError = require("./ValidationError");
|
|
49
31
|
var _RunEnvironment = require("./RunEnvironment");
|
|
50
|
-
var _Attachment = require("./Attachment");
|
|
51
|
-
var _CustomTypeRegistry = require("./CustomTypeRegistry");
|
|
32
|
+
var _Attachment = require("./Attachment");
|
|
@@ -51,7 +51,7 @@ const uploadFileToS3 = async input => {
|
|
|
51
51
|
if ((0, _utils.isGenerateCodeMode)()) {
|
|
52
52
|
console.log("Uploaded file successfully");
|
|
53
53
|
if (isDownloadedFile) {
|
|
54
|
-
return new _Attachment.Attachment(`${(0, _uuid.v4)()}/${file.suggestedFilename()}`, "testing_bucket", "testing_region", file.suggestedFilename() || "downloaded_file", endpoint, "document");
|
|
54
|
+
return new _Attachment.Attachment(`${(0, _uuid.v4)()}/${file.suggestedFilename()}`, `${(0, _uuid.v4)()}/${file.suggestedFilename()}`, "testing_bucket", "testing_region", file.suggestedFilename() || "downloaded_file", endpoint, "document");
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
57
|
let suggestedFileName;
|
|
@@ -93,7 +93,7 @@ const uploadFileToS3 = async input => {
|
|
|
93
93
|
});
|
|
94
94
|
const response = await s3Client.send(command);
|
|
95
95
|
if (response.$metadata.httpStatusCode === 200) {
|
|
96
|
-
return new _Attachment.Attachment(key, bucketName, region, suggestedFileName || fileName, endpoint);
|
|
96
|
+
return new _Attachment.Attachment(key, key, bucketName, region, suggestedFileName || fileName, endpoint);
|
|
97
97
|
} else {
|
|
98
98
|
throw new Error("Error uploading file");
|
|
99
99
|
}
|
|
@@ -3,99 +3,58 @@
|
|
|
3
3
|
Object.defineProperty(exports, "__esModule", {
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
|
+
exports.injectAttachmentType = injectAttachmentType;
|
|
6
7
|
exports.validateDataUsingSchema = void 0;
|
|
7
8
|
var _ajv = _interopRequireDefault(require("ajv"));
|
|
9
|
+
var _zodToJsonSchema = require("zod-to-json-schema");
|
|
8
10
|
var _types = require("./types");
|
|
11
|
+
var _Attachment = require("./types/Attachment");
|
|
9
12
|
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
10
13
|
const ajv = new _ajv.default({
|
|
11
14
|
strict: false,
|
|
12
15
|
removeAdditional: false
|
|
13
16
|
});
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
});
|
|
19
|
-
const
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
const processedSchema = {
|
|
28
|
-
...schema
|
|
29
|
-
};
|
|
30
|
-
if (processedSchema.type && typeof processedSchema.type === "string") {
|
|
31
|
-
if (customTypeRegistry.isCustomType(processedSchema.type)) {
|
|
32
|
-
return {
|
|
33
|
-
type: "object",
|
|
34
|
-
additionalProperties: true,
|
|
35
|
-
"x-custom-type": processedSchema.type
|
|
36
|
-
};
|
|
17
|
+
function injectAttachmentType(schema) {
|
|
18
|
+
const schemaCopy = JSON.parse(JSON.stringify(schema));
|
|
19
|
+
const attachmentJsonSchema = (0, _zodToJsonSchema.zodToJsonSchema)(_Attachment.AttachmentSchema, {
|
|
20
|
+
$refStrategy: "none"
|
|
21
|
+
});
|
|
22
|
+
const attachmentDefinition = typeof attachmentJsonSchema === "object" && attachmentJsonSchema !== null ? attachmentJsonSchema : {};
|
|
23
|
+
if (!schemaCopy.$defs) {
|
|
24
|
+
schemaCopy.$defs = {};
|
|
25
|
+
}
|
|
26
|
+
schemaCopy.$defs.attachment = attachmentDefinition;
|
|
27
|
+
function replaceAttachmentType(node) {
|
|
28
|
+
if (typeof node !== "object" || node === null) {
|
|
29
|
+
return node;
|
|
37
30
|
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if (key === "properties" && typeof processedSchema[key] === "object") {
|
|
41
|
-
const newProperties = {};
|
|
42
|
-
for (const propKey in processedSchema[key]) {
|
|
43
|
-
newProperties[propKey] = preprocessSchema(processedSchema[key][propKey]);
|
|
44
|
-
}
|
|
45
|
-
processedSchema[key] = newProperties;
|
|
46
|
-
} else if (key === "items") {
|
|
47
|
-
processedSchema[key] = preprocessSchema(processedSchema[key]);
|
|
48
|
-
} else if (typeof processedSchema[key] === "object") {
|
|
49
|
-
processedSchema[key] = preprocessSchema(processedSchema[key]);
|
|
31
|
+
if (Array.isArray(node)) {
|
|
32
|
+
return node.map(item => replaceAttachmentType(item));
|
|
50
33
|
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if (typeof schema !== "object" || schema === null) {
|
|
57
|
-
return errors;
|
|
58
|
-
}
|
|
59
|
-
if (schema["x-custom-type"]) {
|
|
60
|
-
const customType = schema["x-custom-type"];
|
|
61
|
-
if (!customTypeRegistry.validate(customType, data)) {
|
|
62
|
-
errors.push(`${path} does not match custom type '${customType}'`);
|
|
34
|
+
const result = {};
|
|
35
|
+
if (node.type === "attachment") {
|
|
36
|
+
return {
|
|
37
|
+
$ref: "#/$defs/attachment"
|
|
38
|
+
};
|
|
63
39
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
if (schema.type === "object" && schema.properties && typeof data === "object" && data !== null && !Array.isArray(data)) {
|
|
67
|
-
for (const [propKey, propSchema] of Object.entries(schema.properties)) {
|
|
68
|
-
if (Object.prototype.hasOwnProperty.call(data, propKey)) {
|
|
69
|
-
const propPath = path === "root" ? propKey : `${path}.${propKey}`;
|
|
70
|
-
errors.push(...validateCustomTypes(data[propKey], propSchema, propPath));
|
|
71
|
-
}
|
|
40
|
+
for (const key in node) {
|
|
41
|
+
result[key] = replaceAttachmentType(node[key]);
|
|
72
42
|
}
|
|
43
|
+
return result;
|
|
73
44
|
}
|
|
74
|
-
|
|
75
|
-
data.forEach((item, index) => {
|
|
76
|
-
const itemPath = `${path}[${index}]`;
|
|
77
|
-
errors.push(...validateCustomTypes(item, schema.items, itemPath));
|
|
78
|
-
});
|
|
79
|
-
}
|
|
80
|
-
return errors;
|
|
45
|
+
return replaceAttachmentType(schemaCopy);
|
|
81
46
|
}
|
|
82
|
-
const validateDataUsingSchema =
|
|
47
|
+
const validateDataUsingSchema = input => {
|
|
83
48
|
const {
|
|
84
49
|
data,
|
|
85
50
|
schema
|
|
86
51
|
} = input;
|
|
87
|
-
const processedSchema =
|
|
52
|
+
const processedSchema = injectAttachmentType(schema);
|
|
88
53
|
const validate = ajv.compile(processedSchema);
|
|
89
54
|
const valid = validate(data);
|
|
90
|
-
const errors = [];
|
|
91
55
|
if (!valid) {
|
|
92
56
|
var _validate$errors;
|
|
93
|
-
const
|
|
94
|
-
errors.push(...schemaErrors);
|
|
95
|
-
}
|
|
96
|
-
const customTypeErrors = validateCustomTypes(data, processedSchema);
|
|
97
|
-
errors.push(...customTypeErrors);
|
|
98
|
-
if (errors.length > 0) {
|
|
57
|
+
const errors = ((_validate$errors = validate.errors) === null || _validate$errors === void 0 ? void 0 : _validate$errors.map(err => `${err.instancePath || "root"} ${err.message}`)) || [];
|
|
99
58
|
const errorMessage = errors.join(", ") || "Unknown validation error";
|
|
100
59
|
throw new _types.ValidationError(`Data validation failed: ${errorMessage}`, data);
|
|
101
60
|
}
|
|
@@ -63,15 +63,15 @@ const mockLoadRuntime = _vitest.vi.mocked(_loadRuntime.loadRuntime);
|
|
|
63
63
|
(0, _extendedTest.it)("should use default model when not provided", () => {
|
|
64
64
|
const gateway = new _aiApiGateway.APIGateway({
|
|
65
65
|
apiKey: "sk-test123",
|
|
66
|
-
model: "gpt-
|
|
66
|
+
model: "gpt-5-mini-2025-08-07"
|
|
67
67
|
});
|
|
68
|
-
(0, _extendedTest.expect)(gateway["model"]).toBe("gpt-
|
|
68
|
+
(0, _extendedTest.expect)(gateway["model"]).toBe("gpt-5-mini-2025-08-07");
|
|
69
69
|
});
|
|
70
70
|
(0, _extendedTest.it)("should allow no parameters", () => {
|
|
71
71
|
const gateway = new _aiApiGateway.APIGateway({
|
|
72
|
-
model: "gpt-
|
|
72
|
+
model: "gpt-5-mini-2025-08-07"
|
|
73
73
|
});
|
|
74
|
-
(0, _extendedTest.expect)(gateway["model"]).toBe("gpt-
|
|
74
|
+
(0, _extendedTest.expect)(gateway["model"]).toBe("gpt-5-mini-2025-08-07");
|
|
75
75
|
(0, _extendedTest.expect)(gateway["apiKey"]).toBeUndefined();
|
|
76
76
|
});
|
|
77
77
|
});
|
|
@@ -70,16 +70,63 @@ async function handleNewAiExtraction(params) {
|
|
|
70
70
|
allData.value.forEach((v, i) => {
|
|
71
71
|
_Logger.logger.debug(`ai extraction result for row ${i}: ${JSON.stringify(v)}`);
|
|
72
72
|
});
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
73
|
+
const resultValues = [];
|
|
74
|
+
for (let i = 0; i < allData.value.length; i++) {
|
|
75
|
+
const rowValues = allData.value[i];
|
|
76
|
+
const allValues = Object.entries(rowValues).map(([_, value]) => value);
|
|
77
|
+
const rowValuesMatches = await (0, _findDomMatches.getDomMatches)(pageAndSearchRegion.page, pageAndSearchRegion.searchRegionHandler, allValues);
|
|
78
|
+
const rowValuesWithMatchesOnly = Object.entries(rowValues).reduce((acc, [key, value]) => {
|
|
79
|
+
const valueMatches = rowValuesMatches.get(value);
|
|
80
|
+
const bestMatch = (0, _utils.selectBestMatch)(value, valueMatches ?? []);
|
|
81
|
+
if (valueMatches && valueMatches.length > 0 && bestMatch) {
|
|
82
|
+
acc[key] = {
|
|
83
|
+
matchText: bestMatch.matchText,
|
|
84
|
+
matchXpath: bestMatch.matchXpath,
|
|
85
|
+
matchType: bestMatch.matchType
|
|
86
|
+
};
|
|
87
|
+
} else {
|
|
88
|
+
_Logger.logger.debug(`value "${value}" for key "${key}" in row ${i + 1} does not have any matches in the page html, dropped for hallucination protection`);
|
|
89
|
+
}
|
|
90
|
+
return acc;
|
|
91
|
+
}, {});
|
|
92
|
+
resultValues.push({
|
|
93
|
+
rowIndex: i,
|
|
94
|
+
result: rowValuesWithMatchesOnly
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
const matches = await (0, _getListMatches.getListMatches)(pageAndSearchRegion.page, pageAndSearchRegion.searchRegionHandler, resultValues.map(v => ({
|
|
98
|
+
rowIndex: v.rowIndex,
|
|
99
|
+
result: Object.fromEntries(Object.entries(v.result).map(([key, value]) => [key, value.matchText]))
|
|
100
|
+
})));
|
|
101
|
+
let containerPath = null;
|
|
102
|
+
let fullContainerXpath = null;
|
|
103
|
+
if (resultValues.length > 0 && resultValues[0].result[primaryPropertyName]) {
|
|
104
|
+
const primaryXpath = resultValues[0].result[primaryPropertyName].matchXpath;
|
|
105
|
+
if (primaryXpath) {
|
|
106
|
+
const parentXpath = await pageAndSearchRegion.page.evaluate(xpath => {
|
|
107
|
+
var _window$__INTUNED__;
|
|
108
|
+
const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
|
109
|
+
const element = result.singleNodeValue;
|
|
110
|
+
if (!element || !element.parentElement) return null;
|
|
111
|
+
if ((_window$__INTUNED__ = window.__INTUNED__) !== null && _window$__INTUNED__ !== void 0 && _window$__INTUNED__.getElementXPath) {
|
|
112
|
+
return window.__INTUNED__.getElementXPath(element.parentElement);
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
}, primaryXpath);
|
|
116
|
+
if (parentXpath) {
|
|
117
|
+
fullContainerXpath = parentXpath;
|
|
118
|
+
if (hasSearchRegionContainer) {
|
|
119
|
+
containerPath = await (0, _getRelativeContainerXpathSelector.getRelativeContainerXpathSelector)(pageAndSearchRegion.searchRegion, parentXpath);
|
|
120
|
+
} else {
|
|
121
|
+
containerPath = parentXpath;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
79
126
|
return (0, _neverthrow.ok)({
|
|
80
127
|
resultValues,
|
|
81
|
-
containerPath
|
|
82
|
-
fullContainerXpath
|
|
128
|
+
containerPath,
|
|
129
|
+
fullContainerXpath,
|
|
83
130
|
matches
|
|
84
131
|
});
|
|
85
132
|
}
|
|
@@ -34,7 +34,7 @@ await page.goto("https://books.toscrape.com/")
|
|
|
34
34
|
const product = await extractStructuredData({
|
|
35
35
|
source: page,
|
|
36
36
|
strategy: "HTML",
|
|
37
|
-
model: "gpt-4o"
|
|
37
|
+
model: "gpt-4o",
|
|
38
38
|
dataSchema: {
|
|
39
39
|
type: "object",
|
|
40
40
|
properties: {
|
|
@@ -47,8 +47,8 @@ const product = await extractStructuredData({
|
|
|
47
47
|
},
|
|
48
48
|
prompt: "Extract product details from this e page"
|
|
49
49
|
});
|
|
50
|
-
}
|
|
51
50
|
console.log(`Found book: ${product.name} - ${product.price}`);
|
|
51
|
+
}
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
```typescript Locator source
|
|
@@ -59,7 +59,7 @@ const articleContainer = page.locator("article").first()
|
|
|
59
59
|
const article = await extractStructuredData({
|
|
60
60
|
source: articleContainer,
|
|
61
61
|
strategy: "MARKDOWN",
|
|
62
|
-
model: "claude-3",
|
|
62
|
+
model: "claude-3-5-sonnet-20240620",
|
|
63
63
|
dataSchema: {
|
|
64
64
|
type: "object",
|
|
65
65
|
properties: {
|
|
@@ -72,8 +72,8 @@ const article = await extractStructuredData({
|
|
|
72
72
|
},
|
|
73
73
|
maxRetries: 5
|
|
74
74
|
});
|
|
75
|
-
}
|
|
76
75
|
console.log(`Found book: ${article.title}`);
|
|
76
|
+
}
|
|
77
77
|
```
|
|
78
78
|
|
|
79
79
|
</CodeGroup>
|
|
@@ -138,7 +138,7 @@ export declare function extractStructuredData(options: {
|
|
|
138
138
|
content: ContentItem[] | ContentItem;
|
|
139
139
|
dataSchema: JsonSchema | z.ZodSchema;
|
|
140
140
|
prompt?: string;
|
|
141
|
-
|
|
141
|
+
maxRetries?: number;
|
|
142
142
|
enableCache?: boolean;
|
|
143
143
|
model: SUPPORTED_MODELS;
|
|
144
144
|
apiKey?: string;
|