@intuned/browser-dev 0.1.5-dev.0 → 0.1.6-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/RELEASE.md +11 -9
  2. package/dist/ai/export.d.ts +1 -7
  3. package/dist/ai/extractStructuredData.js +1 -1
  4. package/dist/ai/extractStructuredDataUsingAi.js +23 -2
  5. package/dist/ai/extractionHelpers/validateSchema.js +34 -2
  6. package/dist/ai/index.d.ts +1 -7
  7. package/dist/ai/tests/testExtractStructuredData.spec.js +150 -18
  8. package/dist/ai/tests/testIsPageLoaded.spec.js +1 -1
  9. package/dist/ai/types/models.js +2 -5
  10. package/dist/ai/validators.js +1 -1
  11. package/dist/common/aiModelsValidations.js +2 -4
  12. package/dist/helpers/downloadFile.js +1 -3
  13. package/dist/helpers/frame_utils/checkFrameAllowsAsyncScripts.js +20 -0
  14. package/dist/helpers/frame_utils/constants.js +8 -0
  15. package/dist/helpers/frame_utils/findAllIframes.js +82 -0
  16. package/dist/helpers/frame_utils/getContainerFrame.js +22 -0
  17. package/dist/helpers/frame_utils/index.js +44 -0
  18. package/dist/helpers/frame_utils/tests/testFindAllIframes.spec.js +213 -0
  19. package/dist/helpers/tests/testDownloadFile.spec.js +3 -4
  20. package/dist/helpers/tests/testResolveUrl.spec.js +4 -4
  21. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +1041 -4
  22. package/dist/helpers/tests/testWithDomSettledWait.spec.js +142 -0
  23. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +1 -1
  24. package/dist/helpers/uploadFileToS3.js +6 -0
  25. package/dist/helpers/utils/getS3Client.js +2 -2
  26. package/dist/helpers/validateDataUsingSchema.js +93 -7
  27. package/dist/helpers/waitForDomSettled.js +66 -40
  28. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +1 -1
  29. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +271 -2
  30. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +1 -1
  31. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +1 -1
  32. package/dist/optimized-extractors/types/aiModelsValidation.js +1 -3
  33. package/package.json +5 -4
@@ -161,4 +161,146 @@ var _waitForDomSettled = require("../waitForDomSettled");
161
161
  const content = await page.textContent("#target");
162
162
  (0, _extendedTest.expect)(content).toContain("Locator source content");
163
163
  });
164
+ (0, _extendedTest.test)("should wait for DOM to settle with iframe dynamic content", async () => {
165
+ await page.goto(`
166
+ data:text/html,
167
+ <html>
168
+ <body>
169
+ <h1>Main Page</h1>
170
+ <iframe id="test-iframe"
171
+ src="data:text/html,<html><body><div id='iframe-content'>Initial content</div></body></html>"
172
+ width="400"
173
+ height="300">
174
+ </iframe>
175
+ </body>
176
+ </html>
177
+ `);
178
+ await page.waitForTimeout(500);
179
+ const iframeElement = await page.locator("#test-iframe").elementHandle();
180
+ const iframeFrame = await (iframeElement === null || iframeElement === void 0 ? void 0 : iframeElement.contentFrame());
181
+ if (!iframeFrame) {
182
+ throw new Error("Could not get iframe frame");
183
+ }
184
+ await iframeFrame.evaluate(() => {
185
+ let counter = 0;
186
+ const intervalId = setInterval(() => {
187
+ counter++;
188
+ const div = document.createElement("div");
189
+ div.textContent = `Dynamic content ${counter}`;
190
+ div.className = "dynamic-item";
191
+ document.body.appendChild(div);
192
+ if (counter >= 3) {
193
+ clearInterval(intervalId);
194
+ }
195
+ }, 200);
196
+ });
197
+ const settled = await (0, _waitForDomSettled.waitForDomSettled)({
198
+ source: page,
199
+ settleDurationMs: 1000,
200
+ timeoutInMs: 10000
201
+ });
202
+ (0, _extendedTest.expect)(settled).toBe(true);
203
+ const dynamicItems = await iframeFrame.locator(".dynamic-item").count();
204
+ (0, _extendedTest.expect)(dynamicItems).toBe(3);
205
+ for (let i = 0; i < 3; i++) {
206
+ const content = await iframeFrame.locator(".dynamic-item").nth(i).textContent();
207
+ (0, _extendedTest.expect)(content).toBe(`Dynamic content ${i + 1}`);
208
+ }
209
+ });
210
+ (0, _extendedTest.test)("should wait for DOM to settle with nested iframes", async () => {
211
+ await page.goto(`
212
+ data:text/html,
213
+ <html>
214
+ <body>
215
+ <h1>Main Page</h1>
216
+ <div id="main-content">Main content</div>
217
+ <iframe id="outer-iframe"
218
+ src="data:text/html,<html><body><h2>Outer iframe</h2><div id='outer-content'>Outer initial</div><iframe id='inner-iframe' src='data:text/html,<html><body><h3>Inner iframe</h3><div id=inner-content>Inner initial</div></body></html>'></iframe></body></html>"
219
+ width="500"
220
+ height="400">
221
+ </iframe>
222
+ </body>
223
+ </html>
224
+ `);
225
+ await page.waitForTimeout(1000);
226
+ const outerIframeElement = await page.locator("#outer-iframe").elementHandle();
227
+ const outerFrame = await (outerIframeElement === null || outerIframeElement === void 0 ? void 0 : outerIframeElement.contentFrame());
228
+ if (!outerFrame) {
229
+ throw new Error("Could not get outer frame");
230
+ }
231
+ const innerIframeElement = await outerFrame.locator("#inner-iframe").elementHandle();
232
+ const innerFrame = await (innerIframeElement === null || innerIframeElement === void 0 ? void 0 : innerIframeElement.contentFrame());
233
+ if (!innerFrame) {
234
+ throw new Error("Could not get inner frame");
235
+ }
236
+ await outerFrame.evaluate(() => {
237
+ let outerCounter = 0;
238
+ const outerInterval = setInterval(() => {
239
+ outerCounter++;
240
+ const div = document.createElement("div");
241
+ div.textContent = `Outer dynamic ${outerCounter}`;
242
+ div.className = "outer-dynamic";
243
+ const outerContent = document.getElementById("outer-content");
244
+ outerContent === null || outerContent === void 0 || outerContent.appendChild(div);
245
+ if (outerCounter >= 2) {
246
+ clearInterval(outerInterval);
247
+ }
248
+ }, 150);
249
+ });
250
+ await innerFrame.evaluate(() => {
251
+ let innerCounter = 0;
252
+ const innerInterval = setInterval(() => {
253
+ innerCounter++;
254
+ const div = document.createElement("div");
255
+ div.textContent = `Inner dynamic ${innerCounter}`;
256
+ div.className = "inner-dynamic";
257
+ const innerContent = document.getElementById("inner-content");
258
+ innerContent === null || innerContent === void 0 || innerContent.appendChild(div);
259
+ if (innerCounter >= 2) {
260
+ clearInterval(innerInterval);
261
+ }
262
+ }, 200);
263
+ });
264
+ const settled = await (0, _waitForDomSettled.waitForDomSettled)({
265
+ source: page,
266
+ settleDurationMs: 1000,
267
+ timeoutInMs: 15000
268
+ });
269
+ (0, _extendedTest.expect)(settled).toBe(true);
270
+ const outerDynamicItems = await outerFrame.locator(".outer-dynamic").count();
271
+ (0, _extendedTest.expect)(outerDynamicItems).toBe(2);
272
+ const innerDynamicItems = await innerFrame.locator(".inner-dynamic").count();
273
+ (0, _extendedTest.expect)(innerDynamicItems).toBe(2);
274
+ for (let i = 0; i < 2; i++) {
275
+ const content = await outerFrame.locator(".outer-dynamic").nth(i).textContent();
276
+ (0, _extendedTest.expect)(content).toBe(`Outer dynamic ${i + 1}`);
277
+ }
278
+ for (let i = 0; i < 2; i++) {
279
+ const content = await innerFrame.locator(".inner-dynamic").nth(i).textContent();
280
+ (0, _extendedTest.expect)(content).toBe(`Inner dynamic ${i + 1}`);
281
+ }
282
+ });
283
+ (0, _extendedTest.test)("should wait for DOM to settle with sandboxed iframe", async () => {
284
+ await page.goto(`
285
+ data:text/html,
286
+ <html>
287
+ <body>
288
+ <h1>Main Page</h1>
289
+ <iframe id="sandboxed-iframe"
290
+ sandbox="allow-same-origin"
291
+ srcdoc="<html><body><h2>Sandboxed Iframe</h2><div id='content'>Initial content</div></body></html>"
292
+ width="400"
293
+ height="300">
294
+ </iframe>
295
+ </body>
296
+ </html>
297
+ `);
298
+ await page.waitForTimeout(500);
299
+ const settled = await (0, _waitForDomSettled.waitForDomSettled)({
300
+ source: page,
301
+ settleDurationMs: 500,
302
+ timeoutInMs: 5000
303
+ });
304
+ (0, _extendedTest.expect)(settled).toBe(true);
305
+ });
164
306
  });
@@ -103,7 +103,7 @@ _extendedTest.describe.skip("TestWaitForNetworkIdle", () => {
103
103
  await page.goto("data:text/html,<html><body><h1>Direct Call</h1></body></html>");
104
104
  return "direct call complete";
105
105
  }, {
106
- page: page,
106
+ page,
107
107
  timeoutInMs: 10000,
108
108
  maxInflightRequests: 0
109
109
  });
@@ -42,6 +42,9 @@ const uploadFileToS3 = async input => {
42
42
  fileNameOverride,
43
43
  contentType
44
44
  } = input;
45
+ if (!(0, _utils.isDownload)(file) && !Buffer.isBuffer(file)) {
46
+ throw new Error("Invalid file type, Supported types are Download and Buffer");
47
+ }
45
48
  const bucketName = (configs === null || configs === void 0 ? void 0 : configs.bucket) ?? process.env.AWS_BUCKET ?? process.env.INTUNED_S3_BUCKET ?? undefined;
46
49
  const region = (configs === null || configs === void 0 ? void 0 : configs.region) ?? process.env.AWS_REGION ?? process.env.INTUNED_S3_REGION ?? undefined;
47
50
  const endpoint = (configs === null || configs === void 0 ? void 0 : configs.endpoint) ?? process.env.AWS_ENDPOINT_URL ?? process.env.INTUNED_S3_ENDPOINT_URL;
@@ -52,6 +55,9 @@ const uploadFileToS3 = async input => {
52
55
  console.log("Uploaded file successfully");
53
56
  if (isDownloadedFile) {
54
57
  return new _Attachment.Attachment(`${(0, _uuid.v4)()}/${file.suggestedFilename()}`, `${(0, _uuid.v4)()}/${file.suggestedFilename()}`, "testing_bucket", "testing_region", file.suggestedFilename() || "downloaded_file", endpoint, "document");
58
+ } else {
59
+ const suggestedFileName = (0, _uuid.v4)();
60
+ return new _Attachment.Attachment(suggestedFileName, suggestedFileName, "testing_bucket", "testing_region", suggestedFileName, endpoint, "document");
55
61
  }
56
62
  }
57
63
  let suggestedFileName;
@@ -12,8 +12,8 @@ function getS3Client(endpoint, region) {
12
12
  s3ClientInstance = new _clientS.S3Client({
13
13
  region: resolvedRegion,
14
14
  credentials: {
15
- accessKeyId: process.env.AWS_ACCESS_KEY_ID || "",
16
- secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || ""
15
+ accessKeyId: process.env.AWS_ACCESS_KEY_ID || process.env.INTUNED_S3_ACCESS_KEY_ID || "",
16
+ secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || process.env.INTUNED_S3_SECRET_ACCESS_KEY || ""
17
17
  },
18
18
  endpoint: endpoint || undefined
19
19
  });
@@ -12,7 +12,9 @@ var _Attachment = require("./types/Attachment");
12
12
  function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
13
13
  const ajv = new _ajv.default({
14
14
  strict: false,
15
- removeAdditional: false
15
+ removeAdditional: false,
16
+ allErrors: true,
17
+ verbose: true
16
18
  });
17
19
  function injectAttachmentType(schema) {
18
20
  const schemaCopy = JSON.parse(JSON.stringify(schema));
@@ -32,10 +34,35 @@ function injectAttachmentType(schema) {
32
34
  return node.map(item => replaceAttachmentType(item));
33
35
  }
34
36
  const result = {};
35
- if (node.type === "attachment") {
37
+ const nodeType = node.type;
38
+ if (typeof nodeType === "string" && nodeType.toLowerCase() === "attachment") {
36
39
  return {
37
40
  $ref: "#/$defs/attachment"
38
41
  };
42
+ } else if (Array.isArray(nodeType)) {
43
+ const hasAttachment = nodeType.some(t => typeof t === "string" && t.toLowerCase() === "attachment");
44
+ if (hasAttachment) {
45
+ const oneOfSchemas = [];
46
+ for (const typeItem of nodeType) {
47
+ if (typeof typeItem === "string" && typeItem.toLowerCase() === "attachment") {
48
+ oneOfSchemas.push({
49
+ $ref: "#/$defs/attachment"
50
+ });
51
+ } else {
52
+ oneOfSchemas.push({
53
+ type: typeItem
54
+ });
55
+ }
56
+ }
57
+ const newNode = {};
58
+ for (const key in node) {
59
+ if (key !== "type") {
60
+ newNode[key] = replaceAttachmentType(node[key]);
61
+ }
62
+ }
63
+ newNode.oneOf = oneOfSchemas;
64
+ return newNode;
65
+ }
39
66
  }
40
67
  for (const key in node) {
41
68
  result[key] = replaceAttachmentType(node[key]);
@@ -44,19 +71,78 @@ function injectAttachmentType(schema) {
44
71
  }
45
72
  return replaceAttachmentType(schemaCopy);
46
73
  }
74
+ function resolveSchema(schema, rootSchema) {
75
+ if (typeof schema !== "object" || schema === null) {
76
+ return schema;
77
+ }
78
+ if ("$ref" in schema) {
79
+ const refPath = schema.$ref;
80
+ if (refPath.startsWith("#/definitions/")) {
81
+ const defName = refPath.split("/").pop();
82
+ const definitions = rootSchema.definitions || {};
83
+ return definitions[defName] || schema;
84
+ } else if (refPath.startsWith("#/$defs/")) {
85
+ const defName = refPath.split("/").pop();
86
+ const defs = rootSchema.$defs || {};
87
+ return defs[defName] || schema;
88
+ }
89
+ }
90
+ for (const combinator of ["oneOf", "anyOf"]) {
91
+ if (combinator in schema) {
92
+ for (const option of schema[combinator]) {
93
+ const resolved = resolveSchema(option, rootSchema);
94
+ if ((resolved === null || resolved === void 0 ? void 0 : resolved.type) === "object") {
95
+ return resolved;
96
+ }
97
+ }
98
+ }
99
+ }
100
+ return schema;
101
+ }
102
+ function removeNoneFromOptionalFields(data, schema, rootSchema) {
103
+ if (typeof data === "object" && data !== null && !Array.isArray(data) && typeof schema === "object" && schema !== null) {
104
+ const resolvedSchema = resolveSchema(schema, rootSchema);
105
+ const requiredFields = new Set(resolvedSchema.required || []);
106
+ const properties = resolvedSchema.properties || {};
107
+ const cleaned = {};
108
+ for (const key in data) {
109
+ const value = data[key];
110
+ const fieldSchema = properties[key] || {};
111
+ if (requiredFields.has(key)) {
112
+ cleaned[key] = removeNoneFromOptionalFields(value, fieldSchema, rootSchema);
113
+ } else if (value !== null && value !== undefined) {
114
+ cleaned[key] = removeNoneFromOptionalFields(value, fieldSchema, rootSchema);
115
+ }
116
+ }
117
+ return cleaned;
118
+ } else if (Array.isArray(data)) {
119
+ const itemsSchema = (schema === null || schema === void 0 ? void 0 : schema.items) || {};
120
+ return data.filter(item => item !== null && item !== undefined).map(item => removeNoneFromOptionalFields(item, itemsSchema, rootSchema));
121
+ } else {
122
+ return data;
123
+ }
124
+ }
47
125
  const validateDataUsingSchema = input => {
48
126
  const {
49
127
  data,
50
128
  schema
51
129
  } = input;
52
130
  const processedSchema = injectAttachmentType(schema);
131
+ const cleanedData = removeNoneFromOptionalFields(data, processedSchema, processedSchema);
53
132
  const validate = ajv.compile(processedSchema);
54
- const valid = validate(data);
133
+ const valid = validate(cleanedData);
55
134
  if (!valid) {
56
- var _validate$errors;
57
- const errors = ((_validate$errors = validate.errors) === null || _validate$errors === void 0 ? void 0 : _validate$errors.map(err => `${err.instancePath || "root"} ${err.message}`)) || [];
58
- const errorMessage = errors.join(", ") || "Unknown validation error";
59
- throw new _types.ValidationError(`Data validation failed: ${errorMessage}`, data);
135
+ const errors = validate.errors || [];
136
+ if (errors.length > 0) {
137
+ const errorMessages = [];
138
+ for (const err of errors) {
139
+ errorMessages.push(` - ${err.instancePath || "root"}: ${err.message}`);
140
+ }
141
+ const fullMessage = `Validation failed with ${errors.length} error(s):\n${errorMessages.join("\n")}`;
142
+ throw new _types.ValidationError(fullMessage, data);
143
+ } else {
144
+ throw new _types.ValidationError("Validation failed: Unknown validation error", data);
145
+ }
60
146
  }
61
147
  };
62
148
  exports.validateDataUsingSchema = validateDataUsingSchema;
@@ -6,6 +6,8 @@ Object.defineProperty(exports, "__esModule", {
6
6
  exports.waitForDomSettled = void 0;
7
7
  var _locatorHelpers = require("../common/locatorHelpers");
8
8
  var _Logger = require("../common/Logger");
9
+ var _findAllIframes = require("./frame_utils/findAllIframes");
10
+ var _getContainerFrame = require("./frame_utils/getContainerFrame");
9
11
  const waitForDomSettled = async options => {
10
12
  const {
11
13
  source,
@@ -14,10 +16,10 @@ const waitForDomSettled = async options => {
14
16
  } = options;
15
17
  const settleDurationMsFloored = Math.floor(settleDurationMs);
16
18
  const timeoutMs = Math.floor(timeoutInMs);
17
- let pageObj;
19
+ let frame;
18
20
  let elementHandle;
19
21
  if (!(0, _locatorHelpers.isPage)(source)) {
20
- pageObj = source.page();
22
+ frame = await (0, _getContainerFrame.getContainerFrame)(source);
21
23
  const handle = await source.elementHandle();
22
24
  if (!handle) {
23
25
  _Logger.logger.warn("Could not get element handle from locator");
@@ -25,46 +27,26 @@ const waitForDomSettled = async options => {
25
27
  }
26
28
  elementHandle = handle;
27
29
  } else if ((0, _locatorHelpers.isPage)(source)) {
28
- pageObj = source;
29
- elementHandle = await pageObj.evaluateHandle("document.documentElement");
30
+ frame = source.mainFrame();
31
+ elementHandle = await frame.evaluateHandle("document.documentElement");
30
32
  } else {
31
33
  throw new Error("Invalid state");
32
34
  }
33
- try {
34
- const result = await pageObj.evaluate(({
35
- target,
35
+ const jsCode = (target, args) => {
36
+ const {
36
37
  settleDurationMsFloored,
37
38
  timeoutMs
38
- }) => {
39
- return new Promise((resolve, reject) => {
40
- if (!target) {
41
- reject(new Error("Target element not found"));
42
- return;
43
- }
44
- let mutationTimer;
45
- let settled = false;
46
- const observer = new MutationObserver(() => {
47
- if (settled) return;
48
- clearTimeout(mutationTimer);
49
- mutationTimer = window.setTimeout(() => {
50
- settled = true;
51
- observer.disconnect();
52
- clearTimeout(timeoutTimer);
53
- resolve(true);
54
- }, settleDurationMsFloored);
55
- });
56
- const timeoutTimer = window.setTimeout(() => {
57
- settled = true;
58
- observer.disconnect();
59
- clearTimeout(mutationTimer);
60
- reject(new Error(`DOM timed out settling after ${timeoutMs} ms`));
61
- }, timeoutMs);
62
- observer.observe(target, {
63
- childList: true,
64
- subtree: true,
65
- attributes: true,
66
- characterData: true
67
- });
39
+ } = args;
40
+ return new Promise((resolve, reject) => {
41
+ if (!target) {
42
+ reject(new Error("Target element not found"));
43
+ return;
44
+ }
45
+ let mutationTimer;
46
+ let settled = false;
47
+ const observer = new MutationObserver(() => {
48
+ if (settled) return;
49
+ clearTimeout(mutationTimer);
68
50
  mutationTimer = window.setTimeout(() => {
69
51
  settled = true;
70
52
  observer.disconnect();
@@ -72,12 +54,56 @@ const waitForDomSettled = async options => {
72
54
  resolve(true);
73
55
  }, settleDurationMsFloored);
74
56
  });
75
- }, {
76
- target: elementHandle,
57
+ const timeoutTimer = window.setTimeout(() => {
58
+ settled = true;
59
+ observer.disconnect();
60
+ clearTimeout(mutationTimer);
61
+ reject(new Error(`DOM timed out settling after ${timeoutMs} ms`));
62
+ }, timeoutMs);
63
+ observer.observe(target, {
64
+ childList: true,
65
+ subtree: true,
66
+ attributes: true,
67
+ characterData: true
68
+ });
69
+ mutationTimer = window.setTimeout(() => {
70
+ settled = true;
71
+ observer.disconnect();
72
+ clearTimeout(timeoutTimer);
73
+ resolve(true);
74
+ }, settleDurationMsFloored);
75
+ });
76
+ };
77
+ try {
78
+ const result = await elementHandle.evaluate(jsCode, {
77
79
  settleDurationMsFloored,
78
80
  timeoutMs
79
81
  });
80
- return result;
82
+ if (!result) {
83
+ return false;
84
+ }
85
+ const allIframes = await (0, _findAllIframes.findAllIframesList)(frame);
86
+ let hasRestrictedIframes = false;
87
+ for (const iframeNode of allIframes) {
88
+ if (iframeNode.allowsAsyncScripts) {
89
+ const iframeElementHandle = await iframeNode.frame.evaluateHandle("document.documentElement");
90
+ const iframeResult = await iframeElementHandle.evaluate(jsCode, {
91
+ settleDurationMsFloored,
92
+ timeoutMs
93
+ });
94
+ await iframeElementHandle.dispose();
95
+ if (!iframeResult) {
96
+ return false;
97
+ }
98
+ } else {
99
+ hasRestrictedIframes = true;
100
+ }
101
+ }
102
+ if (hasRestrictedIframes) {
103
+ _Logger.logger.debug(`Waiting ${2 * settleDurationMs}ms for iframe(s) that do not allow async scripts to settle`);
104
+ await new Promise(resolve => setTimeout(resolve, 2 * settleDurationMs));
105
+ }
106
+ return true;
81
107
  } catch (error) {
82
108
  _Logger.logger.warn(`DOM settlement detection failed: ${error}`);
83
109
  return false;
@@ -72,7 +72,7 @@ _extendedTest.describe.skip("Array Extractor Caching Tests", () => {
72
72
  model: "claude-3-5-sonnet-20240620",
73
73
  type: "HTML"
74
74
  },
75
- variantKey: variantKey,
75
+ variantKey,
76
76
  apiKey: process.env.ANTHROPIC_API_KEY
77
77
  };
78
78
  await page.setContent(productListTemplate);