@intuned/browser-dev 0.1.4-dev.1 → 0.1.5-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/dist/ai/export.d.ts +1 -1
- package/dist/ai/index.d.ts +1 -1
- package/dist/ai/isPageLoaded.js +14 -3
- package/dist/ai/tests/testIsPageLoaded.spec.js +3 -3
- package/dist/helpers/downloadFile.js +37 -0
- package/dist/helpers/export.d.ts +10 -7
- package/dist/helpers/frame_utils/constants.js +8 -0
- package/dist/helpers/frame_utils/findAllIframes.js +79 -0
- package/dist/helpers/frame_utils/getContainerFrame.js +22 -0
- package/dist/helpers/frame_utils/index.js +44 -0
- package/dist/helpers/frame_utils/tests/testFindAllIframes.spec.js +170 -0
- package/dist/helpers/gotoUrl.js +1 -1
- package/dist/helpers/index.d.ts +10 -7
- package/dist/helpers/index.js +0 -19
- package/dist/helpers/tests/testDownloadFile.spec.js +41 -6
- package/dist/helpers/tests/testInjectAttachmentType.spec.js +482 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +35 -31
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +119 -0
- package/dist/helpers/types/Attachment.js +11 -6
- package/dist/helpers/types/index.js +1 -20
- package/dist/helpers/uploadFileToS3.js +2 -2
- package/dist/helpers/validateDataUsingSchema.js +30 -71
- package/dist/helpers/waitForDomSettled.js +57 -40
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +4 -4
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +271 -2
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +55 -8
- package/generated-docs/ai/functions/extractStructuredData.mdx +5 -5
- package/generated-docs/ai/functions/isPageLoaded.mdx +1 -0
- package/generated-docs/helpers/functions/clickButtonAndWait.mdx +63 -0
- package/generated-docs/helpers/functions/clickUntilExhausted.mdx +112 -0
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +1 -7
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +5 -5
- package/how-to-generate-docs.md +1 -0
- package/package.json +2 -2
- package/dist/helpers/types/CustomTypeRegistry.js +0 -48
|
@@ -6,6 +6,8 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
exports.waitForDomSettled = void 0;
|
|
7
7
|
var _locatorHelpers = require("../common/locatorHelpers");
|
|
8
8
|
var _Logger = require("../common/Logger");
|
|
9
|
+
var _findAllIframes = require("./frame_utils/findAllIframes");
|
|
10
|
+
var _getContainerFrame = require("./frame_utils/getContainerFrame");
|
|
9
11
|
const waitForDomSettled = async options => {
|
|
10
12
|
const {
|
|
11
13
|
source,
|
|
@@ -14,10 +16,10 @@ const waitForDomSettled = async options => {
|
|
|
14
16
|
} = options;
|
|
15
17
|
const settleDurationMsFloored = Math.floor(settleDurationMs);
|
|
16
18
|
const timeoutMs = Math.floor(timeoutInMs);
|
|
17
|
-
let
|
|
19
|
+
let frame;
|
|
18
20
|
let elementHandle;
|
|
19
21
|
if (!(0, _locatorHelpers.isPage)(source)) {
|
|
20
|
-
|
|
22
|
+
frame = await (0, _getContainerFrame.getContainerFrame)(source);
|
|
21
23
|
const handle = await source.elementHandle();
|
|
22
24
|
if (!handle) {
|
|
23
25
|
_Logger.logger.warn("Could not get element handle from locator");
|
|
@@ -25,46 +27,26 @@ const waitForDomSettled = async options => {
|
|
|
25
27
|
}
|
|
26
28
|
elementHandle = handle;
|
|
27
29
|
} else if ((0, _locatorHelpers.isPage)(source)) {
|
|
28
|
-
|
|
29
|
-
elementHandle = await
|
|
30
|
+
frame = source.mainFrame();
|
|
31
|
+
elementHandle = await frame.evaluateHandle("document.documentElement");
|
|
30
32
|
} else {
|
|
31
33
|
throw new Error("Invalid state");
|
|
32
34
|
}
|
|
33
|
-
|
|
34
|
-
const
|
|
35
|
-
target,
|
|
35
|
+
const jsCode = (target, args) => {
|
|
36
|
+
const {
|
|
36
37
|
settleDurationMsFloored,
|
|
37
38
|
timeoutMs
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
mutationTimer = window.setTimeout(() => {
|
|
50
|
-
settled = true;
|
|
51
|
-
observer.disconnect();
|
|
52
|
-
clearTimeout(timeoutTimer);
|
|
53
|
-
resolve(true);
|
|
54
|
-
}, settleDurationMsFloored);
|
|
55
|
-
});
|
|
56
|
-
const timeoutTimer = window.setTimeout(() => {
|
|
57
|
-
settled = true;
|
|
58
|
-
observer.disconnect();
|
|
59
|
-
clearTimeout(mutationTimer);
|
|
60
|
-
reject(new Error(`DOM timed out settling after ${timeoutMs} ms`));
|
|
61
|
-
}, timeoutMs);
|
|
62
|
-
observer.observe(target, {
|
|
63
|
-
childList: true,
|
|
64
|
-
subtree: true,
|
|
65
|
-
attributes: true,
|
|
66
|
-
characterData: true
|
|
67
|
-
});
|
|
39
|
+
} = args;
|
|
40
|
+
return new Promise((resolve, reject) => {
|
|
41
|
+
if (!target) {
|
|
42
|
+
reject(new Error("Target element not found"));
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
let mutationTimer;
|
|
46
|
+
let settled = false;
|
|
47
|
+
const observer = new MutationObserver(() => {
|
|
48
|
+
if (settled) return;
|
|
49
|
+
clearTimeout(mutationTimer);
|
|
68
50
|
mutationTimer = window.setTimeout(() => {
|
|
69
51
|
settled = true;
|
|
70
52
|
observer.disconnect();
|
|
@@ -72,12 +54,47 @@ const waitForDomSettled = async options => {
|
|
|
72
54
|
resolve(true);
|
|
73
55
|
}, settleDurationMsFloored);
|
|
74
56
|
});
|
|
75
|
-
|
|
76
|
-
|
|
57
|
+
const timeoutTimer = window.setTimeout(() => {
|
|
58
|
+
settled = true;
|
|
59
|
+
observer.disconnect();
|
|
60
|
+
clearTimeout(mutationTimer);
|
|
61
|
+
reject(new Error(`DOM timed out settling after ${timeoutMs} ms`));
|
|
62
|
+
}, timeoutMs);
|
|
63
|
+
observer.observe(target, {
|
|
64
|
+
childList: true,
|
|
65
|
+
subtree: true,
|
|
66
|
+
attributes: true,
|
|
67
|
+
characterData: true
|
|
68
|
+
});
|
|
69
|
+
mutationTimer = window.setTimeout(() => {
|
|
70
|
+
settled = true;
|
|
71
|
+
observer.disconnect();
|
|
72
|
+
clearTimeout(timeoutTimer);
|
|
73
|
+
resolve(true);
|
|
74
|
+
}, settleDurationMsFloored);
|
|
75
|
+
});
|
|
76
|
+
};
|
|
77
|
+
try {
|
|
78
|
+
const result = await elementHandle.evaluate(jsCode, {
|
|
77
79
|
settleDurationMsFloored,
|
|
78
80
|
timeoutMs
|
|
79
81
|
});
|
|
80
|
-
|
|
82
|
+
if (!result) {
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
const allIframes = await (0, _findAllIframes.findAllIframesList)(frame);
|
|
86
|
+
for (const iframeNode of allIframes) {
|
|
87
|
+
const iframeElementHandle = await iframeNode.frame.evaluateHandle("document.documentElement");
|
|
88
|
+
const iframeResult = await iframeElementHandle.evaluate(jsCode, {
|
|
89
|
+
settleDurationMsFloored,
|
|
90
|
+
timeoutMs
|
|
91
|
+
});
|
|
92
|
+
await iframeElementHandle.dispose();
|
|
93
|
+
if (!iframeResult) {
|
|
94
|
+
return false;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return true;
|
|
81
98
|
} catch (error) {
|
|
82
99
|
_Logger.logger.warn(`DOM settlement detection failed: ${error}`);
|
|
83
100
|
return false;
|
|
@@ -63,15 +63,15 @@ const mockLoadRuntime = _vitest.vi.mocked(_loadRuntime.loadRuntime);
|
|
|
63
63
|
(0, _extendedTest.it)("should use default model when not provided", () => {
|
|
64
64
|
const gateway = new _aiApiGateway.APIGateway({
|
|
65
65
|
apiKey: "sk-test123",
|
|
66
|
-
model: "gpt-
|
|
66
|
+
model: "gpt-5-mini-2025-08-07"
|
|
67
67
|
});
|
|
68
|
-
(0, _extendedTest.expect)(gateway["model"]).toBe("gpt-
|
|
68
|
+
(0, _extendedTest.expect)(gateway["model"]).toBe("gpt-5-mini-2025-08-07");
|
|
69
69
|
});
|
|
70
70
|
(0, _extendedTest.it)("should allow no parameters", () => {
|
|
71
71
|
const gateway = new _aiApiGateway.APIGateway({
|
|
72
|
-
model: "gpt-
|
|
72
|
+
model: "gpt-5-mini-2025-08-07"
|
|
73
73
|
});
|
|
74
|
-
(0, _extendedTest.expect)(gateway["model"]).toBe("gpt-
|
|
74
|
+
(0, _extendedTest.expect)(gateway["model"]).toBe("gpt-5-mini-2025-08-07");
|
|
75
75
|
(0, _extendedTest.expect)(gateway["apiKey"]).toBeUndefined();
|
|
76
76
|
});
|
|
77
77
|
});
|
package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js
CHANGED
|
@@ -69,10 +69,10 @@ _extendedTest.describe.skip("Array Extractor from Page Caching Tests", () => {
|
|
|
69
69
|
label: testLabel,
|
|
70
70
|
itemEntitySchema,
|
|
71
71
|
strategy: {
|
|
72
|
-
model: "claude-3-5-
|
|
72
|
+
model: "claude-3-5-haiku-20241022",
|
|
73
73
|
type: "HTML"
|
|
74
74
|
},
|
|
75
|
-
variantKey
|
|
75
|
+
variantKey,
|
|
76
76
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
77
77
|
};
|
|
78
78
|
await page.setContent(productListTemplate);
|
|
@@ -126,5 +126,274 @@ _extendedTest.describe.skip("Array Extractor from Page Caching Tests", () => {
|
|
|
126
126
|
(0, _extendedTest.expect)(fourthResult[0]).toHaveProperty("price", "$1099");
|
|
127
127
|
console.log("All cache behavior tests completed successfully!");
|
|
128
128
|
});
|
|
129
|
+
(0, _extendedTest.test)("should demonstrate caching behavior with different types of DOM changes", async ({
|
|
130
|
+
page
|
|
131
|
+
}) => {
|
|
132
|
+
await page.goto("https://vendor.myfloridamarketplace.com/search/bids/detail/9507", {
|
|
133
|
+
timeout: 0
|
|
134
|
+
});
|
|
135
|
+
const result = await (0, _.extractArrayFromPage)(page, {
|
|
136
|
+
label: "external website links.",
|
|
137
|
+
itemEntityName: "downloadable_links",
|
|
138
|
+
itemEntitySchema: {
|
|
139
|
+
type: "object",
|
|
140
|
+
required: ["anchor_innerText"],
|
|
141
|
+
properties: {
|
|
142
|
+
anchor_href: {
|
|
143
|
+
type: "string",
|
|
144
|
+
description: "extract all downloadable files hrefs."
|
|
145
|
+
},
|
|
146
|
+
anchor_innerText: {
|
|
147
|
+
primary: true,
|
|
148
|
+
type: "string",
|
|
149
|
+
description: "extract title attribute of that anchor.k"
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
strategy: {
|
|
154
|
+
type: "HTML",
|
|
155
|
+
model: "claude-3-5-haiku-20241022"
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
console.log("Result:", result);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
(0, _extendedTest.describe)("Edge Cases - Item Count Variations", () => {
|
|
162
|
+
(0, _extendedTest.test)("should handle extraction with 0 items", async ({
|
|
163
|
+
page
|
|
164
|
+
}) => {
|
|
165
|
+
const testLabel = `product-list-0-items-${(0, _uuid.v4)()}`;
|
|
166
|
+
const emptyListTemplate = `
|
|
167
|
+
<div class="products-container">
|
|
168
|
+
<div class="additional-info">
|
|
169
|
+
<div class="shipping-notice">No products available</div>
|
|
170
|
+
<div class="return-policy">Check back later</div>
|
|
171
|
+
</div>
|
|
172
|
+
</div>
|
|
173
|
+
`;
|
|
174
|
+
const itemEntitySchema = {
|
|
175
|
+
type: "object",
|
|
176
|
+
required: ["title", "price"],
|
|
177
|
+
properties: {
|
|
178
|
+
title: {
|
|
179
|
+
type: "string",
|
|
180
|
+
description: "Product title",
|
|
181
|
+
primary: true
|
|
182
|
+
},
|
|
183
|
+
price: {
|
|
184
|
+
type: "string",
|
|
185
|
+
description: "Product price"
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
};
|
|
189
|
+
await page.setContent(emptyListTemplate);
|
|
190
|
+
const result = await (0, _.extractArrayFromPage)(page, {
|
|
191
|
+
itemEntityName: "product",
|
|
192
|
+
label: testLabel,
|
|
193
|
+
itemEntitySchema,
|
|
194
|
+
strategy: {
|
|
195
|
+
model: "claude-3-5-haiku-20241022",
|
|
196
|
+
type: "HTML"
|
|
197
|
+
},
|
|
198
|
+
variantKey: testLabel,
|
|
199
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
200
|
+
});
|
|
201
|
+
console.log("0 items result:", result);
|
|
202
|
+
(0, _extendedTest.expect)(result).toHaveLength(0);
|
|
203
|
+
(0, _extendedTest.expect)(Array.isArray(result)).toBe(true);
|
|
204
|
+
});
|
|
205
|
+
(0, _extendedTest.test)("should handle extraction with 1 item", async ({
|
|
206
|
+
page
|
|
207
|
+
}) => {
|
|
208
|
+
const testLabel = `product-list-1-item-${(0, _uuid.v4)()}`;
|
|
209
|
+
const singleItemTemplate = `
|
|
210
|
+
<div class="products-container">
|
|
211
|
+
<div class="product-item">
|
|
212
|
+
<h2 class="product-title">MacBook Pro M3</h2>
|
|
213
|
+
<div class="price-wrapper">
|
|
214
|
+
<span class="price">$2499</span>
|
|
215
|
+
</div>
|
|
216
|
+
<div class="details">
|
|
217
|
+
<p class="product-description">Professional laptop with M3 Max chip</p>
|
|
218
|
+
</div>
|
|
219
|
+
</div>
|
|
220
|
+
<div class="additional-info">
|
|
221
|
+
<div class="shipping-notice">Free express shipping</div>
|
|
222
|
+
<div class="return-policy">30-day return policy</div>
|
|
223
|
+
</div>
|
|
224
|
+
</div>
|
|
225
|
+
`;
|
|
226
|
+
const itemEntitySchema = {
|
|
227
|
+
type: "object",
|
|
228
|
+
required: ["title", "price"],
|
|
229
|
+
properties: {
|
|
230
|
+
title: {
|
|
231
|
+
type: "string",
|
|
232
|
+
description: "Product title",
|
|
233
|
+
primary: true
|
|
234
|
+
},
|
|
235
|
+
price: {
|
|
236
|
+
type: "string",
|
|
237
|
+
description: "Product price"
|
|
238
|
+
},
|
|
239
|
+
description: {
|
|
240
|
+
type: "string",
|
|
241
|
+
description: "Product description"
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
};
|
|
245
|
+
await page.setContent(singleItemTemplate);
|
|
246
|
+
const result = await (0, _.extractArrayFromPage)(page, {
|
|
247
|
+
itemEntityName: "product",
|
|
248
|
+
label: testLabel,
|
|
249
|
+
itemEntitySchema,
|
|
250
|
+
strategy: {
|
|
251
|
+
model: "claude-3-5-haiku-20241022",
|
|
252
|
+
type: "HTML"
|
|
253
|
+
},
|
|
254
|
+
variantKey: testLabel,
|
|
255
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
256
|
+
});
|
|
257
|
+
console.log("1 item result:", result);
|
|
258
|
+
(0, _extendedTest.expect)(result).toHaveLength(1);
|
|
259
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("title", "MacBook Pro M3");
|
|
260
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("price", "$2499");
|
|
261
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("description", "Professional laptop with M3 Max chip");
|
|
262
|
+
});
|
|
263
|
+
(0, _extendedTest.test)("should handle extraction with 2 items", async ({
|
|
264
|
+
page
|
|
265
|
+
}) => {
|
|
266
|
+
const testLabel = `product-list-2-items-${(0, _uuid.v4)()}`;
|
|
267
|
+
const twoItemsTemplate = `
|
|
268
|
+
<div class="products-container">
|
|
269
|
+
<div class="product-item">
|
|
270
|
+
<h2 class="product-title">iPad Pro</h2>
|
|
271
|
+
<div class="price-wrapper">
|
|
272
|
+
<span class="price">$1099</span>
|
|
273
|
+
</div>
|
|
274
|
+
<div class="details">
|
|
275
|
+
<p class="product-description">Powerful tablet with M2 chip</p>
|
|
276
|
+
</div>
|
|
277
|
+
</div>
|
|
278
|
+
<div class="product-item">
|
|
279
|
+
<h2 class="product-title">Apple Watch Ultra</h2>
|
|
280
|
+
<div class="price-wrapper">
|
|
281
|
+
<span class="price">$799</span>
|
|
282
|
+
</div>
|
|
283
|
+
<div class="details">
|
|
284
|
+
<p class="product-description">Rugged smartwatch for athletes</p>
|
|
285
|
+
</div>
|
|
286
|
+
</div>
|
|
287
|
+
<div class="additional-info">
|
|
288
|
+
<div class="shipping-notice">Free shipping on all orders</div>
|
|
289
|
+
<div class="return-policy">30-day return policy</div>
|
|
290
|
+
</div>
|
|
291
|
+
</div>
|
|
292
|
+
`;
|
|
293
|
+
const itemEntitySchema = {
|
|
294
|
+
type: "object",
|
|
295
|
+
required: ["title", "price"],
|
|
296
|
+
properties: {
|
|
297
|
+
title: {
|
|
298
|
+
type: "string",
|
|
299
|
+
description: "Product title",
|
|
300
|
+
primary: true
|
|
301
|
+
},
|
|
302
|
+
price: {
|
|
303
|
+
type: "string",
|
|
304
|
+
description: "Product price"
|
|
305
|
+
},
|
|
306
|
+
description: {
|
|
307
|
+
type: "string",
|
|
308
|
+
description: "Product description"
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
};
|
|
312
|
+
await page.setContent(twoItemsTemplate);
|
|
313
|
+
const result = await (0, _.extractArrayFromPage)(page, {
|
|
314
|
+
itemEntityName: "product",
|
|
315
|
+
label: testLabel,
|
|
316
|
+
itemEntitySchema,
|
|
317
|
+
strategy: {
|
|
318
|
+
model: "claude-3-5-haiku-20241022",
|
|
319
|
+
type: "HTML"
|
|
320
|
+
},
|
|
321
|
+
variantKey: testLabel,
|
|
322
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
323
|
+
});
|
|
324
|
+
console.log("2 items result:", result);
|
|
325
|
+
(0, _extendedTest.expect)(result).toHaveLength(2);
|
|
326
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("title", "iPad Pro");
|
|
327
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("price", "$1099");
|
|
328
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("description", "Powerful tablet with M2 chip");
|
|
329
|
+
(0, _extendedTest.expect)(result[1]).toHaveProperty("title", "Apple Watch Ultra");
|
|
330
|
+
(0, _extendedTest.expect)(result[1]).toHaveProperty("price", "$799");
|
|
331
|
+
(0, _extendedTest.expect)(result[1]).toHaveProperty("description", "Rugged smartwatch for athletes");
|
|
332
|
+
});
|
|
333
|
+
(0, _extendedTest.test)("should cache and reuse results for 1 item correctly", async ({
|
|
334
|
+
page
|
|
335
|
+
}) => {
|
|
336
|
+
const testLabel = `product-list-1-item-cache-${(0, _uuid.v4)()}`;
|
|
337
|
+
const singleItemTemplate = `
|
|
338
|
+
<div class="products-container">
|
|
339
|
+
<div class="product-item">
|
|
340
|
+
<h2 class="product-title">Sony WH-1000XM5</h2>
|
|
341
|
+
<div class="price-wrapper">
|
|
342
|
+
<span class="price">$399</span>
|
|
343
|
+
</div>
|
|
344
|
+
<div class="details">
|
|
345
|
+
<p class="product-description">Premium noise-canceling headphones</p>
|
|
346
|
+
</div>
|
|
347
|
+
</div>
|
|
348
|
+
</div>
|
|
349
|
+
`;
|
|
350
|
+
const itemEntitySchema = {
|
|
351
|
+
type: "object",
|
|
352
|
+
required: ["title", "price"],
|
|
353
|
+
properties: {
|
|
354
|
+
title: {
|
|
355
|
+
type: "string",
|
|
356
|
+
description: "Product title",
|
|
357
|
+
primary: true
|
|
358
|
+
},
|
|
359
|
+
price: {
|
|
360
|
+
type: "string",
|
|
361
|
+
description: "Product price"
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
};
|
|
365
|
+
const extractionOptions = {
|
|
366
|
+
itemEntityName: "product",
|
|
367
|
+
label: testLabel,
|
|
368
|
+
itemEntitySchema,
|
|
369
|
+
strategy: {
|
|
370
|
+
model: "claude-3-5-haiku-20241022",
|
|
371
|
+
type: "HTML"
|
|
372
|
+
},
|
|
373
|
+
variantKey: testLabel,
|
|
374
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
375
|
+
};
|
|
376
|
+
await page.setContent(singleItemTemplate);
|
|
377
|
+
const firstResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
|
|
378
|
+
console.log("First extraction (1 item):", firstResult);
|
|
379
|
+
(0, _extendedTest.expect)(firstResult).toHaveLength(1);
|
|
380
|
+
(0, _extendedTest.expect)(firstResult[0]).toHaveProperty("title", "Sony WH-1000XM5");
|
|
381
|
+
(0, _extendedTest.expect)(firstResult[0]).toHaveProperty("price", "$399");
|
|
382
|
+
await page.setContent(singleItemTemplate);
|
|
383
|
+
const secondResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
|
|
384
|
+
console.log("Second extraction (from cache, 1 item):", secondResult);
|
|
385
|
+
(0, _extendedTest.expect)(secondResult).toEqual(firstResult);
|
|
386
|
+
(0, _extendedTest.expect)(secondResult).toHaveLength(1);
|
|
387
|
+
(0, _extendedTest.expect)(secondResult[0]).toHaveProperty("title", "Sony WH-1000XM5");
|
|
388
|
+
(0, _extendedTest.expect)(secondResult[0]).toHaveProperty("price", "$399");
|
|
389
|
+
const modifiedTemplate = singleItemTemplate.replace("Sony WH-1000XM5", "Bose QuietComfort Ultra").replace("$399", "$429");
|
|
390
|
+
await page.setContent(modifiedTemplate);
|
|
391
|
+
const thirdResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
|
|
392
|
+
console.log("Third extraction (changed content, 1 item):", thirdResult);
|
|
393
|
+
(0, _extendedTest.expect)(thirdResult).not.toEqual(firstResult);
|
|
394
|
+
(0, _extendedTest.expect)(thirdResult).toHaveLength(1);
|
|
395
|
+
(0, _extendedTest.expect)(thirdResult[0]).toHaveProperty("title", "Bose QuietComfort Ultra");
|
|
396
|
+
(0, _extendedTest.expect)(thirdResult[0]).toHaveProperty("price", "$429");
|
|
397
|
+
});
|
|
129
398
|
});
|
|
130
399
|
});
|
|
@@ -70,16 +70,63 @@ async function handleNewAiExtraction(params) {
|
|
|
70
70
|
allData.value.forEach((v, i) => {
|
|
71
71
|
_Logger.logger.debug(`ai extraction result for row ${i}: ${JSON.stringify(v)}`);
|
|
72
72
|
});
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
73
|
+
const resultValues = [];
|
|
74
|
+
for (let i = 0; i < allData.value.length; i++) {
|
|
75
|
+
const rowValues = allData.value[i];
|
|
76
|
+
const allValues = Object.entries(rowValues).map(([_, value]) => value);
|
|
77
|
+
const rowValuesMatches = await (0, _findDomMatches.getDomMatches)(pageAndSearchRegion.page, pageAndSearchRegion.searchRegionHandler, allValues);
|
|
78
|
+
const rowValuesWithMatchesOnly = Object.entries(rowValues).reduce((acc, [key, value]) => {
|
|
79
|
+
const valueMatches = rowValuesMatches.get(value);
|
|
80
|
+
const bestMatch = (0, _utils.selectBestMatch)(value, valueMatches ?? []);
|
|
81
|
+
if (valueMatches && valueMatches.length > 0 && bestMatch) {
|
|
82
|
+
acc[key] = {
|
|
83
|
+
matchText: bestMatch.matchText,
|
|
84
|
+
matchXpath: bestMatch.matchXpath,
|
|
85
|
+
matchType: bestMatch.matchType
|
|
86
|
+
};
|
|
87
|
+
} else {
|
|
88
|
+
_Logger.logger.debug(`value "${value}" for key "${key}" in row ${i + 1} does not have any matches in the page html, dropped for hallucination protection`);
|
|
89
|
+
}
|
|
90
|
+
return acc;
|
|
91
|
+
}, {});
|
|
92
|
+
resultValues.push({
|
|
93
|
+
rowIndex: i,
|
|
94
|
+
result: rowValuesWithMatchesOnly
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
const matches = await (0, _getListMatches.getListMatches)(pageAndSearchRegion.page, pageAndSearchRegion.searchRegionHandler, resultValues.map(v => ({
|
|
98
|
+
rowIndex: v.rowIndex,
|
|
99
|
+
result: Object.fromEntries(Object.entries(v.result).map(([key, value]) => [key, value.matchText]))
|
|
100
|
+
})));
|
|
101
|
+
let containerPath = null;
|
|
102
|
+
let fullContainerXpath = null;
|
|
103
|
+
if (resultValues.length > 0 && resultValues[0].result[primaryPropertyName]) {
|
|
104
|
+
const primaryXpath = resultValues[0].result[primaryPropertyName].matchXpath;
|
|
105
|
+
if (primaryXpath) {
|
|
106
|
+
const parentXpath = await pageAndSearchRegion.page.evaluate(xpath => {
|
|
107
|
+
var _window$__INTUNED__;
|
|
108
|
+
const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
|
109
|
+
const element = result.singleNodeValue;
|
|
110
|
+
if (!element || !element.parentElement) return null;
|
|
111
|
+
if ((_window$__INTUNED__ = window.__INTUNED__) !== null && _window$__INTUNED__ !== void 0 && _window$__INTUNED__.getElementXPath) {
|
|
112
|
+
return window.__INTUNED__.getElementXPath(element.parentElement);
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
}, primaryXpath);
|
|
116
|
+
if (parentXpath) {
|
|
117
|
+
fullContainerXpath = parentXpath;
|
|
118
|
+
if (hasSearchRegionContainer) {
|
|
119
|
+
containerPath = await (0, _getRelativeContainerXpathSelector.getRelativeContainerXpathSelector)(pageAndSearchRegion.searchRegion, parentXpath);
|
|
120
|
+
} else {
|
|
121
|
+
containerPath = parentXpath;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
79
126
|
return (0, _neverthrow.ok)({
|
|
80
127
|
resultValues,
|
|
81
|
-
containerPath
|
|
82
|
-
fullContainerXpath
|
|
128
|
+
containerPath,
|
|
129
|
+
fullContainerXpath,
|
|
83
130
|
matches
|
|
84
131
|
});
|
|
85
132
|
}
|
|
@@ -34,7 +34,7 @@ await page.goto("https://books.toscrape.com/")
|
|
|
34
34
|
const product = await extractStructuredData({
|
|
35
35
|
source: page,
|
|
36
36
|
strategy: "HTML",
|
|
37
|
-
model: "gpt-4o"
|
|
37
|
+
model: "gpt-4o",
|
|
38
38
|
dataSchema: {
|
|
39
39
|
type: "object",
|
|
40
40
|
properties: {
|
|
@@ -47,8 +47,8 @@ const product = await extractStructuredData({
|
|
|
47
47
|
},
|
|
48
48
|
prompt: "Extract product details from this e page"
|
|
49
49
|
});
|
|
50
|
-
}
|
|
51
50
|
console.log(`Found book: ${product.name} - ${product.price}`);
|
|
51
|
+
}
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
```typescript Locator source
|
|
@@ -59,7 +59,7 @@ const articleContainer = page.locator("article").first()
|
|
|
59
59
|
const article = await extractStructuredData({
|
|
60
60
|
source: articleContainer,
|
|
61
61
|
strategy: "MARKDOWN",
|
|
62
|
-
model: "claude-3",
|
|
62
|
+
model: "claude-3-5-sonnet-20240620",
|
|
63
63
|
dataSchema: {
|
|
64
64
|
type: "object",
|
|
65
65
|
properties: {
|
|
@@ -72,8 +72,8 @@ const article = await extractStructuredData({
|
|
|
72
72
|
},
|
|
73
73
|
maxRetries: 5
|
|
74
74
|
});
|
|
75
|
-
}
|
|
76
75
|
console.log(`Found book: ${article.title}`);
|
|
76
|
+
}
|
|
77
77
|
```
|
|
78
78
|
|
|
79
79
|
</CodeGroup>
|
|
@@ -138,7 +138,7 @@ export declare function extractStructuredData(options: {
|
|
|
138
138
|
content: ContentItem[] | ContentItem;
|
|
139
139
|
dataSchema: JsonSchema | z.ZodSchema;
|
|
140
140
|
prompt?: string;
|
|
141
|
-
|
|
141
|
+
maxRetries?: number;
|
|
142
142
|
enableCache?: boolean;
|
|
143
143
|
model: SUPPORTED_MODELS;
|
|
144
144
|
apiKey?: string;
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: clickButtonAndWait
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
```typescript
|
|
7
|
+
export declare function clickButtonAndWait(input: {
|
|
8
|
+
page: Page;
|
|
9
|
+
buttonLocator: Locator;
|
|
10
|
+
clickDelay?: number;
|
|
11
|
+
}): Promise<void>;
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Click a button and wait briefly for content to load.
|
|
15
|
+
|
|
16
|
+
This function clicks a button element and waits for a specified delay, allowing time for
|
|
17
|
+
any triggered content to load. It automatically waits for network activity to settle.
|
|
18
|
+
|
|
19
|
+
## Examples
|
|
20
|
+
|
|
21
|
+
<CodeGroup>
|
|
22
|
+
|
|
23
|
+
```typescript Basic Button Click
|
|
24
|
+
import { clickButtonAndWait } from "@intuned/browser";
|
|
25
|
+
export default async function handler(params, page, context){
|
|
26
|
+
await page.goto("https://example.com/products");
|
|
27
|
+
const loadMoreButton = page.locator("#load-more-button");
|
|
28
|
+
await clickButtonAndWait({
|
|
29
|
+
page,
|
|
30
|
+
buttonLocator: loadMoreButton,
|
|
31
|
+
clickDelay: 1.0
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
</CodeGroup>
|
|
37
|
+
|
|
38
|
+
## Arguments
|
|
39
|
+
|
|
40
|
+
<ParamField path="input" type="Object" required
|
|
41
|
+
>
|
|
42
|
+
Configuration options
|
|
43
|
+
|
|
44
|
+
<Expandable title="input">
|
|
45
|
+
<ParamField path="input.page" type="Page">
|
|
46
|
+
Playwright Page object
|
|
47
|
+
</ParamField>
|
|
48
|
+
|
|
49
|
+
<ParamField path="input.buttonLocator" type="Locator">
|
|
50
|
+
Locator for the button element to click
|
|
51
|
+
</ParamField>
|
|
52
|
+
|
|
53
|
+
<ParamField path="input.clickDelay" type="number">
|
|
54
|
+
Delay after clicking the button (in seconds)
|
|
55
|
+
</ParamField>
|
|
56
|
+
|
|
57
|
+
</Expandable>
|
|
58
|
+
|
|
59
|
+
</ParamField>
|
|
60
|
+
|
|
61
|
+
## Returns: `Promise<void>`
|
|
62
|
+
|
|
63
|
+
Promise that resolves when the click and wait is complete
|