@intuned/browser-dev 0.1.5-dev.0 → 0.1.6-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RELEASE.md +11 -9
- package/dist/ai/export.d.ts +1 -7
- package/dist/ai/extractStructuredData.js +1 -1
- package/dist/ai/extractStructuredDataUsingAi.js +23 -2
- package/dist/ai/extractionHelpers/validateSchema.js +34 -2
- package/dist/ai/index.d.ts +1 -7
- package/dist/ai/tests/testExtractStructuredData.spec.js +150 -18
- package/dist/ai/tests/testIsPageLoaded.spec.js +1 -1
- package/dist/ai/types/models.js +2 -5
- package/dist/ai/validators.js +1 -1
- package/dist/common/aiModelsValidations.js +2 -4
- package/dist/helpers/downloadFile.js +1 -3
- package/dist/helpers/frame_utils/checkFrameAllowsAsyncScripts.js +20 -0
- package/dist/helpers/frame_utils/constants.js +8 -0
- package/dist/helpers/frame_utils/findAllIframes.js +82 -0
- package/dist/helpers/frame_utils/getContainerFrame.js +22 -0
- package/dist/helpers/frame_utils/index.js +44 -0
- package/dist/helpers/frame_utils/tests/testFindAllIframes.spec.js +213 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +3 -4
- package/dist/helpers/tests/testResolveUrl.spec.js +4 -4
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +1041 -4
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +142 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +1 -1
- package/dist/helpers/uploadFileToS3.js +6 -0
- package/dist/helpers/utils/getS3Client.js +2 -2
- package/dist/helpers/validateDataUsingSchema.js +93 -7
- package/dist/helpers/waitForDomSettled.js +66 -40
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +1 -1
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +271 -2
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +1 -1
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +1 -1
- package/dist/optimized-extractors/types/aiModelsValidation.js +1 -3
- package/package.json +5 -4
package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js
CHANGED
|
@@ -69,10 +69,10 @@ _extendedTest.describe.skip("Array Extractor from Page Caching Tests", () => {
|
|
|
69
69
|
label: testLabel,
|
|
70
70
|
itemEntitySchema,
|
|
71
71
|
strategy: {
|
|
72
|
-
model: "claude-3-5-
|
|
72
|
+
model: "claude-3-5-haiku-20241022",
|
|
73
73
|
type: "HTML"
|
|
74
74
|
},
|
|
75
|
-
variantKey
|
|
75
|
+
variantKey,
|
|
76
76
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
77
77
|
};
|
|
78
78
|
await page.setContent(productListTemplate);
|
|
@@ -126,5 +126,274 @@ _extendedTest.describe.skip("Array Extractor from Page Caching Tests", () => {
|
|
|
126
126
|
(0, _extendedTest.expect)(fourthResult[0]).toHaveProperty("price", "$1099");
|
|
127
127
|
console.log("All cache behavior tests completed successfully!");
|
|
128
128
|
});
|
|
129
|
+
(0, _extendedTest.test)("should demonstrate caching behavior with different types of DOM changes", async ({
|
|
130
|
+
page
|
|
131
|
+
}) => {
|
|
132
|
+
await page.goto("https://vendor.myfloridamarketplace.com/search/bids/detail/9507", {
|
|
133
|
+
timeout: 0
|
|
134
|
+
});
|
|
135
|
+
const result = await (0, _.extractArrayFromPage)(page, {
|
|
136
|
+
label: "external website links.",
|
|
137
|
+
itemEntityName: "downloadable_links",
|
|
138
|
+
itemEntitySchema: {
|
|
139
|
+
type: "object",
|
|
140
|
+
required: ["anchor_innerText"],
|
|
141
|
+
properties: {
|
|
142
|
+
anchor_href: {
|
|
143
|
+
type: "string",
|
|
144
|
+
description: "extract all downloadable files hrefs."
|
|
145
|
+
},
|
|
146
|
+
anchor_innerText: {
|
|
147
|
+
primary: true,
|
|
148
|
+
type: "string",
|
|
149
|
+
description: "extract title attribute of that anchor.k"
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
strategy: {
|
|
154
|
+
type: "HTML",
|
|
155
|
+
model: "claude-3-5-haiku-20241022"
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
console.log("Result:", result);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
(0, _extendedTest.describe)("Edge Cases - Item Count Variations", () => {
|
|
162
|
+
(0, _extendedTest.test)("should handle extraction with 0 items", async ({
|
|
163
|
+
page
|
|
164
|
+
}) => {
|
|
165
|
+
const testLabel = `product-list-0-items-${(0, _uuid.v4)()}`;
|
|
166
|
+
const emptyListTemplate = `
|
|
167
|
+
<div class="products-container">
|
|
168
|
+
<div class="additional-info">
|
|
169
|
+
<div class="shipping-notice">No products available</div>
|
|
170
|
+
<div class="return-policy">Check back later</div>
|
|
171
|
+
</div>
|
|
172
|
+
</div>
|
|
173
|
+
`;
|
|
174
|
+
const itemEntitySchema = {
|
|
175
|
+
type: "object",
|
|
176
|
+
required: ["title", "price"],
|
|
177
|
+
properties: {
|
|
178
|
+
title: {
|
|
179
|
+
type: "string",
|
|
180
|
+
description: "Product title",
|
|
181
|
+
primary: true
|
|
182
|
+
},
|
|
183
|
+
price: {
|
|
184
|
+
type: "string",
|
|
185
|
+
description: "Product price"
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
};
|
|
189
|
+
await page.setContent(emptyListTemplate);
|
|
190
|
+
const result = await (0, _.extractArrayFromPage)(page, {
|
|
191
|
+
itemEntityName: "product",
|
|
192
|
+
label: testLabel,
|
|
193
|
+
itemEntitySchema,
|
|
194
|
+
strategy: {
|
|
195
|
+
model: "claude-3-5-haiku-20241022",
|
|
196
|
+
type: "HTML"
|
|
197
|
+
},
|
|
198
|
+
variantKey: testLabel,
|
|
199
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
200
|
+
});
|
|
201
|
+
console.log("0 items result:", result);
|
|
202
|
+
(0, _extendedTest.expect)(result).toHaveLength(0);
|
|
203
|
+
(0, _extendedTest.expect)(Array.isArray(result)).toBe(true);
|
|
204
|
+
});
|
|
205
|
+
(0, _extendedTest.test)("should handle extraction with 1 item", async ({
|
|
206
|
+
page
|
|
207
|
+
}) => {
|
|
208
|
+
const testLabel = `product-list-1-item-${(0, _uuid.v4)()}`;
|
|
209
|
+
const singleItemTemplate = `
|
|
210
|
+
<div class="products-container">
|
|
211
|
+
<div class="product-item">
|
|
212
|
+
<h2 class="product-title">MacBook Pro M3</h2>
|
|
213
|
+
<div class="price-wrapper">
|
|
214
|
+
<span class="price">$2499</span>
|
|
215
|
+
</div>
|
|
216
|
+
<div class="details">
|
|
217
|
+
<p class="product-description">Professional laptop with M3 Max chip</p>
|
|
218
|
+
</div>
|
|
219
|
+
</div>
|
|
220
|
+
<div class="additional-info">
|
|
221
|
+
<div class="shipping-notice">Free express shipping</div>
|
|
222
|
+
<div class="return-policy">30-day return policy</div>
|
|
223
|
+
</div>
|
|
224
|
+
</div>
|
|
225
|
+
`;
|
|
226
|
+
const itemEntitySchema = {
|
|
227
|
+
type: "object",
|
|
228
|
+
required: ["title", "price"],
|
|
229
|
+
properties: {
|
|
230
|
+
title: {
|
|
231
|
+
type: "string",
|
|
232
|
+
description: "Product title",
|
|
233
|
+
primary: true
|
|
234
|
+
},
|
|
235
|
+
price: {
|
|
236
|
+
type: "string",
|
|
237
|
+
description: "Product price"
|
|
238
|
+
},
|
|
239
|
+
description: {
|
|
240
|
+
type: "string",
|
|
241
|
+
description: "Product description"
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
};
|
|
245
|
+
await page.setContent(singleItemTemplate);
|
|
246
|
+
const result = await (0, _.extractArrayFromPage)(page, {
|
|
247
|
+
itemEntityName: "product",
|
|
248
|
+
label: testLabel,
|
|
249
|
+
itemEntitySchema,
|
|
250
|
+
strategy: {
|
|
251
|
+
model: "claude-3-5-haiku-20241022",
|
|
252
|
+
type: "HTML"
|
|
253
|
+
},
|
|
254
|
+
variantKey: testLabel,
|
|
255
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
256
|
+
});
|
|
257
|
+
console.log("1 item result:", result);
|
|
258
|
+
(0, _extendedTest.expect)(result).toHaveLength(1);
|
|
259
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("title", "MacBook Pro M3");
|
|
260
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("price", "$2499");
|
|
261
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("description", "Professional laptop with M3 Max chip");
|
|
262
|
+
});
|
|
263
|
+
(0, _extendedTest.test)("should handle extraction with 2 items", async ({
|
|
264
|
+
page
|
|
265
|
+
}) => {
|
|
266
|
+
const testLabel = `product-list-2-items-${(0, _uuid.v4)()}`;
|
|
267
|
+
const twoItemsTemplate = `
|
|
268
|
+
<div class="products-container">
|
|
269
|
+
<div class="product-item">
|
|
270
|
+
<h2 class="product-title">iPad Pro</h2>
|
|
271
|
+
<div class="price-wrapper">
|
|
272
|
+
<span class="price">$1099</span>
|
|
273
|
+
</div>
|
|
274
|
+
<div class="details">
|
|
275
|
+
<p class="product-description">Powerful tablet with M2 chip</p>
|
|
276
|
+
</div>
|
|
277
|
+
</div>
|
|
278
|
+
<div class="product-item">
|
|
279
|
+
<h2 class="product-title">Apple Watch Ultra</h2>
|
|
280
|
+
<div class="price-wrapper">
|
|
281
|
+
<span class="price">$799</span>
|
|
282
|
+
</div>
|
|
283
|
+
<div class="details">
|
|
284
|
+
<p class="product-description">Rugged smartwatch for athletes</p>
|
|
285
|
+
</div>
|
|
286
|
+
</div>
|
|
287
|
+
<div class="additional-info">
|
|
288
|
+
<div class="shipping-notice">Free shipping on all orders</div>
|
|
289
|
+
<div class="return-policy">30-day return policy</div>
|
|
290
|
+
</div>
|
|
291
|
+
</div>
|
|
292
|
+
`;
|
|
293
|
+
const itemEntitySchema = {
|
|
294
|
+
type: "object",
|
|
295
|
+
required: ["title", "price"],
|
|
296
|
+
properties: {
|
|
297
|
+
title: {
|
|
298
|
+
type: "string",
|
|
299
|
+
description: "Product title",
|
|
300
|
+
primary: true
|
|
301
|
+
},
|
|
302
|
+
price: {
|
|
303
|
+
type: "string",
|
|
304
|
+
description: "Product price"
|
|
305
|
+
},
|
|
306
|
+
description: {
|
|
307
|
+
type: "string",
|
|
308
|
+
description: "Product description"
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
};
|
|
312
|
+
await page.setContent(twoItemsTemplate);
|
|
313
|
+
const result = await (0, _.extractArrayFromPage)(page, {
|
|
314
|
+
itemEntityName: "product",
|
|
315
|
+
label: testLabel,
|
|
316
|
+
itemEntitySchema,
|
|
317
|
+
strategy: {
|
|
318
|
+
model: "claude-3-5-haiku-20241022",
|
|
319
|
+
type: "HTML"
|
|
320
|
+
},
|
|
321
|
+
variantKey: testLabel,
|
|
322
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
323
|
+
});
|
|
324
|
+
console.log("2 items result:", result);
|
|
325
|
+
(0, _extendedTest.expect)(result).toHaveLength(2);
|
|
326
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("title", "iPad Pro");
|
|
327
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("price", "$1099");
|
|
328
|
+
(0, _extendedTest.expect)(result[0]).toHaveProperty("description", "Powerful tablet with M2 chip");
|
|
329
|
+
(0, _extendedTest.expect)(result[1]).toHaveProperty("title", "Apple Watch Ultra");
|
|
330
|
+
(0, _extendedTest.expect)(result[1]).toHaveProperty("price", "$799");
|
|
331
|
+
(0, _extendedTest.expect)(result[1]).toHaveProperty("description", "Rugged smartwatch for athletes");
|
|
332
|
+
});
|
|
333
|
+
(0, _extendedTest.test)("should cache and reuse results for 1 item correctly", async ({
|
|
334
|
+
page
|
|
335
|
+
}) => {
|
|
336
|
+
const testLabel = `product-list-1-item-cache-${(0, _uuid.v4)()}`;
|
|
337
|
+
const singleItemTemplate = `
|
|
338
|
+
<div class="products-container">
|
|
339
|
+
<div class="product-item">
|
|
340
|
+
<h2 class="product-title">Sony WH-1000XM5</h2>
|
|
341
|
+
<div class="price-wrapper">
|
|
342
|
+
<span class="price">$399</span>
|
|
343
|
+
</div>
|
|
344
|
+
<div class="details">
|
|
345
|
+
<p class="product-description">Premium noise-canceling headphones</p>
|
|
346
|
+
</div>
|
|
347
|
+
</div>
|
|
348
|
+
</div>
|
|
349
|
+
`;
|
|
350
|
+
const itemEntitySchema = {
|
|
351
|
+
type: "object",
|
|
352
|
+
required: ["title", "price"],
|
|
353
|
+
properties: {
|
|
354
|
+
title: {
|
|
355
|
+
type: "string",
|
|
356
|
+
description: "Product title",
|
|
357
|
+
primary: true
|
|
358
|
+
},
|
|
359
|
+
price: {
|
|
360
|
+
type: "string",
|
|
361
|
+
description: "Product price"
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
};
|
|
365
|
+
const extractionOptions = {
|
|
366
|
+
itemEntityName: "product",
|
|
367
|
+
label: testLabel,
|
|
368
|
+
itemEntitySchema,
|
|
369
|
+
strategy: {
|
|
370
|
+
model: "claude-3-5-haiku-20241022",
|
|
371
|
+
type: "HTML"
|
|
372
|
+
},
|
|
373
|
+
variantKey: testLabel,
|
|
374
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
375
|
+
};
|
|
376
|
+
await page.setContent(singleItemTemplate);
|
|
377
|
+
const firstResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
|
|
378
|
+
console.log("First extraction (1 item):", firstResult);
|
|
379
|
+
(0, _extendedTest.expect)(firstResult).toHaveLength(1);
|
|
380
|
+
(0, _extendedTest.expect)(firstResult[0]).toHaveProperty("title", "Sony WH-1000XM5");
|
|
381
|
+
(0, _extendedTest.expect)(firstResult[0]).toHaveProperty("price", "$399");
|
|
382
|
+
await page.setContent(singleItemTemplate);
|
|
383
|
+
const secondResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
|
|
384
|
+
console.log("Second extraction (from cache, 1 item):", secondResult);
|
|
385
|
+
(0, _extendedTest.expect)(secondResult).toEqual(firstResult);
|
|
386
|
+
(0, _extendedTest.expect)(secondResult).toHaveLength(1);
|
|
387
|
+
(0, _extendedTest.expect)(secondResult[0]).toHaveProperty("title", "Sony WH-1000XM5");
|
|
388
|
+
(0, _extendedTest.expect)(secondResult[0]).toHaveProperty("price", "$399");
|
|
389
|
+
const modifiedTemplate = singleItemTemplate.replace("Sony WH-1000XM5", "Bose QuietComfort Ultra").replace("$399", "$429");
|
|
390
|
+
await page.setContent(modifiedTemplate);
|
|
391
|
+
const thirdResult = await (0, _.extractArrayFromPage)(page, extractionOptions);
|
|
392
|
+
console.log("Third extraction (changed content, 1 item):", thirdResult);
|
|
393
|
+
(0, _extendedTest.expect)(thirdResult).not.toEqual(firstResult);
|
|
394
|
+
(0, _extendedTest.expect)(thirdResult).toHaveLength(1);
|
|
395
|
+
(0, _extendedTest.expect)(thirdResult[0]).toHaveProperty("title", "Bose QuietComfort Ultra");
|
|
396
|
+
(0, _extendedTest.expect)(thirdResult[0]).toHaveProperty("price", "$429");
|
|
397
|
+
});
|
|
129
398
|
});
|
|
130
399
|
});
|
|
@@ -57,7 +57,7 @@ _extendedTest.describe.skip("Object Extractor Caching Tests", () => {
|
|
|
57
57
|
model: "claude-3-5-sonnet-20240620",
|
|
58
58
|
type: "HTML"
|
|
59
59
|
},
|
|
60
|
-
variantKey
|
|
60
|
+
variantKey,
|
|
61
61
|
apiKey: process.env.ANTHROPIC_API_KEY
|
|
62
62
|
};
|
|
63
63
|
await page.setContent(productTemplate);
|
|
@@ -5,12 +5,11 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
5
5
|
});
|
|
6
6
|
exports.SUPPORTED_VISION_MODELS = exports.SUPPORTED_TEXT_MODELS = exports.SUPPORTED_GPT_MODELS = exports.SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_MODELS_MAPPINGS = void 0;
|
|
7
7
|
const CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = ["claude-3-5-haiku", "claude-3-5-haiku-20241022"];
|
|
8
|
-
const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3-haiku", "claude-3-haiku-20240307", "claude-
|
|
8
|
+
const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3-haiku", "claude-3-haiku-20240307", "claude-opus-4", "claude-opus-4-20250514", "claude-sonnet-4", "claude-sonnet-4-20250514"];
|
|
9
9
|
const SUPPORTED_CLAUDE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = [...CLAUDE_ONLY_TEXT_MODELS, ...CLAUDE_VISION_SUPPORTED_MODELS];
|
|
10
10
|
const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
|
|
11
11
|
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
12
12
|
"claude-3-5-haiku": "claude-3-5-haiku-20241022",
|
|
13
|
-
"claude-3.5-sonnet": "claude-3-5-sonnet-20241022",
|
|
14
13
|
"claude-opus-4": "claude-opus-4-20250514",
|
|
15
14
|
"claude-sonnet-4": "claude-sonnet-4-20250514"
|
|
16
15
|
};
|
|
@@ -37,7 +36,6 @@ const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
|
|
|
37
36
|
...GOOGLE_MODELS_MAPPINGS
|
|
38
37
|
};
|
|
39
38
|
const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
|
|
40
|
-
"claude-3-5-sonnet-20240620": 8192,
|
|
41
39
|
"gemini-1.5-pro-002": 8192,
|
|
42
40
|
"gemini-1.5-flash-8b-002": 8192,
|
|
43
41
|
"gemini-1.5-flash-002": 8192,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@intuned/browser-dev",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6-dev.0",
|
|
4
4
|
"description": "runner package for intuned functions",
|
|
5
5
|
"types": "./dist/index.d.ts",
|
|
6
6
|
"typesVersions": {
|
|
@@ -48,7 +48,8 @@
|
|
|
48
48
|
"generate-docs": "npx tsx ./scripts/generate-docs.ts",
|
|
49
49
|
"generate-all-docs": "npx tsx ./scripts/generate-docs.ts ./src/helpers/export.d.ts ./generated-docs/helpers && npx tsx ./scripts/generate-docs.ts ./src/ai/export.d.ts ./generated-docs/ai && npx tsx ./scripts/generate-docs.ts ./src/optimized-extractors/export.d.ts ./generated-docs/optimized-extractors",
|
|
50
50
|
"build-browser-scripts": "rollup -c ./src/common/browserScripts/rollup.config.mjs",
|
|
51
|
-
"copy-dts": "copyfiles -u 1 \"src/**/*.d.ts\" dist"
|
|
51
|
+
"copy-dts": "copyfiles -u 1 \"src/**/*.d.ts\" dist",
|
|
52
|
+
"release": "npx tsx ./scripts/release.ts"
|
|
52
53
|
},
|
|
53
54
|
"dependencies": {
|
|
54
55
|
"@ai-sdk/anthropic": "2.0.1",
|
|
@@ -76,7 +77,7 @@
|
|
|
76
77
|
"openai": "4.77.3",
|
|
77
78
|
"stack-utils": "2.0.6",
|
|
78
79
|
"tslib": "2.6.0",
|
|
79
|
-
"uuid": "
|
|
80
|
+
"uuid": "11.0.0",
|
|
80
81
|
"zod": "^3.25.76",
|
|
81
82
|
"zod-to-json-schema": "^3.24.6",
|
|
82
83
|
"zod-validation-error": "3.0.3"
|
|
@@ -122,4 +123,4 @@
|
|
|
122
123
|
"optional": true
|
|
123
124
|
}
|
|
124
125
|
}
|
|
125
|
-
}
|
|
126
|
+
}
|