@intuned/browser-dev 0.1.6-dev.1 → 0.1.8-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/export.d.ts +11 -160
- package/dist/ai/extractStructuredData.js +4 -4
- package/dist/ai/index.d.ts +11 -160
- package/dist/ai/tests/testExtractFromContent.spec.js +2 -2
- package/dist/ai/tests/testIsPageLoaded.spec.js +2 -2
- package/dist/ai/validators.js +2 -3
- package/dist/helpers/export.d.ts +4 -5
- package/dist/helpers/frame_utils/tests/testFindAllIframes.spec.js +2 -2
- package/dist/helpers/gotoUrl.js +51 -51
- package/dist/helpers/index.d.ts +4 -5
- package/dist/helpers/tests/testClickUntilExhausted.spec.js +4 -3
- package/dist/helpers/tests/testDownloadFile.spec.js +3 -3
- package/dist/helpers/tests/testGoToUrl.spec.js +2 -2
- package/dist/helpers/tests/testScrollToLoadContent.spec.js +2 -2
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +2 -2
- package/dist/{common → optimized-extractors/common}/aiModelsValidations.js +4 -2
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +1 -1
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +1 -1
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +1 -1
- package/dist/optimized-extractors/export.d.ts +1 -1
- package/dist/optimized-extractors/index.d.ts +1 -1
- package/dist/optimized-extractors/types/aiModelsValidation.js +3 -1
- package/package.json +4 -3
- package/dist/ai/types/models.js +0 -42
- package/generated-docs/ai/functions/extractStructuredData.mdx +0 -255
- package/generated-docs/ai/functions/isPageLoaded.mdx +0 -89
- package/generated-docs/ai/interfaces/ArraySchema.mdx +0 -36
- package/generated-docs/ai/interfaces/BasicSchema.mdx +0 -14
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +0 -28
- package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +0 -16
- package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +0 -16
- package/generated-docs/ai/interfaces/NumberSchema.mdx +0 -35
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +0 -39
- package/generated-docs/ai/interfaces/StringSchema.mdx +0 -35
- package/generated-docs/ai/interfaces/TextContentItem.mdx +0 -14
- package/generated-docs/ai/type-aliases/ContentItem.mdx +0 -12
- package/generated-docs/ai/type-aliases/JsonSchema.mdx +0 -47
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +0 -85
- package/generated-docs/helpers/functions/clickButtonAndWait.mdx +0 -63
- package/generated-docs/helpers/functions/clickUntilExhausted.mdx +0 -112
- package/generated-docs/helpers/functions/downloadFile.mdx +0 -99
- package/generated-docs/helpers/functions/extractMarkdown.mdx +0 -56
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +0 -51
- package/generated-docs/helpers/functions/goToUrl.mdx +0 -124
- package/generated-docs/helpers/functions/processDate.mdx +0 -55
- package/generated-docs/helpers/functions/resolveUrl.mdx +0 -165
- package/generated-docs/helpers/functions/sanitizeHtml.mdx +0 -113
- package/generated-docs/helpers/functions/saveFileToS3.mdx +0 -127
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +0 -83
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +0 -121
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +0 -90
- package/generated-docs/helpers/functions/waitForDomSettled.mdx +0 -91
- package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +0 -76
- package/generated-docs/helpers/interfaces/Attachment.mdx +0 -56
- package/generated-docs/helpers/interfaces/S3Configs.mdx +0 -52
- package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +0 -22
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +0 -10
- package/generated-docs/helpers/type-aliases/FileType.mdx +0 -61
- package/generated-docs/helpers/type-aliases/Trigger.mdx +0 -62
package/dist/ai/export.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Locator, Page } from "playwright
|
|
1
|
+
import { Locator, Page } from "playwright";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -221,7 +221,7 @@ export type JsonSchema =
|
|
|
221
221
|
* @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
|
|
222
222
|
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
|
|
223
223
|
* @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
224
|
-
* @param {
|
|
224
|
+
* @param {string} [options.model="claude-haiku-4-5-20251001"] - AI model to use for extraction. Defaults to "claude-haiku-4-5-20251001"
|
|
225
225
|
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
226
226
|
*
|
|
227
227
|
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
@@ -234,7 +234,7 @@ export type JsonSchema =
|
|
|
234
234
|
* const product = await extractStructuredData({
|
|
235
235
|
* source: page,
|
|
236
236
|
* strategy: "HTML",
|
|
237
|
-
* model: "
|
|
237
|
+
* model: "claude-haiku-4-5-20251001",
|
|
238
238
|
* dataSchema: {
|
|
239
239
|
* type: "object",
|
|
240
240
|
* properties: {
|
|
@@ -285,7 +285,7 @@ export declare function extractStructuredData(options: {
|
|
|
285
285
|
enableDomMatching?: boolean;
|
|
286
286
|
enableCache?: boolean;
|
|
287
287
|
maxRetries?: number;
|
|
288
|
-
model?:
|
|
288
|
+
model?: string;
|
|
289
289
|
apiKey?: string;
|
|
290
290
|
}): Promise<any>;
|
|
291
291
|
|
|
@@ -302,7 +302,7 @@ export declare function extractStructuredData(options: {
|
|
|
302
302
|
* @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
|
|
303
303
|
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
|
|
304
304
|
* @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
305
|
-
* @param {
|
|
305
|
+
* @param {string} options.model - AI model to use for extraction (e.g., "gpt-4", "claude-3"). Defaults to "claude-haiku-4-5-20251001"
|
|
306
306
|
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
307
307
|
*
|
|
308
308
|
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
@@ -318,7 +318,7 @@ export declare function extractStructuredData(options: {
|
|
|
318
318
|
*
|
|
319
319
|
* const person = await extractStructuredData({
|
|
320
320
|
* content: textContent,
|
|
321
|
-
* model: "
|
|
321
|
+
* model: "claude-haiku-4-5-20251001",
|
|
322
322
|
* dataSchema: {
|
|
323
323
|
* type: "object",
|
|
324
324
|
* properties: {
|
|
@@ -348,7 +348,7 @@ export declare function extractStructuredData(options: {
|
|
|
348
348
|
*
|
|
349
349
|
* const product = await extractStructuredData({
|
|
350
350
|
* content: mixedContent,
|
|
351
|
-
* model: "claude-
|
|
351
|
+
* model: "claude-haiku-4-5-20251001",
|
|
352
352
|
* dataSchema: {
|
|
353
353
|
* type: "object",
|
|
354
354
|
* properties: {
|
|
@@ -369,159 +369,10 @@ export declare function extractStructuredData(options: {
|
|
|
369
369
|
prompt?: string;
|
|
370
370
|
maxRetries?: number;
|
|
371
371
|
enableCache?: boolean;
|
|
372
|
-
model:
|
|
372
|
+
model: string;
|
|
373
373
|
apiKey?: string;
|
|
374
374
|
}): Promise<any>;
|
|
375
375
|
|
|
376
|
-
type SUPPORTED_CLAUDE_MODELS =
|
|
377
|
-
| "claude-3-5-haiku-20241022"
|
|
378
|
-
| "claude-3-5-haiku-latest"
|
|
379
|
-
| "claude-3-7-sonnet-20250219"
|
|
380
|
-
| "claude-3-7-sonnet-latest"
|
|
381
|
-
| "claude-3-haiku-20240307"
|
|
382
|
-
| "claude-4-opus-20250514"
|
|
383
|
-
| "claude-4-sonnet-20250514"
|
|
384
|
-
| "claude-opus-4-1"
|
|
385
|
-
| "claude-opus-4-1-20250805"
|
|
386
|
-
| "claude-opus-4-20250514"
|
|
387
|
-
| "claude-sonnet-4-20250514";
|
|
388
|
-
|
|
389
|
-
type SUPPORTED_OPENAI_MODELS =
|
|
390
|
-
| "gpt-3.5-turbo"
|
|
391
|
-
| "gpt-3.5-turbo-0125"
|
|
392
|
-
| "gpt-3.5-turbo-0301"
|
|
393
|
-
| "gpt-3.5-turbo-0613"
|
|
394
|
-
| "gpt-3.5-turbo-1106"
|
|
395
|
-
| "gpt-3.5-turbo-16k"
|
|
396
|
-
| "gpt-3.5-turbo-16k-0613"
|
|
397
|
-
| "gpt-3.5-turbo-instruct"
|
|
398
|
-
| "gpt-3.5-turbo-instruct-0914"
|
|
399
|
-
| "gpt-4"
|
|
400
|
-
| "gpt-4-0314"
|
|
401
|
-
| "gpt-4-0613"
|
|
402
|
-
| "gpt-4-32k"
|
|
403
|
-
| "gpt-4-32k-0314"
|
|
404
|
-
| "gpt-4-32k-0613"
|
|
405
|
-
| "gpt-4-turbo"
|
|
406
|
-
| "gpt-4-turbo-2024-04-09"
|
|
407
|
-
| "gpt-4.1"
|
|
408
|
-
| "gpt-4.1-2025-04-14"
|
|
409
|
-
| "gpt-4.1-mini"
|
|
410
|
-
| "gpt-4.1-mini-2025-04-14"
|
|
411
|
-
| "gpt-4.1-nano"
|
|
412
|
-
| "gpt-4.1-nano-2025-04-14"
|
|
413
|
-
| "gpt-4o"
|
|
414
|
-
| "gpt-4o-2024-05-13"
|
|
415
|
-
| "gpt-4o-2024-08-06"
|
|
416
|
-
| "gpt-4o-2024-11-20"
|
|
417
|
-
| "gpt-4o-mini"
|
|
418
|
-
| "gpt-4o-mini-2024-07-18"
|
|
419
|
-
| "gpt-5"
|
|
420
|
-
| "gpt-5-2025-08-07"
|
|
421
|
-
| "gpt-5-chat"
|
|
422
|
-
| "gpt-5-chat-latest"
|
|
423
|
-
| "gpt-5-mini"
|
|
424
|
-
| "gpt-5-mini-2025-08-07"
|
|
425
|
-
| "gpt-5-nano"
|
|
426
|
-
| "gpt-5-nano-2025-08-07"
|
|
427
|
-
| "o1"
|
|
428
|
-
| "o1-2024-12-17"
|
|
429
|
-
| "o1-mini"
|
|
430
|
-
| "o1-mini-2024-09-12"
|
|
431
|
-
| "o1-pro"
|
|
432
|
-
| "o1-pro-2025-03-19"
|
|
433
|
-
| "o3"
|
|
434
|
-
| "o3-2025-04-16"
|
|
435
|
-
| "o3-deep-research"
|
|
436
|
-
| "o3-deep-research-2025-06-26"
|
|
437
|
-
| "o3-mini"
|
|
438
|
-
| "o3-mini-2025-01-31"
|
|
439
|
-
| "o3-pro"
|
|
440
|
-
| "o3-pro-2025-06-10"
|
|
441
|
-
| "o4-mini"
|
|
442
|
-
| "o4-mini-2025-04-16"
|
|
443
|
-
| "o4-mini-deep-research"
|
|
444
|
-
| "o4-mini-deep-research-2025-06-26";
|
|
445
|
-
/**
|
|
446
|
-
* Union type representing all supported AI models for data extraction.
|
|
447
|
-
* Includes models from both OpenAI and Anthropic.
|
|
448
|
-
*
|
|
449
|
-
* **Supported OpenAI Models:**
|
|
450
|
-
* "gpt-3.5-turbo"
|
|
451
|
-
* "gpt-3.5-turbo-0125"
|
|
452
|
-
* "gpt-3.5-turbo-0301"
|
|
453
|
-
* "gpt-3.5-turbo-0613"
|
|
454
|
-
* "gpt-3.5-turbo-1106"
|
|
455
|
-
* "gpt-3.5-turbo-16k"
|
|
456
|
-
* "gpt-3.5-turbo-16k-0613"
|
|
457
|
-
* "gpt-3.5-turbo-instruct"
|
|
458
|
-
* "gpt-3.5-turbo-instruct-0914"
|
|
459
|
-
* "gpt-4"
|
|
460
|
-
* "gpt-4-0314"
|
|
461
|
-
* "gpt-4-0613"
|
|
462
|
-
* "gpt-4-32k"
|
|
463
|
-
* "gpt-4-32k-0314"
|
|
464
|
-
* "gpt-4-32k-0613"
|
|
465
|
-
* "gpt-4-turbo"
|
|
466
|
-
* "gpt-4-turbo-2024-04-09"
|
|
467
|
-
* "gpt-4.1"
|
|
468
|
-
* "gpt-4.1-2025-04-14"
|
|
469
|
-
* "gpt-4.1-mini"
|
|
470
|
-
* "gpt-4.1-mini-2025-04-14"
|
|
471
|
-
* "gpt-4.1-nano"
|
|
472
|
-
* "gpt-4.1-nano-2025-04-14"
|
|
473
|
-
* "gpt-4o"
|
|
474
|
-
* "gpt-4o-2024-05-13"
|
|
475
|
-
* "gpt-4o-2024-08-06"
|
|
476
|
-
* "gpt-4o-2024-11-20"
|
|
477
|
-
* "gpt-4o-mini"
|
|
478
|
-
* "gpt-4o-mini-2024-07-18"
|
|
479
|
-
* "gpt-5"
|
|
480
|
-
* "gpt-5-2025-08-07"
|
|
481
|
-
* "gpt-5-chat"
|
|
482
|
-
* "gpt-5-chat-latest"
|
|
483
|
-
* "gpt-5-mini"
|
|
484
|
-
* "gpt-5-mini-2025-08-07"
|
|
485
|
-
* "gpt-5-nano"
|
|
486
|
-
* "gpt-5-nano-2025-08-07"
|
|
487
|
-
* "o1"
|
|
488
|
-
* "o1-2024-12-17"
|
|
489
|
-
* "o1-mini"
|
|
490
|
-
* "o1-mini-2024-09-12"
|
|
491
|
-
* "o1-pro"
|
|
492
|
-
* "o1-pro-2025-03-19"
|
|
493
|
-
* "o3"
|
|
494
|
-
* "o3-2025-04-16"
|
|
495
|
-
* "o3-deep-research"
|
|
496
|
-
* "o3-deep-research-2025-06-26"
|
|
497
|
-
* "o3-mini"
|
|
498
|
-
* "o3-mini-2025-01-31"
|
|
499
|
-
* "o3-pro"
|
|
500
|
-
* "o3-pro-2025-06-10"
|
|
501
|
-
* "o4-mini"
|
|
502
|
-
* "o4-mini-2025-04-16"
|
|
503
|
-
* "o4-mini-deep-research"
|
|
504
|
-
* "o4-mini-deep-research-2025-06-26"
|
|
505
|
-
*
|
|
506
|
-
* **Supported Anthropic (Claude) Models:**
|
|
507
|
-
* "claude-3-5-haiku-20241022"
|
|
508
|
-
* "claude-3-5-haiku-latest"
|
|
509
|
-
* "claude-3-7-sonnet-20250219"
|
|
510
|
-
* "claude-3-7-sonnet-latest"
|
|
511
|
-
* "claude-3-haiku-20240307"
|
|
512
|
-
* "claude-4-opus-20250514"
|
|
513
|
-
* "claude-4-sonnet-20250514"
|
|
514
|
-
* "claude-opus-4-1"
|
|
515
|
-
* "claude-opus-4-1-20250805"
|
|
516
|
-
* "claude-opus-4-20250514"
|
|
517
|
-
* "claude-sonnet-4-20250514"
|
|
518
|
-
*
|
|
519
|
-
* @type SUPPORTED_MODELS
|
|
520
|
-
*/
|
|
521
|
-
export type SUPPORTED_MODELS =
|
|
522
|
-
| SUPPORTED_CLAUDE_MODELS
|
|
523
|
-
| SUPPORTED_OPENAI_MODELS;
|
|
524
|
-
|
|
525
376
|
/**
|
|
526
377
|
* Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
|
|
527
378
|
* Detects loading spinners, blank content, or incomplete page states.
|
|
@@ -529,7 +380,7 @@ export type SUPPORTED_MODELS =
|
|
|
529
380
|
* @param {Object} input - Input object containing the page to check
|
|
530
381
|
* @param {Page} input.page - The Playwright page to check
|
|
531
382
|
* @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds. Defaults to 10000
|
|
532
|
-
* @param {
|
|
383
|
+
* @param {string} [input.model="gpt-5-mini-2025-08-07"] - AI model to use for the check. Defaults to "gpt-5-mini-2025-08-07"
|
|
533
384
|
* @param {string} [input.apiKey] - Optional API key for the AI service (if provided, will not be billed to your account)
|
|
534
385
|
* @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading
|
|
535
386
|
* @example
|
|
@@ -558,7 +409,7 @@ export type SUPPORTED_MODELS =
|
|
|
558
409
|
* while (attempts < 10) {
|
|
559
410
|
* const pageLoaded = await isPageLoaded({
|
|
560
411
|
* page,
|
|
561
|
-
* model: "
|
|
412
|
+
* model: "claude-haiku-4-5-20251001",
|
|
562
413
|
* timeoutInMs: 5000
|
|
563
414
|
* });
|
|
564
415
|
* if (pageLoaded) break;
|
|
@@ -573,7 +424,7 @@ export type SUPPORTED_MODELS =
|
|
|
573
424
|
export declare function isPageLoaded(input: {
|
|
574
425
|
page: Page;
|
|
575
426
|
timeoutInMs?: number;
|
|
576
|
-
model?:
|
|
427
|
+
model?: string;
|
|
577
428
|
apiKey?: string;
|
|
578
429
|
}): Promise<boolean>;
|
|
579
430
|
|
|
@@ -92,7 +92,7 @@ const extractStructuredData = async options => {
|
|
|
92
92
|
apiKey: validatedData.apiKey,
|
|
93
93
|
enableDomMatching: validatedData.enableDomMatching,
|
|
94
94
|
jsonSchema: validatedData.dataSchema,
|
|
95
|
-
model: validatedData.model || "claude-
|
|
95
|
+
model: validatedData.model || "claude-haiku-4-5-20251001",
|
|
96
96
|
content: simplifiedHtml,
|
|
97
97
|
prompt: validatedData.prompt,
|
|
98
98
|
images: [],
|
|
@@ -151,7 +151,7 @@ const extractStructuredData = async options => {
|
|
|
151
151
|
apiKey: validatedData.apiKey,
|
|
152
152
|
enableDomMatching: validatedData.enableDomMatching,
|
|
153
153
|
jsonSchema: validatedData.dataSchema,
|
|
154
|
-
model: validatedData.model || "claude-
|
|
154
|
+
model: validatedData.model || "claude-haiku-4-5-20251001",
|
|
155
155
|
content: "Extract structured data from the following images.",
|
|
156
156
|
prompt: validatedData.prompt,
|
|
157
157
|
images: images.value.map(i => ({
|
|
@@ -218,7 +218,7 @@ const extractStructuredData = async options => {
|
|
|
218
218
|
apiKey: validatedData.apiKey,
|
|
219
219
|
enableDomMatching: validatedData.enableDomMatching,
|
|
220
220
|
jsonSchema: validatedData.dataSchema,
|
|
221
|
-
model: validatedData.model || "claude-
|
|
221
|
+
model: validatedData.model || "claude-haiku-4-5-20251001",
|
|
222
222
|
content: markdown,
|
|
223
223
|
prompt: validatedData.prompt,
|
|
224
224
|
images: [],
|
|
@@ -306,7 +306,7 @@ const extractStructuredDataFromContent = async options => {
|
|
|
306
306
|
content: texts.join("\n"),
|
|
307
307
|
enableDomMatching: false,
|
|
308
308
|
apiKey: options.apiKey,
|
|
309
|
-
model: options.model || "claude-
|
|
309
|
+
model: options.model || "claude-haiku-4-5-20251001",
|
|
310
310
|
maxRetries: options.maxRetries
|
|
311
311
|
}
|
|
312
312
|
});
|
package/dist/ai/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Locator, Page } from "playwright
|
|
1
|
+
import { Locator, Page } from "playwright";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -221,7 +221,7 @@ export type JsonSchema =
|
|
|
221
221
|
* @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
|
|
222
222
|
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
|
|
223
223
|
* @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
224
|
-
* @param {
|
|
224
|
+
* @param {string} [options.model="claude-haiku-4-5-20251001"] - AI model to use for extraction. Defaults to "claude-haiku-4-5-20251001"
|
|
225
225
|
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
226
226
|
*
|
|
227
227
|
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
@@ -234,7 +234,7 @@ export type JsonSchema =
|
|
|
234
234
|
* const product = await extractStructuredData({
|
|
235
235
|
* source: page,
|
|
236
236
|
* strategy: "HTML",
|
|
237
|
-
* model: "
|
|
237
|
+
* model: "claude-haiku-4-5-20251001",
|
|
238
238
|
* dataSchema: {
|
|
239
239
|
* type: "object",
|
|
240
240
|
* properties: {
|
|
@@ -285,7 +285,7 @@ export declare function extractStructuredData(options: {
|
|
|
285
285
|
enableDomMatching?: boolean;
|
|
286
286
|
enableCache?: boolean;
|
|
287
287
|
maxRetries?: number;
|
|
288
|
-
model?:
|
|
288
|
+
model?: string;
|
|
289
289
|
apiKey?: string;
|
|
290
290
|
}): Promise<any>;
|
|
291
291
|
|
|
@@ -302,7 +302,7 @@ export declare function extractStructuredData(options: {
|
|
|
302
302
|
* @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
|
|
303
303
|
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
|
|
304
304
|
* @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
305
|
-
* @param {
|
|
305
|
+
* @param {string} options.model - AI model to use for extraction (e.g., "gpt-4", "claude-3"). Defaults to "claude-haiku-4-5-20251001"
|
|
306
306
|
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
307
307
|
*
|
|
308
308
|
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
@@ -318,7 +318,7 @@ export declare function extractStructuredData(options: {
|
|
|
318
318
|
*
|
|
319
319
|
* const person = await extractStructuredData({
|
|
320
320
|
* content: textContent,
|
|
321
|
-
* model: "
|
|
321
|
+
* model: "claude-haiku-4-5-20251001",
|
|
322
322
|
* dataSchema: {
|
|
323
323
|
* type: "object",
|
|
324
324
|
* properties: {
|
|
@@ -348,7 +348,7 @@ export declare function extractStructuredData(options: {
|
|
|
348
348
|
*
|
|
349
349
|
* const product = await extractStructuredData({
|
|
350
350
|
* content: mixedContent,
|
|
351
|
-
* model: "claude-
|
|
351
|
+
* model: "claude-haiku-4-5-20251001",
|
|
352
352
|
* dataSchema: {
|
|
353
353
|
* type: "object",
|
|
354
354
|
* properties: {
|
|
@@ -369,159 +369,10 @@ export declare function extractStructuredData(options: {
|
|
|
369
369
|
prompt?: string;
|
|
370
370
|
maxRetries?: number;
|
|
371
371
|
enableCache?: boolean;
|
|
372
|
-
model:
|
|
372
|
+
model: string;
|
|
373
373
|
apiKey?: string;
|
|
374
374
|
}): Promise<any>;
|
|
375
375
|
|
|
376
|
-
type SUPPORTED_CLAUDE_MODELS =
|
|
377
|
-
| "claude-3-5-haiku-20241022"
|
|
378
|
-
| "claude-3-5-haiku-latest"
|
|
379
|
-
| "claude-3-7-sonnet-20250219"
|
|
380
|
-
| "claude-3-7-sonnet-latest"
|
|
381
|
-
| "claude-3-haiku-20240307"
|
|
382
|
-
| "claude-4-opus-20250514"
|
|
383
|
-
| "claude-4-sonnet-20250514"
|
|
384
|
-
| "claude-opus-4-1"
|
|
385
|
-
| "claude-opus-4-1-20250805"
|
|
386
|
-
| "claude-opus-4-20250514"
|
|
387
|
-
| "claude-sonnet-4-20250514";
|
|
388
|
-
|
|
389
|
-
type SUPPORTED_OPENAI_MODELS =
|
|
390
|
-
| "gpt-3.5-turbo"
|
|
391
|
-
| "gpt-3.5-turbo-0125"
|
|
392
|
-
| "gpt-3.5-turbo-0301"
|
|
393
|
-
| "gpt-3.5-turbo-0613"
|
|
394
|
-
| "gpt-3.5-turbo-1106"
|
|
395
|
-
| "gpt-3.5-turbo-16k"
|
|
396
|
-
| "gpt-3.5-turbo-16k-0613"
|
|
397
|
-
| "gpt-3.5-turbo-instruct"
|
|
398
|
-
| "gpt-3.5-turbo-instruct-0914"
|
|
399
|
-
| "gpt-4"
|
|
400
|
-
| "gpt-4-0314"
|
|
401
|
-
| "gpt-4-0613"
|
|
402
|
-
| "gpt-4-32k"
|
|
403
|
-
| "gpt-4-32k-0314"
|
|
404
|
-
| "gpt-4-32k-0613"
|
|
405
|
-
| "gpt-4-turbo"
|
|
406
|
-
| "gpt-4-turbo-2024-04-09"
|
|
407
|
-
| "gpt-4.1"
|
|
408
|
-
| "gpt-4.1-2025-04-14"
|
|
409
|
-
| "gpt-4.1-mini"
|
|
410
|
-
| "gpt-4.1-mini-2025-04-14"
|
|
411
|
-
| "gpt-4.1-nano"
|
|
412
|
-
| "gpt-4.1-nano-2025-04-14"
|
|
413
|
-
| "gpt-4o"
|
|
414
|
-
| "gpt-4o-2024-05-13"
|
|
415
|
-
| "gpt-4o-2024-08-06"
|
|
416
|
-
| "gpt-4o-2024-11-20"
|
|
417
|
-
| "gpt-4o-mini"
|
|
418
|
-
| "gpt-4o-mini-2024-07-18"
|
|
419
|
-
| "gpt-5"
|
|
420
|
-
| "gpt-5-2025-08-07"
|
|
421
|
-
| "gpt-5-chat"
|
|
422
|
-
| "gpt-5-chat-latest"
|
|
423
|
-
| "gpt-5-mini"
|
|
424
|
-
| "gpt-5-mini-2025-08-07"
|
|
425
|
-
| "gpt-5-nano"
|
|
426
|
-
| "gpt-5-nano-2025-08-07"
|
|
427
|
-
| "o1"
|
|
428
|
-
| "o1-2024-12-17"
|
|
429
|
-
| "o1-mini"
|
|
430
|
-
| "o1-mini-2024-09-12"
|
|
431
|
-
| "o1-pro"
|
|
432
|
-
| "o1-pro-2025-03-19"
|
|
433
|
-
| "o3"
|
|
434
|
-
| "o3-2025-04-16"
|
|
435
|
-
| "o3-deep-research"
|
|
436
|
-
| "o3-deep-research-2025-06-26"
|
|
437
|
-
| "o3-mini"
|
|
438
|
-
| "o3-mini-2025-01-31"
|
|
439
|
-
| "o3-pro"
|
|
440
|
-
| "o3-pro-2025-06-10"
|
|
441
|
-
| "o4-mini"
|
|
442
|
-
| "o4-mini-2025-04-16"
|
|
443
|
-
| "o4-mini-deep-research"
|
|
444
|
-
| "o4-mini-deep-research-2025-06-26";
|
|
445
|
-
/**
|
|
446
|
-
* Union type representing all supported AI models for data extraction.
|
|
447
|
-
* Includes models from both OpenAI and Anthropic.
|
|
448
|
-
*
|
|
449
|
-
* **Supported OpenAI Models:**
|
|
450
|
-
* "gpt-3.5-turbo"
|
|
451
|
-
* "gpt-3.5-turbo-0125"
|
|
452
|
-
* "gpt-3.5-turbo-0301"
|
|
453
|
-
* "gpt-3.5-turbo-0613"
|
|
454
|
-
* "gpt-3.5-turbo-1106"
|
|
455
|
-
* "gpt-3.5-turbo-16k"
|
|
456
|
-
* "gpt-3.5-turbo-16k-0613"
|
|
457
|
-
* "gpt-3.5-turbo-instruct"
|
|
458
|
-
* "gpt-3.5-turbo-instruct-0914"
|
|
459
|
-
* "gpt-4"
|
|
460
|
-
* "gpt-4-0314"
|
|
461
|
-
* "gpt-4-0613"
|
|
462
|
-
* "gpt-4-32k"
|
|
463
|
-
* "gpt-4-32k-0314"
|
|
464
|
-
* "gpt-4-32k-0613"
|
|
465
|
-
* "gpt-4-turbo"
|
|
466
|
-
* "gpt-4-turbo-2024-04-09"
|
|
467
|
-
* "gpt-4.1"
|
|
468
|
-
* "gpt-4.1-2025-04-14"
|
|
469
|
-
* "gpt-4.1-mini"
|
|
470
|
-
* "gpt-4.1-mini-2025-04-14"
|
|
471
|
-
* "gpt-4.1-nano"
|
|
472
|
-
* "gpt-4.1-nano-2025-04-14"
|
|
473
|
-
* "gpt-4o"
|
|
474
|
-
* "gpt-4o-2024-05-13"
|
|
475
|
-
* "gpt-4o-2024-08-06"
|
|
476
|
-
* "gpt-4o-2024-11-20"
|
|
477
|
-
* "gpt-4o-mini"
|
|
478
|
-
* "gpt-4o-mini-2024-07-18"
|
|
479
|
-
* "gpt-5"
|
|
480
|
-
* "gpt-5-2025-08-07"
|
|
481
|
-
* "gpt-5-chat"
|
|
482
|
-
* "gpt-5-chat-latest"
|
|
483
|
-
* "gpt-5-mini"
|
|
484
|
-
* "gpt-5-mini-2025-08-07"
|
|
485
|
-
* "gpt-5-nano"
|
|
486
|
-
* "gpt-5-nano-2025-08-07"
|
|
487
|
-
* "o1"
|
|
488
|
-
* "o1-2024-12-17"
|
|
489
|
-
* "o1-mini"
|
|
490
|
-
* "o1-mini-2024-09-12"
|
|
491
|
-
* "o1-pro"
|
|
492
|
-
* "o1-pro-2025-03-19"
|
|
493
|
-
* "o3"
|
|
494
|
-
* "o3-2025-04-16"
|
|
495
|
-
* "o3-deep-research"
|
|
496
|
-
* "o3-deep-research-2025-06-26"
|
|
497
|
-
* "o3-mini"
|
|
498
|
-
* "o3-mini-2025-01-31"
|
|
499
|
-
* "o3-pro"
|
|
500
|
-
* "o3-pro-2025-06-10"
|
|
501
|
-
* "o4-mini"
|
|
502
|
-
* "o4-mini-2025-04-16"
|
|
503
|
-
* "o4-mini-deep-research"
|
|
504
|
-
* "o4-mini-deep-research-2025-06-26"
|
|
505
|
-
*
|
|
506
|
-
* **Supported Anthropic (Claude) Models:**
|
|
507
|
-
* "claude-3-5-haiku-20241022"
|
|
508
|
-
* "claude-3-5-haiku-latest"
|
|
509
|
-
* "claude-3-7-sonnet-20250219"
|
|
510
|
-
* "claude-3-7-sonnet-latest"
|
|
511
|
-
* "claude-3-haiku-20240307"
|
|
512
|
-
* "claude-4-opus-20250514"
|
|
513
|
-
* "claude-4-sonnet-20250514"
|
|
514
|
-
* "claude-opus-4-1"
|
|
515
|
-
* "claude-opus-4-1-20250805"
|
|
516
|
-
* "claude-opus-4-20250514"
|
|
517
|
-
* "claude-sonnet-4-20250514"
|
|
518
|
-
*
|
|
519
|
-
* @type SUPPORTED_MODELS
|
|
520
|
-
*/
|
|
521
|
-
export type SUPPORTED_MODELS =
|
|
522
|
-
| SUPPORTED_CLAUDE_MODELS
|
|
523
|
-
| SUPPORTED_OPENAI_MODELS;
|
|
524
|
-
|
|
525
376
|
/**
|
|
526
377
|
* Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
|
|
527
378
|
* Detects loading spinners, blank content, or incomplete page states.
|
|
@@ -529,7 +380,7 @@ export type SUPPORTED_MODELS =
|
|
|
529
380
|
* @param {Object} input - Input object containing the page to check
|
|
530
381
|
* @param {Page} input.page - The Playwright page to check
|
|
531
382
|
* @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds. Defaults to 10000
|
|
532
|
-
* @param {
|
|
383
|
+
* @param {string} [input.model="gpt-5-mini-2025-08-07"] - AI model to use for the check. Defaults to "gpt-5-mini-2025-08-07"
|
|
533
384
|
* @param {string} [input.apiKey] - Optional API key for the AI service (if provided, will not be billed to your account)
|
|
534
385
|
* @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading
|
|
535
386
|
* @example
|
|
@@ -558,7 +409,7 @@ export type SUPPORTED_MODELS =
|
|
|
558
409
|
* while (attempts < 10) {
|
|
559
410
|
* const pageLoaded = await isPageLoaded({
|
|
560
411
|
* page,
|
|
561
|
-
* model: "
|
|
412
|
+
* model: "claude-haiku-4-5-20251001",
|
|
562
413
|
* timeoutInMs: 5000
|
|
563
414
|
* });
|
|
564
415
|
* if (pageLoaded) break;
|
|
@@ -573,7 +424,7 @@ export type SUPPORTED_MODELS =
|
|
|
573
424
|
export declare function isPageLoaded(input: {
|
|
574
425
|
page: Page;
|
|
575
426
|
timeoutInMs?: number;
|
|
576
|
-
model?:
|
|
427
|
+
model?: string;
|
|
577
428
|
apiKey?: string;
|
|
578
429
|
}): Promise<boolean>;
|
|
579
430
|
|
|
@@ -2,14 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
var _extendedTest = require("../../common/extendedTest");
|
|
4
4
|
var _ = require("..");
|
|
5
|
-
var
|
|
5
|
+
var _playwright = require("playwright");
|
|
6
6
|
var _dotenv = require("dotenv");
|
|
7
7
|
(0, _dotenv.config)();
|
|
8
8
|
_extendedTest.describe.skip("Extract data from content tests", () => {
|
|
9
9
|
let browser;
|
|
10
10
|
let page;
|
|
11
11
|
(0, _extendedTest.beforeAll)(async () => {
|
|
12
|
-
browser = await
|
|
12
|
+
browser = await _playwright.chromium.launch({
|
|
13
13
|
headless: true
|
|
14
14
|
});
|
|
15
15
|
});
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
|
|
3
3
|
var _extendedTest = require("../../common/extendedTest");
|
|
4
|
-
var
|
|
4
|
+
var _playwright = require("playwright");
|
|
5
5
|
var _isPageLoaded = require("../isPageLoaded");
|
|
6
6
|
var dotenv = _interopRequireWildcard(require("dotenv"));
|
|
7
7
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
@@ -179,7 +179,7 @@ _extendedTest.describe.skip("isPageLoaded Tests", () => {
|
|
|
179
179
|
let browser;
|
|
180
180
|
let page;
|
|
181
181
|
(0, _extendedTest.beforeAll)(async () => {
|
|
182
|
-
browser = await
|
|
182
|
+
browser = await _playwright.chromium.launch({
|
|
183
183
|
headless: true
|
|
184
184
|
});
|
|
185
185
|
});
|
package/dist/ai/validators.js
CHANGED
|
@@ -13,7 +13,6 @@ exports.normalizeJsonSchema = normalizeJsonSchema;
|
|
|
13
13
|
var _zod = require("zod");
|
|
14
14
|
var _locatorHelpers = require("../common/locatorHelpers");
|
|
15
15
|
var _ajv = _interopRequireDefault(require("ajv"));
|
|
16
|
-
var _aiModelsValidations = require("../common/aiModelsValidations");
|
|
17
16
|
var _zodToJsonSchema = require("zod-to-json-schema");
|
|
18
17
|
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
19
18
|
const basicSchema = _zod.z.object({
|
|
@@ -102,7 +101,7 @@ const extractDataInputJsonSchema = exports.extractDataInputJsonSchema = _zod.z.o
|
|
|
102
101
|
}), _zod.z.any().refine(_locatorHelpers.isPage, {
|
|
103
102
|
message: "Invalid PageOrLocator"
|
|
104
103
|
})]),
|
|
105
|
-
model: _zod.z.
|
|
104
|
+
model: _zod.z.string().optional().default("claude-haiku-4-5-20251001"),
|
|
106
105
|
dataSchema: jsonSchemaCustomValidation,
|
|
107
106
|
strategy: _zod.z.enum(["IMAGE", "MARKDOWN", "HTML"]).optional().default("HTML"),
|
|
108
107
|
prompt: _zod.z.string().optional(),
|
|
@@ -160,7 +159,7 @@ function checkAllTypesAreStrings(schema) {
|
|
|
160
159
|
const genericExtractDataInputSchema = exports.genericExtractDataInputSchema = _zod.z.object({
|
|
161
160
|
dataSchema: jsonSchemaCustomValidation,
|
|
162
161
|
prompt: _zod.z.string().optional(),
|
|
163
|
-
model: _zod.z.
|
|
162
|
+
model: _zod.z.string().optional().default("claude-haiku-4-5-20251001"),
|
|
164
163
|
apiKey: _zod.z.string().optional(),
|
|
165
164
|
enableCache: _zod.z.boolean().optional().default(true),
|
|
166
165
|
maxRetries: _zod.z.number().optional().default(3)
|
package/dist/helpers/export.d.ts
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
/* eslint-disable prettier/prettier */
|
|
2
2
|
// prettier-ignore-file
|
|
3
|
-
import type { Locator, Page, ElementHandle } from "playwright
|
|
3
|
+
import type { Locator, Page, ElementHandle } from "playwright";
|
|
4
4
|
import type { ReadStream } from "fs";
|
|
5
|
-
import { Download } from "playwright
|
|
6
|
-
import { SUPPORTED_MODELS } from "../ai/export";
|
|
5
|
+
import { Download } from "playwright";
|
|
7
6
|
|
|
8
7
|
/**
|
|
9
8
|
* Configuration options for sanitizing HTML content.
|
|
@@ -204,7 +203,7 @@ export declare function filterEmptyValues<T>(input: { data: T }): T;
|
|
|
204
203
|
* @param {string} [input.waitForLoadState="load"] - When to consider navigation succeeded. Options: "load", "domcontentloaded", "networkidle", "commit". Defaults to "load"
|
|
205
204
|
* @param {boolean} [input.throwOnTimeout=true] - Whether to throw an error if navigation times out. When false, the function returns without throwing, allowing continued execution. Defaults to true.
|
|
206
205
|
* @param {boolean} [input.waitForLoadingStateUsingAi=false] - When true, uses AI vision to verify the page is fully loaded by checking for loading spinners, blank content, or incomplete states. Retries up to 4 times with 5-second delays. Defaults to false
|
|
207
|
-
* @param {
|
|
206
|
+
* @param {string} [input.model="gpt-5-mini-2025-08-07"] - AI model to use for loading verification. Defaults to "gpt-5-mini-2025-08-07"
|
|
208
207
|
* @param {string} [input.apiKey] - Optional API key for the AI service (if provided, will not be billed to your account)
|
|
209
208
|
* @returns {Promise<void>} Promise that resolves when navigation completes successfully. If the operation fails and `throwOnTimeout` is false, resolves without error
|
|
210
209
|
*
|
|
@@ -261,7 +260,7 @@ export declare function goToUrl(input: {
|
|
|
261
260
|
throwOnTimeout?: boolean;
|
|
262
261
|
waitForLoadState?: "load" | "domcontentloaded" | "networkidle";
|
|
263
262
|
waitForLoadingStateUsingAi?: boolean;
|
|
264
|
-
model?:
|
|
263
|
+
model?: string;
|
|
265
264
|
apiKey?: string;
|
|
266
265
|
}): Promise<void>;
|
|
267
266
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
|
|
3
3
|
var _extendedTest = require("../../../common/extendedTest");
|
|
4
|
-
var
|
|
4
|
+
var _playwright = require("playwright");
|
|
5
5
|
var _findAllIframes = require("../findAllIframes");
|
|
6
6
|
(0, _extendedTest.describe)("Test findAllIframes", () => {
|
|
7
7
|
let browser;
|
|
8
8
|
let page;
|
|
9
9
|
(0, _extendedTest.beforeAll)(async () => {
|
|
10
|
-
browser = await
|
|
10
|
+
browser = await _playwright.chromium.launch({
|
|
11
11
|
headless: true
|
|
12
12
|
});
|
|
13
13
|
});
|