@intuned/browser-dev 2.2.3-unify-sdks.26 → 2.2.3-unify-sdks.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/export.d.ts +40 -239
- package/dist/ai/index.d.ts +40 -239
- package/dist/ai/isPageLoaded.js +8 -8
- package/dist/ai/tests/testExtractStructuredData.spec.js +1 -1
- package/dist/helpers/export.d.js +5 -1
- package/dist/helpers/export.d.ts +277 -464
- package/dist/helpers/gotoUrl.js +2 -1
- package/dist/helpers/index.d.ts +277 -464
- package/dist/helpers/index.js +3 -3
- package/dist/helpers/resolveUrl.js +4 -3
- package/dist/helpers/scrollToLoadContent.js +10 -5
- package/dist/helpers/tests/testIsPageLoaded.spec.js +12 -6
- package/dist/helpers/tests/testWaitForDomSettled.spec.js +36 -41
- package/dist/helpers/tests/{testWaitForNetworkIdle.spec.js → testWithNetworkIdleWait.spec.js} +44 -45
- package/dist/helpers/types/Attachment.js +43 -9
- package/dist/helpers/uploadFileToS3.js +5 -5
- package/dist/helpers/utils/getS3Client.js +3 -3
- package/dist/helpers/waitForDomSettled.js +4 -97
- package/dist/helpers/withNetworkIdleWait.js +91 -0
- package/dist/optimized-extractors/export.d.ts +4 -4
- package/dist/optimized-extractors/index.d.ts +4 -4
- package/generated-docs/ai/functions/extractStructuredData.mdx +168 -0
- package/generated-docs/ai/functions/isPageLoaded.mdx +139 -0
- package/generated-docs/ai/interfaces/ArraySchema.mdx +33 -0
- package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +25 -0
- package/generated-docs/ai/interfaces/NumberSchema.mdx +32 -0
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +36 -0
- package/generated-docs/ai/interfaces/StringSchema.mdx +32 -0
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +12 -0
- package/generated-docs/helpers/functions/downloadFile.mdx +95 -0
- package/generated-docs/helpers/functions/extractMarkdown.mdx +53 -0
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +48 -0
- package/generated-docs/helpers/functions/goToUrl.mdx +97 -0
- package/generated-docs/helpers/functions/processDate.mdx +52 -0
- package/generated-docs/helpers/functions/resolveUrl.mdx +79 -0
- package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
- package/generated-docs/helpers/functions/saveFileToS3.mdx +144 -0
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +87 -0
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +104 -0
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +66 -0
- package/generated-docs/helpers/functions/waitForDomSettled.mdx +95 -0
- package/generated-docs/helpers/functions/withNetworkIdle.mdx +93 -0
- package/generated-docs/helpers/interfaces/Attachment.mdx +45 -0
- package/generated-docs/helpers/interfaces/S3Configs.mdx +36 -0
- package/generated-docs/helpers/interfaces/S3UploadOptions.mdx +40 -0
- package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +12 -0
- package/generated-docs/helpers/type-aliases/DataInput.mdx +11 -0
- package/generated-docs/helpers/type-aliases/DataObject.mdx +11 -0
- package/generated-docs/helpers/type-aliases/S3UploadableFile.mdx +10 -0
- package/generated-docs/helpers/type-aliases/Trigger.mdx +13 -0
- package/package.json +3 -19
- package/dist/ai-extractors/AnthropicClient/index.js +0 -23
- package/dist/ai-extractors/export.d.js +0 -5
- package/dist/ai-extractors/export.d.ts +0 -425
- package/dist/ai-extractors/extractStructuredData.js +0 -79
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/constants.js +0 -7
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/errors.js +0 -42
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingClaude.js +0 -149
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingGoogle.js +0 -37
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingOpenAi.js +0 -144
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStrucutredDataUsingAiInstance.js +0 -123
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/index.js +0 -55
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/isItemTableHeaderOrFooter.js +0 -96
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/screenshotHelpers.js +0 -55
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/types.js +0 -5
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/utils.js +0 -53
- package/dist/ai-extractors/extractionHelpers/types.js +0 -5
- package/dist/ai-extractors/fileExtractors.js +0 -176
- package/dist/ai-extractors/index.d.ts +0 -425
- package/dist/ai-extractors/index.js +0 -31
- package/dist/ai-extractors/openAiClients/index.js +0 -23
- package/dist/ai-extractors/validators.js +0 -239
- package/dist/helpers/waitForNetworkIdle.js +0 -192
- package/dist/playwright/export.d.js +0 -5
- package/dist/playwright/export.d.ts +0 -220
- package/dist/playwright/index.d.ts +0 -220
- package/dist/playwright/index.js +0 -18
- package/dist/playwright/staticExtractors/extractHelpers.js +0 -170
- package/dist/playwright/staticExtractors/getArrayUsingArrayExtractor.js +0 -84
- package/dist/playwright/staticExtractors/getObjectUsingObjectExtractor.js +0 -45
- package/dist/playwright/staticExtractors/index.js +0 -37
- package/dist/playwright/staticExtractors/types.js +0 -26
package/dist/ai/export.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Locator, Page } from "playwright-core";
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
/**
|
|
4
4
|
* Base schema interface that all JSON schema types extend from.
|
|
5
5
|
* Provides common properties like type and description.
|
|
@@ -159,22 +159,22 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
159
159
|
* including HTML parsing, image analysis, and Markdown conversion. It supports extraction
|
|
160
160
|
* from entire pages or specific elements, with built-in caching and retry mechanisms.
|
|
161
161
|
*
|
|
162
|
-
* @param options - Configuration object containing extraction parameters
|
|
163
|
-
* @param {Page | Locator}
|
|
162
|
+
* @param {Object} options - Configuration object containing extraction parameters
|
|
163
|
+
* @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
|
|
164
|
+
* @param {JsonSchema} options.dataSchema - [JsonSchema](../interfaces/JsonSchema) defining the structure of the data to extract
|
|
164
165
|
* @param {SUPPORTED_MODELS} [options.model] - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
|
|
165
166
|
* @param {string} [options.strategy] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN"
|
|
166
|
-
* @param {JsonSchema} options.dataSchema - [JsonSchema](../interfaces/JsonSchema) defining the structure of the data to extract
|
|
167
167
|
* @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
|
|
168
168
|
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)s
|
|
169
169
|
* @param {boolean} [options.enableDomMatching=false] - Whether to disable DOM element matching during extraction. Defaults to False. When set to false, all types in the schema must be strings to match with the DOM elements. The extracted resultes will be matched with the DOM elements and returned, then will be cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
|
|
170
170
|
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. default true
|
|
171
|
-
* @param {integer} [options.retries=3] - Maximum number of retry attempts on
|
|
171
|
+
* @param {integer} [options.retries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, api errors, output errors, etc.
|
|
172
172
|
*
|
|
173
173
|
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
174
174
|
|
|
175
175
|
* @example
|
|
176
176
|
* ```typescript Extract Product Information from Entire Page
|
|
177
|
-
* import { extractStructuredData } from '
|
|
177
|
+
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
178
178
|
*
|
|
179
179
|
* const productSchema = {
|
|
180
180
|
* type: "object",
|
|
@@ -188,11 +188,10 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
188
188
|
* };
|
|
189
189
|
*
|
|
190
190
|
* const product = await extractStructuredData({
|
|
191
|
-
*
|
|
192
|
-
* strategy:
|
|
191
|
+
* source: page,
|
|
192
|
+
* strategy: "HTML",
|
|
193
|
+
* model: "gpt-4o",
|
|
193
194
|
* dataSchema: productSchema,
|
|
194
|
-
* entityName: "product",
|
|
195
|
-
* label: "product-extractor",
|
|
196
195
|
* prompt: "Extract product details from this e-commerce page"
|
|
197
196
|
* });
|
|
198
197
|
*
|
|
@@ -201,7 +200,7 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
201
200
|
*
|
|
202
201
|
* @example
|
|
203
202
|
* ```typescript Extract Article Data from Specific Element
|
|
204
|
-
* import { extractStructuredData } from '
|
|
203
|
+
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
205
204
|
*
|
|
206
205
|
* const articleSchema = {
|
|
207
206
|
* type: "object",
|
|
@@ -217,12 +216,11 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
217
216
|
*
|
|
218
217
|
* const articleContainer = page.locator("article.main-content");
|
|
219
218
|
* const article = await extractStructuredData({
|
|
220
|
-
*
|
|
221
|
-
* strategy:
|
|
219
|
+
* source: articleContainer,
|
|
220
|
+
* strategy: "MARKDOWN",
|
|
221
|
+
* model: "claude-3",
|
|
222
222
|
* dataSchema: articleSchema,
|
|
223
|
-
*
|
|
224
|
-
* label: "article-extractor",
|
|
225
|
-
* retries: 5
|
|
223
|
+
* maxRetries: 5
|
|
226
224
|
* });
|
|
227
225
|
*
|
|
228
226
|
* console.log(`Article: ${article.title} by ${article.author}`);
|
|
@@ -230,7 +228,7 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
230
228
|
*
|
|
231
229
|
* @example
|
|
232
230
|
* ```typescript Extract Data from Screenshots using Image Strategy
|
|
233
|
-
* import { extractStructuredData } from '
|
|
231
|
+
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
234
232
|
*
|
|
235
233
|
* const chartSchema = {
|
|
236
234
|
* type: "object",
|
|
@@ -251,11 +249,10 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
251
249
|
*
|
|
252
250
|
* const chartElement = page.locator("#data-visualization");
|
|
253
251
|
* const chartData = await extractStructuredData({
|
|
254
|
-
*
|
|
255
|
-
* strategy:
|
|
252
|
+
* source: chartElement,
|
|
253
|
+
* strategy: "IMAGE",
|
|
254
|
+
* model: "gpt-4o",
|
|
256
255
|
* dataSchema: chartSchema,
|
|
257
|
-
* entityName: "chart",
|
|
258
|
-
* label: "chart-extractor",
|
|
259
256
|
* prompt: "Extract the chart title and all data points with their values"
|
|
260
257
|
* });
|
|
261
258
|
*
|
|
@@ -417,229 +414,43 @@ type SUPPORTED_MODELS = SUPPORTED_CLAUDE_MODELS | SUPPORTED_OPENAI_MODELS;
|
|
|
417
414
|
* ```
|
|
418
415
|
*/
|
|
419
416
|
|
|
420
|
-
/**
|
|
421
|
-
* @interface HTMLStrategy
|
|
422
|
-
* Represents a strategy for extracting data from HTML content using AI models.
|
|
423
|
-
*
|
|
424
|
-
* This strategy processes the HTML structure of a page or element, focusing on semantic attributes
|
|
425
|
-
* for better context understanding. It automatically filters and includes only relevant HTML attributes:
|
|
426
|
-
* `aria-label`, `data-name`, `name`, `type`, `placeholder`, `value`, `role`, `title`, `href`, `id`, `alt`
|
|
427
|
-
*
|
|
428
|
-
* @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
|
|
429
|
-
* @param {string} type - Type of extraction: 'HTML'
|
|
430
|
-
*
|
|
431
|
-
* @example
|
|
432
|
-
* ```typescript Basic HTML Extraction
|
|
433
|
-
* const htmlStrategy: HTMLStrategy = {
|
|
434
|
-
* type: "HTML",
|
|
435
|
-
* model: "gpt-4"
|
|
436
|
-
* };
|
|
437
|
-
*
|
|
438
|
-
* const data = await extractStructuredData({
|
|
439
|
-
* page: page,
|
|
440
|
-
* strategy: htmlStrategy,
|
|
441
|
-
* // ... other options
|
|
442
|
-
* });
|
|
443
|
-
* ```
|
|
444
|
-
*
|
|
445
|
-
* @example
|
|
446
|
-
* ```typescript Advanced HTML Extraction
|
|
447
|
-
* const htmlStrategy: HTMLStrategy = {
|
|
448
|
-
* type: "HTML",
|
|
449
|
-
* model: "claude-3-sonnet-20240620"
|
|
450
|
-
* };
|
|
451
|
-
*
|
|
452
|
-
* // Extract product details from a specific container
|
|
453
|
-
* const productData = await extractStructuredData({
|
|
454
|
-
* locator: page.locator('.product-container'),
|
|
455
|
-
* strategy: htmlStrategy,
|
|
456
|
-
* dataSchema: productSchema,
|
|
457
|
-
* entityName: "product",
|
|
458
|
-
* label: "product-extractor"
|
|
459
|
-
* });
|
|
460
|
-
* ```
|
|
461
|
-
*/
|
|
462
|
-
export interface HTMLStrategy {
|
|
463
|
-
/** The AI model to use for content analysis and data extraction */
|
|
464
|
-
model: SUPPORTED_MODELS;
|
|
465
|
-
|
|
466
|
-
/** Strategy type identifier, must be "HTML" for HTML-based extraction */
|
|
467
|
-
type: "HTML";
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
/**
|
|
471
|
-
* @interface ImageStrategy
|
|
472
|
-
* Represents a strategy for extracting data from visual content using AI vision models.
|
|
473
|
-
*
|
|
474
|
-
* This strategy captures screenshots of the target page or element and uses AI vision
|
|
475
|
-
* capabilities to extract information. It's particularly useful for:
|
|
476
|
-
* - Data embedded in images or charts
|
|
477
|
-
* - Content with complex visual layouts
|
|
478
|
-
* - Information that's not directly accessible in the HTML
|
|
479
|
-
*
|
|
480
|
-
* @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
|
|
481
|
-
* @param {string} type - Type of extraction: 'IMAGE'
|
|
482
|
-
* @example
|
|
483
|
-
* ```typescript Basic Image Analysis
|
|
484
|
-
* const imageStrategy: ImageStrategy = {
|
|
485
|
-
* type: "IMAGE",
|
|
486
|
-
* model: "gpt-4-vision"
|
|
487
|
-
* };
|
|
488
|
-
*
|
|
489
|
-
* const chartData = await extractStructuredData({
|
|
490
|
-
* locator: page.locator('.chart-container'),
|
|
491
|
-
* strategy: imageStrategy,
|
|
492
|
-
* dataSchema: chartSchema,
|
|
493
|
-
* entityName: "chart",
|
|
494
|
-
* label: "chart-data-extractor"
|
|
495
|
-
* });
|
|
496
|
-
* ```
|
|
497
|
-
*
|
|
498
|
-
* @example
|
|
499
|
-
* ```typescript Complex Visual Extraction
|
|
500
|
-
* const imageStrategy: ImageStrategy = {
|
|
501
|
-
* type: "IMAGE",
|
|
502
|
-
* model: "claude-3-sonnet-20240620"
|
|
503
|
-
* };
|
|
504
|
-
*
|
|
505
|
-
* // Extract data from a complex dashboard
|
|
506
|
-
* const dashboardData = await extractStructuredData({
|
|
507
|
-
* page: page,
|
|
508
|
-
* strategy: imageStrategy,
|
|
509
|
-
* dataSchema: dashboardSchema,
|
|
510
|
-
* entityName: "dashboard",
|
|
511
|
-
* label: "dashboard-metrics",
|
|
512
|
-
* prompt: "Extract all metrics and their values from this dashboard view"
|
|
513
|
-
* });
|
|
514
|
-
* ```
|
|
515
|
-
*/
|
|
516
|
-
export interface ImageStrategy {
|
|
517
|
-
/** The AI vision model to use for image analysis and data extraction */
|
|
518
|
-
model: SUPPORTED_MODELS;
|
|
519
|
-
|
|
520
|
-
/** Strategy type identifier, must be "IMAGE" for image-based extraction */
|
|
521
|
-
type: "IMAGE";
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
/**
|
|
525
|
-
* @interface MarkDownStrategy
|
|
526
|
-
* Represents a strategy for extracting data from content after converting it to Markdown format.
|
|
527
|
-
*
|
|
528
|
-
* This strategy first converts the HTML content to semantic Markdown before processing,
|
|
529
|
-
* which helps in:
|
|
530
|
-
* - Preserving content hierarchy and structure
|
|
531
|
-
* - Removing unnecessary styling and formatting
|
|
532
|
-
* - Focusing on semantic meaning of the content
|
|
533
|
-
* - Handling content-heavy pages more efficiently
|
|
534
|
-
*
|
|
535
|
-
* @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
|
|
536
|
-
* @param {string} type - Type of extraction: 'MARKDOWN'
|
|
537
|
-
* @example
|
|
538
|
-
* ```typescript Basic Article Extraction
|
|
539
|
-
* const markdownStrategy: MarkDownStrategy = {
|
|
540
|
-
* type: "MARKDOWN",
|
|
541
|
-
* model: "gpt-4"
|
|
542
|
-
* };
|
|
543
|
-
*
|
|
544
|
-
* const articleData = await extractStructuredData({
|
|
545
|
-
* locator: page.locator('article'),
|
|
546
|
-
* strategy: markdownStrategy,
|
|
547
|
-
* dataSchema: articleSchema,
|
|
548
|
-
* entityName: "article",
|
|
549
|
-
* label: "article-content"
|
|
550
|
-
* });
|
|
551
|
-
* ```
|
|
552
|
-
*
|
|
553
|
-
* @example
|
|
554
|
-
* ```typescript Documentation Extraction
|
|
555
|
-
* const markdownStrategy: MarkDownStrategy = {
|
|
556
|
-
* type: "MARKDOWN",
|
|
557
|
-
* model: "claude-3-sonnet-20240620"
|
|
558
|
-
* };
|
|
559
|
-
*
|
|
560
|
-
* // Extract structured data from documentation pages
|
|
561
|
-
* const docData = await extractStructuredData({
|
|
562
|
-
* page: page,
|
|
563
|
-
* strategy: markdownStrategy,
|
|
564
|
-
* dataSchema: documentationSchema,
|
|
565
|
-
* entityName: "documentation",
|
|
566
|
-
* label: "docs-extractor",
|
|
567
|
-
* prompt: "Extract main concepts, code examples, and API references"
|
|
568
|
-
* });
|
|
569
|
-
* ```
|
|
570
|
-
*/
|
|
571
|
-
export interface MarkDownStrategy {
|
|
572
|
-
/** The AI model to use for processing the Markdown content */
|
|
573
|
-
model: SUPPORTED_MODELS;
|
|
574
|
-
|
|
575
|
-
/** Strategy type identifier, must be "MARKDOWN" for Markdown-based extraction */
|
|
576
|
-
type: "MARKDOWN";
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
/**
|
|
580
|
-
* @interface HtmlStrategy
|
|
581
|
-
* Represents a strategy for extracting data from HTML content using AI models.
|
|
582
|
-
* @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
|
|
583
|
-
* @param {string} type - Type of extraction: 'HTML'
|
|
584
|
-
* @example
|
|
585
|
-
* ```typescript Basic HTML Extraction
|
|
586
|
-
* const htmlStrategy: HtmlStrategy = {
|
|
587
|
-
* type: "HTML",
|
|
588
|
-
* model: "gpt-4"
|
|
589
|
-
* };
|
|
590
|
-
* ```
|
|
591
|
-
* @example
|
|
592
|
-
* ```typescript Advanced HTML Extraction
|
|
593
|
-
* const htmlStrategy: HtmlStrategy = {
|
|
594
|
-
* type: "HTML",
|
|
595
|
-
* model: "claude-3-sonnet-20240620"
|
|
596
|
-
* };
|
|
597
|
-
* ```
|
|
598
|
-
*/
|
|
599
|
-
export interface HtmlStrategy {
|
|
600
|
-
type: "HTML";
|
|
601
|
-
model: SUPPORTED_MODELS;
|
|
602
|
-
}
|
|
603
|
-
|
|
604
417
|
/**
|
|
605
418
|
* Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
|
|
606
419
|
* Detects loading spinners, blank content, or incomplete page states.
|
|
607
|
-
*
|
|
608
|
-
* @param {Page} page - The Playwright page to check
|
|
609
|
-
* @param {
|
|
610
|
-
* @param {SUPPORTED_MODELS} [
|
|
611
|
-
* @param {
|
|
612
|
-
* @
|
|
613
|
-
* @returns {Promise.<{status: LoadingStatus, reason: (string|null|undefined), cost: (number|undefined)}>}
|
|
614
|
-
* - `status`: "True" if page is loaded, "False" if still loading, "Dont know" if uncertain
|
|
615
|
-
* - `reason`: Optional reason for the status (e.g., detected loading spinner)
|
|
616
|
-
* - `cost`: Optional cost of the AI analysis (if applicable)
|
|
420
|
+
* @param {Object} input - Input object containing the page to check
|
|
421
|
+
* @param {Page} input.page - The Playwright page to check
|
|
422
|
+
* @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds
|
|
423
|
+
* @param {SUPPORTED_MODELS} [input.model="gpt-4o-2024-08-06"] - [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) to use for the AI Check. default gpt-4o-2024-08-06
|
|
424
|
+
* @param {string} [input.apiKey] - Optional API key for the AI service
|
|
425
|
+
* @returns {Promise.<boolean>} True if page is loaded, false if still loading
|
|
617
426
|
* @example
|
|
618
427
|
* ```typescript Check Page Loading
|
|
619
|
-
* import { isPageLoaded } from "@intuned/
|
|
428
|
+
* import { isPageLoaded } from "@intuned/browser/ai";
|
|
620
429
|
*
|
|
621
430
|
* // Wait for page to finish loading
|
|
622
431
|
* await page.goto('https://example.com');
|
|
623
432
|
*
|
|
624
|
-
* const pageLoaded = await isPageLoaded(page);
|
|
625
|
-
* if (pageLoaded
|
|
626
|
-
* console.log("Page loaded:", pageLoaded['reason']);
|
|
433
|
+
* const pageLoaded = await isPageLoaded({page});
|
|
434
|
+
* if (pageLoaded) {
|
|
627
435
|
* // Continue with scraping or interactions
|
|
628
436
|
* } else {
|
|
629
|
-
* console.log("Still loading:", pageLoaded['reason']);
|
|
630
437
|
* // Wait longer or retry
|
|
631
438
|
* }
|
|
632
439
|
* ```
|
|
633
440
|
*
|
|
634
441
|
* @example
|
|
635
442
|
* ```typescript Loading Loop
|
|
636
|
-
* import { isPageLoaded } from "@intuned/
|
|
443
|
+
* import { isPageLoaded } from "@intuned/browser/ai";
|
|
637
444
|
*
|
|
638
445
|
* // Keep checking until page loads
|
|
639
446
|
* let attempts = 0;
|
|
640
447
|
* while (attempts < 10) {
|
|
641
|
-
* const pageLoaded = await isPageLoaded(
|
|
642
|
-
*
|
|
448
|
+
* const pageLoaded = await isPageLoaded({
|
|
449
|
+
* page,
|
|
450
|
+
* model: "gpt-4o",
|
|
451
|
+
* timeoutInMs: 5000
|
|
452
|
+
* });
|
|
453
|
+
* if (pageLoaded) break;
|
|
643
454
|
*
|
|
644
455
|
* await page.waitForTimeout(2000);
|
|
645
456
|
* attempts++;
|
|
@@ -647,23 +458,13 @@ export interface HtmlStrategy {
|
|
|
647
458
|
* ```
|
|
648
459
|
*
|
|
649
460
|
*/
|
|
650
|
-
export declare function isPageLoaded(
|
|
651
|
-
page: Page
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
}
|
|
657
|
-
): Promise<{
|
|
658
|
-
status: LoadingStatus;
|
|
659
|
-
reason?: string | null;
|
|
660
|
-
cost?: number;
|
|
661
|
-
}>;
|
|
461
|
+
export declare function isPageLoaded(input: {
|
|
462
|
+
page: Page;
|
|
463
|
+
timeoutInMs?: number;
|
|
464
|
+
model?: SUPPORTED_MODELS;
|
|
465
|
+
apiKey?: string;
|
|
466
|
+
}): Promise<boolean>;
|
|
662
467
|
|
|
663
|
-
/**
|
|
664
|
-
* LoadingStatus is a union of true, false, and "Dont know".
|
|
665
|
-
*/
|
|
666
|
-
export type LoadingStatus = true | false | "Dont know";
|
|
667
468
|
export type JsonSchema =
|
|
668
469
|
| StringSchema
|
|
669
470
|
| NumberSchema
|