@intuned/browser-dev 0.1.8-dev.0 β 0.1.9-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -143
- package/dist/ai/export.d.ts +291 -143
- package/dist/ai/extractStructuredDataUsingAi.js +24 -1
- package/dist/ai/index.d.ts +291 -143
- package/dist/ai/tests/testExtractStructuredData.spec.js +2 -2
- package/dist/common/Logger/index.js +2 -2
- package/dist/helpers/export.d.ts +702 -575
- package/dist/helpers/index.d.ts +702 -575
- package/dist/helpers/withNetworkSettledWait.js +2 -7
- package/dist/optimized-extractors/export.d.ts +17 -18
- package/dist/optimized-extractors/index.d.ts +17 -18
- package/how-to-generate-docs.md +40 -28
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,159 +1,101 @@
|
|
|
1
|
-
|
|
1
|
+
---
|
|
2
|
+
title: "TypeScript SDK"
|
|
3
|
+
sidebarTitle: "@intuned/browser"
|
|
4
|
+
icon: cube
|
|
5
|
+
---
|
|
2
6
|
|
|
3
|
-
|
|
7
|
+
Browser automation helpers for TypeScript/JavaScript, built on [Playwright](https://playwright.dev/). This package provides utilities for common automation tasksβAI-powered data extraction, navigation with retries, pagination handling, and more.
|
|
4
8
|
|
|
5
9
|
## Installation
|
|
6
10
|
|
|
7
|
-
### Using Yarn (Recommended)
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
yarn add @intuned/browser
|
|
11
|
-
```
|
|
12
|
-
|
|
13
|
-
### Using npm
|
|
14
|
-
|
|
15
11
|
```bash
|
|
16
12
|
npm install @intuned/browser
|
|
17
13
|
```
|
|
18
14
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
### π€ AI-Powered Extraction
|
|
24
|
-
|
|
25
|
-
- **Structured Data Extraction** - Extract structured data from web pages using AI with `extractStructuredData()`
|
|
26
|
-
- **Smart Page Loading Detection** - Determine when pages have fully loaded with `isPageLoaded()`
|
|
27
|
-
- **Schema Validation** - Validate extracted data against JSON schemas
|
|
28
|
-
|
|
29
|
-
### π Web Automation Helpers
|
|
30
|
-
|
|
31
|
-
- **Navigation** - Advanced URL navigation with `goToUrl()`
|
|
32
|
-
- **Content Loading** - Scroll to load dynamic content with `scrollToLoadContent()`
|
|
33
|
-
- **Network Monitoring** - Wait for network activity with `withNetworkSettledWait()`
|
|
34
|
-
- **DOM Monitoring** - Wait for DOM changes with `waitForDomSettled()`
|
|
35
|
-
- **Click Automation** - Click elements until exhausted with `clickUntilExhausted()`
|
|
36
|
-
|
|
37
|
-
### π Content Processing
|
|
38
|
-
|
|
39
|
-
- **HTML Sanitization** - Clean and sanitize HTML with `sanitizeHtml()`
|
|
40
|
-
- **Markdown Extraction** - Convert HTML to markdown with `extractMarkdown()`
|
|
41
|
-
- **URL Resolution** - Resolve relative URLs with `resolveUrl()`
|
|
42
|
-
- **Date Processing** - Parse and process dates with `processDate()`
|
|
15
|
+
<Note>
|
|
16
|
+
When using [Intuned](https://intuned.io), this package is pre-installed in every TypeScript project.
|
|
17
|
+
</Note>
|
|
43
18
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
- **File Downloads** - Download files with `downloadFile()`
|
|
47
|
-
- **S3 Integration** - Upload and save files to S3 with `uploadFileToS3()` and `saveFileToS3()`
|
|
48
|
-
|
|
49
|
-
### β
Data Validation
|
|
50
|
-
|
|
51
|
-
- **Schema Validation** - Validate data structures with `validateDataUsingSchema()`
|
|
52
|
-
- **Empty Value Filtering** - Filter empty values with `filterEmptyValues()`
|
|
53
|
-
|
|
54
|
-
### β‘ Optimized Extractors
|
|
55
|
-
|
|
56
|
-
- **High-Performance Extractors** - Pre-built optimized extractors for common use cases
|
|
57
|
-
- Available via `@intuned/browser/optimized-extractors`
|
|
58
|
-
|
|
59
|
-
## Quick Start
|
|
19
|
+
## Quick example
|
|
60
20
|
|
|
61
21
|
```typescript
|
|
62
|
-
import {
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
await
|
|
76
|
-
|
|
22
|
+
import { Page, BrowserContext } from "playwright";
|
|
23
|
+
import { extractStructuredData, isPageLoaded } from "@intuned/browser/ai";
|
|
24
|
+
import { goToUrl } from "@intuned/browser";
|
|
25
|
+
|
|
26
|
+
interface Params {}
|
|
27
|
+
|
|
28
|
+
export default async function automation(
|
|
29
|
+
params: Params,
|
|
30
|
+
page: Page,
|
|
31
|
+
context: BrowserContext
|
|
32
|
+
) {
|
|
33
|
+
await goToUrl(page, "https://books.toscrape.com");
|
|
34
|
+
|
|
35
|
+
const loaded = await isPageLoaded({ source: page });
|
|
36
|
+
if (!loaded) {
|
|
37
|
+
throw new Error("Page is not loaded, cannot extract data");
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Extract all book listings from the page
|
|
41
|
+
const books = await extractStructuredData({
|
|
42
|
+
source: page,
|
|
43
|
+
dataSchema: {
|
|
44
|
+
type: "object",
|
|
45
|
+
properties: {
|
|
46
|
+
products: {
|
|
47
|
+
type: "array",
|
|
48
|
+
items: {
|
|
49
|
+
type: "object",
|
|
50
|
+
properties: {
|
|
51
|
+
title: { type: "string" },
|
|
52
|
+
price: { type: "string" },
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
prompt: "Extract all book listings with their titles and prices",
|
|
59
|
+
strategy: "HTML",
|
|
60
|
+
model: "claude-haiku-4-5-20251001",
|
|
77
61
|
});
|
|
78
62
|
|
|
79
|
-
|
|
80
|
-
const html = await page.content();
|
|
81
|
-
const cleanHtml = sanitizeHtml(html);
|
|
82
|
-
|
|
83
|
-
// Extract markdown
|
|
84
|
-
const markdown = extractMarkdown(cleanHtml);
|
|
85
|
-
|
|
86
|
-
return markdown;
|
|
63
|
+
return books;
|
|
87
64
|
}
|
|
88
65
|
```
|
|
89
66
|
|
|
90
|
-
## AI
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
// AI functions
|
|
127
|
-
import { extractStructuredData, isPageLoaded } from "@intuned/browser/ai";
|
|
128
|
-
|
|
129
|
-
// Optimized extractors
|
|
130
|
-
import /* extractors */ "@intuned/browser/optimized-extractors";
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
## Documentation
|
|
134
|
-
|
|
135
|
-
For detailed documentation on all functions and types, see the [documentation](https://docs.intunedhq.com/docs-old/getting-started/introduction).
|
|
136
|
-
|
|
137
|
-
## Building from Source
|
|
138
|
-
|
|
139
|
-
```bash
|
|
140
|
-
# Install dependencies
|
|
141
|
-
yarn install
|
|
142
|
-
|
|
143
|
-
# Build the project
|
|
144
|
-
yarn build
|
|
145
|
-
|
|
146
|
-
# Run tests
|
|
147
|
-
yarn test
|
|
148
|
-
|
|
149
|
-
# Run tests with UI
|
|
150
|
-
yarn test:dev
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
## Support
|
|
154
|
-
|
|
155
|
-
For support, questions, or contributions, please contact the Intuned team at engineering@intunedhq.com.
|
|
156
|
-
|
|
157
|
-
## About Intuned
|
|
158
|
-
|
|
159
|
-
Intuned provides powerful tools for browser automation, web scraping, and data extraction. Visit [intunedhq.com](https://intunedhq.com) to learn more.
|
|
67
|
+
## AI module
|
|
68
|
+
|
|
69
|
+
AI-powered utilities for data extraction and page analysis. These functions use AI and incur costs.
|
|
70
|
+
|
|
71
|
+
| Function | Description |
|
|
72
|
+
| --- | --- |
|
|
73
|
+
| [`extractStructuredData`](./ai/functions/extractStructuredData) | Extract structured data from pages using AI with JSON Schema or Zod validation |
|
|
74
|
+
| [`isPageLoaded`](./ai/functions/isPageLoaded) | Detect when a page has finished loading |
|
|
75
|
+
|
|
76
|
+
<Tip>AI functions support caching and matching to reduce costs.</Tip>
|
|
77
|
+
|
|
78
|
+
## Helpers module
|
|
79
|
+
|
|
80
|
+
| Function | Description |
|
|
81
|
+
| --- | --- |
|
|
82
|
+
| [`goToUrl`](./helpers/functions/goToUrl) | Navigate with automatic retries and error handling |
|
|
83
|
+
| [`withNetworkSettledWait`](./helpers/functions/withNetworkSettledWait) | Wait for network requests to complete |
|
|
84
|
+
| [`waitForDomSettled`](./helpers/functions/waitForDomSettled) | Wait for DOM mutations to finish |
|
|
85
|
+
| [`scrollToLoadContent`](./helpers/functions/scrollToLoadContent) | Load infinite-scroll content |
|
|
86
|
+
| [`clickUntilExhausted`](./helpers/functions/clickUntilExhausted) | Click "Load More" buttons until all content loads |
|
|
87
|
+
| [`extractMarkdown`](./helpers/functions/extractMarkdown) | Convert pages to markdown |
|
|
88
|
+
| [`downloadFile`](./helpers/functions/downloadFile) | Download files with different triggers |
|
|
89
|
+
| [`saveFileToS3`](./helpers/functions/saveFileToS3) | Download and upload files to S3 |
|
|
90
|
+
| [`uploadFileToS3`](./helpers/functions/uploadFileToS3) | Upload files with custom S3 configurations |
|
|
91
|
+
| [`filterEmptyValues`](./helpers/functions/filterEmptyValues) | Remove empty values from data |
|
|
92
|
+
| [`validateDataUsingSchema`](./helpers/functions/validateDataUsingSchema) | Validate data against schemas |
|
|
93
|
+
| [`processDate`](./helpers/functions/processDate) | Parse and normalize dates |
|
|
94
|
+
| [`sanitizeHtml`](./helpers/functions/sanitizeHtml) | Clean and sanitize HTML |
|
|
95
|
+
| [`resolveUrl`](./helpers/functions/resolveUrl) | Resolve relative URLs to absolute paths |
|
|
96
|
+
|
|
97
|
+
## Requirements
|
|
98
|
+
|
|
99
|
+
- Node.js 18+
|
|
100
|
+
- Playwright (`npm install playwright && npx playwright install`)
|
|
101
|
+
- For AI functions: API key for your AI provider (set via environment variable or function parameter)
|