@purepageio/fetch-engines 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -281
- package/package.json +4 -3
package/README.md
CHANGED
|
@@ -3,342 +3,142 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/@purepageio/fetch-engines)
|
|
4
4
|
[](https://opensource.org/licenses/MIT)
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
Fetch websites with confidence. `@purepageio/fetch-engines` gives teams an HTTP-first workflow that automatically promotes tricky pages to a managed Playwright browser and can even hand structured results back through OpenAI.
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
**Key Benefits:**
|
|
11
|
-
|
|
12
|
-
- **Unified API:** Use `fetchHTML(url, options?)` for processed content or `fetchContent(url, options?)` for raw content
|
|
13
|
-
- **Smart Fallback Strategy:** Tries fast HTTP first, automatically falls back to full browser for complex sites
|
|
14
|
-
- **AI-Powered Data Extraction:** Extract structured data from web pages using OpenAI and Zod schemas
|
|
15
|
-
- **Raw Content Support:** Retrieve PDFs, images, APIs with the same fallback logic
|
|
16
|
-
- **Built-in Resilience:** Caching, retries, and standardised error handling
|
|
17
|
-
- **Browser Management:** Automatic browser pooling and stealth measures for complex sites
|
|
18
|
-
- **Content Transformation:** Convert HTML to clean Markdown
|
|
19
|
-
- **TypeScript Ready:** Fully typed codebase
|
|
20
|
-
|
|
21
|
-
## Table of Contents
|
|
8
|
+
## Table of contents
|
|
22
9
|
|
|
10
|
+
- [Why fetch-engines?](#why-fetch-engines)
|
|
23
11
|
- [Installation](#installation)
|
|
24
|
-
- [
|
|
25
|
-
- [
|
|
26
|
-
- [
|
|
27
|
-
- [Structured
|
|
12
|
+
- [Quick start](#quick-start)
|
|
13
|
+
- [Usage patterns](#usage-patterns)
|
|
14
|
+
- [Pick an engine](#pick-an-engine)
|
|
15
|
+
- [Structured extraction](#structured-extraction)
|
|
28
16
|
- [Configuration](#configuration)
|
|
29
|
-
- [
|
|
30
|
-
- [
|
|
31
|
-
- [
|
|
32
|
-
- [
|
|
17
|
+
- [Essentials](#essentials)
|
|
18
|
+
- [Complete reference](#complete-reference)
|
|
19
|
+
- [Error handling](#error-handling)
|
|
20
|
+
- [Tooling and examples](#tooling-and-examples)
|
|
33
21
|
- [Contributing](#contributing)
|
|
22
|
+
- [License](#license)
|
|
34
23
|
|
|
35
|
-
##
|
|
24
|
+
## Why fetch-engines?
|
|
36
25
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
26
|
+
- **One API for multiple strategies** – Call `fetchHTML` for rendered pages or `fetchContent` for raw responses. The library handles HTTP shortcuts and Playwright fallbacks automatically.
|
|
27
|
+
- **Production-minded defaults** – Retries, caching, and consistent telemetry are ready out of the box.
|
|
28
|
+
- **Drop-in AI enrichment** – Provide a Zod schema and let OpenAI convert full pages into structured data.
|
|
29
|
+
- **Typed and tested** – Built in TypeScript with examples that mirror real-world scraping pipelines.
|
|
40
30
|
|
|
41
|
-
|
|
31
|
+
## Installation
|
|
42
32
|
|
|
43
33
|
```bash
|
|
34
|
+
pnpm add @purepageio/fetch-engines
|
|
35
|
+
# install Playwright browsers once if you plan to use the Hybrid or Playwright engines
|
|
44
36
|
pnpm exec playwright install
|
|
45
37
|
```
|
|
46
38
|
|
|
47
|
-
##
|
|
48
|
-
|
|
49
|
-
**`HybridEngine`** (recommended): Attempts fast HTTP fetch first, falls back to Playwright browser on failure or when SPA shell detected. Handles both simple and complex sites automatically.
|
|
50
|
-
|
|
51
|
-
**`FetchEngine`**: Lightweight HTTP-only engine for basic sites without browser fallback.
|
|
52
|
-
|
|
53
|
-
**`StructuredContentEngine`**: AI-powered engine that combines HybridEngine with OpenAI for structured data extraction.
|
|
54
|
-
|
|
55
|
-
## Basic Usage
|
|
56
|
-
|
|
57
|
-
### Quick Start
|
|
39
|
+
## Quick start
|
|
58
40
|
|
|
59
41
|
```typescript
|
|
60
42
|
import { HybridEngine } from "@purepageio/fetch-engines";
|
|
61
43
|
|
|
62
44
|
const engine = new HybridEngine();
|
|
63
45
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
console.log(`Title: ${simple.title}`);
|
|
67
|
-
|
|
68
|
-
// Complex sites automatically use browser
|
|
69
|
-
const complex = await engine.fetchHTML("https://spa-site.com", {
|
|
70
|
-
markdown: true,
|
|
71
|
-
spaMode: true,
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
await engine.cleanup(); // Important: releases browser resources
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
### With Custom Headers
|
|
78
|
-
|
|
79
|
-
```typescript
|
|
80
|
-
const engine = new HybridEngine({
|
|
81
|
-
headers: { "X-Custom-Header": "value" },
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
const result = await engine.fetchHTML("https://example.com", {
|
|
85
|
-
headers: { "X-Request-Header": "value" },
|
|
86
|
-
});
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
### Raw Content (PDFs, Images, APIs)
|
|
90
|
-
|
|
91
|
-
```typescript
|
|
92
|
-
const engine = new HybridEngine();
|
|
93
|
-
|
|
94
|
-
// Fetch PDF
|
|
95
|
-
const pdf = await engine.fetchContent("https://example.com/doc.pdf");
|
|
96
|
-
console.log(`PDF size: ${pdf.content.length} bytes`);
|
|
97
|
-
|
|
98
|
-
// Fetch JSON API with auth
|
|
99
|
-
const api = await engine.fetchContent("https://api.example.com/data", {
|
|
100
|
-
headers: { Authorization: "Bearer token" },
|
|
101
|
-
});
|
|
46
|
+
const page = await engine.fetchHTML("https://example.com");
|
|
47
|
+
console.log(page.title);
|
|
102
48
|
|
|
103
49
|
await engine.cleanup();
|
|
104
50
|
```
|
|
105
51
|
|
|
106
|
-
##
|
|
107
|
-
|
|
108
|
-
### `fetchHTML(url, options?)`
|
|
52
|
+
## Usage patterns
|
|
109
53
|
|
|
110
|
-
|
|
54
|
+
### Pick an engine
|
|
111
55
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
56
|
+
| Engine | When to use it |
|
|
57
|
+
| ------------------------- | -------------------------------------------------------------------------------- |
|
|
58
|
+
| `HybridEngine` | Default option. Starts with HTTP, then retries via Playwright for tougher pages. |
|
|
59
|
+
| `FetchEngine` | Lightweight HTML/text fetching with zero browser overhead. |
|
|
60
|
+
| `StructuredContentEngine` | Fetch a page and transform it into typed data with OpenAI. |
|
|
116
61
|
|
|
117
|
-
###
|
|
118
|
-
|
|
119
|
-
**Use for:** Raw content retrieval (like standard `fetch`)
|
|
120
|
-
|
|
121
|
-
- Retrieves any content type (PDFs, images, JSON, XML, etc.)
|
|
122
|
-
- No content-type restrictions
|
|
123
|
-
- Returns `Buffer` (binary) or `string` (text)
|
|
124
|
-
- Preserves original MIME type
|
|
125
|
-
|
|
126
|
-
### Example Comparison
|
|
127
|
-
|
|
128
|
-
```typescript
|
|
129
|
-
// fetchHTML - processes content
|
|
130
|
-
const html = await engine.fetchHTML("https://example.com");
|
|
131
|
-
console.log(html.title); // "Example Domain"
|
|
132
|
-
console.log(html.contentType); // "html" or "markdown"
|
|
133
|
-
|
|
134
|
-
// fetchContent - raw content
|
|
135
|
-
const raw = await engine.fetchContent("https://example.com");
|
|
136
|
-
console.log(raw.contentType); // "text/html"
|
|
137
|
-
console.log(typeof raw.content); // "string" (raw HTML)
|
|
138
|
-
|
|
139
|
-
// Binary content
|
|
140
|
-
const pdf = await engine.fetchContent("https://example.com/doc.pdf");
|
|
141
|
-
console.log(Buffer.isBuffer(pdf.content)); // true
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
## Structured Content Extraction
|
|
145
|
-
|
|
146
|
-
Extract structured data from web pages using AI and Zod schemas.
|
|
147
|
-
|
|
148
|
-
### Prerequisites
|
|
149
|
-
|
|
150
|
-
Set environment variable:
|
|
151
|
-
|
|
152
|
-
```bash
|
|
153
|
-
export OPENAI_API_KEY="your-openai-api-key"
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
### Basic Usage
|
|
62
|
+
### Structured extraction
|
|
157
63
|
|
|
158
64
|
```typescript
|
|
159
65
|
import { fetchStructuredContent } from "@purepageio/fetch-engines";
|
|
160
66
|
import { z } from "zod";
|
|
161
67
|
|
|
162
|
-
|
|
68
|
+
type Article = {
|
|
69
|
+
title: string;
|
|
70
|
+
summary: string;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
const schema = z.object({
|
|
163
74
|
title: z.string(),
|
|
164
|
-
author: z.string().optional(),
|
|
165
|
-
publishDate: z.string().optional(),
|
|
166
75
|
summary: z.string(),
|
|
167
|
-
tags: z.array(z.string()),
|
|
168
|
-
});
|
|
169
|
-
|
|
170
|
-
const result = await fetchStructuredContent("https://example.com/article", articleSchema, {
|
|
171
|
-
model: "gpt-4.1-mini",
|
|
172
|
-
customPrompt: "Extract main article information",
|
|
173
|
-
});
|
|
174
|
-
|
|
175
|
-
console.log("Extracted:", result.data);
|
|
176
|
-
console.log("Token usage:", result.usage);
|
|
177
|
-
```
|
|
178
|
-
|
|
179
|
-
### StructuredContentEngine Class
|
|
180
|
-
|
|
181
|
-
```typescript
|
|
182
|
-
import { StructuredContentEngine } from "@purepageio/fetch-engines";
|
|
183
|
-
|
|
184
|
-
const productSchema = z.object({
|
|
185
|
-
name: z.string(),
|
|
186
|
-
price: z.number(),
|
|
187
|
-
inStock: z.boolean(),
|
|
188
|
-
});
|
|
189
|
-
|
|
190
|
-
const engine = new StructuredContentEngine({
|
|
191
|
-
spaMode: true,
|
|
192
|
-
spaRenderDelayMs: 2000,
|
|
193
76
|
});
|
|
194
77
|
|
|
195
|
-
const result = await
|
|
196
|
-
console.log(`${result.data.name} costs $${result.data.price}`);
|
|
78
|
+
const result = await fetchStructuredContent("https://example.com/article", schema, { model: "gpt-4.1-mini" });
|
|
197
79
|
|
|
198
|
-
|
|
80
|
+
console.log(result.data.summary);
|
|
199
81
|
```
|
|
200
82
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
- `'gpt-5-mini'` - Latest model, mini version **(default)**
|
|
204
|
-
- `'gpt-5'` - Most capable model
|
|
205
|
-
- `'gpt-4.1-mini'` - Fast and cost-effective
|
|
206
|
-
- `'gpt-4.1'` - More capable GPT-4.1 version
|
|
83
|
+
Set `OPENAI_API_KEY` before running structured helpers.
|
|
207
84
|
|
|
208
85
|
## Configuration
|
|
209
86
|
|
|
210
|
-
###
|
|
211
|
-
|
|
212
|
-
| Option | Type | Default | Description |
|
|
213
|
-
| ---------- | ------------------------ | ------- | ------------------------ |
|
|
214
|
-
| `markdown` | `boolean` | `false` | Convert HTML to Markdown |
|
|
215
|
-
| `headers` | `Record<string, string>` | `{}` | Custom HTTP headers |
|
|
216
|
-
|
|
217
|
-
### HybridEngine Configuration
|
|
218
|
-
|
|
219
|
-
| Option | Type | Default | Description |
|
|
220
|
-
| ------------------ | ------------------------ | -------- | -------------------------------------------- |
|
|
221
|
-
| `headers` | `Record<string, string>` | `{}` | Default headers for both engines |
|
|
222
|
-
| `markdown` | `boolean` | `false` | Default Markdown conversion |
|
|
223
|
-
| `useHttpFallback` | `boolean` | `true` | Try HTTP before Playwright |
|
|
224
|
-
| `spaMode` | `boolean` | `false` | Enable SPA mode with patient load conditions |
|
|
225
|
-
| `spaRenderDelayMs` | `number` | `0` | Delay after page load in SPA mode |
|
|
226
|
-
| `maxRetries` | `number` | `3` | Max retry attempts |
|
|
227
|
-
| `cacheTTL` | `number` | `900000` | Cache TTL in ms (15 min default) |
|
|
228
|
-
| `concurrentPages` | `number` | `3` | Max concurrent pages |
|
|
229
|
-
|
|
230
|
-
### Browser Pool Options
|
|
231
|
-
|
|
232
|
-
| Option | Type | Default | Description |
|
|
233
|
-
| -------------------- | -------- | --------- | ---------------------------------- |
|
|
234
|
-
| `maxBrowsers` | `number` | `2` | Max browser instances |
|
|
235
|
-
| `maxPagesPerContext` | `number` | `6` | Pages per context before recycling |
|
|
236
|
-
| `maxBrowserAge` | `number` | `1200000` | Browser lifetime (20 min) |
|
|
87
|
+
### Essentials
|
|
237
88
|
|
|
238
|
-
|
|
89
|
+
All engines accept familiar `fetch` options such as custom headers. Additional Hybrid/Playwright options you are likely to tweak:
|
|
239
90
|
|
|
240
|
-
|
|
91
|
+
- `markdown` – return Markdown instead of HTML.
|
|
92
|
+
- `spaMode` & `spaRenderDelayMs` – allow single-page apps to render before extraction.
|
|
93
|
+
- `cacheTTL`, `maxRetries`, and browser pool sizes – control resilience and throughput.
|
|
241
94
|
|
|
242
|
-
|
|
243
|
-
2. Engine constructor headers
|
|
244
|
-
3. Engine default headers
|
|
95
|
+
Check the inline TypeScript docs or the [`/examples`](./examples) directory for end-to-end flows.
|
|
245
96
|
|
|
246
|
-
|
|
97
|
+
### Complete reference
|
|
247
98
|
|
|
248
|
-
|
|
99
|
+
Every option from `PlaywrightEngineConfig` (consumed by `HybridEngine`) with defaults:
|
|
249
100
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
101
|
+
| Option | Default | Purpose |
|
|
102
|
+
| -------------------------- | ----------- | --------------------------------------------------------------------------------------------- |
|
|
103
|
+
| `headers` | `{}` | Extra headers merged into every request. |
|
|
104
|
+
| `concurrentPages` | `3` | Maximum Playwright pages processed at once. |
|
|
105
|
+
| `maxRetries` | `3` | Additional retry attempts after the first failure. |
|
|
106
|
+
| `retryDelay` | `5000` | Milliseconds to wait between retries. |
|
|
107
|
+
| `cacheTTL` | `900000` | Cache lifetime in ms (`0` disables caching). |
|
|
108
|
+
| `useHttpFallback` | `true` | Try a fast HTTP GET before spinning up Playwright. |
|
|
109
|
+
| `useHeadedModeFallback` | `false` | Automatically retry a domain in headed mode after repeated failures. |
|
|
110
|
+
| `defaultFastMode` | `true` | Block non-critical assets and skip human simulation unless overridden. |
|
|
111
|
+
| `simulateHumanBehavior` | `true` | When not in fast mode, add delays and scrolling to avoid bot detection. |
|
|
112
|
+
| `maxBrowsers` | `2` | Highest number of Playwright browser instances kept in the pool. |
|
|
113
|
+
| `maxPagesPerContext` | `6` | Pages opened per browser context before recycling it. |
|
|
114
|
+
| `maxBrowserAge` | `1200000` | Milliseconds before a browser instance is torn down (20 minutes). |
|
|
115
|
+
| `healthCheckInterval` | `60000` | Pool health check frequency in ms. |
|
|
116
|
+
| `poolBlockedDomains` | `[]` | Domains blocked across every Playwright request (inherit pool defaults if empty). |
|
|
117
|
+
| `poolBlockedResourceTypes` | `[]` | Resource types (e.g. `"image"`) blocked globally. |
|
|
118
|
+
| `proxy` | `undefined` | Per-browser proxy `{ server, username?, password? }`. |
|
|
119
|
+
| `useHeadedMode` | `false` | Force every browser to launch with a visible window. |
|
|
120
|
+
| `markdown` | `true` | Return Markdown (instead of HTML) when possible. Override per request with `markdown: false`. |
|
|
121
|
+
| `spaMode` | `false` | Enable SPA heuristics and allow additional waits for client rendering. |
|
|
122
|
+
| `spaRenderDelayMs` | `0` | Extra delay after load when `spaMode` is `true`. |
|
|
123
|
+
| `playwrightOnlyPatterns` | `[]` | URLs matching any string/regex go straight to Playwright, skipping HTTP fetches. |
|
|
124
|
+
| `playwrightLaunchOptions` | `undefined` | Options passed to `browserType.launch` (see Playwright docs). |
|
|
256
125
|
|
|
257
|
-
|
|
126
|
+
Per-request overrides: `fetchHTML` accepts `fastMode`, `markdown`, `spaMode`, and `headers`, while `fetchContent` supports `fastMode` and `headers`.
|
|
258
127
|
|
|
259
|
-
|
|
260
|
-
- `contentType` (`string`): Original MIME type
|
|
261
|
-
- `title` (`string | null`): Title if HTML content, otherwise null
|
|
262
|
-
- `url` (`string`): Final URL after redirects
|
|
263
|
-
- `isFromCache` (`boolean`): Cache hit indicator
|
|
264
|
-
- `statusCode` (`number | undefined`): HTTP status code
|
|
128
|
+
## Error handling
|
|
265
129
|
|
|
266
|
-
|
|
130
|
+
Failures raise a typed `FetchError` exposing `code`, `statusCode`, and the underlying error. Log these fields to diagnose issues quickly and tune your retry policy.
|
|
267
131
|
|
|
268
|
-
|
|
132
|
+
## Tooling and examples
|
|
269
133
|
|
|
270
|
-
- `
|
|
271
|
-
-
|
|
272
|
-
|
|
273
|
-
- `markdown?: boolean`: Request Markdown (HybridEngine only)
|
|
274
|
-
- `fastMode?: boolean`: Override fast mode (HybridEngine only)
|
|
275
|
-
- `spaMode?: boolean`: Override SPA mode (HybridEngine only)
|
|
276
|
-
- **Returns:** `Promise<HTMLFetchResult>`
|
|
277
|
-
|
|
278
|
-
### `engine.fetchContent(url, options?)`
|
|
279
|
-
|
|
280
|
-
- `url` (`string`): Target URL
|
|
281
|
-
- `options?` (`ContentFetchOptions`):
|
|
282
|
-
- `headers?: Record<string, string>`: Request headers
|
|
283
|
-
- **Returns:** `Promise<ContentFetchResult>`
|
|
284
|
-
|
|
285
|
-
### `fetchStructuredContent(url, schema, options?)`
|
|
286
|
-
|
|
287
|
-
- `url` (`string`): Target URL
|
|
288
|
-
- `schema` (`z.ZodSchema<T>`): Zod schema for extraction
|
|
289
|
-
- `options?` (`StructuredContentOptions`):
|
|
290
|
-
- `model?: string`: OpenAI model (default: 'gpt-5-mini')
|
|
291
|
-
- `customPrompt?: string`: Additional AI context
|
|
292
|
-
- `engineConfig?: PlaywrightEngineConfig`: HybridEngine config
|
|
293
|
-
- **Returns:** `Promise<StructuredContentResult<T>>`
|
|
294
|
-
|
|
295
|
-
### `engine.cleanup()`
|
|
296
|
-
|
|
297
|
-
Shuts down browser instances for `HybridEngine` and `StructuredContentEngine`. Call when finished to release resources. No-op for `FetchEngine`.
|
|
298
|
-
|
|
299
|
-
## Stealth Features
|
|
300
|
-
|
|
301
|
-
When `HybridEngine` uses Playwright, it automatically applies stealth measures via `playwright-extra` and stealth plugins to bypass common bot detection. No manual configuration required.
|
|
302
|
-
|
|
303
|
-
Stealth techniques are not foolproof against sophisticated detection systems.
|
|
304
|
-
|
|
305
|
-
## Error Handling
|
|
306
|
-
|
|
307
|
-
Errors are thrown as `FetchError` instances with additional context:
|
|
308
|
-
|
|
309
|
-
- `message` (`string`): Error description
|
|
310
|
-
- `code` (`string | undefined`): Specific error code
|
|
311
|
-
- `originalError` (`Error | undefined`): Underlying error
|
|
312
|
-
- `statusCode` (`number | undefined`): HTTP status code
|
|
313
|
-
|
|
314
|
-
Common error codes:
|
|
315
|
-
|
|
316
|
-
- `ERR_HTTP_ERROR`: HTTP status >= 400
|
|
317
|
-
- `ERR_NON_HTML_CONTENT`: Non-HTML content for HTML request
|
|
318
|
-
- `ERR_FETCH_FAILED`: General fetch operation failure
|
|
319
|
-
- `ERR_PLAYWRIGHT_OPERATION`: Playwright operation failure
|
|
320
|
-
- `ERR_NAVIGATION`: Navigation timeout or failure
|
|
321
|
-
- `ERR_BROWSER_POOL_EXHAUSTED`: No available browser resources
|
|
322
|
-
- `ERR_MAX_RETRIES_REACHED`: All retry attempts exhausted
|
|
323
|
-
- `ERR_MARKDOWN_CONVERSION_NON_HTML`: Markdown conversion on non-HTML content
|
|
324
|
-
|
|
325
|
-
```typescript
|
|
326
|
-
import { HybridEngine } from "@purepageio/fetch-engines";
|
|
327
|
-
|
|
328
|
-
const engine = new HybridEngine();
|
|
329
|
-
|
|
330
|
-
try {
|
|
331
|
-
const result = await engine.fetchHTML(url);
|
|
332
|
-
} catch (error: any) {
|
|
333
|
-
console.error(`Error: ${error.code || "Unknown"} - ${error.message}`);
|
|
334
|
-
if (error.statusCode) console.error(`Status: ${error.statusCode}`);
|
|
335
|
-
}
|
|
336
|
-
```
|
|
134
|
+
- Explore the [`examples`](./examples) directory for scripts you can run end-to-end.
|
|
135
|
+
- Ready-to-use TypeScript types ship with the package.
|
|
136
|
+
- `pnpm test` runs the automated suite when you are ready to contribute.
|
|
337
137
|
|
|
338
138
|
## Contributing
|
|
339
139
|
|
|
340
|
-
|
|
140
|
+
Issues and pull requests are welcome! Please follow the existing linting/test commands before sending a change.
|
|
341
141
|
|
|
342
142
|
## License
|
|
343
143
|
|
|
344
|
-
MIT
|
|
144
|
+
Distributed under the [MIT](./LICENSE) license.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@purepageio/fetch-engines",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "A collection of configurable engines for fetching HTML content using fetch or Playwright.",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
"dependencies": {
|
|
14
14
|
"@ai-sdk/openai": "^2.0.30",
|
|
15
15
|
"ai": "^5.0.44",
|
|
16
|
-
"axios": "^1.
|
|
16
|
+
"axios": "^1.12.0",
|
|
17
17
|
"node-html-parser": "^7.0.1",
|
|
18
18
|
"p-queue": "^7.4.1",
|
|
19
|
-
"playwright": "^1.
|
|
19
|
+
"playwright": "^1.55.1",
|
|
20
20
|
"playwright-extra": "^4.3.6",
|
|
21
21
|
"puppeteer-extra-plugin": "^3.2.3",
|
|
22
22
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
"zod": "^4.1.8"
|
|
28
28
|
},
|
|
29
29
|
"devDependencies": {
|
|
30
|
+
"@playwright/test": "^1.55.1",
|
|
30
31
|
"@types/axios": "^0.14.0",
|
|
31
32
|
"@types/jsdom": "^21.1.6",
|
|
32
33
|
"@types/node": "^18.0.0",
|