wsper-js 0.1.1 → 0.1.2-wc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +684 -476
- package/dist/index.d.ts +2534 -62
- package/dist/index.js +1 -1
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
<h1 align="center">
|
|
3
2
|
<img alt="ShikanokoBail banner" src="https://i.pinimg.com/736x/0c/ff/62/0cff624a04a81495f4b8e69bcedd34aa.jpg" width="100%"/>
|
|
4
3
|
</h1>
|
|
@@ -8,98 +7,126 @@
|
|
|
8
7
|
[](https://www.npmjs.com/package/wsper-js)
|
|
9
8
|

|
|
10
9
|

|
|
11
|
-

|
|
12
11
|

|
|
13
12
|
</div>
|
|
14
13
|
|
|
15
|
-
# wsper-js
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
-
|
|
32
|
-
|
|
33
|
-
|
|
14
|
+
# wsper-js: Developer User Manual & API Guide
|
|
15
|
+
|
|
16
|
+
`wsper-js` is an enterprise-grade, TypeScript-first scraper and media generation toolkit. It provides structured scrapers for over 80 public and credentialed services, a deterministic canvas-based rendering engine, a stylized text-to-sticker generator, a secure file downloader, and request-pacing queues designed to interact ethically and robustly with public endpoints.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Table of Contents
|
|
21
|
+
1. [Prerequisites & Installation](#prerequisites--installation)
|
|
22
|
+
2. [Quick Start](#quick-start)
|
|
23
|
+
3. [Core Concepts & Architecture](#core-concepts--architecture)
|
|
24
|
+
- [Unified WsperResponse Envelope](#unified-wsperresponse-envelope)
|
|
25
|
+
- [Request Pacing & Rate Limiting](#request-pacing--rate-limiting)
|
|
26
|
+
- [Safe HTTP Client & Rotation](#safe-http-client--rotation)
|
|
27
|
+
- [Credentials Configuration](#credentials-configuration)
|
|
28
|
+
4. [In-Depth Feature Modules](#in-depth-feature-modules)
|
|
29
|
+
- [Brat Generator (Stickers & Media)](#1-brat-generator-stickers--media)
|
|
30
|
+
- [Analytics Chart Image Generator](#2-analytics-chart-image-generator)
|
|
31
|
+
- [Safe Downloader Primitive](#3-safe-downloader-primitive)
|
|
32
|
+
5. [Scrapers Usage Catalog](#scrapers-usage-catalog)
|
|
33
|
+
- [Social Media & AI Messaging](#social-media--ai-messaging)
|
|
34
|
+
- [Streaming & Media Resolvers](#streaming--media-resolvers)
|
|
35
|
+
- [Indonesian Reference Services](#indonesian-reference-services)
|
|
36
|
+
- [Global Reference & Search APIs](#global-reference--search-apis)
|
|
37
|
+
- [Scholarly & Academic Metadata](#scholarly--academic-metadata)
|
|
38
|
+
- [Developer Registry & Package APIs](#developer-registry--package-apis)
|
|
39
|
+
- [Utility, AI & Conversion Resolvers](#utility-ai--conversion-resolvers)
|
|
40
|
+
6. [Sandbox & Mock Server Usage](#sandbox--mock-server-usage)
|
|
41
|
+
7. [Error Handling & Exceptions](#error-handling--exceptions)
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Prerequisites & Installation
|
|
46
|
+
|
|
47
|
+
Install the package via your preferred package manager:
|
|
34
48
|
|
|
35
49
|
```bash
|
|
36
50
|
npm install wsper-js
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
```bash
|
|
51
|
+
# or
|
|
40
52
|
pnpm add wsper-js
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
```bash
|
|
53
|
+
# or
|
|
44
54
|
yarn add wsper-js
|
|
45
55
|
```
|
|
46
56
|
|
|
47
|
-
|
|
57
|
+
### External Dependencies
|
|
58
|
+
Some optional modules require external binaries in your system `PATH`:
|
|
48
59
|
|
|
49
|
-
| Feature | External
|
|
50
|
-
| --- | --- |
|
|
51
|
-
| YouTube
|
|
52
|
-
| Video
|
|
60
|
+
| Feature | External Binary | Notes |
|
|
61
|
+
| --- | --- | --- |
|
|
62
|
+
| YouTube/Spotify Media Downloads | `yt-dlp` | Must be executable in your `PATH` |
|
|
63
|
+
| Brat Video/GIF Conversion | `ffmpeg` | Required for converting canvas streams to MP4/GIF |
|
|
64
|
+
|
|
65
|
+
---
|
|
53
66
|
|
|
54
67
|
## Quick Start
|
|
55
68
|
|
|
69
|
+
### Basic Scraper Class (Direct Usage)
|
|
70
|
+
Every scraper in the library is exported as a standalone class.
|
|
71
|
+
|
|
56
72
|
```ts
|
|
57
73
|
import { LyricsScraper } from "wsper-js";
|
|
58
74
|
|
|
59
|
-
const
|
|
60
|
-
const
|
|
75
|
+
const lyricsScraper = new LyricsScraper();
|
|
76
|
+
const response = await lyricsScraper.search("after hours the weeknd");
|
|
61
77
|
|
|
62
|
-
if (
|
|
63
|
-
|
|
78
|
+
if (response.ok && response.data) {
|
|
79
|
+
console.log(`Title: ${response.data.title}`);
|
|
80
|
+
console.log(`Lyrics: ${response.data.lyrics}`);
|
|
81
|
+
} else {
|
|
82
|
+
console.error(`Error [${response.error?.code}]: ${response.error?.message}`);
|
|
64
83
|
}
|
|
65
|
-
|
|
66
|
-
console.log(result.statusCode);
|
|
67
|
-
console.log(result.data?.title);
|
|
68
|
-
console.log(result.data?.lyrics);
|
|
69
84
|
```
|
|
70
85
|
|
|
71
|
-
|
|
86
|
+
### Aggregated Entrypoint (`WsperScraper`)
|
|
87
|
+
For larger architectures, you can use the unified `WsperScraper` client to access primary social and media scrapers with shared request-pacing configurations.
|
|
72
88
|
|
|
73
89
|
```ts
|
|
74
90
|
import { WsperScraper } from "wsper-js";
|
|
75
91
|
|
|
92
|
+
// Initialize with shared pacing options
|
|
76
93
|
const wsper = new WsperScraper({
|
|
77
|
-
queue: {
|
|
94
|
+
queue: {
|
|
95
|
+
concurrency: 2,
|
|
96
|
+
minDelayMs: 1000,
|
|
97
|
+
maxDelayMs: 3000,
|
|
98
|
+
},
|
|
99
|
+
debug: true
|
|
78
100
|
});
|
|
79
101
|
|
|
80
|
-
|
|
81
|
-
const
|
|
102
|
+
// Run parallel requests through the rate-limited queue
|
|
103
|
+
const [spotifyTrack, ytVideo] = await Promise.all([
|
|
104
|
+
wsper.spotify.search("never gonna give you up", { limit: 1 }),
|
|
105
|
+
wsper.youtube.getVideo("dQw4w9WgXcQ")
|
|
106
|
+
]);
|
|
82
107
|
|
|
83
|
-
console.log(
|
|
108
|
+
console.log("Spotify Search Result:", spotifyTrack.ok);
|
|
109
|
+
console.log("YouTube Video Result:", ytVideo.ok);
|
|
84
110
|
```
|
|
85
111
|
|
|
86
|
-
|
|
112
|
+
---
|
|
87
113
|
|
|
88
|
-
##
|
|
114
|
+
## Core Concepts & Architecture
|
|
89
115
|
|
|
90
|
-
|
|
116
|
+
### Unified WsperResponse Envelope
|
|
117
|
+
Every public scraper method returns a standard `WsperResponse<TData>` envelope. This prevents uncaught runtime errors and simplifies error checks.
|
|
91
118
|
|
|
92
119
|
```ts
|
|
93
120
|
export interface WsperResponse<TData, TMeta extends WsperResponseMeta = WsperResponseMeta> {
|
|
94
|
-
ok: boolean;
|
|
95
|
-
statusCode: number;
|
|
96
|
-
data: TData | null;
|
|
97
|
-
error: {
|
|
121
|
+
ok: boolean; // True if request succeeded and parsing completed
|
|
122
|
+
statusCode: number; // HTTP response code (or internal fallback: 400, 422, 500)
|
|
123
|
+
data: TData | null; // Type-safe payload on success; null on error
|
|
124
|
+
error: { // Normalized error details on failure; null on success
|
|
98
125
|
code: string;
|
|
99
126
|
message: string;
|
|
100
127
|
details?: Record<string, unknown>;
|
|
101
128
|
} | null;
|
|
102
|
-
meta: TMeta;
|
|
129
|
+
meta: TMeta; // Operational metadata
|
|
103
130
|
}
|
|
104
131
|
|
|
105
132
|
export interface WsperResponseMeta {
|
|
@@ -110,570 +137,751 @@ export interface WsperResponseMeta {
|
|
|
110
137
|
}
|
|
111
138
|
```
|
|
112
139
|
|
|
113
|
-
Recommended
|
|
114
|
-
|
|
140
|
+
#### Recommended Code Pattern:
|
|
115
141
|
```ts
|
|
116
|
-
const
|
|
142
|
+
const result = await scraper.search("query");
|
|
117
143
|
|
|
118
|
-
if (
|
|
119
|
-
|
|
144
|
+
if (!result.ok) {
|
|
145
|
+
// Gracefully handle failures without try-catch blocks
|
|
146
|
+
console.error(`Scraper failed: ${result.error?.code} - ${result.error?.message}`);
|
|
147
|
+
if (result.error?.details) {
|
|
148
|
+
console.error("Debug Context:", result.error.details);
|
|
149
|
+
}
|
|
120
150
|
} else {
|
|
121
|
-
|
|
151
|
+
// Safely consume data
|
|
152
|
+
console.log("Scraped Data:", result.data);
|
|
122
153
|
}
|
|
123
154
|
```
|
|
124
155
|
|
|
125
|
-
|
|
156
|
+
### Request Pacing & Rate Limiting
|
|
157
|
+
To respect external platform policies and prevent IP bans, `wsper-js` integrates a rate-limited task queue (`p-queue`) directly into the HTTP layer.
|
|
158
|
+
|
|
159
|
+
Configure queue parameters in `QueueOptions` at scraper construction:
|
|
160
|
+
|
|
161
|
+
```ts
|
|
162
|
+
import { SpotifyScraper } from "wsper-js";
|
|
163
|
+
|
|
164
|
+
const scraper = new SpotifyScraper({
|
|
165
|
+
queue: {
|
|
166
|
+
concurrency: 1, // Max active parallel connections
|
|
167
|
+
intervalMs: 1000, // Rate-limiting window
|
|
168
|
+
intervalCap: 3, // Max requests allowed per window (3 requests/sec)
|
|
169
|
+
minDelayMs: 500, // Minimum random delay (jitter) before each request
|
|
170
|
+
maxDelayMs: 1500, // Maximum random delay (jitter) before each request
|
|
171
|
+
timeoutMs: 30000, // Max request lifetime before abortion
|
|
172
|
+
retries: 3 // Bounded retries for retryable status codes (e.g. 429, 503)
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Safe HTTP Client & Rotation
|
|
178
|
+
All HTTP tasks execute through a customized `HttpClient` wrapper around Axios. It features:
|
|
179
|
+
* **Browser Profile Rotation**: Automatically rotates randomized, realistic HTTP headers and User-Agent strings.
|
|
180
|
+
* **SSRF Protections**: Restricts destination URLs to standard protocols (`http:`, `https:`). Block private networks (`127.0.0.1`, `10.0.0.0/8`, etc.) by default. Set `allowPrivateNetwork: true` strictly during local testing or mock server environments.
|
|
181
|
+
* **Retry-After Compliance**: Automatically parses `Retry-After` response headers on HTTP 429 and delays subsequent queued requests.
|
|
182
|
+
|
|
183
|
+
### Credentials & Authentication Configuration
|
|
184
|
+
|
|
185
|
+
Every scraper in `wsper-js` is designed to run in **public mode** by default. However, many platforms require active session cookies, API tokens, or keys to fetch data successfully.
|
|
186
|
+
|
|
187
|
+
The library resolves credentials sequentially per scraper instance:
|
|
188
|
+
1. **Constructor Options** (Highest priority, recommended)
|
|
189
|
+
2. **Environment Variables** (System fallback)
|
|
126
190
|
|
|
127
|
-
|
|
191
|
+
---
|
|
128
192
|
|
|
129
|
-
|
|
193
|
+
#### 1. Input Formats: Raw Cookie String vs. Structured Object
|
|
130
194
|
|
|
131
|
-
|
|
195
|
+
When initializing a scraper, the `credentials` option accepts either a **raw cookie string** or a **structured object** (`WsperCredentials`):
|
|
132
196
|
|
|
197
|
+
##### A. Raw Cookie String Shortcut (Convenient)
|
|
198
|
+
If you pass a string directly to `credentials`, the library automatically normalizes it into a `{ cookie: string }` object and extracts properties like CSRF tokens.
|
|
133
199
|
```ts
|
|
134
|
-
|
|
200
|
+
const instagram = new InstagramScraper({
|
|
201
|
+
credentials: "sessionid=12345678%3Aabcde...; csrftoken=xyz...; ds_user_id=87654321"
|
|
202
|
+
});
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
##### B. Structured Object Config (Granular)
|
|
206
|
+
For advanced configurations, pass an object containing explicit fields:
|
|
207
|
+
```ts
|
|
208
|
+
const twitter = new TwitterScraper({
|
|
209
|
+
credentials: {
|
|
210
|
+
cookie: "auth_token=abc123xyz...; ct0=def456...",
|
|
211
|
+
csrfToken: "def456...", // Optional: explicitly provide the csrfToken (X-CSRFToken header)
|
|
212
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) ..." // Override the default user-agent profile
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
---
|
|
135
218
|
|
|
136
|
-
|
|
137
|
-
const result = await scraper.search("after hours the weeknd");
|
|
219
|
+
#### 2. Platform-by-Platform Credentials Guide
|
|
138
220
|
|
|
139
|
-
|
|
221
|
+
Below are the exact options and code structures for initializing each credentialed scraper:
|
|
222
|
+
|
|
223
|
+
##### Instagram Scraper (`InstagramScraper`)
|
|
224
|
+
* **Required Credential**: Legitimate session cookie.
|
|
225
|
+
* **Auto-Extraction**: The client automatically parses `csrftoken` out of the cookie string and injects it as the `X-IG-App-ID` and `X-CSRFToken` headers.
|
|
226
|
+
```ts
|
|
227
|
+
import { InstagramScraper } from "wsper-js";
|
|
228
|
+
|
|
229
|
+
const instagram = new InstagramScraper({
|
|
230
|
+
credentials: {
|
|
231
|
+
cookie: "sessionid=YOUR_INSTAGRAM_SESSION_ID; csrftoken=YOUR_CSRF_TOKEN;",
|
|
232
|
+
userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) ..." // Recommended mobile profile
|
|
233
|
+
}
|
|
234
|
+
});
|
|
140
235
|
```
|
|
141
236
|
|
|
142
|
-
|
|
237
|
+
##### Twitter/X Scraper (`TwitterScraper`)
|
|
238
|
+
* **Required Credential**: Session cookies containing `auth_token` and `ct0` (CSRF token).
|
|
239
|
+
* **Usage**:
|
|
240
|
+
```ts
|
|
241
|
+
import { TwitterScraper } from "wsper-js";
|
|
143
242
|
|
|
144
|
-
|
|
145
|
-
{
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
}
|
|
243
|
+
const twitter = new TwitterScraper({
|
|
244
|
+
credentials: {
|
|
245
|
+
cookie: "auth_token=YOUR_AUTH_TOKEN; ct0=YOUR_CSRF_TOKEN;",
|
|
246
|
+
csrfToken: "YOUR_CSRF_TOKEN" // Must match the value in the cookie (ct0)
|
|
247
|
+
}
|
|
248
|
+
});
|
|
150
249
|
```
|
|
151
250
|
|
|
152
|
-
|
|
251
|
+
##### Character.AI Scraper (`CaiScraper`)
|
|
252
|
+
* **Required Credential**: Bearer Auth Token.
|
|
253
|
+
* **Usage**:
|
|
254
|
+
```ts
|
|
255
|
+
import { CaiScraper } from "wsper-js";
|
|
153
256
|
|
|
154
|
-
|
|
257
|
+
const cai = new CaiScraper({
|
|
258
|
+
credentials: {
|
|
259
|
+
bearerToken: "YOUR_CHARACTER_AI_API_TOKEN" // Injected as "Authorization: Bearer <token>"
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
```
|
|
155
263
|
|
|
264
|
+
##### Spotify Scraper (`SpotifyScraper`)
|
|
265
|
+
* **Required Credential**: Spotify Developer Client ID and Secret (from developer.spotify.com).
|
|
266
|
+
* **Usage**: Configure via the dedicated `spotifyCredentials` object in the options.
|
|
156
267
|
```ts
|
|
157
|
-
import {
|
|
268
|
+
import { SpotifyScraper } from "wsper-js";
|
|
269
|
+
|
|
270
|
+
const spotify = new SpotifyScraper({
|
|
271
|
+
spotifyCredentials: {
|
|
272
|
+
clientId: "YOUR_SPOTIFY_CLIENT_ID",
|
|
273
|
+
clientSecret: "YOUR_SPOTIFY_CLIENT_SECRET",
|
|
274
|
+
callbackUrl: "http://localhost:3000/callback", // Optional
|
|
275
|
+
market: "ID" // Optional: country ISO code
|
|
276
|
+
}
|
|
277
|
+
});
|
|
278
|
+
```
|
|
158
279
|
|
|
159
|
-
|
|
160
|
-
|
|
280
|
+
##### YouTube Scraper (`YouTubeScraper`)
|
|
281
|
+
* **Configuration**: Requires paths to system binaries rather than typical session credentials.
|
|
282
|
+
* **Factory Method**: Use `YouTubeScraper.create` to automatically search `PATH` or Python virtual environments for `yt-dlp` and `ffmpeg`.
|
|
283
|
+
```ts
|
|
284
|
+
import { YouTubeScraper } from "wsper-js";
|
|
285
|
+
|
|
286
|
+
// Manual configuration
|
|
287
|
+
const yt = new YouTubeScraper({
|
|
288
|
+
ytdlpPath: "C:\\tools\\yt-dlp.exe",
|
|
289
|
+
ffmpegPath: "C:\\tools\\ffmpeg.exe",
|
|
290
|
+
ffprobePath: "C:\\tools\\ffprobe.exe",
|
|
291
|
+
outputDir: "./downloads/vids"
|
|
292
|
+
});
|
|
161
293
|
|
|
162
|
-
|
|
163
|
-
|
|
294
|
+
// Auto-detection factory method (Recommended)
|
|
295
|
+
const ytAuto = await YouTubeScraper.create({
|
|
296
|
+
pythonPath: "python3", // Tries "python3 -m yt_dlp" fallback if not in PATH
|
|
297
|
+
outputDir: "./downloads/vids"
|
|
298
|
+
});
|
|
164
299
|
```
|
|
165
300
|
|
|
166
|
-
|
|
301
|
+
##### remove.bg Scraper (`RemovebgScraper`)
|
|
302
|
+
* **Required Credential**: Official remove.bg API key.
|
|
303
|
+
* **Usage**:
|
|
304
|
+
```ts
|
|
305
|
+
import { RemovebgScraper } from "wsper-js";
|
|
167
306
|
|
|
168
|
-
|
|
169
|
-
{
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
"title": "Wallpaper sky123",
|
|
174
|
-
"resolution": "1920x1080",
|
|
175
|
-
"image": "https://w.wallhaven.cc/full/sky123.jpg",
|
|
176
|
-
"page": "https://wallhaven.cc/w/sky123"
|
|
177
|
-
}
|
|
178
|
-
]
|
|
179
|
-
}
|
|
307
|
+
const removebg = new RemovebgScraper({
|
|
308
|
+
credentials: {
|
|
309
|
+
apiKey: "YOUR_REMOVE_BG_API_KEY"
|
|
310
|
+
}
|
|
311
|
+
});
|
|
180
312
|
```
|
|
181
313
|
|
|
182
|
-
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
#### 3. Configuring Credentials in the Unified `WsperScraper`
|
|
183
317
|
|
|
184
|
-
|
|
318
|
+
The aggregate entrypoint class `WsperScraper` exposes a nested configuration layout, allowing you to bootstrap all credential-requiring scrapers simultaneously:
|
|
185
319
|
|
|
186
320
|
```ts
|
|
187
|
-
import {
|
|
321
|
+
import { WsperScraper } from "wsper-js";
|
|
188
322
|
|
|
189
|
-
const
|
|
190
|
-
|
|
323
|
+
const wsper = new WsperScraper({
|
|
324
|
+
// 1. Spotify developer credentials (dedicated object)
|
|
325
|
+
spotifyCredentials: {
|
|
326
|
+
clientId: "YOUR_SPOTIFY_CLIENT_ID",
|
|
327
|
+
clientSecret: "YOUR_SPOTIFY_CLIENT_SECRET",
|
|
328
|
+
},
|
|
329
|
+
// 2. Platform-specific session credentials
|
|
330
|
+
credentials: {
|
|
331
|
+
instagram: "sessionid=IG_SESSION_COOKIE; csrftoken=IG_CSRF_TOKEN",
|
|
332
|
+
twitter: {
|
|
333
|
+
cookie: "auth_token=X_AUTH_COOKIE; ct0=X_CSRF_COOKIE",
|
|
334
|
+
csrfToken: "X_CSRF_COOKIE"
|
|
335
|
+
},
|
|
336
|
+
cai: {
|
|
337
|
+
bearerToken: "CHARACTER_AI_WS_TOKEN"
|
|
338
|
+
},
|
|
339
|
+
threads: "sessionid=THREADS_SESSION_COOKIE; csrftoken=THREADS_CSRF_TOKEN",
|
|
340
|
+
pinterest: "sessionid=PINTEREST_SESSION_COOKIE"
|
|
341
|
+
},
|
|
342
|
+
// 3. YouTube binary paths configuration
|
|
343
|
+
youtube: {
|
|
344
|
+
ytdlpPath: "yt-dlp",
|
|
345
|
+
ffmpegPath: "ffmpeg"
|
|
346
|
+
}
|
|
347
|
+
});
|
|
191
348
|
|
|
192
|
-
|
|
349
|
+
// Access child clients directly
|
|
350
|
+
const isIgOk = (await wsper.instagram.getUserProfile("some_user")).ok;
|
|
193
351
|
```
|
|
194
352
|
|
|
195
|
-
|
|
353
|
+
---
|
|
196
354
|
|
|
197
|
-
|
|
198
|
-
{
|
|
199
|
-
"title": "Jiyan",
|
|
200
|
-
"slug": "Jin_Hsi",
|
|
201
|
-
"url": "https://wutheringwaves.fandom.com/wiki/Jin_Hsi",
|
|
202
|
-
"bio": "Bio karakter.",
|
|
203
|
-
"profile": {},
|
|
204
|
-
"images": []
|
|
205
|
-
}
|
|
206
|
-
```
|
|
355
|
+
#### 4. Environment Variables Configuration
|
|
207
356
|
|
|
208
|
-
|
|
357
|
+
If you do not pass credentials in the constructor, `wsper-js` will check your system environment variables. You can store these in a `.env` file in the root of your application.
|
|
209
358
|
|
|
210
|
-
|
|
359
|
+
> [!WARNING]
|
|
360
|
+
> The library itself does **not** load `.env` files automatically to avoid side-effects. You must import `dotenv` or run your Node.js application with the `--env-file` flag (Node 20.6+).
|
|
211
361
|
|
|
212
|
-
|
|
213
|
-
|
|
362
|
+
##### Node.js 20.6+ Native Env Loading:
|
|
363
|
+
```bash
|
|
364
|
+
node --env-file=.env index.js
|
|
365
|
+
```
|
|
214
366
|
|
|
215
|
-
|
|
216
|
-
|
|
367
|
+
##### Standard dotenv Import:
|
|
368
|
+
```ts
|
|
369
|
+
import "dotenv/config";
|
|
370
|
+
import { WsperScraper } from "wsper-js";
|
|
217
371
|
|
|
218
|
-
|
|
372
|
+
const wsper = new WsperScraper(); // Reads WSPER_* environment variables automatically
|
|
219
373
|
```
|
|
220
374
|
|
|
221
|
-
|
|
375
|
+
##### Supported Env Keys reference:
|
|
376
|
+
```ini
|
|
377
|
+
# Spotify Developer API
|
|
378
|
+
WSPER_SPOTIFY_CLIENT_ID=your_client_id
|
|
379
|
+
WSPER_SPOTIFY_CLIENT_SECRET=your_client_secret
|
|
380
|
+
WSPER_SPOTIFY_CALLBACK_URL=http://localhost:3000/callback
|
|
381
|
+
WSPER_SPOTIFY_MARKET=US
|
|
222
382
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
383
|
+
# Social Media Session Cookies
|
|
384
|
+
WSPER_INSTAGRAM_COOKIE="sessionid=ig_cookie_val; csrftoken=ig_csrf_val;"
|
|
385
|
+
WSPER_THREADS_COOKIE="sessionid=threads_cookie_val; csrftoken=threads_csrf_val;"
|
|
386
|
+
WSPER_TWITTER_COOKIE="auth_token=twitter_cookie_val; ct0=twitter_csrf_val;"
|
|
387
|
+
WSPER_PINTEREST_COOKIE="sessionid=pinterest_cookie_val;"
|
|
388
|
+
WSPER_TIKTOK_COOKIE="sessionid=tiktok_cookie_val;"
|
|
389
|
+
WSPER_FACEBOOK_COOKIE="c_user=fb_cookie_val; xs=fb_xs_val;"
|
|
390
|
+
WSPER_BILIBILI_COOKIE="SESSDATA=bili_cookie_val;"
|
|
391
|
+
|
|
392
|
+
# Character.AI Auth
|
|
393
|
+
WSPER_CAI_TOKEN=your_cai_bearer_token
|
|
394
|
+
|
|
395
|
+
# AI Tools API Keys
|
|
396
|
+
REMOVEBG_API_KEY=your_remove_bg_api_key
|
|
235
397
|
```
|
|
236
398
|
|
|
237
|
-
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
## In-Depth Feature Modules
|
|
238
402
|
|
|
239
|
-
|
|
403
|
+
### 1. Brat Generator (Stickers & Media)
|
|
404
|
+
The Brat module provides programmatic text-to-media rendering, applying the classic green-and-black aesthetic (popularized by Charli XCX) or customized styles.
|
|
240
405
|
|
|
406
|
+
#### Generate static Brat Images (PNG/JPG/WEBP)
|
|
241
407
|
```ts
|
|
242
|
-
import {
|
|
408
|
+
import { BratGenerator } from "wsper-js";
|
|
409
|
+
|
|
410
|
+
const brat = new BratGenerator();
|
|
243
411
|
|
|
244
|
-
const
|
|
245
|
-
|
|
412
|
+
const imageResult = await brat.generate({
|
|
413
|
+
canvas: { preset: "1:1", width: 1024 }, // Aspect presets: "1:1", "9:16", "16:9"
|
|
414
|
+
text: {
|
|
415
|
+
value: "kamu pas kecil pernah nelen magnet ya? menarik banget soalnya",
|
|
416
|
+
align: "justify", // "left" | "center" | "right" | "justify"
|
|
417
|
+
color: "#111111",
|
|
418
|
+
blur: 0, // >0 to blur only the text pixels
|
|
419
|
+
},
|
|
420
|
+
background: { type: "solid", color: "#8ace00" }, // or "linear-gradient" / "radial-gradient"
|
|
421
|
+
backgroundBlur: 0, // blur radius for background layout (frosted-glass)
|
|
422
|
+
output: {
|
|
423
|
+
type: "image",
|
|
424
|
+
format: "png",
|
|
425
|
+
path: "./downloads/brat.png"
|
|
426
|
+
}
|
|
427
|
+
});
|
|
246
428
|
|
|
247
|
-
console.log(
|
|
429
|
+
console.log("Saved static Brat image to:", imageResult.path);
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
#### Generate Animated Brat GIFs & Videos
|
|
433
|
+
```ts
|
|
434
|
+
const gifResult = await brat.generate({
|
|
435
|
+
text: {
|
|
436
|
+
value: "charli xcx is so brat i can't even",
|
|
437
|
+
align: "center"
|
|
438
|
+
},
|
|
439
|
+
background: { type: "solid", color: "#8ace00" },
|
|
440
|
+
animation: {
|
|
441
|
+
enabled: true,
|
|
442
|
+
mode: "word", // "word" (appear word-by-word) or "character" (typewriter effect)
|
|
443
|
+
direction: "left-to-right",
|
|
444
|
+
fps: 15, // frame rate
|
|
445
|
+
textSpeed: 120, // milliseconds delay per word/character
|
|
446
|
+
},
|
|
447
|
+
output: {
|
|
448
|
+
type: "gif",
|
|
449
|
+
path: "./downloads/animated_brat.gif"
|
|
450
|
+
}
|
|
451
|
+
});
|
|
248
452
|
```
|
|
249
453
|
|
|
250
|
-
|
|
454
|
+
#### WhatsApp Sticker Generation & Media Format Conversion
|
|
455
|
+
```ts
|
|
456
|
+
// 1. Convert image buffer to WhatsApp-compliant sticker (512x512 WebP with overlays)
|
|
457
|
+
const stickerBuffer = await brat.imageToSticker(imageResult.buffer, {
|
|
458
|
+
size: 512,
|
|
459
|
+
format: "webp",
|
|
460
|
+
top: "BRAT ENERGY",
|
|
461
|
+
bottom: "2026",
|
|
462
|
+
});
|
|
251
463
|
|
|
252
|
-
|
|
253
|
-
{
|
|
254
|
-
|
|
255
|
-
|
|
464
|
+
// 2. Transcode formats (requires ffmpeg)
|
|
465
|
+
const mp4Buffer = await brat.gifToMp4("./downloads/animated_brat.gif", "./downloads/brat.mp4", { fps: 24 });
|
|
466
|
+
const webmBuffer = await brat.gifToWebm("./downloads/animated_brat.gif", "./downloads/brat.webm");
|
|
467
|
+
const reassembledGif = await brat.mp4ToGif("./downloads/brat.mp4", "./downloads/transcoded.gif");
|
|
256
468
|
```
|
|
257
469
|
|
|
258
|
-
|
|
470
|
+
---
|
|
259
471
|
|
|
260
|
-
|
|
472
|
+
### 2. Analytics Chart Image Generator
|
|
473
|
+
The `ChartGenerator` creates high-resolution visual statistics cards, posters, and dashboard widgets containing comparative group-and-user trends using `@napi-rs/canvas`.
|
|
261
474
|
|
|
262
475
|
```ts
|
|
263
|
-
import {
|
|
264
|
-
|
|
476
|
+
import { ChartGenerator, MonthlyPoint, WeeklyPoint } from "wsper-js";
|
|
477
|
+
|
|
478
|
+
const chart = new ChartGenerator();
|
|
479
|
+
|
|
480
|
+
const monthlyData: MonthlyPoint[] = [
|
|
481
|
+
{ label: "JAN", group: 120, user: 90 },
|
|
482
|
+
{ label: "FEB", group: 150, user: 110 },
|
|
483
|
+
{ label: "MAR", group: 240, user: 180 },
|
|
484
|
+
];
|
|
485
|
+
|
|
486
|
+
const weeklyData: WeeklyPoint[] = [
|
|
487
|
+
{ label: "MON", group: 40, user: 20 },
|
|
488
|
+
{ label: "TUE", group: 35, user: 25 },
|
|
489
|
+
{ label: "WED", group: 50, user: 30 },
|
|
490
|
+
];
|
|
491
|
+
|
|
492
|
+
const chartImageBuffer = await chart.generateAnalyticsStatsImage({
|
|
493
|
+
title: "Application Traffic Report",
|
|
494
|
+
subtitle: "Comparison: System Group vs Active Users",
|
|
495
|
+
monthly: monthlyData,
|
|
496
|
+
weekly: weeklyData,
|
|
497
|
+
width: 1024,
|
|
498
|
+
height: 1024,
|
|
499
|
+
output: "./downloads/traffic_report.png",
|
|
500
|
+
footer: "Generated via wsper-js Analytics module",
|
|
501
|
+
// Choose model style: "vintage-poster" | "modern-dashboard" | "minimal-report" | "dark-neon" | "compact-card"
|
|
502
|
+
model: "modern-dashboard",
|
|
503
|
+
theme: {
|
|
504
|
+
// Optionally override specific theme colors
|
|
505
|
+
background: "#0f172a",
|
|
506
|
+
ink: "#ffffff",
|
|
507
|
+
groupLine: "#38bdf8",
|
|
508
|
+
userLine: "#34d399",
|
|
509
|
+
}
|
|
510
|
+
});
|
|
511
|
+
```
|
|
265
512
|
|
|
266
|
-
|
|
267
|
-
const scraper = new ImgUpscalerScraper();
|
|
268
|
-
const result = await scraper.upscaleBuffer(image, "photo.jpg", 4);
|
|
513
|
+
---
|
|
269
514
|
|
|
270
|
-
|
|
271
|
-
|
|
515
|
+
### 3. Safe Downloader Primitive
|
|
516
|
+
The `Downloader` class allows developers to download files to local filesystems, enforcing safe paths, restricting traversal, and asserting file sizes.
|
|
272
517
|
|
|
273
|
-
|
|
518
|
+
```ts
|
|
519
|
+
import { Downloader } from "wsper-js";
|
|
520
|
+
|
|
521
|
+
// Initialize and bind to a specific safe download directory
|
|
522
|
+
const downloader = new Downloader("./downloads/sandbox");
|
|
274
523
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
524
|
+
try {
|
|
525
|
+
const downloadResult = await downloader.download(
|
|
526
|
+
"https://example.com/large-assets.zip",
|
|
527
|
+
"archives/assets.zip", // Saved relatively inside './downloads/sandbox'
|
|
528
|
+
{
|
|
529
|
+
overwrite: true,
|
|
530
|
+
maxSizeBytes: 100 * 1024 * 1024, // Limit files to 100MB
|
|
531
|
+
allowedMimeTypes: ["application/zip"], // Whitelist content-type headers
|
|
532
|
+
timeoutMs: 60000 // Set download timeout
|
|
533
|
+
}
|
|
534
|
+
);
|
|
535
|
+
console.log("Download complete:", downloadResult.outputPath);
|
|
536
|
+
} catch (err) {
|
|
537
|
+
console.error("Secure download failed:", err.message);
|
|
281
538
|
}
|
|
282
539
|
```
|
|
283
540
|
|
|
284
|
-
|
|
541
|
+
---
|
|
542
|
+
|
|
543
|
+
## Scrapers Usage Catalog
|
|
285
544
|
|
|
286
|
-
|
|
545
|
+
### Social Media & AI Messaging
|
|
546
|
+
|
|
547
|
+
#### Character.AI (`CaiScraper`)
|
|
548
|
+
Integrates search, Chat-V2, and TTS (Text-to-Speech) generation with session reuse.
|
|
287
549
|
|
|
288
550
|
```ts
|
|
289
|
-
import {
|
|
290
|
-
|
|
551
|
+
import { CaiScraper } from "wsper-js";
|
|
552
|
+
|
|
553
|
+
const cai = new CaiScraper({ credentials: { bearerToken: "YOUR_CAI_TOKEN" } });
|
|
554
|
+
|
|
555
|
+
// Search for a character
|
|
556
|
+
const search = await cai.searchCharacters("Mario");
|
|
557
|
+
const marioId = search.data?.[0]?.externalId;
|
|
558
|
+
|
|
559
|
+
// Send chat message and generate TTS Audio URL
|
|
560
|
+
if (marioId) {
|
|
561
|
+
const reply = await cai.chat({
|
|
562
|
+
characterId: marioId,
|
|
563
|
+
message: "It's-a me, who are you?",
|
|
564
|
+
voiceId: "4fdd6bc1-c659-4587-b462-53f569b39078" // Optional voice ID
|
|
565
|
+
});
|
|
291
566
|
|
|
292
|
-
|
|
293
|
-
|
|
567
|
+
console.log("Response text:", reply.data?.text);
|
|
568
|
+
console.log("TTS audio URL:", reply.data?.audioUrl);
|
|
294
569
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
const status = await scraper.checkStatus(upload.data.code);
|
|
298
|
-
console.log(status.data);
|
|
570
|
+
// Clean up WebSocket session
|
|
571
|
+
await cai.disconnectCharacterSession(marioId);
|
|
299
572
|
}
|
|
300
573
|
```
|
|
301
574
|
|
|
302
|
-
|
|
575
|
+
#### Instagram (`InstagramScraper`)
|
|
576
|
+
Fetches profile endpoints and user posts. Requires a legitimate authenticated session cookie.
|
|
303
577
|
|
|
304
|
-
```
|
|
305
|
-
{
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
}
|
|
578
|
+
```ts
|
|
579
|
+
import { InstagramScraper } from "wsper-js";
|
|
580
|
+
|
|
581
|
+
const ig = new InstagramScraper({ credentials: "sessionid=YOUR_COOKIE..." });
|
|
582
|
+
|
|
583
|
+
const profile = await ig.getUserProfile("charlixcx");
|
|
584
|
+
console.log("Bio:", profile.data?.bio);
|
|
585
|
+
|
|
586
|
+
const posts = await ig.getUserPosts("charlixcx", { limit: 12 });
|
|
587
|
+
```
|
|
588
|
+
|
|
589
|
+
#### Twitter/X (`TwitterScraper`)
|
|
590
|
+
Reads user timelines and tweets. Custom `csrfToken` is extracted automatically if cookies are passed.
|
|
591
|
+
|
|
592
|
+
```ts
|
|
593
|
+
import { TwitterScraper } from "wsper-js";
|
|
594
|
+
|
|
595
|
+
const x = new TwitterScraper({ credentials: "auth_token=YOUR_TOKEN..." });
|
|
596
|
+
|
|
597
|
+
const tweets = await x.getUserTimeline("elonmusk");
|
|
598
|
+
const singleTweet = await x.getTweetDetail("1234567890");
|
|
313
599
|
```
|
|
314
600
|
|
|
315
|
-
|
|
601
|
+
---
|
|
602
|
+
|
|
603
|
+
### Streaming & Media Resolvers
|
|
316
604
|
|
|
317
|
-
|
|
605
|
+
#### Spotify (`SpotifyScraper`)
|
|
606
|
+
Retrieves tracks, albums, playlists, and downloads MP3 wrappers (using external `yt-dlp` enrichment).
|
|
318
607
|
|
|
319
608
|
```ts
|
|
320
|
-
import {
|
|
321
|
-
import { FaceswapScraper } from "wsper-js";
|
|
609
|
+
import { SpotifyScraper } from "wsper-js";
|
|
322
610
|
|
|
323
|
-
const
|
|
324
|
-
readFile("./source-face.jpg"),
|
|
325
|
-
readFile("./target.jpg"),
|
|
326
|
-
]);
|
|
611
|
+
const spotify = new SpotifyScraper(); // Can take optional spotifyCredentials
|
|
327
612
|
|
|
328
|
-
|
|
329
|
-
const
|
|
613
|
+
// Get enriched metadata
|
|
614
|
+
const track = await spotify.getTrack("4PTG3Z6ehGkBF3zI7Ywtqs", { enrichYtDlp: true });
|
|
615
|
+
console.log("Spotify Title:", track.data?.name);
|
|
616
|
+
console.log("Associated YT Link:", track.data?.enriched?.youtubeUrl);
|
|
330
617
|
|
|
331
|
-
|
|
618
|
+
// Download track locally (saves artwork and MP3 file)
|
|
619
|
+
const download = await spotify.downloadPost({
|
|
620
|
+
trackUrlOrId: "4PTG3Z6ehGkBF3zI7Ywtqs",
|
|
621
|
+
outputDir: "./downloads/music",
|
|
622
|
+
audioFormat: "mp3",
|
|
623
|
+
includeMetadata: true
|
|
624
|
+
});
|
|
332
625
|
```
|
|
333
626
|
|
|
334
|
-
|
|
627
|
+
#### YouTube (`YouTubeScraper`)
|
|
628
|
+
Queries search, scrapes playlists, and triggers local transcode scripts.
|
|
335
629
|
|
|
336
|
-
```
|
|
337
|
-
{
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
630
|
+
```ts
|
|
631
|
+
import { YouTubeScraper } from "wsper-js";
|
|
632
|
+
|
|
633
|
+
const yt = new YouTubeScraper();
|
|
634
|
+
|
|
635
|
+
const video = await yt.getVideo("dQw4w9WgXcQ");
|
|
636
|
+
const search = await yt.searchVideos("lofi beats", { limit: 5 });
|
|
637
|
+
|
|
638
|
+
// Downloads video/audio (requires yt-dlp in PATH)
|
|
639
|
+
await yt.downloadVideo("dQw4w9WgXcQ", { outputDir: "./downloads/vids" });
|
|
640
|
+
await yt.downloadAudio("dQw4w9WgXcQ", { outputDir: "./downloads/vids", audioFormat: "mp3" });
|
|
341
641
|
```
|
|
342
642
|
|
|
343
|
-
|
|
643
|
+
---
|
|
344
644
|
|
|
345
|
-
|
|
645
|
+
### Indonesian Reference Services
|
|
646
|
+
|
|
647
|
+
#### Prayer Times (`BimasIslamScraper`)
|
|
648
|
+
Fetches official prayer schedules for provinces and cities in Indonesia from Kemenag.
|
|
346
649
|
|
|
347
650
|
```ts
|
|
348
|
-
import {
|
|
349
|
-
|
|
651
|
+
import { BimasIslamScraper } from "wsper-js";
|
|
652
|
+
|
|
653
|
+
const kemenag = new BimasIslamScraper();
|
|
350
654
|
|
|
351
|
-
const
|
|
352
|
-
const
|
|
353
|
-
const result = await scraper.upscaleBuffer(image, "image/jpeg");
|
|
655
|
+
const provinces = await kemenag.getProvinces();
|
|
656
|
+
const cities = await kemenag.getCities(provinces.data?.[0]?.id || "");
|
|
354
657
|
|
|
355
|
-
|
|
658
|
+
// Fetch prayer schedules for specific region (Jakarta ID=18), June 2026
|
|
659
|
+
const schedule = await kemenag.getPrayerTimes("18", "18", 6, 2026);
|
|
660
|
+
console.log("Prayer Times for June 1st:", schedule.data?.[0]);
|
|
356
661
|
```
|
|
357
662
|
|
|
358
|
-
|
|
663
|
+
#### Earthquake & Weather (`BMKGScraper` & `CuacaScraper`)
|
|
664
|
+
Retrieves BMKG meteorological data feeds.
|
|
359
665
|
|
|
360
|
-
```
|
|
361
|
-
{
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
666
|
+
```ts
|
|
667
|
+
import { BMKGScraper, CuacaScraper } from "wsper-js";
|
|
668
|
+
|
|
669
|
+
const bmkg = new BMKGScraper();
|
|
670
|
+
const cuaca = new CuacaScraper();
|
|
671
|
+
|
|
672
|
+
// 1. Get recent earthquake alerts
|
|
673
|
+
const earthquakes = await bmkg.getRecentEarthquakes();
|
|
674
|
+
console.log("Recent Gempa:", earthquakes.data?.gempaTerbuka);
|
|
675
|
+
|
|
676
|
+
// 2. Get local weather forecast by latitude & longitude
|
|
677
|
+
const localWeather = await cuaca.getWeatherByCoordinate(-6.2, 106.8);
|
|
366
678
|
```
|
|
367
679
|
|
|
368
|
-
|
|
680
|
+
---
|
|
369
681
|
|
|
370
|
-
|
|
682
|
+
### Global Reference & Search APIs
|
|
683
|
+
|
|
684
|
+
#### Rest Countries (`RestCountriesScraper`)
|
|
685
|
+
Provides country profiles. Use standard bounds to prevent unbounded HTTP payloads.
|
|
371
686
|
|
|
372
687
|
```ts
|
|
373
|
-
import {
|
|
688
|
+
import { RestCountriesScraper } from "wsper-js";
|
|
374
689
|
|
|
375
|
-
const
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
```
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
"dependents": "0",
|
|
393
|
-
"version_count": "1",
|
|
394
|
-
"keywords": [],
|
|
395
|
-
"install": "npm install @scope/pkg",
|
|
396
|
-
"info": [],
|
|
397
|
-
"collaborator": []
|
|
398
|
-
}
|
|
690
|
+
const rest = new RestCountriesScraper();
|
|
691
|
+
|
|
692
|
+
// Explicitly choose required fields for performance
|
|
693
|
+
const countries = await rest.getAll(["name", "cca2", "population", "flags"]);
|
|
694
|
+
const indonesia = await rest.getByName("Indonesia");
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
#### Wikipedia (`WikipediaScraper`)
|
|
698
|
+
Search and summarize wiki entries.
|
|
699
|
+
|
|
700
|
+
```ts
|
|
701
|
+
import { WikipediaScraper } from "wsper-js";
|
|
702
|
+
|
|
703
|
+
const wiki = new WikipediaScraper();
|
|
704
|
+
|
|
705
|
+
const summary = await wiki.getSummary("Artificial Intelligence", "en");
|
|
706
|
+
console.log("Excerpt:", summary.data?.extract);
|
|
399
707
|
```
|
|
400
708
|
|
|
401
|
-
|
|
709
|
+
#### OpenMeteo (`OpenMeteoScraper`)
|
|
710
|
+
Queries forecasts and current weather parameters.
|
|
711
|
+
|
|
712
|
+
```ts
|
|
713
|
+
import { OpenMeteoScraper } from "wsper-js";
|
|
714
|
+
|
|
715
|
+
const weather = new OpenMeteoScraper();
|
|
716
|
+
const forecast = await weather.getForecast(-6.2, 106.8, { hourly: "temperature_2m" });
|
|
717
|
+
```
|
|
402
718
|
|
|
403
|
-
|
|
719
|
+
#### USGS Earthquakes (`UsgsEarthquakeScraper`)
|
|
720
|
+
Reads active seismic activity feeds.
|
|
404
721
|
|
|
405
722
|
```ts
|
|
406
|
-
import {
|
|
723
|
+
import { UsgsEarthquakeScraper } from "wsper-js";
|
|
724
|
+
|
|
725
|
+
const usgs = new UsgsEarthquakeScraper();
|
|
726
|
+
const feed = await usgs.getSummary("all_day"); // "all_hour" | "all_day" | "all_week"
|
|
727
|
+
```
|
|
728
|
+
|
|
729
|
+
---
|
|
730
|
+
|
|
731
|
+
### Scholarly & Academic Metadata
|
|
732
|
+
|
|
733
|
+
#### Crossref (`CrossrefScraper`)
|
|
734
|
+
Search academic DOI indexes.
|
|
407
735
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
"https://www.mediafire.com/file/ipnyzofjcwri357/test-10mb.bin/file",
|
|
411
|
-
);
|
|
736
|
+
```ts
|
|
737
|
+
import { CrossrefScraper } from "wsper-js";
|
|
412
738
|
|
|
413
|
-
|
|
739
|
+
// Politeness parameter provides an email to target servers
|
|
740
|
+
const crossref = new CrossrefScraper({ politeMailTo: "developer@example.com" });
|
|
741
|
+
const paper = await crossref.getWorkByDoi("10.1038/nature14539");
|
|
414
742
|
```
|
|
415
743
|
|
|
416
|
-
|
|
744
|
+
#### OpenAlex (`OpenAlexScraper`)
|
|
745
|
+
Queries scholarly graphs, works, institutions, and authors.
|
|
417
746
|
|
|
418
|
-
```
|
|
419
|
-
{
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
"fileType": "ZIP"
|
|
424
|
-
}
|
|
747
|
+
```ts
|
|
748
|
+
import { OpenAlexScraper } from "wsper-js";
|
|
749
|
+
|
|
750
|
+
const openAlex = new OpenAlexScraper();
|
|
751
|
+
const searchWorks = await openAlex.searchWorks("machine learning", { limit: 10 });
|
|
425
752
|
```
|
|
426
753
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
| Scraper | Purpose | Source/API | Auth/Cookie required? | Example file | Notes |
|
|
430
|
-
| --- | --- | --- | --- | --- | --- |
|
|
431
|
-
| `AlkitabScraper` | Bible verse search | `alkitab.me` | No | `examples/alkitab.example.ts` | `search(query)` |
|
|
432
|
-
| `AnimeQuoteScraper` | Random anime quote | `otakotaku.com` | No | `examples/anime-quote.example.ts` | `getRandom()` |
|
|
433
|
-
| `AnimeRandomScraper` | Random anime character image | GitHub raw anime dataset | No | `examples/anime-random.example.ts` | `getImage(character)`, `random()` |
|
|
434
|
-
| `BiliBiliScraper` | BiliBili search and video info | `api.bilibili.com` | Optional cookie | `examples/bilibili.example.ts` | Cookie may unlock authenticated stream access |
|
|
435
|
-
| `BMKGScraper` | Indonesian earthquake and weather feeds | `data.bmkg.go.id`, `nowcasting.bmkg.go.id` | No | `examples/bmkg.example.ts` | Autogempa, gempa dirasakan, nowcasting, forecast |
|
|
436
|
-
| `CapCutScraper` | Resolve CapCut template video URL | `capdownloader.com/wp-json/aio-dl/video-data/` | No | `examples/capcut.example.ts` | Mocked in example runner |
|
|
437
|
-
| `CuacaScraper` | Indonesian weather by location/coordinate | BMKG weather APIs | Optional API key for warnings | `examples/cuaca.example.ts` | Reads optional `BMKG_WARNING_API_KEY` in example |
|
|
438
|
-
| `DrakorScraper` | Korean drama search/list/detail | `drakorkita30.kita.baby` | No | `examples/drakor.example.ts` | `search`, `detail`, `ongoing`, `getAll` |
|
|
439
|
-
| `DramaboxScraper` | Dramabox search | `dramabox.com` | No | `examples/dramabox.example.ts` | `search(query)` |
|
|
440
|
-
| `FaceswapScraper` | Face-swap image processing | `api.lovefaceswap.com` | No | `examples/faceswap.example.ts` | Mocked in example runner |
|
|
441
|
-
| `HokInfoScraper` | Honor of Kings character info | Fandom MediaWiki parse API | No | `examples/hok-info.example.ts` | Uses `api.php?action=parse` |
|
|
442
|
-
| `HtmlToJpgScraper` | HTML file to JPG conversion | `api.freeconvert.com` | No credential in code | `examples/html-to-jpg.example.ts` | File-based conversion; skipped in runner without fixture |
|
|
443
|
-
| `IkiruMangaScraper` | Manga search | `02.ikiru.wtf` | No | `examples/ikiru-manga.example.ts` | Mock server fallback available |
|
|
444
|
-
| `ImageScraper` | Safebooru image search | `safebooru.org` | No | `examples/image.example.ts` | Current site type: `safebooru` |
|
|
445
|
-
| `ImgUpscalerScraper` | Image upscaling | `get1.imglarger.com` | No | `examples/img-upscaler.example.ts` | Mocked in example runner |
|
|
446
|
-
| `InstagramScraper` | Profile, feed, post, download | `instagram.com` web/API endpoints | Internal defaults; custom cookie supported | `examples/instagram.example.ts` | Use only legitimate session cookies |
|
|
447
|
-
| `KomikindoScraper` | Manga search/detail | `komikindo.ch` | No | `examples/komikindo.example.ts` | `search`, `getDetail` |
|
|
448
|
-
| `LyricsScraper` | Lyrics search | `lrclib.net` JSON API | No | `examples/lyrics.example.ts` | Replaced blocked HTML scraping with API integration |
|
|
449
|
-
| `MConverterScraper` | File conversion helpers | `mconverter.eu` | No | `examples/mconverter.example.ts` | `getTargets`, `convert`, `convertBuffer` |
|
|
450
|
-
| `McAddonScraper` | Minecraft addon search/detail | `mmcreviews.com` | No | `examples/mcaddon.example.ts` | `search`, `getDetail`, `getAddon` |
|
|
451
|
-
| `MediafireScraper` | Resolve Mediafire download link | Mediafire public HTML page | No | `examples/mediafire.example.ts` | Default example uses active 10MB test file |
|
|
452
|
-
| `ModAndroidScraper` | Android APK/mod search aggregations | `an1.com`, `modyolo.com`, `aptoide.com`, `uptodown.com` | No | `examples/mod-android.example.ts` | `android1`, `modyolo`, `aptoide`, `uptodown`, `searchAll` |
|
|
453
|
-
| `OcrScraper` | OCR image scan | `newocr.com` | No | `examples/ocr.example.ts` | File/buffer based |
|
|
454
|
-
| `PhotoAiScraper` | Photo AI upload/status | `photoai.imglarger.com` | No | `examples/photo-ai.example.ts` | Mocked in example runner |
|
|
455
|
-
| `PinterestScraper` | Pin search, detail, download | `pinterest.com` | Internal defaults; custom cookie supported | `examples/pinterest.example.ts` | Supports `credentials` option |
|
|
456
|
-
| `PlayStoreScraper` | Google Play app search | `play.google.com` | No | `examples/playstore.example.ts` | `search(query, limit)` |
|
|
457
|
-
| `ResepScraper` | Recipe search | `cookpad.com` | No | `examples/resep.example.ts` | Returns recipe items |
|
|
458
|
-
| `SakuraNovelScraper` | Novel search/detail/chapter | `sakuranovel.id` | No | `examples/sakura-novel.example.ts` | Mock server fallback available |
|
|
459
|
-
| `SpotifyScraper` | Spotify track, album, playlist, search, downloads | Spotify Web API and Accounts API | Internal defaults; custom client credentials supported | `examples/spotify.example.ts` | Partial custom credentials are rejected |
|
|
460
|
-
| `StalkScraper` | npm package metadata lookup | `registry.npmjs.org` | No | `examples/stalk.example.ts` | Default example query is `axios` |
|
|
461
|
-
| `ThreadsScraper` | Threads profile, post, search, download | `threads.net` web/API endpoints | Internal defaults; custom cookie supported | `examples/threads.example.ts` | Use only legitimate session cookies |
|
|
462
|
-
| `TopAnimeScraper` | MyAnimeList top anime list | `myanimelist.net` | No | `examples/top-anime.example.ts` | `getTopAnime(limit)` |
|
|
463
|
-
| `TwitterScraper` | Tweet, profile, search, timeline, downloads | `x.com/i/api/graphql` | Cookie/CSRF commonly required | `examples/twitter.example.ts` | Supports `credentials` option |
|
|
464
|
-
| `UguuScraper` | Temporary file upload | `uguu.se/upload` | No | `examples/uguu.example.ts` | `upload(buffer, filename)` |
|
|
465
|
-
| `UpscalerScraper` | Image enhancement | `aienhancer.ai` | No | `examples/upscaler.example.ts` | Rejects remote URL string input |
|
|
466
|
-
| `VideyScraper` | Video upload | `videy.co/api/upload` | No | `examples/videy.example.ts` | `upload`, `uploadBuffer` |
|
|
467
|
-
| `WallpaperScraper` | Wallpaper search | `wallhaven.cc/api/v1/search` | No | `examples/wallpaper.example.ts` | Replaced blocked HTML scraping with API integration |
|
|
468
|
-
| `Webp2Mp4Scraper` | WebP to MP4/PNG conversion | `ezgif.com` | No | `examples/webp2mp4.example.ts` | `toMp4`, `toPng` |
|
|
469
|
-
| `WwCharScraper` | Wuthering Waves character info | Fandom MediaWiki parse API | No | `examples/ww-char.example.ts` | Uses `api.php?action=parse` |
|
|
470
|
-
| `YouTubeScraper` | YouTube metadata, search, playlist, channel, downloads | `yt-dlp`, `play-dl`, YouTube pages | No cookie option exposed in current scraper options | `examples/youtube.example.ts` | Media download features need external tools |
|
|
471
|
-
|
|
472
|
-
## What's New
|
|
473
|
-
|
|
474
|
-
The latest scraper reliability pass fixed 12 failing scraper functions and their corresponding tests.
|
|
475
|
-
|
|
476
|
-
- `LyricsScraper` now uses LRCLIB's public JSON API instead of a Cloudflare-protected Musixmatch HTML page.
|
|
477
|
-
- `WallpaperScraper` now uses Wallhaven's public search API instead of a Cloudflare-protected Wallpaperflare HTML page.
|
|
478
|
-
- `WwCharScraper` and `HokInfoScraper` now use Fandom's MediaWiki parse API (`api.php?action=parse`) and prepend a synthetic `#firstHeading` node before running the existing parsers.
|
|
479
|
-
- `examples/mock-server.ts` now covers WordPress resolver routes, AI tool polling/upload routes, and Fandom API responses.
|
|
480
|
-
- `examples/alllexamp.ts` starts and stops the mock server during the runner lifecycle.
|
|
481
|
-
- `WSPER_MOCK_BASE_URL` is injected into spawned example subprocesses so individual examples can use the local mock endpoint automatically.
|
|
482
|
-
- Default example inputs were updated: `StalkScraper` uses `axios`, `MediafireScraper` uses an active 10MB test file URL, and `HokInfoScraper` uses `Angela`.
|
|
483
|
-
- Verification recorded in `walkthrough.md`: 82 test files passed, 314 tests passed, and the examples runner reported 83 OK, 0 FAIL, 2 SKIP.
|
|
484
|
-
|
|
485
|
-
## Mock Server and Testing
|
|
486
|
-
|
|
487
|
-
`examples/mock-server.ts` is a local HTTP server used by the examples runner to provide deterministic responses for endpoints that are rate-limited, depend on third-party availability, or are inconvenient to call during automated checks.
|
|
488
|
-
|
|
489
|
-
Currently mocked routes include:
|
|
490
|
-
|
|
491
|
-
| Area | Routes |
|
|
492
|
-
| --- | --- |
|
|
493
|
-
| CapCut | `/wp-json/aio-dl/video-data/` |
|
|
494
|
-
| ImgUpscaler | `/api/UpscalerNew/UploadNew`, `/api/UpscalerNew/CheckStatusNew` |
|
|
495
|
-
| PhotoAi | `/api/PhoAi/Upload`, `/api/PhoAi/CheckStatus` |
|
|
496
|
-
| Faceswap | `/api/face-swap/create-poll`, `/api/common/get` |
|
|
497
|
-
| Upscaler | `/api/v1/r/image-enhance/create`, `/api/v1/r/image-enhance/result` |
|
|
498
|
-
| Fandom Wiki | `/api.php` |
|
|
499
|
-
| WordPress search fixtures | `/wp-admin/admin-ajax.php` |
|
|
500
|
-
|
|
501
|
-
`examples/alllexamp.ts` starts the mock server, sets `process.env.WSPER_MOCK_BASE_URL`, routes direct scraper demos to the mock server with `http: { allowPrivateNetwork: true }` where needed, runs individual example files as subprocesses, then closes the mock server.
|
|
502
|
-
|
|
503
|
-
This keeps example validation safer and more repeatable. It avoids making every test depend on live third-party services, Cloudflare-protected HTML pages, or rate-limited AI tool endpoints.
|
|
504
|
-
|
|
505
|
-
## Running Examples
|
|
506
|
-
|
|
507
|
-
Run all direct scraper demos and individual example files:
|
|
754
|
+
---
|
|
508
755
|
|
|
509
|
-
|
|
510
|
-
|
|
756
|
+
### Developer Registry & Package APIs
|
|
757
|
+
|
|
758
|
+
#### Python Package Index (`PypiScraper`)
|
|
759
|
+
Retrieves package releases and upload history mappings.
|
|
760
|
+
|
|
761
|
+
```ts
|
|
762
|
+
import { PypiScraper } from "wsper-js";
|
|
763
|
+
|
|
764
|
+
const pypi = new PypiScraper();
|
|
765
|
+
const pkg = await pypi.getPackage("requests");
|
|
766
|
+
const release = await pypi.getRelease("requests", "2.31.0");
|
|
511
767
|
```
|
|
512
768
|
|
|
513
|
-
|
|
769
|
+
#### npm Registry (`StalkScraper`)
|
|
770
|
+
Resolves npm package metadata.
|
|
514
771
|
|
|
515
|
-
|
|
772
|
+
```ts
|
|
773
|
+
import { StalkScraper } from "wsper-js";
|
|
516
774
|
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
775
|
+
const stalk = new StalkScraper();
|
|
776
|
+
const pkg = await stalk.getNpmPackage("axios");
|
|
777
|
+
```
|
|
778
|
+
|
|
779
|
+
#### OSV Vulnerability Database (`OsvScraper`)
|
|
780
|
+
Find open source package vulnerabilities.
|
|
522
781
|
|
|
523
|
-
|
|
782
|
+
```ts
|
|
783
|
+
import { OsvScraper } from "wsper-js";
|
|
524
784
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
FAIL : 0
|
|
528
|
-
SKIP : 2
|
|
529
|
-
Total: 85
|
|
785
|
+
const osv = new OsvScraper();
|
|
786
|
+
const query = await osv.queryPackage({ ecosystem: "PyPI", name: "requests", version: "2.20.0" });
|
|
530
787
|
```
|
|
531
788
|
|
|
532
|
-
|
|
789
|
+
---
|
|
533
790
|
|
|
534
|
-
|
|
535
|
-
npx tsx examples/lyrics.example.ts "after hours the weeknd"
|
|
536
|
-
npx tsx examples/wallpaper.example.ts cyberpunk
|
|
537
|
-
npx tsx examples/stalk.example.ts axios
|
|
538
|
-
npx tsx examples/mediafire.example.ts "https://www.mediafire.com/file/ipnyzofjcwri357/test-10mb.bin/file"
|
|
539
|
-
npx tsx examples/upscaler.example.ts testassets/photo.jpg
|
|
540
|
-
```
|
|
541
|
-
|
|
542
|
-
For scrapers that support cookies or credentials, use only accounts and sessions you are authorized to access. Do not hardcode real cookies, tokens, client secrets, or API keys in source files.
|
|
543
|
-
|
|
544
|
-
## Running Tests and Validation
|
|
545
|
-
|
|
546
|
-
Package scripts from `package.json`:
|
|
547
|
-
|
|
548
|
-
| Command | Purpose |
|
|
549
|
-
| --- | --- |
|
|
550
|
-
| `npm run test` | Run all Vitest tests once. |
|
|
551
|
-
| `npm run test:watch` | Run Vitest in watch mode. |
|
|
552
|
-
| `npm run typecheck` | Run TypeScript with `--noEmit`. |
|
|
553
|
-
| `npm run build` | Build production ESM output through `script/build.mjs`. |
|
|
554
|
-
| `npm run build:dev` | Build development output. |
|
|
555
|
-
| `npm run build:prod` | Build production output. |
|
|
556
|
-
| `npm run build:bytecode` | Build bytecode output with `script/build-bytecode.mjs`. |
|
|
557
|
-
| `npm run build:all` | Build production output and bytecode. |
|
|
558
|
-
| `npm run test:instagram` | Run Instagram tests only. |
|
|
559
|
-
| `npm run test:spotify` | Run Spotify tests only. |
|
|
560
|
-
| `npm run test:youtube` | Run YouTube tests only. |
|
|
561
|
-
| `npm run test:threads` | Run Threads tests only. |
|
|
562
|
-
| `npm run test:pinterest` | Run Pinterest tests only. |
|
|
563
|
-
| `npm run test:brat` | Run Brat tests only. |
|
|
564
|
-
|
|
565
|
-
Recommended validation before publishing or changing behavior:
|
|
791
|
+
### Utility, AI & Conversion Resolvers
|
|
566
792
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
npm run test
|
|
570
|
-
npm run build
|
|
571
|
-
npx tsx examples/alllexamp.ts
|
|
572
|
-
```
|
|
573
|
-
|
|
574
|
-
## Project Structure
|
|
575
|
-
|
|
576
|
-
```txt
|
|
577
|
-
src/
|
|
578
|
-
index.ts Public package exports
|
|
579
|
-
WsperScraper.ts Aggregate scraper entrypoint
|
|
580
|
-
core/
|
|
581
|
-
credentials/ Credential normalization and platform headers
|
|
582
|
-
error/ WsperError, ValidationError, HttpError, ParseError, DownloadError, ScraperError
|
|
583
|
-
http/ HTTP client, retries, timeouts, safe URL validation
|
|
584
|
-
parser/ Shared HTML and JSON parser helpers
|
|
585
|
-
queue/ Request pacing and concurrency control
|
|
586
|
-
modules/
|
|
587
|
-
brat/ Brat image/GIF/video generator and converters
|
|
588
|
-
chart/ Analytics image generator
|
|
589
|
-
download/ Safe downloader primitives
|
|
590
|
-
scrapers/ Platform-specific scraper implementations
|
|
591
|
-
types/ Shared response, option, and common types
|
|
592
|
-
utils/ Sleep, URL, validation, browser-profile, and helper utilities
|
|
593
|
-
examples/
|
|
594
|
-
alllexamp.ts Full example runner
|
|
595
|
-
mock-server.ts Local deterministic mock server
|
|
596
|
-
*.example.ts Individual runnable examples
|
|
597
|
-
tests/
|
|
598
|
-
*/*.test.ts Unit and parser tests
|
|
599
|
-
dist/ Build output only; do not edit manually
|
|
600
|
-
```
|
|
601
|
-
|
|
602
|
-
## Environment Variables
|
|
603
|
-
|
|
604
|
-
These variables appear in the repository:
|
|
605
|
-
|
|
606
|
-
| Variable | Used by | Required? | Notes |
|
|
607
|
-
| --- | --- | --- | --- |
|
|
608
|
-
| `WSPER_MOCK_BASE_URL` | `examples/alllexamp.ts`, AI tool examples | No | Set by the all-examples runner for subprocesses. Points examples to the local mock server. |
|
|
609
|
-
| `BMKG_WARNING_API_KEY` | `examples/cuaca.example.ts` | No | Optional warning API key passed to `CuacaScraper({ warningApiKey })`. |
|
|
610
|
-
| `INSTAGRAM_COOKIE` | `examples/instagram.example.ts` comments | No | Optional example input for constructor credentials. Use `<YOUR_COOKIE_HERE>` style placeholders in docs and never commit real cookies. |
|
|
611
|
-
| `INSTAGRAM_CSRF_TOKEN` | `examples/instagram.example.ts` comments | No | Optional example input for constructor credentials. |
|
|
612
|
-
| `BILI_COOKIE` | `examples/bilibili.example.ts` | No | Optional BiliBili cookie for authenticated stream access. |
|
|
613
|
-
| `WSPER_COOKIE` | `tests/core/credentials.test.ts` | No | Test-only variable proving runtime credential resolution does not read env credentials automatically. |
|
|
614
|
-
|
|
615
|
-
Credential configuration is constructor-based. The library does not rely on `.env` files for runtime credentials.
|
|
793
|
+
#### GDrive (`GDriveScraper`)
|
|
794
|
+
Finds direct download paths and confirms token attributes of shared files.
|
|
616
795
|
|
|
617
796
|
```ts
|
|
618
|
-
import {
|
|
797
|
+
import { GDriveScraper } from "wsper-js";
|
|
619
798
|
|
|
620
|
-
const
|
|
621
|
-
|
|
622
|
-
clientId: "your-client-id",
|
|
623
|
-
clientSecret: "your-client-secret",
|
|
624
|
-
},
|
|
625
|
-
credentials: {
|
|
626
|
-
twitter: {
|
|
627
|
-
cookie: "<YOUR_COOKIE_HERE>",
|
|
628
|
-
csrfToken: "<YOUR_CSRF_TOKEN_HERE>",
|
|
629
|
-
},
|
|
630
|
-
instagram: {
|
|
631
|
-
cookie: "<YOUR_COOKIE_HERE>",
|
|
632
|
-
csrfToken: "<YOUR_CSRF_TOKEN_HERE>",
|
|
633
|
-
},
|
|
634
|
-
},
|
|
635
|
-
});
|
|
799
|
+
const gdrive = new GDriveScraper();
|
|
800
|
+
const file = await gdrive.getFileInfo("https://drive.google.com/file/d/SHARED_ID/view");
|
|
636
801
|
```
|
|
637
802
|
|
|
638
|
-
|
|
803
|
+
#### AI Image Enhancer (`UpscalerScraper`)
|
|
804
|
+
Uploads buffers to aienhancer APIs and awaits polling queues.
|
|
805
|
+
|
|
806
|
+
```ts
|
|
807
|
+
import { UpscalerScraper } from "wsper-js";
|
|
808
|
+
import { readFile } from "node:fs/promises";
|
|
809
|
+
|
|
810
|
+
const upscaler = new UpscalerScraper();
|
|
811
|
+
const buffer = await readFile("photo.jpg");
|
|
639
812
|
|
|
640
|
-
|
|
813
|
+
const result = await upscaler.upscaleBuffer(buffer, "image/jpeg");
|
|
814
|
+
if (result.ok && result.data) {
|
|
815
|
+
console.log("Upscaled Image CDN Link:", result.data.output);
|
|
816
|
+
}
|
|
817
|
+
```
|
|
641
818
|
|
|
642
|
-
|
|
819
|
+
---
|
|
643
820
|
|
|
644
|
-
|
|
821
|
+
## Sandbox & Mock Server Usage
|
|
645
822
|
|
|
646
|
-
|
|
823
|
+
To prevent outbound requests to live websites during automated tests or local script iterations, `wsper-js` packages a local mock server under `examples/mock-server.ts`.
|
|
647
824
|
|
|
648
|
-
|
|
825
|
+
### Running the Mock Server
|
|
826
|
+
Start the server in a terminal window:
|
|
649
827
|
|
|
650
828
|
```bash
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
829
|
+
npx tsx examples/mock-server.ts
|
|
830
|
+
```
|
|
831
|
+
|
|
832
|
+
This boots an express-like mock instance at `http://localhost:3000`.
|
|
833
|
+
|
|
834
|
+
### Consuming Mocks in your Code
|
|
835
|
+
To route scrapers to the local mock base rather than live platform servers:
|
|
836
|
+
|
|
837
|
+
```ts
|
|
838
|
+
const scraper = new SpotifyScraper({
|
|
839
|
+
baseUrl: "http://localhost:3000",
|
|
840
|
+
http: {
|
|
841
|
+
allowPrivateNetwork: true // Required: permits localhost requests through the validation guard
|
|
842
|
+
}
|
|
843
|
+
});
|
|
656
844
|
```
|
|
657
845
|
|
|
658
|
-
|
|
846
|
+
Alternatively, set the environment variable:
|
|
847
|
+
```bash
|
|
848
|
+
WSPER_MOCK_BASE_URL="http://localhost:3000"
|
|
849
|
+
```
|
|
659
850
|
|
|
660
|
-
|
|
661
|
-
2. Export the scraper from `src/scrapers/<name>/index.ts` and `src/scrapers/index.ts`.
|
|
662
|
-
3. Return typed `WsperResponse<T>` results.
|
|
663
|
-
4. Keep HTTP, parsing, queueing, and file download responsibilities separated.
|
|
664
|
-
5. Add or update parser and scraper tests under `tests/`.
|
|
665
|
-
6. Add or update a runnable example under `examples/`.
|
|
666
|
-
7. Use the mock server for flows that should not depend on live third-party behavior in default tests.
|
|
667
|
-
8. Update this README when public API, usage, behavior, examples, or validation results change.
|
|
851
|
+
---
|
|
668
852
|
|
|
669
|
-
##
|
|
853
|
+
## Error Handling & Exceptions
|
|
670
854
|
|
|
671
|
-
|
|
672
|
-
- Use only `http:` and `https:` unless a module explicitly supports something else.
|
|
673
|
-
- Keep SSRF protections enabled; private network requests require explicit `allowPrivateNetwork: true` and should be reserved for local mocks or trusted endpoints.
|
|
674
|
-
- Do not log secrets, cookies, client secrets, authorization headers, access tokens, refresh tokens, or raw credential objects.
|
|
675
|
-
- Do not commit credentials, cookies, tokens, private fixtures, or real session material.
|
|
855
|
+
All errors thrown internally inherit from `WsperError`. You can import and catch specific exceptions for granular error handling:
|
|
676
856
|
|
|
677
|
-
|
|
857
|
+
```ts
|
|
858
|
+
import { WsperError, HttpError, ParseError, ValidationError, DownloadError } from "wsper-js";
|
|
859
|
+
|
|
860
|
+
try {
|
|
861
|
+
await scraper.search("");
|
|
862
|
+
} catch (err) {
|
|
863
|
+
if (err instanceof ValidationError) {
|
|
864
|
+
console.error("Validation failed. Incorrect fields:", err.details);
|
|
865
|
+
} else if (err instanceof HttpError) {
|
|
866
|
+
console.error(`HTTP Status Error ${err.statusCode} on URL ${err.url}`);
|
|
867
|
+
if (err.details?.preview) {
|
|
868
|
+
console.error("Server raw response preview:", err.details.preview);
|
|
869
|
+
}
|
|
870
|
+
} else if (err instanceof ParseError) {
|
|
871
|
+
console.error("Target HTML/JSON parser format changed:", err.message);
|
|
872
|
+
} else if (err instanceof WsperError) {
|
|
873
|
+
console.error(`Generic Wsper error [${err.code}]:`, err.message);
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
```
|
|
678
877
|
|
|
679
|
-
|
|
878
|
+
### Exception Hierarchy
|
|
879
|
+
```
|
|
880
|
+
Error (Node.js)
|
|
881
|
+
└── WsperError
|
|
882
|
+
├── HttpError (Thrown on HTTP failures >= 400 or timeouts)
|
|
883
|
+
├── ParseError (Thrown when DOM scraper or JSON parsing fails)
|
|
884
|
+
├── ValidationError (Thrown when argument formatting is incorrect)
|
|
885
|
+
├── DownloadError (Thrown by the Downloader on sizes/mime violations)
|
|
886
|
+
└── ScraperError (Thrown by custom scraper-specific subroutines)
|
|
887
|
+
```
|