wsper-js 0.1.2-wc1 → 0.1.2-wc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +657 -739
- package/dist/index.d.ts +4 -0
- package/dist/index.js +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
<h1 align="center">
|
|
3
2
|
<img alt="ShikanokoBail banner" src="https://i.pinimg.com/736x/0c/ff/62/0cff624a04a81495f4b8e69bcedd34aa.jpg" width="100%"/>
|
|
4
3
|
</h1>
|
|
@@ -12,94 +11,122 @@
|
|
|
12
11
|

|
|
13
12
|
</div>
|
|
14
13
|
|
|
15
|
-
# wsper-js
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
-
|
|
32
|
-
|
|
33
|
-
|
|
14
|
+
# wsper-js: Developer User Manual & API Guide
|
|
15
|
+
|
|
16
|
+
`wsper-js` is an enterprise-grade, TypeScript-first scraper and media generation toolkit. It provides structured scrapers for over 80 public and credentialed services, a deterministic canvas-based rendering engine, a stylized text-to-sticker generator, a secure file downloader, and request-pacing queues designed to interact ethically and robustly with public endpoints.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Table of Contents
|
|
21
|
+
1. [Prerequisites & Installation](#prerequisites--installation)
|
|
22
|
+
2. [Quick Start](#quick-start)
|
|
23
|
+
3. [Core Concepts & Architecture](#core-concepts--architecture)
|
|
24
|
+
- [Unified WsperResponse Envelope](#unified-wsperresponse-envelope)
|
|
25
|
+
- [Request Pacing & Rate Limiting](#request-pacing--rate-limiting)
|
|
26
|
+
- [Safe HTTP Client & Rotation](#safe-http-client--rotation)
|
|
27
|
+
- [Credentials Configuration](#credentials-configuration)
|
|
28
|
+
4. [In-Depth Feature Modules](#in-depth-feature-modules)
|
|
29
|
+
- [Brat Generator (Stickers & Media)](#1-brat-generator-stickers--media)
|
|
30
|
+
- [Analytics Chart Image Generator](#2-analytics-chart-image-generator)
|
|
31
|
+
- [Safe Downloader Primitive](#3-safe-downloader-primitive)
|
|
32
|
+
5. [Scrapers Usage Catalog](#scrapers-usage-catalog)
|
|
33
|
+
- [Social Media & AI Messaging](#social-media--ai-messaging)
|
|
34
|
+
- [Streaming & Media Resolvers](#streaming--media-resolvers)
|
|
35
|
+
- [Indonesian Reference Services](#indonesian-reference-services)
|
|
36
|
+
- [Global Reference & Search APIs](#global-reference--search-apis)
|
|
37
|
+
- [Scholarly & Academic Metadata](#scholarly--academic-metadata)
|
|
38
|
+
- [Developer Registry & Package APIs](#developer-registry--package-apis)
|
|
39
|
+
- [Utility, AI & Conversion Resolvers](#utility-ai--conversion-resolvers)
|
|
40
|
+
6. [Sandbox & Mock Server Usage](#sandbox--mock-server-usage)
|
|
41
|
+
7. [Error Handling & Exceptions](#error-handling--exceptions)
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Prerequisites & Installation
|
|
46
|
+
|
|
47
|
+
Install the package via your preferred package manager:
|
|
34
48
|
|
|
35
49
|
```bash
|
|
36
50
|
npm install wsper-js
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
```bash
|
|
51
|
+
# or
|
|
40
52
|
pnpm add wsper-js
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
```bash
|
|
53
|
+
# or
|
|
44
54
|
yarn add wsper-js
|
|
45
55
|
```
|
|
46
56
|
|
|
47
|
-
|
|
57
|
+
### External Dependencies
|
|
58
|
+
Some optional modules require external binaries in your system `PATH`:
|
|
48
59
|
|
|
49
|
-
| Feature | External
|
|
50
|
-
| --- | --- |
|
|
51
|
-
| YouTube
|
|
52
|
-
| Video
|
|
60
|
+
| Feature | External Binary | Notes |
|
|
61
|
+
| --- | --- | --- |
|
|
62
|
+
| YouTube/Spotify Media Downloads | `yt-dlp` | Must be executable in your `PATH` |
|
|
63
|
+
| Brat Video/GIF Conversion | `ffmpeg` | Required for converting canvas streams to MP4/GIF |
|
|
64
|
+
|
|
65
|
+
---
|
|
53
66
|
|
|
54
67
|
## Quick Start
|
|
55
68
|
|
|
69
|
+
### Basic Scraper Class (Direct Usage)
|
|
70
|
+
Every scraper in the library is exported as a standalone class.
|
|
71
|
+
|
|
56
72
|
```ts
|
|
57
73
|
import { LyricsScraper } from "wsper-js";
|
|
58
74
|
|
|
59
|
-
const
|
|
60
|
-
const
|
|
75
|
+
const lyricsScraper = new LyricsScraper();
|
|
76
|
+
const response = await lyricsScraper.search("after hours the weeknd");
|
|
61
77
|
|
|
62
|
-
if (
|
|
63
|
-
|
|
78
|
+
if (response.ok && response.data) {
|
|
79
|
+
console.log(`Title: ${response.data.title}`);
|
|
80
|
+
console.log(`Lyrics: ${response.data.lyrics}`);
|
|
81
|
+
} else {
|
|
82
|
+
console.error(`Error [${response.error?.code}]: ${response.error?.message}`);
|
|
64
83
|
}
|
|
65
|
-
|
|
66
|
-
console.log(result.statusCode);
|
|
67
|
-
console.log(result.data?.title);
|
|
68
|
-
console.log(result.data?.lyrics);
|
|
69
84
|
```
|
|
70
85
|
|
|
71
|
-
|
|
86
|
+
### Aggregated Entrypoint (`WsperScraper`)
|
|
87
|
+
For larger architectures, you can use the unified `WsperScraper` client to access primary social and media scrapers with shared request-pacing configurations.
|
|
72
88
|
|
|
73
89
|
```ts
|
|
74
90
|
import { WsperScraper } from "wsper-js";
|
|
75
91
|
|
|
92
|
+
// Initialize with shared pacing options
|
|
76
93
|
const wsper = new WsperScraper({
|
|
77
|
-
queue: {
|
|
94
|
+
queue: {
|
|
95
|
+
concurrency: 2,
|
|
96
|
+
minDelayMs: 1000,
|
|
97
|
+
maxDelayMs: 3000,
|
|
98
|
+
},
|
|
99
|
+
debug: true
|
|
78
100
|
});
|
|
79
101
|
|
|
80
|
-
|
|
81
|
-
const
|
|
102
|
+
// Run parallel requests through the rate-limited queue
|
|
103
|
+
const [spotifyTrack, ytVideo] = await Promise.all([
|
|
104
|
+
wsper.spotify.search("never gonna give you up", { limit: 1 }),
|
|
105
|
+
wsper.youtube.getVideo("dQw4w9WgXcQ")
|
|
106
|
+
]);
|
|
82
107
|
|
|
83
|
-
console.log(
|
|
108
|
+
console.log("Spotify Search Result:", spotifyTrack.ok);
|
|
109
|
+
console.log("YouTube Video Result:", ytVideo.ok);
|
|
84
110
|
```
|
|
85
111
|
|
|
86
|
-
|
|
112
|
+
---
|
|
87
113
|
|
|
88
|
-
##
|
|
114
|
+
## Core Concepts & Architecture
|
|
89
115
|
|
|
90
|
-
|
|
116
|
+
### Unified WsperResponse Envelope
|
|
117
|
+
Every public scraper method returns a standard `WsperResponse<TData>` envelope. This prevents uncaught runtime errors and simplifies error checks.
|
|
91
118
|
|
|
92
119
|
```ts
|
|
93
120
|
export interface WsperResponse<TData, TMeta extends WsperResponseMeta = WsperResponseMeta> {
|
|
94
|
-
ok: boolean;
|
|
95
|
-
statusCode: number;
|
|
96
|
-
data: TData | null;
|
|
97
|
-
error: {
|
|
121
|
+
ok: boolean; // True if request succeeded and parsing completed
|
|
122
|
+
statusCode: number; // HTTP response code (or internal fallback: 400, 422, 500)
|
|
123
|
+
data: TData | null; // Type-safe payload on success; null on error
|
|
124
|
+
error: { // Normalized error details on failure; null on success
|
|
98
125
|
code: string;
|
|
99
126
|
message: string;
|
|
100
127
|
details?: Record<string, unknown>;
|
|
101
128
|
} | null;
|
|
102
|
-
meta: TMeta;
|
|
129
|
+
meta: TMeta; // Operational metadata
|
|
103
130
|
}
|
|
104
131
|
|
|
105
132
|
export interface WsperResponseMeta {
|
|
@@ -110,860 +137,751 @@ export interface WsperResponseMeta {
|
|
|
110
137
|
}
|
|
111
138
|
```
|
|
112
139
|
|
|
113
|
-
Recommended
|
|
114
|
-
|
|
140
|
+
#### Recommended Code Pattern:
|
|
115
141
|
```ts
|
|
116
|
-
const
|
|
142
|
+
const result = await scraper.search("query");
|
|
117
143
|
|
|
118
|
-
if (
|
|
119
|
-
|
|
144
|
+
if (!result.ok) {
|
|
145
|
+
// Gracefully handle failures without try-catch blocks
|
|
146
|
+
console.error(`Scraper failed: ${result.error?.code} - ${result.error?.message}`);
|
|
147
|
+
if (result.error?.details) {
|
|
148
|
+
console.error("Debug Context:", result.error.details);
|
|
149
|
+
}
|
|
120
150
|
} else {
|
|
121
|
-
|
|
151
|
+
// Safely consume data
|
|
152
|
+
console.log("Scraped Data:", result.data);
|
|
122
153
|
}
|
|
123
154
|
```
|
|
124
155
|
|
|
125
|
-
|
|
156
|
+
### Request Pacing & Rate Limiting
|
|
157
|
+
To respect external platform policies and prevent IP bans, `wsper-js` integrates a rate-limited task queue (`p-queue`) directly into the HTTP layer.
|
|
126
158
|
|
|
127
|
-
|
|
159
|
+
Configure queue parameters in `QueueOptions` at scraper construction:
|
|
128
160
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
Some scrapers need live public targets. The bundled examples avoid dummy URLs:
|
|
161
|
+
```ts
|
|
162
|
+
import { SpotifyScraper } from "wsper-js";
|
|
132
163
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
npx tsx examples/world-bank.example.ts IDN SP.POP.TOTL
|
|
145
|
-
npx tsx examples/ecb.example.ts 100 USD
|
|
146
|
-
npx tsx examples/crossref.example.ts "open science"
|
|
147
|
-
npx tsx examples/openalex.example.ts "open science"
|
|
148
|
-
npx tsx examples/ror.example.ts "University of Indonesia"
|
|
149
|
-
npx tsx examples/clinical-trials.example.ts diabetes
|
|
150
|
-
npx tsx examples/pypi.example.ts requests
|
|
151
|
-
npx tsx examples/packagist.example.ts monolog/monolog
|
|
152
|
-
npx tsx examples/osv.example.ts PyPI requests
|
|
153
|
-
npx tsx examples/binance.example.ts BTCUSDT
|
|
164
|
+
const scraper = new SpotifyScraper({
|
|
165
|
+
queue: {
|
|
166
|
+
concurrency: 1, // Max active parallel connections
|
|
167
|
+
intervalMs: 1000, // Rate-limiting window
|
|
168
|
+
intervalCap: 3, // Max requests allowed per window (3 requests/sec)
|
|
169
|
+
minDelayMs: 500, // Minimum random delay (jitter) before each request
|
|
170
|
+
maxDelayMs: 1500, // Maximum random delay (jitter) before each request
|
|
171
|
+
timeoutMs: 30000, // Max request lifetime before abortion
|
|
172
|
+
retries: 3 // Bounded retries for retryable status codes (e.g. 429, 503)
|
|
173
|
+
}
|
|
174
|
+
});
|
|
154
175
|
```
|
|
155
176
|
|
|
156
|
-
|
|
177
|
+
### Safe HTTP Client & Rotation
|
|
178
|
+
All HTTP tasks execute through a customized `HttpClient` wrapper around Axios. It features:
|
|
179
|
+
* **Browser Profile Rotation**: Automatically rotates randomized, realistic HTTP headers and User-Agent strings.
|
|
180
|
+
* **SSRF Protections**: Restricts destination URLs to standard protocols (`http:`, `https:`). Block private networks (`127.0.0.1`, `10.0.0.0/8`, etc.) by default. Set `allowPrivateNetwork: true` strictly during local testing or mock server environments.
|
|
181
|
+
* **Retry-After Compliance**: Automatically parses `Retry-After` response headers on HTTP 429 and delays subsequent queued requests.
|
|
157
182
|
|
|
158
|
-
|
|
183
|
+
### Credentials & Authentication Configuration
|
|
159
184
|
|
|
160
|
-
|
|
185
|
+
Every scraper in `wsper-js` is designed to run in **public mode** by default. However, many platforms require active session cookies, API tokens, or keys to fetch data successfully.
|
|
161
186
|
|
|
162
|
-
|
|
187
|
+
The library resolves credentials sequentially per scraper instance:
|
|
188
|
+
1. **Constructor Options** (Highest priority, recommended)
|
|
189
|
+
2. **Environment Variables** (System fallback)
|
|
163
190
|
|
|
164
|
-
|
|
165
|
-
import { LyricsScraper } from "wsper-js";
|
|
191
|
+
---
|
|
166
192
|
|
|
167
|
-
|
|
168
|
-
const result = await scraper.search("after hours the weeknd");
|
|
193
|
+
#### 1. Input Formats: Raw Cookie String vs. Structured Object
|
|
169
194
|
|
|
170
|
-
|
|
171
|
-
```
|
|
195
|
+
When initializing a scraper, the `credentials` option accepts either a **raw cookie string** or a **structured object** (`WsperCredentials`):
|
|
172
196
|
|
|
173
|
-
|
|
197
|
+
##### A. Raw Cookie String Shortcut (Convenient)
|
|
198
|
+
If you pass a string directly to `credentials`, the library automatically normalizes it into a `{ cookie: string }` object and extracts properties like CSRF tokens.
|
|
199
|
+
```ts
|
|
200
|
+
const instagram = new InstagramScraper({
|
|
201
|
+
credentials: "sessionid=12345678%3Aabcde...; csrftoken=xyz...; ds_user_id=87654321"
|
|
202
|
+
});
|
|
203
|
+
```
|
|
174
204
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
205
|
+
##### B. Structured Object Config (Granular)
|
|
206
|
+
For advanced configurations, pass an object containing explicit fields:
|
|
207
|
+
```ts
|
|
208
|
+
const twitter = new TwitterScraper({
|
|
209
|
+
credentials: {
|
|
210
|
+
cookie: "auth_token=abc123xyz...; ct0=def456...",
|
|
211
|
+
csrfToken: "def456...", // Optional: explicitly provide the csrfToken (X-CSRFToken header)
|
|
212
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) ..." // Override the default user-agent profile
|
|
213
|
+
}
|
|
214
|
+
});
|
|
181
215
|
```
|
|
182
216
|
|
|
183
|
-
|
|
217
|
+
---
|
|
184
218
|
|
|
185
|
-
|
|
219
|
+
#### 2. Platform-by-Platform Credentials Guide
|
|
186
220
|
|
|
187
|
-
|
|
188
|
-
import { WallpaperScraper } from "wsper-js";
|
|
221
|
+
Below are the exact options and code structures for initializing each credentialed scraper:
|
|
189
222
|
|
|
190
|
-
|
|
191
|
-
|
|
223
|
+
##### Instagram Scraper (`InstagramScraper`)
|
|
224
|
+
* **Required Credential**: Legitimate session cookie.
|
|
225
|
+
* **Auto-Extraction**: The client automatically parses `csrftoken` out of the cookie string and injects it as the `X-IG-App-ID` and `X-CSRFToken` headers.
|
|
226
|
+
```ts
|
|
227
|
+
import { InstagramScraper } from "wsper-js";
|
|
192
228
|
|
|
193
|
-
|
|
194
|
-
|
|
229
|
+
const instagram = new InstagramScraper({
|
|
230
|
+
credentials: {
|
|
231
|
+
cookie: "sessionid=YOUR_INSTAGRAM_SESSION_ID; csrftoken=YOUR_CSRF_TOKEN;",
|
|
232
|
+
userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) ..." // Recommended mobile profile
|
|
233
|
+
}
|
|
234
|
+
});
|
|
195
235
|
```
|
|
196
236
|
|
|
197
|
-
|
|
237
|
+
##### Twitter/X Scraper (`TwitterScraper`)
|
|
238
|
+
* **Required Credential**: Session cookies containing `auth_token` and `ct0` (CSRF token).
|
|
239
|
+
* **Usage**:
|
|
240
|
+
```ts
|
|
241
|
+
import { TwitterScraper } from "wsper-js";
|
|
198
242
|
|
|
199
|
-
|
|
200
|
-
{
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
"resolution": "1920x1080",
|
|
206
|
-
"image": "https://w.wallhaven.cc/full/sky123.jpg",
|
|
207
|
-
"page": "https://wallhaven.cc/w/sky123"
|
|
208
|
-
}
|
|
209
|
-
]
|
|
210
|
-
}
|
|
243
|
+
const twitter = new TwitterScraper({
|
|
244
|
+
credentials: {
|
|
245
|
+
cookie: "auth_token=YOUR_AUTH_TOKEN; ct0=YOUR_CSRF_TOKEN;",
|
|
246
|
+
csrfToken: "YOUR_CSRF_TOKEN" // Must match the value in the cookie (ct0)
|
|
247
|
+
}
|
|
248
|
+
});
|
|
211
249
|
```
|
|
212
250
|
|
|
213
|
-
|
|
251
|
+
##### Character.AI Scraper (`CaiScraper`)
|
|
252
|
+
* **Required Credential**: Bearer Auth Token.
|
|
253
|
+
* **Usage**:
|
|
254
|
+
```ts
|
|
255
|
+
import { CaiScraper } from "wsper-js";
|
|
214
256
|
|
|
215
|
-
|
|
257
|
+
const cai = new CaiScraper({
|
|
258
|
+
credentials: {
|
|
259
|
+
bearerToken: "YOUR_CHARACTER_AI_API_TOKEN" // Injected as "Authorization: Bearer <token>"
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
```
|
|
216
263
|
|
|
264
|
+
##### Spotify Scraper (`SpotifyScraper`)
|
|
265
|
+
* **Required Credential**: Spotify Developer Client ID and Secret (from developer.spotify.com).
|
|
266
|
+
* **Usage**: Configure via the dedicated `spotifyCredentials` object in the options.
|
|
217
267
|
```ts
|
|
218
|
-
import {
|
|
268
|
+
import { SpotifyScraper } from "wsper-js";
|
|
269
|
+
|
|
270
|
+
const spotify = new SpotifyScraper({
|
|
271
|
+
spotifyCredentials: {
|
|
272
|
+
clientId: "YOUR_SPOTIFY_CLIENT_ID",
|
|
273
|
+
clientSecret: "YOUR_SPOTIFY_CLIENT_SECRET",
|
|
274
|
+
callbackUrl: "http://localhost:3000/callback", // Optional
|
|
275
|
+
market: "ID" // Optional: country ISO code
|
|
276
|
+
}
|
|
277
|
+
});
|
|
278
|
+
```
|
|
219
279
|
|
|
220
|
-
|
|
221
|
-
|
|
280
|
+
##### YouTube Scraper (`YouTubeScraper`)
|
|
281
|
+
* **Configuration**: Requires paths to system binaries rather than typical session credentials.
|
|
282
|
+
* **Factory Method**: Use `YouTubeScraper.create` to automatically search `PATH` or Python virtual environments for `yt-dlp` and `ffmpeg`.
|
|
283
|
+
```ts
|
|
284
|
+
import { YouTubeScraper } from "wsper-js";
|
|
285
|
+
|
|
286
|
+
// Manual configuration
|
|
287
|
+
const yt = new YouTubeScraper({
|
|
288
|
+
ytdlpPath: "C:\\tools\\yt-dlp.exe",
|
|
289
|
+
ffmpegPath: "C:\\tools\\ffmpeg.exe",
|
|
290
|
+
ffprobePath: "C:\\tools\\ffprobe.exe",
|
|
291
|
+
outputDir: "./downloads/vids"
|
|
292
|
+
});
|
|
222
293
|
|
|
223
|
-
|
|
294
|
+
// Auto-detection factory method (Recommended)
|
|
295
|
+
const ytAuto = await YouTubeScraper.create({
|
|
296
|
+
pythonPath: "python3", // Tries "python3 -m yt_dlp" fallback if not in PATH
|
|
297
|
+
outputDir: "./downloads/vids"
|
|
298
|
+
});
|
|
224
299
|
```
|
|
225
300
|
|
|
226
|
-
|
|
301
|
+
##### remove.bg Scraper (`RemovebgScraper`)
|
|
302
|
+
* **Required Credential**: Official remove.bg API key.
|
|
303
|
+
* **Usage**:
|
|
304
|
+
```ts
|
|
305
|
+
import { RemovebgScraper } from "wsper-js";
|
|
227
306
|
|
|
228
|
-
|
|
229
|
-
{
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
"bio": "Bio karakter.",
|
|
234
|
-
"profile": {},
|
|
235
|
-
"images": []
|
|
236
|
-
}
|
|
307
|
+
const removebg = new RemovebgScraper({
|
|
308
|
+
credentials: {
|
|
309
|
+
apiKey: "YOUR_REMOVE_BG_API_KEY"
|
|
310
|
+
}
|
|
311
|
+
});
|
|
237
312
|
```
|
|
238
313
|
|
|
239
|
-
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
#### 3. Configuring Credentials in the Unified `WsperScraper`
|
|
240
317
|
|
|
241
|
-
|
|
318
|
+
The aggregate entrypoint class `WsperScraper` exposes a nested configuration layout, allowing you to bootstrap all credential-requiring scrapers simultaneously:
|
|
242
319
|
|
|
243
320
|
```ts
|
|
244
|
-
import {
|
|
321
|
+
import { WsperScraper } from "wsper-js";
|
|
245
322
|
|
|
246
|
-
const
|
|
247
|
-
|
|
323
|
+
const wsper = new WsperScraper({
|
|
324
|
+
// 1. Spotify developer credentials (dedicated object)
|
|
325
|
+
spotifyCredentials: {
|
|
326
|
+
clientId: "YOUR_SPOTIFY_CLIENT_ID",
|
|
327
|
+
clientSecret: "YOUR_SPOTIFY_CLIENT_SECRET",
|
|
328
|
+
},
|
|
329
|
+
// 2. Platform-specific session credentials
|
|
330
|
+
credentials: {
|
|
331
|
+
instagram: "sessionid=IG_SESSION_COOKIE; csrftoken=IG_CSRF_TOKEN",
|
|
332
|
+
twitter: {
|
|
333
|
+
cookie: "auth_token=X_AUTH_COOKIE; ct0=X_CSRF_COOKIE",
|
|
334
|
+
csrfToken: "X_CSRF_COOKIE"
|
|
335
|
+
},
|
|
336
|
+
cai: {
|
|
337
|
+
bearerToken: "CHARACTER_AI_WS_TOKEN"
|
|
338
|
+
},
|
|
339
|
+
threads: "sessionid=THREADS_SESSION_COOKIE; csrftoken=THREADS_CSRF_TOKEN",
|
|
340
|
+
pinterest: "sessionid=PINTEREST_SESSION_COOKIE"
|
|
341
|
+
},
|
|
342
|
+
// 3. YouTube binary paths configuration
|
|
343
|
+
youtube: {
|
|
344
|
+
ytdlpPath: "yt-dlp",
|
|
345
|
+
ffmpegPath: "ffmpeg"
|
|
346
|
+
}
|
|
347
|
+
});
|
|
248
348
|
|
|
249
|
-
|
|
349
|
+
// Access child clients directly
|
|
350
|
+
const isIgOk = (await wsper.instagram.getUserProfile("some_user")).ok;
|
|
250
351
|
```
|
|
251
352
|
|
|
252
|
-
|
|
353
|
+
---
|
|
253
354
|
|
|
254
|
-
|
|
255
|
-
{
|
|
256
|
-
"title": "Sun Wukong",
|
|
257
|
-
"image": null,
|
|
258
|
-
"profile": {
|
|
259
|
-
"Role": "Fighter"
|
|
260
|
-
},
|
|
261
|
-
"bio": "Bio singkat karakter.",
|
|
262
|
-
"skills": [],
|
|
263
|
-
"lore": null,
|
|
264
|
-
"url": "https://honor-of-kings.fandom.com/wiki/Sun%20Wukong"
|
|
265
|
-
}
|
|
266
|
-
```
|
|
355
|
+
#### 4. Environment Variables Configuration
|
|
267
356
|
|
|
268
|
-
|
|
357
|
+
If you do not pass credentials in the constructor, `wsper-js` will check your system environment variables. You can store these in a `.env` file in the root of your application.
|
|
269
358
|
|
|
270
|
-
|
|
359
|
+
> [!WARNING]
|
|
360
|
+
> The library itself does **not** load `.env` files automatically to avoid side-effects. You must import `dotenv` or run your Node.js application with the `--env-file` flag (Node 20.6+).
|
|
271
361
|
|
|
272
|
-
|
|
273
|
-
|
|
362
|
+
##### Node.js 20.6+ Native Env Loading:
|
|
363
|
+
```bash
|
|
364
|
+
node --env-file=.env index.js
|
|
365
|
+
```
|
|
274
366
|
|
|
275
|
-
|
|
276
|
-
|
|
367
|
+
##### Standard dotenv Import:
|
|
368
|
+
```ts
|
|
369
|
+
import "dotenv/config";
|
|
370
|
+
import { WsperScraper } from "wsper-js";
|
|
277
371
|
|
|
278
|
-
|
|
372
|
+
const wsper = new WsperScraper(); // Reads WSPER_* environment variables automatically
|
|
279
373
|
```
|
|
280
374
|
|
|
281
|
-
|
|
375
|
+
##### Supported Env Keys reference:
|
|
376
|
+
```ini
|
|
377
|
+
# Spotify Developer API
|
|
378
|
+
WSPER_SPOTIFY_CLIENT_ID=your_client_id
|
|
379
|
+
WSPER_SPOTIFY_CLIENT_SECRET=your_client_secret
|
|
380
|
+
WSPER_SPOTIFY_CALLBACK_URL=http://localhost:3000/callback
|
|
381
|
+
WSPER_SPOTIFY_MARKET=US
|
|
282
382
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
383
|
+
# Social Media Session Cookies
|
|
384
|
+
WSPER_INSTAGRAM_COOKIE="sessionid=ig_cookie_val; csrftoken=ig_csrf_val;"
|
|
385
|
+
WSPER_THREADS_COOKIE="sessionid=threads_cookie_val; csrftoken=threads_csrf_val;"
|
|
386
|
+
WSPER_TWITTER_COOKIE="auth_token=twitter_cookie_val; ct0=twitter_csrf_val;"
|
|
387
|
+
WSPER_PINTEREST_COOKIE="sessionid=pinterest_cookie_val;"
|
|
388
|
+
WSPER_TIKTOK_COOKIE="sessionid=tiktok_cookie_val;"
|
|
389
|
+
WSPER_FACEBOOK_COOKIE="c_user=fb_cookie_val; xs=fb_xs_val;"
|
|
390
|
+
WSPER_BILIBILI_COOKIE="SESSDATA=bili_cookie_val;"
|
|
391
|
+
|
|
392
|
+
# Character.AI Auth
|
|
393
|
+
WSPER_CAI_TOKEN=your_cai_bearer_token
|
|
394
|
+
|
|
395
|
+
# AI Tools API Keys
|
|
396
|
+
REMOVEBG_API_KEY=your_remove_bg_api_key
|
|
287
397
|
```
|
|
288
398
|
|
|
289
|
-
|
|
399
|
+
---
|
|
290
400
|
|
|
291
|
-
|
|
401
|
+
## In-Depth Feature Modules
|
|
292
402
|
|
|
403
|
+
### 1. Brat Generator (Stickers & Media)
|
|
404
|
+
The Brat module provides programmatic text-to-media rendering, applying the classic green-and-black aesthetic (popularized by Charli XCX) or customized styles.
|
|
405
|
+
|
|
406
|
+
#### Generate static Brat Images (PNG/JPG/WEBP)
|
|
293
407
|
```ts
|
|
294
|
-
import {
|
|
295
|
-
|
|
408
|
+
import { BratGenerator } from "wsper-js";
|
|
409
|
+
|
|
410
|
+
const brat = new BratGenerator();
|
|
296
411
|
|
|
297
|
-
const
|
|
298
|
-
|
|
299
|
-
|
|
412
|
+
const imageResult = await brat.generate({
|
|
413
|
+
canvas: { preset: "1:1", width: 1024 }, // Aspect presets: "1:1", "9:16", "16:9"
|
|
414
|
+
text: {
|
|
415
|
+
value: "kamu pas kecil pernah nelen magnet ya? menarik banget soalnya",
|
|
416
|
+
align: "justify", // "left" | "center" | "right" | "justify"
|
|
417
|
+
color: "#111111",
|
|
418
|
+
blur: 0, // >0 to blur only the text pixels
|
|
419
|
+
},
|
|
420
|
+
background: { type: "solid", color: "#8ace00" }, // or "linear-gradient" / "radial-gradient"
|
|
421
|
+
backgroundBlur: 0, // blur radius for background layout (frosted-glass)
|
|
422
|
+
output: {
|
|
423
|
+
type: "image",
|
|
424
|
+
format: "png",
|
|
425
|
+
path: "./downloads/brat.png"
|
|
426
|
+
}
|
|
427
|
+
});
|
|
300
428
|
|
|
301
|
-
console.log(
|
|
429
|
+
console.log("Saved static Brat image to:", imageResult.path);
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
#### Generate Animated Brat GIFs & Videos
|
|
433
|
+
```ts
|
|
434
|
+
const gifResult = await brat.generate({
|
|
435
|
+
text: {
|
|
436
|
+
value: "charli xcx is so brat i can't even",
|
|
437
|
+
align: "center"
|
|
438
|
+
},
|
|
439
|
+
background: { type: "solid", color: "#8ace00" },
|
|
440
|
+
animation: {
|
|
441
|
+
enabled: true,
|
|
442
|
+
mode: "word", // "word" (appear word-by-word) or "character" (typewriter effect)
|
|
443
|
+
direction: "left-to-right",
|
|
444
|
+
fps: 15, // frame rate
|
|
445
|
+
textSpeed: 120, // milliseconds delay per word/character
|
|
446
|
+
},
|
|
447
|
+
output: {
|
|
448
|
+
type: "gif",
|
|
449
|
+
path: "./downloads/animated_brat.gif"
|
|
450
|
+
}
|
|
451
|
+
});
|
|
302
452
|
```
|
|
303
453
|
|
|
304
|
-
|
|
454
|
+
#### WhatsApp Sticker Generation & Media Format Conversion
|
|
455
|
+
```ts
|
|
456
|
+
// 1. Convert image buffer to WhatsApp-compliant sticker (512x512 WebP with overlays)
|
|
457
|
+
const stickerBuffer = await brat.imageToSticker(imageResult.buffer, {
|
|
458
|
+
size: 512,
|
|
459
|
+
format: "webp",
|
|
460
|
+
top: "BRAT ENERGY",
|
|
461
|
+
bottom: "2026",
|
|
462
|
+
});
|
|
305
463
|
|
|
306
|
-
|
|
307
|
-
{
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
"resultUrl": "https://cdn.test/out.jpg",
|
|
311
|
-
"scale": 4
|
|
312
|
-
}
|
|
464
|
+
// 2. Transcode formats (requires ffmpeg)
|
|
465
|
+
const mp4Buffer = await brat.gifToMp4("./downloads/animated_brat.gif", "./downloads/brat.mp4", { fps: 24 });
|
|
466
|
+
const webmBuffer = await brat.gifToWebm("./downloads/animated_brat.gif", "./downloads/brat.webm");
|
|
467
|
+
const reassembledGif = await brat.mp4ToGif("./downloads/brat.mp4", "./downloads/transcoded.gif");
|
|
313
468
|
```
|
|
314
469
|
|
|
315
|
-
|
|
470
|
+
---
|
|
316
471
|
|
|
317
|
-
|
|
472
|
+
### 2. Analytics Chart Image Generator
|
|
473
|
+
The `ChartGenerator` creates high-resolution visual statistics cards, posters, and dashboard widgets containing comparative group-and-user trends using `@napi-rs/canvas`.
|
|
318
474
|
|
|
319
475
|
```ts
|
|
320
|
-
import {
|
|
321
|
-
|
|
476
|
+
import { ChartGenerator, MonthlyPoint, WeeklyPoint } from "wsper-js";
|
|
477
|
+
|
|
478
|
+
const chart = new ChartGenerator();
|
|
479
|
+
|
|
480
|
+
const monthlyData: MonthlyPoint[] = [
|
|
481
|
+
{ label: "JAN", group: 120, user: 90 },
|
|
482
|
+
{ label: "FEB", group: 150, user: 110 },
|
|
483
|
+
{ label: "MAR", group: 240, user: 180 },
|
|
484
|
+
];
|
|
485
|
+
|
|
486
|
+
const weeklyData: WeeklyPoint[] = [
|
|
487
|
+
{ label: "MON", group: 40, user: 20 },
|
|
488
|
+
{ label: "TUE", group: 35, user: 25 },
|
|
489
|
+
{ label: "WED", group: 50, user: 30 },
|
|
490
|
+
];
|
|
491
|
+
|
|
492
|
+
const chartImageBuffer = await chart.generateAnalyticsStatsImage({
|
|
493
|
+
title: "Application Traffic Report",
|
|
494
|
+
subtitle: "Comparison: System Group vs Active Users",
|
|
495
|
+
monthly: monthlyData,
|
|
496
|
+
weekly: weeklyData,
|
|
497
|
+
width: 1024,
|
|
498
|
+
height: 1024,
|
|
499
|
+
output: "./downloads/traffic_report.png",
|
|
500
|
+
footer: "Generated via wsper-js Analytics module",
|
|
501
|
+
// Choose model style: "vintage-poster" | "modern-dashboard" | "minimal-report" | "dark-neon" | "compact-card"
|
|
502
|
+
model: "modern-dashboard",
|
|
503
|
+
theme: {
|
|
504
|
+
// Optionally override specific theme colors
|
|
505
|
+
background: "#0f172a",
|
|
506
|
+
ink: "#ffffff",
|
|
507
|
+
groupLine: "#38bdf8",
|
|
508
|
+
userLine: "#34d399",
|
|
509
|
+
}
|
|
510
|
+
});
|
|
511
|
+
```
|
|
322
512
|
|
|
323
|
-
|
|
324
|
-
const scraper = new PhotoAiScraper();
|
|
513
|
+
---
|
|
325
514
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
const status = await scraper.checkStatus(upload.data.code);
|
|
329
|
-
console.log(status.data);
|
|
330
|
-
}
|
|
331
|
-
```
|
|
515
|
+
### 3. Safe Downloader Primitive
|
|
516
|
+
The `Downloader` class allows developers to download files to local filesystems, enforcing safe paths, restricting traversal, and asserting file sizes.
|
|
332
517
|
|
|
333
|
-
|
|
518
|
+
```ts
|
|
519
|
+
import { Downloader } from "wsper-js";
|
|
334
520
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
"
|
|
341
|
-
"
|
|
342
|
-
|
|
521
|
+
// Initialize and bind to a specific safe download directory
|
|
522
|
+
const downloader = new Downloader("./downloads/sandbox");
|
|
523
|
+
|
|
524
|
+
try {
|
|
525
|
+
const downloadResult = await downloader.download(
|
|
526
|
+
"https://example.com/large-assets.zip",
|
|
527
|
+
"archives/assets.zip", // Saved relatively inside './downloads/sandbox'
|
|
528
|
+
{
|
|
529
|
+
overwrite: true,
|
|
530
|
+
maxSizeBytes: 100 * 1024 * 1024, // Limit files to 100MB
|
|
531
|
+
allowedMimeTypes: ["application/zip"], // Whitelist content-type headers
|
|
532
|
+
timeoutMs: 60000 // Set download timeout
|
|
533
|
+
}
|
|
534
|
+
);
|
|
535
|
+
console.log("Download complete:", downloadResult.outputPath);
|
|
536
|
+
} catch (err) {
|
|
537
|
+
console.error("Secure download failed:", err.message);
|
|
343
538
|
}
|
|
344
539
|
```
|
|
345
540
|
|
|
346
|
-
|
|
541
|
+
---
|
|
542
|
+
|
|
543
|
+
## Scrapers Usage Catalog
|
|
347
544
|
|
|
348
|
-
|
|
545
|
+
### Social Media & AI Messaging
|
|
546
|
+
|
|
547
|
+
#### Character.AI (`CaiScraper`)
|
|
548
|
+
Integrates search, Chat-V2, and TTS (Text-to-Speech) generation with session reuse.
|
|
349
549
|
|
|
350
550
|
```ts
|
|
351
|
-
import {
|
|
352
|
-
import { FaceswapScraper } from "wsper-js";
|
|
551
|
+
import { CaiScraper } from "wsper-js";
|
|
353
552
|
|
|
354
|
-
const
|
|
355
|
-
readFile("./source-face.jpg"),
|
|
356
|
-
readFile("./target.jpg"),
|
|
357
|
-
]);
|
|
553
|
+
const cai = new CaiScraper({ credentials: { bearerToken: "YOUR_CAI_TOKEN" } });
|
|
358
554
|
|
|
359
|
-
|
|
360
|
-
const
|
|
555
|
+
// Search for a character
|
|
556
|
+
const search = await cai.searchCharacters("Mario");
|
|
557
|
+
const marioId = search.data?.[0]?.externalId;
|
|
361
558
|
|
|
362
|
-
|
|
363
|
-
|
|
559
|
+
// Send chat message and generate TTS Audio URL
|
|
560
|
+
if (marioId) {
|
|
561
|
+
const reply = await cai.chat({
|
|
562
|
+
characterId: marioId,
|
|
563
|
+
message: "It's-a me, who are you?",
|
|
564
|
+
voiceId: "4fdd6bc1-c659-4587-b462-53f569b39078" // Optional voice ID
|
|
565
|
+
});
|
|
364
566
|
|
|
365
|
-
|
|
567
|
+
console.log("Response text:", reply.data?.text);
|
|
568
|
+
console.log("TTS audio URL:", reply.data?.audioUrl);
|
|
366
569
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
"job_id": "job-1",
|
|
370
|
-
"image": "https://cdn.test/out.jpg"
|
|
570
|
+
// Clean up WebSocket session
|
|
571
|
+
await cai.disconnectCharacterSession(marioId);
|
|
371
572
|
}
|
|
372
573
|
```
|
|
373
574
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
Source: `https://aienhancer.ai`.
|
|
575
|
+
#### Instagram (`InstagramScraper`)
|
|
576
|
+
Fetches profile endpoints and user posts. Requires a legitimate authenticated session cookie.
|
|
377
577
|
|
|
378
578
|
```ts
|
|
379
|
-
import {
|
|
380
|
-
|
|
579
|
+
import { InstagramScraper } from "wsper-js";
|
|
580
|
+
|
|
581
|
+
const ig = new InstagramScraper({ credentials: "sessionid=YOUR_COOKIE..." });
|
|
381
582
|
|
|
382
|
-
const
|
|
383
|
-
|
|
384
|
-
const result = await scraper.upscaleBuffer(image, "image/jpeg");
|
|
583
|
+
const profile = await ig.getUserProfile("charlixcx");
|
|
584
|
+
console.log("Bio:", profile.data?.bio);
|
|
385
585
|
|
|
386
|
-
|
|
586
|
+
const posts = await ig.getUserPosts("charlixcx", { limit: 12 });
|
|
387
587
|
```
|
|
388
588
|
|
|
389
|
-
|
|
589
|
+
#### Twitter/X (`TwitterScraper`)
|
|
590
|
+
Reads user timelines and tweets. Custom `csrfToken` is extracted automatically if cookies are passed.
|
|
390
591
|
|
|
391
|
-
```
|
|
392
|
-
{
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
592
|
+
```ts
|
|
593
|
+
import { TwitterScraper } from "wsper-js";
|
|
594
|
+
|
|
595
|
+
const x = new TwitterScraper({ credentials: "auth_token=YOUR_TOKEN..." });
|
|
596
|
+
|
|
597
|
+
const tweets = await x.getUserTimeline("elonmusk");
|
|
598
|
+
const singleTweet = await x.getTweetDetail("1234567890");
|
|
397
599
|
```
|
|
398
600
|
|
|
399
|
-
|
|
601
|
+
---
|
|
602
|
+
|
|
603
|
+
### Streaming & Media Resolvers
|
|
400
604
|
|
|
401
|
-
|
|
605
|
+
#### Spotify (`SpotifyScraper`)
|
|
606
|
+
Retrieves tracks, albums, playlists, and downloads MP3 wrappers (using external `yt-dlp` enrichment).
|
|
402
607
|
|
|
403
608
|
```ts
|
|
404
|
-
import {
|
|
609
|
+
import { SpotifyScraper } from "wsper-js";
|
|
405
610
|
|
|
406
|
-
const
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
console.log(
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
611
|
+
const spotify = new SpotifyScraper(); // Can take optional spotifyCredentials
|
|
612
|
+
|
|
613
|
+
// Get enriched metadata
|
|
614
|
+
const track = await spotify.getTrack("4PTG3Z6ehGkBF3zI7Ywtqs", { enrichYtDlp: true });
|
|
615
|
+
console.log("Spotify Title:", track.data?.name);
|
|
616
|
+
console.log("Associated YT Link:", track.data?.enriched?.youtubeUrl);
|
|
617
|
+
|
|
618
|
+
// Download track locally (saves artwork and MP3 file)
|
|
619
|
+
const download = await spotify.downloadPost({
|
|
620
|
+
trackUrlOrId: "4PTG3Z6ehGkBF3zI7Ywtqs",
|
|
621
|
+
outputDir: "./downloads/music",
|
|
622
|
+
audioFormat: "mp3",
|
|
623
|
+
includeMetadata: true
|
|
624
|
+
});
|
|
625
|
+
```
|
|
626
|
+
|
|
627
|
+
#### YouTube (`YouTubeScraper`)
|
|
628
|
+
Queries search, scrapes playlists, and triggers local transcode scripts.
|
|
629
|
+
|
|
630
|
+
```ts
|
|
631
|
+
import { YouTubeScraper } from "wsper-js";
|
|
632
|
+
|
|
633
|
+
const yt = new YouTubeScraper();
|
|
634
|
+
|
|
635
|
+
const video = await yt.getVideo("dQw4w9WgXcQ");
|
|
636
|
+
const search = await yt.searchVideos("lofi beats", { limit: 5 });
|
|
637
|
+
|
|
638
|
+
// Downloads video/audio (requires yt-dlp in PATH)
|
|
639
|
+
await yt.downloadVideo("dQw4w9WgXcQ", { outputDir: "./downloads/vids" });
|
|
640
|
+
await yt.downloadAudio("dQw4w9WgXcQ", { outputDir: "./downloads/vids", audioFormat: "mp3" });
|
|
430
641
|
```
|
|
431
642
|
|
|
432
|
-
|
|
643
|
+
---
|
|
433
644
|
|
|
434
|
-
|
|
645
|
+
### Indonesian Reference Services
|
|
646
|
+
|
|
647
|
+
#### Prayer Times (`BimasIslamScraper`)
|
|
648
|
+
Fetches official prayer schedules for provinces and cities in Indonesia from Kemenag.
|
|
435
649
|
|
|
436
650
|
```ts
|
|
437
|
-
import {
|
|
651
|
+
import { BimasIslamScraper } from "wsper-js";
|
|
652
|
+
|
|
653
|
+
const kemenag = new BimasIslamScraper();
|
|
438
654
|
|
|
439
|
-
const
|
|
440
|
-
const
|
|
441
|
-
"https://www.mediafire.com/file/ipnyzofjcwri357/test-10mb.bin/file",
|
|
442
|
-
);
|
|
655
|
+
const provinces = await kemenag.getProvinces();
|
|
656
|
+
const cities = await kemenag.getCities(provinces.data?.[0]?.id || "");
|
|
443
657
|
|
|
444
|
-
|
|
658
|
+
// Fetch prayer schedules for specific region (Jakarta ID=18), June 2026
|
|
659
|
+
const schedule = await kemenag.getPrayerTimes("18", "18", 6, 2026);
|
|
660
|
+
console.log("Prayer Times for June 1st:", schedule.data?.[0]);
|
|
445
661
|
```
|
|
446
662
|
|
|
447
|
-
|
|
663
|
+
#### Earthquake & Weather (`BMKGScraper` & `CuacaScraper`)
|
|
664
|
+
Retrieves BMKG meteorological data feeds.
|
|
448
665
|
|
|
449
|
-
```
|
|
450
|
-
{
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
666
|
+
```ts
|
|
667
|
+
import { BMKGScraper, CuacaScraper } from "wsper-js";
|
|
668
|
+
|
|
669
|
+
const bmkg = new BMKGScraper();
|
|
670
|
+
const cuaca = new CuacaScraper();
|
|
671
|
+
|
|
672
|
+
// 1. Get recent earthquake alerts
|
|
673
|
+
const earthquakes = await bmkg.getRecentEarthquakes();
|
|
674
|
+
console.log("Recent Gempa:", earthquakes.data?.gempaTerbuka);
|
|
675
|
+
|
|
676
|
+
// 2. Get local weather forecast by latitude & longitude
|
|
677
|
+
const localWeather = await cuaca.getWeatherByCoordinate(-6.2, 106.8);
|
|
456
678
|
```
|
|
457
679
|
|
|
458
|
-
|
|
680
|
+
---
|
|
459
681
|
|
|
460
|
-
|
|
682
|
+
### Global Reference & Search APIs
|
|
683
|
+
|
|
684
|
+
#### Rest Countries (`RestCountriesScraper`)
|
|
685
|
+
Provides country profiles. Use standard bounds to prevent unbounded HTTP payloads.
|
|
461
686
|
|
|
462
687
|
```ts
|
|
463
|
-
import {
|
|
688
|
+
import { RestCountriesScraper } from "wsper-js";
|
|
464
689
|
|
|
465
|
-
const
|
|
466
|
-
credentials: {
|
|
467
|
-
bearerToken: "YOUR_CHARACTER_AI_TOKEN",
|
|
468
|
-
},
|
|
469
|
-
});
|
|
690
|
+
const rest = new RestCountriesScraper();
|
|
470
691
|
|
|
471
|
-
//
|
|
472
|
-
const
|
|
473
|
-
|
|
692
|
+
// Explicitly choose required fields for performance
|
|
693
|
+
const countries = await rest.getAll(["name", "cca2", "population", "flags"]);
|
|
694
|
+
const indonesia = await rest.getByName("Indonesia");
|
|
695
|
+
```
|
|
474
696
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
});
|
|
481
|
-
console.log(chat.data?.text);
|
|
482
|
-
console.log(chat.data?.audioUrl); // Generated TTS audio URL
|
|
697
|
+
#### Wikipedia (`WikipediaScraper`)
|
|
698
|
+
Search and summarize wiki entries.
|
|
699
|
+
|
|
700
|
+
```ts
|
|
701
|
+
import { WikipediaScraper } from "wsper-js";
|
|
483
702
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
await
|
|
487
|
-
|
|
488
|
-
await scraper.disconnectAllCharacterSessions();
|
|
703
|
+
const wiki = new WikipediaScraper();
|
|
704
|
+
|
|
705
|
+
const summary = await wiki.getSummary("Artificial Intelligence", "en");
|
|
706
|
+
console.log("Excerpt:", summary.data?.extract);
|
|
489
707
|
```
|
|
490
708
|
|
|
491
|
-
|
|
709
|
+
#### OpenMeteo (`OpenMeteoScraper`)
|
|
710
|
+
Queries forecasts and current weather parameters.
|
|
492
711
|
|
|
493
|
-
```
|
|
494
|
-
{
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
}
|
|
712
|
+
```ts
|
|
713
|
+
import { OpenMeteoScraper } from "wsper-js";
|
|
714
|
+
|
|
715
|
+
const weather = new OpenMeteoScraper();
|
|
716
|
+
const forecast = await weather.getForecast(-6.2, 106.8, { hourly: "temperature_2m" });
|
|
499
717
|
```
|
|
500
718
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
This table is audited against the public scraper exports in `src/scrapers/index.ts`.
|
|
504
|
-
|
|
505
|
-
| Scraper | Purpose | Source/API | Auth/Cookie required? | Example file | Notes |
|
|
506
|
-
| --- | --- | --- | --- | --- | --- |
|
|
507
|
-
| `AioScraper` | Resolve public media download options | `allinonedownloader.com` | No | `examples/aio.example.ts` | `download(url)`; use only authorized public media URLs |
|
|
508
|
-
| `AlkitabScraper` | Bible verse search | `alkitab.me` | No | `examples/alkitab.example.ts` | `search(query)` |
|
|
509
|
-
| `AnimeQuoteScraper` | Random anime quote | `otakotaku.com` | No | `examples/anime-quote.example.ts` | `getRandom()` |
|
|
510
|
-
| `AnimeRandomScraper` | Random anime character image | GitHub raw anime dataset | No | `examples/anime-random.example.ts` | `getImage(character)`, `random()` |
|
|
511
|
-
| `ArenaAiScraper` | AI model leaderboard categories and rankings | `api.wulong.dev/arena-ai-leaderboards/v1` | No | `examples/arena-ai.example.ts` | `getCategories`, `getLeaderboard` |
|
|
512
|
-
| `BiliBiliScraper` | BiliBili search and video info | `api.bilibili.com` | Optional cookie | `examples/bilibili.example.ts` | Cookie may unlock authenticated stream access |
|
|
513
|
-
| `BinanceScraper` | Binance public market data | Binance Spot market data API | No | `examples/binance.example.ts` | Market data only: ticker, average price, klines, order book; no trading/account endpoints |
|
|
514
|
-
| `BimasIslamScraper` | Indonesian Kemenag prayer time regions and schedules | `bimasislam.kemenag.go.id` | No | `examples/bimas-islam.example.ts` | `getProvinces`, `getCities`, `getPrayerTimes` |
|
|
515
|
-
| `BMKGScraper` | Indonesian earthquake and weather feeds | `data.bmkg.go.id`, `nowcasting.bmkg.go.id` | No | `examples/bmkg.example.ts` | Autogempa, gempa dirasakan, nowcasting, forecast |
|
|
516
|
-
| `CaiScraper` | Character.AI search, chat, and voice details | `neo.character.ai` API & WebSockets | Yes (Token required) | `examples/cai.example.ts` | `searchCharacters`, `chat`, `getVoice` |
|
|
517
|
-
| `CapCutScraper` | Resolve CapCut template video URL | `capdownloader.com/wp-json/aio-dl/video-data/` | No | `examples/capcut.example.ts` | Mocked in example runner |
|
|
518
|
-
| `ClinicalTrialsScraper` | ClinicalTrials.gov public study search and detail lookup | ClinicalTrials.gov API v2 | No | `examples/clinical-trials.example.ts` | `searchStudies`, `getStudy`; public registry metadata only |
|
|
519
|
-
| `CodashopScraper` | Game nickname verification | `order-sg.codashop.com` | No | `examples/codashop.example.ts` | Supported game names are validated in the scraper |
|
|
520
|
-
| `CrossrefScraper` | Scholarly work and journal metadata | Crossref REST API | No | `examples/crossref.example.ts` | `searchWorks`, `getWorkByDoi`, `searchJournals`; optional polite `mailto` option |
|
|
521
|
-
| `CuacaScraper` | Indonesian weather by location/coordinate | BMKG weather APIs | Optional API key for warnings | `examples/cuaca.example.ts` | Reads optional `BMKG_WARNING_API_KEY` in example |
|
|
522
|
-
| `DetikNewsScraper` | Detik news search | `detik.com` search HTML | No | `examples/detik-news.example.ts` | `search(query, resultType)` |
|
|
523
|
-
| `DonghubScraper` | Drama search and detail pages | `donghub.vip` | No | `examples/donghub.example.ts` | `search`, `getDetail` |
|
|
524
|
-
| `DownrScraper` | Resolve public video download options | `downr.org` Netlify functions | No | `examples/downr.example.ts` | `getVideo(url)`; use only authorized public media URLs |
|
|
525
|
-
| `DrakorScraper` | Korean drama search/list/detail | `drakorkita30.kita.baby` | No | `examples/drakor.example.ts` | `search`, `detail`, `ongoing`, `getAll` |
|
|
526
|
-
| `DramaboxScraper` | Dramabox search | `dramabox.com` | No | `examples/dramabox.example.ts` | `search(query)` |
|
|
527
|
-
| `EcbScraper` | ECB euro foreign exchange reference rates | ECB eurofxref XML feed | No | `examples/ecb.example.ts` | `getDailyReferenceRates`, `convertFromEuro`; no investment advice |
|
|
528
|
-
| `FacebookScraper` | Facebook public profile/post metadata and image download | `facebook.com`, `mbasic.facebook.com` | No credential option in current scraper | `examples/facebook.example.ts` | Public pages only; no session extraction |
|
|
529
|
-
| `FaceswapScraper` | Face-swap image processing | `api.lovefaceswap.com` | No | `examples/faceswap.example.ts` | Mocked in example runner |
|
|
530
|
-
| `FaceswapV2Scraper` | Face-swap image processing via URL inputs | `supawork.ai` headshot API | No | `examples/faceswap-v2.example.ts` | `swap(targetImageUrl, targetFaceUrl)` |
|
|
531
|
-
| `ForeignNewsScraper` | RSS news feeds | BBC, CNBC Indonesia, CNN Indonesia, Antara, Republika | No | `examples/foreign-news.example.ts` | `getBbcNews`, `getCnbcNews`, `getCnnNews`, `getAntaraNews`, `getRepublikaNews` |
|
|
532
|
-
| `GDriveScraper` | Public Google Drive file metadata and direct download URL | `drive.google.com/uc` | No for public files | `examples/gdrive.example.ts` | `getFileInfo(fileIdOrUrl)` |
|
|
533
|
-
| `GenshinImpactScraper` | Genshin official manga and Hoyowiki entries | `genshin.hoyoverse.com`, `sg-public-api.hoyolab.com` | No | `examples/genshinimpact.example.ts` | `getMangaChapters`, `getWikiCategories`, `getWikiEntries` |
|
|
534
|
-
| `GoqrScraper` | QR code image generation | `api.qrserver.com` | No | `examples/goqr.example.ts` | Returns image `Buffer` |
|
|
535
|
-
| `GutendexScraper` | Project Gutenberg book metadata search | Gutendex JSON API | No | `examples/gutendex.example.ts` | `searchBooks`, `getBook`; metadata only, no bulk book downloads |
|
|
536
|
-
| `HackerNewsScraper` | Hacker News stories, items, and users | Hacker News Firebase API | No | `examples/hacker-news.example.ts` | `getTopStories`, `getNewStories`, `getBestStories`, `getItem`, `getUser` |
|
|
537
|
-
| `HokInfoScraper` | Honor of Kings character info | Fandom MediaWiki parse API | No | `examples/hok-info.example.ts` | Uses `api.php?action=parse` |
|
|
538
|
-
| `HtmlToJpgScraper` | HTML file to JPG conversion | `api.freeconvert.com` | No credential in code | `examples/html-to-jpg.example.ts` | File-based conversion; skipped in runner without fixture |
|
|
539
|
-
| `IkiruMangaScraper` | Manga search | `02.ikiru.wtf` | No | `examples/ikiru-manga.example.ts` | Mock server fallback available |
|
|
540
|
-
| `ImageScraper` | Safebooru image search | `safebooru.org` | No | `examples/image.example.ts` | Current site type: `safebooru` |
|
|
541
|
-
| `ImgflipScraper` | Meme templates and caption generation | `api.imgflip.com` | Credentials required for `captionMeme` only | `examples/imgflip.example.ts` | `getMemes`, `captionMeme` |
|
|
542
|
-
| `ImgUpscalerScraper` | Image upscaling | `get1.imglarger.com` | No | `examples/img-upscaler.example.ts` | Mocked in example runner |
|
|
543
|
-
| `InstagramScraper` | Profile, feed, post, download | `instagram.com` web/API endpoints | Internal defaults; custom cookie supported | `examples/instagram.example.ts` | Use only legitimate session cookies |
|
|
544
|
-
| `KemendagScraper` | Indonesian basic goods dataset discovery | `data.go.id` CKAN API | No | `examples/kemendag.example.ts` | `getBapokDatasets()` |
|
|
545
|
-
| `KomikindoScraper` | Manga search/detail | `komikindo.ch` | No | `examples/komikindo.example.ts` | `search`, `getDetail` |
|
|
546
|
-
| `KompasNewsScraper` | Kompas news search | `search.kompas.com` | No | `examples/kompas-news.example.ts` | `search(query)` |
|
|
547
|
-
| `LikeeScraper` | Likee public video metadata | `likeedownloader.com/process` | No | `examples/likee.example.ts` | `getInfo(url)` |
|
|
548
|
-
| `LyricsScraper` | Lyrics search | `lrclib.net` JSON API | No | `examples/lyrics.example.ts` | Replaced blocked HTML scraping with API integration |
|
|
549
|
-
| `McAddonScraper` | Minecraft addon search/detail | `mmcreviews.com` | No | `examples/mcaddon.example.ts` | `search`, `getDetail`, `getAddon` |
|
|
550
|
-
| `MConverterScraper` | File conversion helpers | `mconverter.eu` | No | `examples/mconverter.example.ts` | `getTargets`, `convert`, `convertBuffer` |
|
|
551
|
-
| `MediafireScraper` | Resolve Mediafire download link | Mediafire public HTML page | No | `examples/mediafire.example.ts` | Default example uses active 10MB test file |
|
|
552
|
-
| `ModAndroidScraper` | Android APK/mod search aggregations | `an1.com`, `modyolo.com`, `aptoide.com`, `uptodown.com` | No | `examples/mod-android.example.ts` | `android1`, `modyolo`, `aptoide`, `uptodown`, `searchAll` |
|
|
553
|
-
| `MyAnimeListScraper` | Anime search and top anime via Jikan | `api.jikan.moe/v4` | No | `examples/myanimelist.example.ts` | `search`, `getTopAnime` |
|
|
554
|
-
| `NanoBananaScraper` | AI image edit workflow | `app.live3d.io` | No | `examples/nanobanana.example.ts` | Buffer upload, job creation, and polling |
|
|
555
|
-
| `OcrScraper` | OCR image scan | `newocr.com` | No | `examples/ocr.example.ts` | File/buffer based |
|
|
556
|
-
| `OpenAlexScraper` | Scholarly works, authors, and institutions | OpenAlex API | No | `examples/openalex.example.ts` | `searchWorks`, `getWork`, `searchAuthors`, `searchInstitutions`; optional polite `mailto` option |
|
|
557
|
-
| `OpenMeteoScraper` | Weather forecast and current weather by coordinate | Open-Meteo forecast API | No | `examples/open-meteo.example.ts` | `getForecast`, `getCurrentWeather`; validates coordinates and forecast bounds |
|
|
558
|
-
| `OpenLibraryScraper` | Open Library book, work, author, and ISBN lookup | Open Library JSON APIs | No | `examples/open-library.example.ts` | `searchBooks`, `getWork`, `getAuthor`, `getByIsbn` |
|
|
559
|
-
| `OsvScraper` | Open Source Vulnerabilities package and vulnerability lookup | OSV API v1 | No | `examples/osv.example.ts` | `queryPackage`, `getVulnerability`; public advisory metadata |
|
|
560
|
-
| `PackagistScraper` | PHP Composer package metadata and search | Packagist search API and Composer metadata API | No | `examples/packagist.example.ts` | `searchPackages`, `getPackage` |
|
|
561
|
-
| `PhotoAiScraper` | Photo AI upload/status | `photoai.imglarger.com` | No | `examples/photo-ai.example.ts` | Mocked in example runner |
|
|
562
|
-
| `PinterestScraper` | Pin search, detail, download | `pinterest.com` | Internal defaults; custom cookie supported | `examples/pinterest.example.ts` | Supports `credentials` option |
|
|
563
|
-
| `PixivScraper` | Pixiv artwork metadata | `pixiv.net/ajax/illust` | Internal defaults; custom credentials supported | `examples/pixiv.example.ts` | `getIllust(illustId)` |
|
|
564
|
-
| `PlayStoreScraper` | Google Play app search | `play.google.com` | No | `examples/playstore.example.ts` | `search(query, limit)` |
|
|
565
|
-
| `PubgMobileScraper` | PUBG Mobile announcement list | `pubgmobile.com` server-rendered news path | No | `examples/pubgmobile.example.ts` | `getNews()` |
|
|
566
|
-
| `PypiScraper` | Python package and release metadata | PyPI JSON API | No | `examples/pypi.example.ts` | `getPackage`, `getRelease` |
|
|
567
|
-
| `RemovebgScraper` | Background removal | Official `remove.bg` API | Yes (API key) | `examples/removebg.example.ts` | Prefer constructor credentials with `apiKey` |
|
|
568
|
-
| `ResepScraper` | Recipe search | `cookpad.com` | No | `examples/resep.example.ts` | Returns recipe items |
|
|
569
|
-
| `RestCountriesScraper` | Country lookup by name/code and bounded country lists | REST Countries v3.1 API | No | `examples/restcountries.example.ts` | `getAll(fields)` requires explicit fields to keep payloads bounded |
|
|
570
|
-
| `RorScraper` | Research organization registry search and lookup | ROR REST API | No | `examples/ror.example.ts` | `searchOrganizations`, `getOrganization` |
|
|
571
|
-
| `SakuraNovelScraper` | Novel search/detail/chapter | `sakuranovel.id` | No | `examples/sakura-novel.example.ts` | Mock server fallback available |
|
|
572
|
-
| `SfileScraper` | Sfile public file metadata | `sfile.mobi`, `sfile.co` | No | Not yet available | `getMetadata(url)` |
|
|
573
|
-
| `SoundcloudScraper` | SoundCloud track metadata | `soundcloud.com`, `api-v2.soundcloud.com` | No | `examples/soundcloud.example.ts` | Extracts or falls back to a client ID |
|
|
574
|
-
| `SpotifyScraper` | Spotify track, album, playlist, search, downloads | Spotify Web API and Accounts API | Internal defaults; custom client credentials supported | `examples/spotify.example.ts` | Partial custom credentials are rejected |
|
|
575
|
-
| `StalkScraper` | npm package metadata lookup | `registry.npmjs.org` | No | `examples/stalk.example.ts` | Default example query is `axios` |
|
|
576
|
-
| `TeraboxScraper` | TeraBox public share listing | `terabox.com/share/list` | Internal defaults; custom credentials supported | `examples/terabox.example.ts` | `getShareList(url)` |
|
|
577
|
-
| `TextReplaceScraper` | Replace text in images | `imgupscaler.ai`, `magiceraser.org` APIs | No | `examples/text-replace.example.ts` | Buffer upload, job creation, and polling |
|
|
578
|
-
| `ThreadsScraper` | Threads profile, post, search, download | `threads.net` web/API endpoints | Internal defaults; custom cookie supported | `examples/threads.example.ts` | Use only legitimate session cookies |
|
|
579
|
-
| `TiktokScraper` | TikTok video, user, post, and search metadata | `tiktok.com` web/API endpoints | Internal defaults; custom credentials supported | `examples/tiktok.example.ts` | `getVideo`, `getUser`, `getUserPosts`, `searchVideos`, `searchUsers` |
|
|
580
|
-
| `TopAnimeScraper` | MyAnimeList top anime list | `myanimelist.net` | No | `examples/top-anime.example.ts` | `getTopAnime(limit)` |
|
|
581
|
-
| `TwitterScraper` | Tweet, profile, search, timeline, downloads | `x.com/i/api/graphql` | Cookie/CSRF commonly required | `examples/twitter.example.ts` | Supports `credentials` option |
|
|
582
|
-
| `UguuScraper` | Temporary file upload | `uguu.se/upload` | No | `examples/uguu.example.ts` | `upload(buffer, filename)` |
|
|
583
|
-
| `UnblurVideoScraper` | Video enhancement workflow | `api.unblurimage.ai` | No | `examples/unblur-video.example.ts` | Buffer upload, OSS PUT, job creation, and polling |
|
|
584
|
-
| `UnsplashScraper` | Unsplash photo search | `unsplash.com/napi/search/photos` | No | `examples/unsplash.example.ts` | `searchPhotos(query, page, perPage)` |
|
|
585
|
-
| `UnwatermarkScraper` | Image watermark/text/logo restoration workflow | `api.unwatermark.ai` | No | `examples/unwatermark.example.ts` | Buffer upload, job creation, and polling |
|
|
586
|
-
| `UpscalerScraper` | Image enhancement | `aienhancer.ai` | No | `examples/upscaler.example.ts` | Rejects remote URL string input |
|
|
587
|
-
| `UpscalerV3Scraper` | Image upscale, background removal, style, and deblur actions | `imageupscaler.com/wp-admin/admin-ajax.php` | No | `examples/upscaler-v3.example.ts` | Requires explicit `nonce` and `pid` parameters |
|
|
588
|
-
| `UsgsEarthquakeScraper` | Earthquake feeds and bounded event queries | USGS earthquake GeoJSON feeds and event API | No | `examples/usgs-earthquake.example.ts` | `getSummary`, `queryEvents`; validates feed/query bounds |
|
|
589
|
-
| `VideyScraper` | Video upload | `videy.co/api/upload` | No | `examples/videy.example.ts` | `upload`, `uploadBuffer` |
|
|
590
|
-
| `WallpaperScraper` | Wallpaper search | `wallhaven.cc/api/v1/search` | No | `examples/wallpaper.example.ts` | Replaced blocked HTML scraping with API integration |
|
|
591
|
-
| `Webp2Mp4Scraper` | WebP to MP4/PNG conversion | `ezgif.com` | No | `examples/webp2mp4.example.ts` | `toMp4`, `toPng` |
|
|
592
|
-
| `WikipediaScraper` | Wikipedia search and page summary | MediaWiki and REST summary APIs | No | `examples/wikipedia.example.ts` | `search`, `getSummary` |
|
|
593
|
-
| `WorldBankScraper` | World Bank country and indicator metadata/data | World Bank API v2 | No | `examples/world-bank.example.ts` | `searchCountries`, `getCountry`, `searchIndicators`, `getIndicator` |
|
|
594
|
-
| `WwCharScraper` | Wuthering Waves character info | Fandom MediaWiki parse API | No | `examples/ww-char.example.ts` | Uses `api.php?action=parse` |
|
|
595
|
-
| `XiaohongshuScraper` | Xiaohongshu note metadata | Xiaohongshu web endpoints | Internal defaults; custom credentials supported | `examples/xiaohongshu.example.ts` | `getNote(url)` |
|
|
596
|
-
| `YouTubeScraper` | YouTube metadata, search, playlist, channel, downloads | `yt-dlp`, `play-dl`, YouTube pages | No cookie option exposed in current scraper options | `examples/youtube.example.ts` | Media download features need external tools |
|
|
597
|
-
|
|
598
|
-
## Scraper Response Reference
|
|
599
|
-
|
|
600
|
-
Every scraper method still uses the standard `WsperResponse<T>` envelope. The table below documents `response.data` for scraper exports that were missing from the previous `Available Scrapers` table or only mentioned in prose.
|
|
601
|
-
|
|
602
|
-
| Scraper | Primary methods | Success `response.data` |
|
|
603
|
-
| --- | --- | --- |
|
|
604
|
-
| `AioScraper` | `download(url)` | `AioResult`: optional `title`, `thumbnail`, `duration`, `source`, and `medias[]` entries with `url`, `quality`, `type`, `ext`, optional `size`. |
|
|
605
|
-
| `ArenaAiScraper` | `getCategories()`, `getLeaderboard(category)` | Categories return `{ date, fetched_at, leaderboards, errors }`; leaderboard returns `{ meta, models }` where each model has rank, model, vendor, license, score, confidence interval, and votes. |
|
|
606
|
-
| `BimasIslamScraper` | `getProvinces()`, `getCities(provinceId)`, `getPrayerTimes(provinceId, cityId, month, year)` | Region methods return `{ id, name }[]`; prayer times return daily entries with `tanggal`, `imsak`, `subuh`, `terbit`, `dhuha`, `dzuhur`, `ashar`, `maghrib`, and `isya`. |
|
|
607
|
-
| `BinanceScraper` | `getTicker24hr(symbol)`, `getAveragePrice(symbol)`, `getKlines(symbol, interval, options?)`, `getOrderBook(symbol, limit?)` | Public market data only. Ticker/average/order book/klines preserve price and quantity strings from Binance and include source timestamps/IDs where available. No trading or account endpoints are implemented. |
|
|
608
|
-
| `ClinicalTrialsScraper` | `searchStudies(query, options?)`, `getStudy(nctId)` | Search returns `{ totalCount, nextPageToken, studies }`; each study includes NCT ID, title, status, date fields, conditions, interventions, phases, study type, and lead sponsor metadata. |
|
|
609
|
-
| `CodashopScraper` | `checkNickname(game, id, zone?)` | `{ success, game, name, userId, zoneId? }`. |
|
|
610
|
-
| `CrossrefScraper` | `searchWorks(query, options?)`, `getWorkByDoi(doi)`, `searchJournals(query, options?)` | Works include DOI, title/subtitle arrays, publisher, type, URL, issued date, subjects, authors, reference count, and score. Journals include title, publisher, ISSNs, DOI count, and subjects. |
|
|
611
|
-
| `DetikNewsScraper` | `search(query, resultType?)` | `DetikNewsItem[]` with `title`, `url`, `imageUrl`, `category`, `description`, and `date`. |
|
|
612
|
-
| `DonghubScraper` | `search(query)`, `getDetail(url)` | Search returns `{ query, results }`; detail returns drama metadata including `title`, `url`, `image`, `genres`, `synopsis`, and `episodeList`. |
|
|
613
|
-
| `DownrScraper` | `getVideo(url)` | `DownrResult`: optional `url`, `title`, `author`, `duration`, `thumbnail`, and `medias[]` entries with URL, quality, size, extension, and media type. |
|
|
614
|
-
| `EcbScraper` | `getDailyReferenceRates()`, `convertFromEuro(amount, currency)` | Daily rates return `{ date, base: "EUR", rates }`; conversion returns amount, target currency, rate, converted amount, and rate date from the ECB eurofxref XML feed. |
|
|
615
|
-
| `FacebookScraper` | `getUserProfile(usernameOrId)`, `getPostDetail(url)`, `downloadImage(url)` | Profile data, post detail data, or `{ url, buffer?, title? }` for downloaded image data. |
|
|
616
|
-
| `FaceswapV2Scraper` | `swap(targetImageUrl, targetFaceUrl)` | `{ url, requestId? }`. |
|
|
617
|
-
| `ForeignNewsScraper` | `getBbcNews(region?)`, `getCnbcNews()`, `getCnnNews()`, `getAntaraNews()`, `getRepublikaNews()` | `ForeignNewsItem[]` with `title`, `description`, `link`, `pubDate`, and `guid`. |
|
|
618
|
-
| `GDriveScraper` | `getFileInfo(fileIdOrUrl)` | `{ filename, size, mimeType, directDownloadUrl, requiresConfirm, confirmToken }`. |
|
|
619
|
-
| `GenshinImpactScraper` | `getMangaChapters(lang?)`, `getWikiCategories()`, `getWikiEntries(menuId, pageNum?, pageSize?)` | Manga chapters, wiki categories, or wiki entries with IDs, names, icons, rarity, weapon type, and element where available. |
|
|
620
|
-
| `GoqrScraper` | `generateQrCode(data, size?)` | `Buffer` containing the generated QR code image bytes. |
|
|
621
|
-
| `GutendexScraper` | `searchBooks(query, options?)`, `getBook(id)` | Search returns `{ count, next, previous, results }`; each book includes ID, title, authors/translators, subjects, bookshelves, languages, copyright flag, media type, format URLs, and download count. |
|
|
622
|
-
| `HackerNewsScraper` | `getTopStories(limit?)`, `getNewStories(limit?)`, `getBestStories(limit?)`, `getItem(id)`, `getUser(username)` | Story list methods return `HackerNewsItem[]` with IDs, titles, authors, scores, URLs, comment IDs, timestamps, and descendant counts. Item/user methods return a single item or user object, or `null` when the Firebase API returns no object. |
|
|
623
|
-
| `ImgflipScraper` | `getMemes()`, `captionMeme(options)` | Meme templates return `ImgflipMemeTemplate[]`; captioning returns `{ url, page_url }`. |
|
|
624
|
-
| `KemendagScraper` | `getBapokDatasets()` | `KemendagBapokItem[]` with title, resource ID, format, download URL, and last-modified timestamp. |
|
|
625
|
-
| `KompasNewsScraper` | `search(query)` | `KompasNewsItem[]` with `title`, `url`, `imageUrl`, `category`, `date`, and `description`. |
|
|
626
|
-
| `LikeeScraper` | `getInfo(url)` | `{ pageUrl, thumbnail?, title?, playUrl? }`. |
|
|
627
|
-
| `MyAnimeListScraper` | `search(query, limit?)`, `getTopAnime()` | `MalAnimeItem[]` with MAL ID, titles, type, episode count, status, score, rating, synopsis, image URL, and genres. |
|
|
628
|
-
| `NanoBananaScraper` | `edit(buffer, prompt)` | `{ resultUrl }`. |
|
|
629
|
-
| `OpenAlexScraper` | `searchWorks(query, options?)`, `getWork(idOrDoi)`, `searchAuthors(query, options?)`, `searchInstitutions(query, options?)` | Search methods return `{ meta, results }`. Works include IDs, DOI, title/display name, publication year, type, citation count, and open access status. Authors and institutions include counts and affiliation/country metadata. |
|
|
630
|
-
| `OpenMeteoScraper` | `getForecast(latitude, longitude, options?)`, `getCurrentWeather(latitude, longitude)` | `OpenMeteoForecast` with coordinates, timezone, elevation, `currentWeather`, and optional `hourly`/`daily` series records. Invalid coordinates and forecast ranges return validation responses. |
|
|
631
|
-
| `OpenLibraryScraper` | `searchBooks(query, options?)`, `getWork(workKey)`, `getAuthor(authorKey)`, `getByIsbn(isbn)` | Search returns Open Library docs with title, author names, first publish year, ISBNs, language, subjects, and cover ID. Work, author, and ISBN methods return normalized metadata for the requested Open Library key. |
|
|
632
|
-
| `OsvScraper` | `queryPackage({ ecosystem, name, version? })`, `getVulnerability(id)` | Query returns `{ vulns }`; vulnerabilities include OSV ID, summary/details, aliases, published/modified timestamps, database-specific metadata, and affected package/range/version entries. |
|
|
633
|
-
| `PackagistScraper` | `searchPackages(query, options?)`, `getPackage(name)` | Search returns package summary rows with name, description, repository, downloads, and favorites. Package lookup returns normalized Composer versions with license, authors, requirements, source URL, and dist URL. |
|
|
634
|
-
| `PixivScraper` | `getIllust(illustId)` | `PixivArtwork` with artwork ID, title, description, type, created date, image URLs, tags, user ID, and user name. |
|
|
635
|
-
| `PubgMobileScraper` | `getNews()` | `PubgMobileNewsItem[]` with `title`, `date`, `url`, and `summary`. |
|
|
636
|
-
| `PypiScraper` | `getPackage(name)`, `getRelease(name, version)` | PyPI package metadata with normalized `info`, release file maps, and distribution URLs. File records include filename, package type, Python version, URL, size, upload time, and digests. |
|
|
637
|
-
| `RemovebgScraper` | `remove(buffer)` | `{ buffer }` where `buffer` contains the background-removed image bytes. |
|
|
638
|
-
| `RestCountriesScraper` | `getByName(name, options?)`, `getByCode(code, options?)`, `getAll(fields)` | Country records with names, ISO codes, capital, region, population, flags, timezones, coordinates, and optional currencies, languages, maps, and status fields. `getAll` requires explicit fields to avoid unbounded responses. |
|
|
639
|
-
| `RorScraper` | `searchOrganizations(query, options?)`, `getOrganization(id)` | ROR organization metadata with ID, name, status, types, country, links, aliases, and acronyms. `getOrganization` accepts a compact ROR ID or `https://ror.org/...` URL. |
|
|
640
|
-
| `SfileScraper` | `getMetadata(url)` | `SfileMetadata` with optional filename, size, author, upload date, download count, plus `pageUrl`. |
|
|
641
|
-
| `SoundcloudScraper` | `getTrack(url)` | `SoundcloudTrack` with track metadata, artwork, duration, genre, user info, stats, and optional stream URL. |
|
|
642
|
-
| `TeraboxScraper` | `getShareList(url)` | `{ shareid, uk, list }`; each file includes `fsId`, filename, size, directory flag, category, path, and optional download URL. |
|
|
643
|
-
| `TextReplaceScraper` | `replace(buffer, originalText, replaceText, fileName?)` | `{ outputUrl }`. |
|
|
644
|
-
| `TiktokScraper` | `getVideo(url)`, `getVideoDirect(url)`, `getUser(username)`, `getUserPosts(username, count?)`, `searchVideos(query, count?)`, `searchUsers(query, count?)` | Video metadata, user metadata, post arrays, search result objects, or user search arrays depending on the method. |
|
|
645
|
-
| `UnblurVideoScraper` | `enhance(buffer, resolution?, fileName?)` | `{ jobId, inputUrl?, outputUrl? }`. |
|
|
646
|
-
| `UnsplashScraper` | `searchPhotos(query, page?, perPage?)` | `UnsplashPhotoItem[]` with IDs, descriptions, dimensions, likes, image URLs, and photographer info. |
|
|
647
|
-
| `UnwatermarkScraper` | `restore(buffer)` | `{ jobId, inputUrl?, outputUrl? }`. |
|
|
648
|
-
| `UpscalerV3Scraper` | `process(buffer, params)` | `{ output }`; `params` must include `functionType`, explicit `nonce`, and explicit `pid`. |
|
|
649
|
-
| `UsgsEarthquakeScraper` | `getSummary(feed?)`, `queryEvents(options?)` | `UsgsEarthquakeSummary` with feed metadata, generated timestamp, count, and normalized event rows including magnitude, place, ISO timestamps, coordinates, depth, significance, alert/status, and source URLs. |
|
|
650
|
-
| `WikipediaScraper` | `search(query, lang?)`, `getSummary(title, lang?)` | Search returns result rows with title, page ID, snippet, and timestamp; summary returns title, extracts, description, optional thumbnail URL, and page URL. |
|
|
651
|
-
| `WorldBankScraper` | `searchCountries(query, options?)`, `getCountry(code)`, `searchIndicators(query, options?)`, `getIndicator(country, indicator, options?)` | Country and indicator searches return `{ pagination, items }`; indicator data returns `{ pagination, values }` with country, indicator, date, numeric value, unit, observation status, and decimal metadata. |
|
|
652
|
-
| `XiaohongshuScraper` | `getNote(url)` | `XiaohongshuNote` with title, description, type, timestamp, user, images, and engagement stats. |
|
|
653
|
-
|
|
654
|
-
## What's New
|
|
655
|
-
|
|
656
|
-
The latest research implementation passes added sixteen public, no-credential scrapers from the roadmap:
|
|
657
|
-
|
|
658
|
-
- `UsgsEarthquakeScraper` for USGS earthquake GeoJSON feeds and bounded event queries.
|
|
659
|
-
- `OpenMeteoScraper` for Open-Meteo forecast/current weather by coordinate.
|
|
660
|
-
- `RestCountriesScraper` for country lookup by name/code and bounded `getAll(fields)` lists.
|
|
661
|
-
- `HackerNewsScraper` for Hacker News stories, items, and users through the public Firebase API.
|
|
662
|
-
- `OpenLibraryScraper` for Open Library book search, work, author, and ISBN metadata.
|
|
663
|
-
- `GutendexScraper` for Project Gutenberg metadata through the Gutendex JSON API.
|
|
664
|
-
- `WorldBankScraper` for World Bank countries, indicators, and bounded indicator time-series.
|
|
665
|
-
- `EcbScraper` for ECB daily euro reference rates and EUR conversion helpers.
|
|
666
|
-
- `CrossrefScraper` for scholarly work and journal metadata through the Crossref REST API.
|
|
667
|
-
- `OpenAlexScraper` for scholarly works, authors, and institutions.
|
|
668
|
-
- `RorScraper` for research organization registry search and lookup.
|
|
669
|
-
- `ClinicalTrialsScraper` for ClinicalTrials.gov public study search and detail lookup.
|
|
670
|
-
- `PypiScraper` for Python package and release metadata through the PyPI JSON API.
|
|
671
|
-
- `PackagistScraper` for PHP Composer package metadata and search.
|
|
672
|
-
- `OsvScraper` for public OSV package vulnerability queries and vulnerability detail lookup.
|
|
673
|
-
- `BinanceScraper` for Binance public market data only, with no account or trading endpoints.
|
|
674
|
-
- Verification for the latest full pass: `npm run typecheck`, `npm run test`, and `npm run build`; full Vitest suite reported 132 test files passed and 441 tests passed.
|
|
675
|
-
|
|
676
|
-
The previous scraper reliability pass fixed 12 failing scraper functions and their corresponding tests.
|
|
677
|
-
|
|
678
|
-
- `LyricsScraper` now uses LRCLIB's public JSON API instead of a Cloudflare-protected Musixmatch HTML page.
|
|
679
|
-
- `WallpaperScraper` now uses Wallhaven's public search API instead of a Cloudflare-protected Wallpaperflare HTML page.
|
|
680
|
-
- `WwCharScraper` and `HokInfoScraper` now use Fandom's MediaWiki parse API (`api.php?action=parse`) and prepend a synthetic `#firstHeading` node before running the existing parsers.
|
|
681
|
-
- `examples/mock-server.ts` now covers WordPress resolver routes, AI tool polling/upload routes, and Fandom API responses.
|
|
682
|
-
- `examples/alllexamp.ts` starts and stops the mock server during the runner lifecycle.
|
|
683
|
-
- `WSPER_MOCK_BASE_URL` is injected into spawned example subprocesses so individual examples can use the local mock endpoint automatically.
|
|
684
|
-
- Default example inputs were updated: `StalkScraper` uses `axios`, `MediafireScraper` uses an active 10MB test file URL, and `HokInfoScraper` uses `Angela`.
|
|
685
|
-
- Verification recorded in `walkthrough.md`: 82 test files passed, 314 tests passed, and the examples runner reported 83 OK, 0 FAIL, 2 SKIP.
|
|
686
|
-
|
|
687
|
-
## Mock Server and Testing
|
|
688
|
-
|
|
689
|
-
`examples/mock-server.ts` is a local HTTP server used by the examples runner to provide deterministic responses for endpoints that are rate-limited, depend on third-party availability, or are inconvenient to call during automated checks.
|
|
690
|
-
|
|
691
|
-
Currently mocked routes include:
|
|
692
|
-
|
|
693
|
-
| Area | Routes |
|
|
694
|
-
| --- | --- |
|
|
695
|
-
| CapCut | `/wp-json/aio-dl/video-data/` |
|
|
696
|
-
| ImgUpscaler | `/api/UpscalerNew/UploadNew`, `/api/UpscalerNew/CheckStatusNew` |
|
|
697
|
-
| PhotoAi | `/api/PhoAi/Upload`, `/api/PhoAi/CheckStatus` |
|
|
698
|
-
| Faceswap | `/api/face-swap/create-poll`, `/api/common/get` |
|
|
699
|
-
| Upscaler | `/api/v1/r/image-enhance/create`, `/api/v1/r/image-enhance/result` |
|
|
700
|
-
| Fandom Wiki | `/api.php` |
|
|
701
|
-
| WordPress search fixtures | `/wp-admin/admin-ajax.php` |
|
|
702
|
-
|
|
703
|
-
`examples/alllexamp.ts` starts the mock server, sets `process.env.WSPER_MOCK_BASE_URL`, routes direct scraper demos to the mock server with `http: { allowPrivateNetwork: true }` where needed, runs individual example files as subprocesses, then closes the mock server.
|
|
704
|
-
|
|
705
|
-
This keeps example validation safer and more repeatable. It avoids making every test depend on live third-party services, Cloudflare-protected HTML pages, or rate-limited AI tool endpoints.
|
|
706
|
-
|
|
707
|
-
## Running Examples
|
|
708
|
-
|
|
709
|
-
Run all direct scraper demos and individual example files:
|
|
719
|
+
#### USGS Earthquakes (`UsgsEarthquakeScraper`)
|
|
720
|
+
Reads active seismic activity feeds.
|
|
710
721
|
|
|
711
|
-
```
|
|
712
|
-
|
|
722
|
+
```ts
|
|
723
|
+
import { UsgsEarthquakeScraper } from "wsper-js";
|
|
724
|
+
|
|
725
|
+
const usgs = new UsgsEarthquakeScraper();
|
|
726
|
+
const feed = await usgs.getSummary("all_day"); // "all_hour" | "all_day" | "all_week"
|
|
713
727
|
```
|
|
714
728
|
|
|
715
|
-
|
|
729
|
+
---
|
|
716
730
|
|
|
717
|
-
|
|
731
|
+
### Scholarly & Academic Metadata
|
|
718
732
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
| `OK` | The scraper/example returned a successful result. |
|
|
722
|
-
| `FAIL` | The scraper/example returned a failed response or subprocess exit. Auth-required scrapers may fail without valid credentials. |
|
|
723
|
-
| `SKIP` | The runner intentionally skipped an entry, usually because a local fixture is unavailable. |
|
|
733
|
+
#### Crossref (`CrossrefScraper`)
|
|
734
|
+
Search academic DOI indexes.
|
|
724
735
|
|
|
725
|
-
|
|
736
|
+
```ts
|
|
737
|
+
import { CrossrefScraper } from "wsper-js";
|
|
726
738
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
SKIP : 2
|
|
731
|
-
Total: 85
|
|
739
|
+
// Politeness parameter provides an email to target servers
|
|
740
|
+
const crossref = new CrossrefScraper({ politeMailTo: "developer@example.com" });
|
|
741
|
+
const paper = await crossref.getWorkByDoi("10.1038/nature14539");
|
|
732
742
|
```
|
|
733
743
|
|
|
734
|
-
|
|
744
|
+
#### OpenAlex (`OpenAlexScraper`)
|
|
745
|
+
Queries scholarly graphs, works, institutions, and authors.
|
|
735
746
|
|
|
736
|
-
```
|
|
737
|
-
|
|
738
|
-
npx tsx examples/wallpaper.example.ts cyberpunk
|
|
739
|
-
npx tsx examples/stalk.example.ts axios
|
|
740
|
-
npx tsx examples/mediafire.example.ts "https://www.mediafire.com/file/ipnyzofjcwri357/test-10mb.bin/file"
|
|
741
|
-
npx tsx examples/upscaler.example.ts testassets/photo.jpg
|
|
742
|
-
npx tsx examples/usgs-earthquake.example.ts all_day
|
|
743
|
-
npx tsx examples/open-meteo.example.ts -6.2 106.8
|
|
744
|
-
npx tsx examples/restcountries.example.ts Indonesia
|
|
745
|
-
npx tsx examples/hacker-news.example.ts 10
|
|
746
|
-
npx tsx examples/open-library.example.ts dune
|
|
747
|
-
npx tsx examples/gutendex.example.ts alice
|
|
748
|
-
npx tsx examples/world-bank.example.ts IDN SP.POP.TOTL
|
|
749
|
-
npx tsx examples/ecb.example.ts 100 USD
|
|
750
|
-
npx tsx examples/crossref.example.ts "open science"
|
|
751
|
-
npx tsx examples/openalex.example.ts "open science"
|
|
752
|
-
npx tsx examples/ror.example.ts "University of Indonesia"
|
|
753
|
-
npx tsx examples/clinical-trials.example.ts diabetes
|
|
754
|
-
npx tsx examples/pypi.example.ts requests
|
|
755
|
-
npx tsx examples/packagist.example.ts monolog/monolog
|
|
756
|
-
npx tsx examples/osv.example.ts PyPI requests
|
|
757
|
-
npx tsx examples/binance.example.ts BTCUSDT
|
|
758
|
-
```
|
|
759
|
-
|
|
760
|
-
For scrapers that support cookies or credentials, use only accounts and sessions you are authorized to access. Do not hardcode real cookies, tokens, client secrets, or API keys in source files.
|
|
761
|
-
|
|
762
|
-
## Running Tests and Validation
|
|
763
|
-
|
|
764
|
-
Package scripts from `package.json`:
|
|
765
|
-
|
|
766
|
-
| Command | Purpose |
|
|
767
|
-
| --- | --- |
|
|
768
|
-
| `npm run test` | Run all Vitest tests once. |
|
|
769
|
-
| `npm run test:watch` | Run Vitest in watch mode. |
|
|
770
|
-
| `npm run typecheck` | Run TypeScript with `--noEmit`. |
|
|
771
|
-
| `npm run build` | Build production ESM output through `script/build.mjs`. |
|
|
772
|
-
| `npm run build:dev` | Build development output. |
|
|
773
|
-
| `npm run build:prod` | Build production output. |
|
|
774
|
-
| `npm run build:bytecode` | Build bytecode output with `script/build-bytecode.mjs`. |
|
|
775
|
-
| `npm run build:all` | Build production output and bytecode. |
|
|
776
|
-
| `npm run test:instagram` | Run Instagram tests only. |
|
|
777
|
-
| `npm run test:spotify` | Run Spotify tests only. |
|
|
778
|
-
| `npm run test:youtube` | Run YouTube tests only. |
|
|
779
|
-
| `npm run test:threads` | Run Threads tests only. |
|
|
780
|
-
| `npm run test:pinterest` | Run Pinterest tests only. |
|
|
781
|
-
| `npm run test:brat` | Run Brat tests only. |
|
|
782
|
-
|
|
783
|
-
Recommended validation before publishing or changing behavior:
|
|
747
|
+
```ts
|
|
748
|
+
import { OpenAlexScraper } from "wsper-js";
|
|
784
749
|
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
npm run test
|
|
788
|
-
npm run build
|
|
789
|
-
npx tsx examples/alllexamp.ts
|
|
750
|
+
const openAlex = new OpenAlexScraper();
|
|
751
|
+
const searchWorks = await openAlex.searchWorks("machine learning", { limit: 10 });
|
|
790
752
|
```
|
|
791
753
|
|
|
792
|
-
|
|
754
|
+
---
|
|
793
755
|
|
|
794
|
-
|
|
795
|
-
real credentials. Tests that hit live platform APIs are guarded by
|
|
796
|
-
`tests/helpers/credentials.ts` — when the required `WSPER_*` env variables are
|
|
797
|
-
missing they are **skipped with a clear message** instead of failing:
|
|
756
|
+
### Developer Registry & Package APIs
|
|
798
757
|
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
```
|
|
809
|
-
|
|
810
|
-
## Project Structure
|
|
811
|
-
|
|
812
|
-
```txt
|
|
813
|
-
src/
|
|
814
|
-
index.ts Public package exports
|
|
815
|
-
WsperScraper.ts Aggregate scraper entrypoint
|
|
816
|
-
core/
|
|
817
|
-
credentials/ Credential normalization and platform headers
|
|
818
|
-
error/ WsperError, ValidationError, HttpError, ParseError, DownloadError, ScraperError
|
|
819
|
-
http/ HTTP client, retries, timeouts, safe URL validation
|
|
820
|
-
parser/ Shared HTML and JSON parser helpers
|
|
821
|
-
queue/ Request pacing and concurrency control
|
|
822
|
-
modules/
|
|
823
|
-
brat/ Brat image/GIF/video generator and converters
|
|
824
|
-
chart/ Analytics image generator
|
|
825
|
-
download/ Safe downloader primitives
|
|
826
|
-
scrapers/ Platform-specific scraper implementations
|
|
827
|
-
types/ Shared response, option, and common types
|
|
828
|
-
utils/ Sleep, URL, validation, browser-profile, and helper utilities
|
|
829
|
-
examples/
|
|
830
|
-
alllexamp.ts Full example runner
|
|
831
|
-
mock-server.ts Local deterministic mock server
|
|
832
|
-
*.example.ts Individual runnable examples
|
|
833
|
-
tests/
|
|
834
|
-
*/*.test.ts Unit and parser tests
|
|
835
|
-
dist/ Build output only; do not edit manually
|
|
836
|
-
```
|
|
837
|
-
|
|
838
|
-
## Credentials Configuration
|
|
839
|
-
|
|
840
|
-
The library never ships personal credentials. Resolution order per scraper:
|
|
841
|
-
|
|
842
|
-
1. **Constructor injection (recommended)** — pass `options.credentials` when creating a scraper.
|
|
843
|
-
2. **Environment variables** — optional `WSPER_*` fallbacks read lazily by `src/core/credentials.ts`.
|
|
844
|
-
3. **Nothing** — the scraper runs in public mode, or methods that require credentials fail with a clear error code (e.g. `CAI_CREDENTIALS_REQUIRED`, `SPOTIFY_CREDENTIALS_MISSING`).
|
|
845
|
-
|
|
846
|
-
```ts
|
|
847
|
-
const scraper = new InstagramScraper({
|
|
848
|
-
credentials: { cookie: process.env.WSPER_INSTAGRAM_COOKIE },
|
|
849
|
-
queue: {
|
|
850
|
-
concurrency: 1,
|
|
851
|
-
minDelayMs: 1500,
|
|
852
|
-
maxDelayMs: 4000,
|
|
853
|
-
timeoutMs: 30000,
|
|
854
|
-
retries: 2,
|
|
855
|
-
},
|
|
856
|
-
});
|
|
758
|
+
#### Python Package Index (`PypiScraper`)
|
|
759
|
+
Retrieves package releases and upload history mappings.
|
|
760
|
+
|
|
761
|
+
```ts
|
|
762
|
+
import { PypiScraper } from "wsper-js";
|
|
763
|
+
|
|
764
|
+
const pypi = new PypiScraper();
|
|
765
|
+
const pkg = await pypi.getPackage("requests");
|
|
766
|
+
const release = await pypi.getRelease("requests", "2.31.0");
|
|
857
767
|
```
|
|
858
768
|
|
|
859
|
-
|
|
769
|
+
#### npm Registry (`StalkScraper`)
|
|
770
|
+
Resolves npm package metadata.
|
|
860
771
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
| `WSPER_SPOTIFY_CLIENT_ID` / `WSPER_SPOTIFY_CLIENT_SECRET` | Spotify Web API |
|
|
864
|
-
| `WSPER_SPOTIFY_CALLBACK_URL` / `WSPER_SPOTIFY_MARKET` | Spotify OAuth / market |
|
|
865
|
-
| `WSPER_INSTAGRAM_COOKIE` | Instagram |
|
|
866
|
-
| `WSPER_THREADS_COOKIE` | Threads |
|
|
867
|
-
| `WSPER_TWITTER_COOKIE` | Twitter/X |
|
|
868
|
-
| `WSPER_PINTEREST_COOKIE` | Pinterest |
|
|
869
|
-
| `WSPER_TIKTOK_COOKIE` | TikTok |
|
|
870
|
-
| `WSPER_FACEBOOK_COOKIE` | Facebook |
|
|
871
|
-
| `WSPER_BILIBILI_COOKIE` | BiliBili |
|
|
872
|
-
| `WSPER_CAI_TOKEN` | Character.AI |
|
|
873
|
-
| `REMOVEBG_API_KEY` | remove.bg |
|
|
772
|
+
```ts
|
|
773
|
+
import { StalkScraper } from "wsper-js";
|
|
874
774
|
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
`WSPER_EXAMPLE_MODE`:
|
|
775
|
+
const stalk = new StalkScraper();
|
|
776
|
+
const pkg = await stalk.getNpmPackage("axios");
|
|
777
|
+
```
|
|
879
778
|
|
|
880
|
-
|
|
881
|
-
|
|
779
|
+
#### OSV Vulnerability Database (`OsvScraper`)
|
|
780
|
+
Find open source package vulnerabilities.
|
|
882
781
|
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
> now resolves from `WSPER_*` env variables and is empty by default. If you
|
|
886
|
-
> relied on the old defaults, inject credentials via constructor options or env.
|
|
887
|
-
> Rotate any cookies/tokens that were ever committed.
|
|
782
|
+
```ts
|
|
783
|
+
import { OsvScraper } from "wsper-js";
|
|
888
784
|
|
|
889
|
-
|
|
785
|
+
const osv = new OsvScraper();
|
|
786
|
+
const query = await osv.queryPackage({ ecosystem: "PyPI", name: "requests", version: "2.20.0" });
|
|
787
|
+
```
|
|
890
788
|
|
|
891
|
-
|
|
789
|
+
---
|
|
892
790
|
|
|
893
|
-
|
|
894
|
-
| --- | --- | --- | --- |
|
|
895
|
-
| `WSPER_MOCK_BASE_URL` | `examples/alllexamp.ts`, AI tool examples | No | Set by the all-examples runner for subprocesses. Points examples to the local mock server. |
|
|
896
|
-
| `BMKG_WARNING_API_KEY` | `examples/cuaca.example.ts` | No | Optional warning API key passed to `CuacaScraper({ warningApiKey })`. |
|
|
897
|
-
| `INSTAGRAM_COOKIE` | `examples/instagram.example.ts` comments | No | Optional example input for constructor credentials. Use `<YOUR_COOKIE_HERE>` style placeholders in docs and never commit real cookies. |
|
|
898
|
-
| `INSTAGRAM_CSRF_TOKEN` | `examples/instagram.example.ts` comments | No | Optional example input for constructor credentials. |
|
|
899
|
-
| `BILI_COOKIE` | `examples/bilibili.example.ts` | No | Optional BiliBili cookie for authenticated stream access. |
|
|
900
|
-
| `WSPER_COOKIE` | `tests/core/credentials.test.ts` | No | Test-only variable proving runtime credential resolution does not read env credentials automatically. |
|
|
791
|
+
### Utility, AI & Conversion Resolvers
|
|
901
792
|
|
|
902
|
-
|
|
793
|
+
#### GDrive (`GDriveScraper`)
|
|
794
|
+
Finds direct download paths and confirms token attributes of shared files.
|
|
903
795
|
|
|
904
796
|
```ts
|
|
905
|
-
import {
|
|
797
|
+
import { GDriveScraper } from "wsper-js";
|
|
906
798
|
|
|
907
|
-
const
|
|
908
|
-
|
|
909
|
-
clientId: "your-client-id",
|
|
910
|
-
clientSecret: "your-client-secret",
|
|
911
|
-
},
|
|
912
|
-
credentials: {
|
|
913
|
-
cai: {
|
|
914
|
-
bearerToken: "<YOUR_TOKEN_HERE>",
|
|
915
|
-
},
|
|
916
|
-
twitter: {
|
|
917
|
-
cookie: "<YOUR_COOKIE_HERE>",
|
|
918
|
-
csrfToken: "<YOUR_CSRF_TOKEN_HERE>",
|
|
919
|
-
},
|
|
920
|
-
instagram: {
|
|
921
|
-
cookie: "<YOUR_COOKIE_HERE>",
|
|
922
|
-
csrfToken: "<YOUR_CSRF_TOKEN_HERE>",
|
|
923
|
-
},
|
|
924
|
-
},
|
|
925
|
-
});
|
|
799
|
+
const gdrive = new GDriveScraper();
|
|
800
|
+
const file = await gdrive.getFileInfo("https://drive.google.com/file/d/SHARED_ID/view");
|
|
926
801
|
```
|
|
927
802
|
|
|
928
|
-
|
|
803
|
+
#### AI Image Enhancer (`UpscalerScraper`)
|
|
804
|
+
Uploads buffers to aienhancer APIs and awaits polling queues.
|
|
805
|
+
|
|
806
|
+
```ts
|
|
807
|
+
import { UpscalerScraper } from "wsper-js";
|
|
808
|
+
import { readFile } from "node:fs/promises";
|
|
809
|
+
|
|
810
|
+
const upscaler = new UpscalerScraper();
|
|
811
|
+
const buffer = await readFile("photo.jpg");
|
|
929
812
|
|
|
930
|
-
|
|
813
|
+
const result = await upscaler.upscaleBuffer(buffer, "image/jpeg");
|
|
814
|
+
if (result.ok && result.data) {
|
|
815
|
+
console.log("Upscaled Image CDN Link:", result.data.output);
|
|
816
|
+
}
|
|
817
|
+
```
|
|
931
818
|
|
|
932
|
-
|
|
819
|
+
---
|
|
933
820
|
|
|
934
|
-
|
|
821
|
+
## Sandbox & Mock Server Usage
|
|
935
822
|
|
|
936
|
-
|
|
823
|
+
To prevent outbound requests to live websites during automated tests or local script iterations, `wsper-js` packages a local mock server under `examples/mock-server.ts`.
|
|
937
824
|
|
|
938
|
-
|
|
825
|
+
### Running the Mock Server
|
|
826
|
+
Start the server in a terminal window:
|
|
939
827
|
|
|
940
828
|
```bash
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
829
|
+
npx tsx examples/mock-server.ts
|
|
830
|
+
```
|
|
831
|
+
|
|
832
|
+
This boots an express-like mock instance at `http://localhost:3000`.
|
|
833
|
+
|
|
834
|
+
### Consuming Mocks in your Code
|
|
835
|
+
To route scrapers to the local mock base rather than live platform servers:
|
|
836
|
+
|
|
837
|
+
```ts
|
|
838
|
+
const scraper = new SpotifyScraper({
|
|
839
|
+
baseUrl: "http://localhost:3000",
|
|
840
|
+
http: {
|
|
841
|
+
allowPrivateNetwork: true // Required: permits localhost requests through the validation guard
|
|
842
|
+
}
|
|
843
|
+
});
|
|
946
844
|
```
|
|
947
845
|
|
|
948
|
-
|
|
846
|
+
Alternatively, set the environment variable:
|
|
847
|
+
```bash
|
|
848
|
+
WSPER_MOCK_BASE_URL="http://localhost:3000"
|
|
849
|
+
```
|
|
949
850
|
|
|
950
|
-
|
|
951
|
-
2. Export the scraper from `src/scrapers/<name>/index.ts` and `src/scrapers/index.ts`.
|
|
952
|
-
3. Return typed `WsperResponse<T>` results.
|
|
953
|
-
4. Keep HTTP, parsing, queueing, and file download responsibilities separated.
|
|
954
|
-
5. Add or update parser and scraper tests under `tests/`.
|
|
955
|
-
6. Add or update a runnable example under `examples/`.
|
|
956
|
-
7. Use the mock server for flows that should not depend on live third-party behavior in default tests.
|
|
957
|
-
8. Update this README when public API, usage, behavior, examples, or validation results change.
|
|
851
|
+
---
|
|
958
852
|
|
|
959
|
-
##
|
|
853
|
+
## Error Handling & Exceptions
|
|
960
854
|
|
|
961
|
-
|
|
962
|
-
- Use only `http:` and `https:` unless a module explicitly supports something else.
|
|
963
|
-
- Keep SSRF protections enabled; private network requests require explicit `allowPrivateNetwork: true` and should be reserved for local mocks or trusted endpoints.
|
|
964
|
-
- Do not log secrets, cookies, client secrets, authorization headers, access tokens, refresh tokens, or raw credential objects.
|
|
965
|
-
- Do not commit credentials, cookies, tokens, private fixtures, or real session material.
|
|
855
|
+
All errors thrown internally inherit from `WsperError`. You can import and catch specific exceptions for granular error handling:
|
|
966
856
|
|
|
967
|
-
|
|
857
|
+
```ts
|
|
858
|
+
import { WsperError, HttpError, ParseError, ValidationError, DownloadError } from "wsper-js";
|
|
859
|
+
|
|
860
|
+
try {
|
|
861
|
+
await scraper.search("");
|
|
862
|
+
} catch (err) {
|
|
863
|
+
if (err instanceof ValidationError) {
|
|
864
|
+
console.error("Validation failed. Incorrect fields:", err.details);
|
|
865
|
+
} else if (err instanceof HttpError) {
|
|
866
|
+
console.error(`HTTP Status Error ${err.statusCode} on URL ${err.url}`);
|
|
867
|
+
if (err.details?.preview) {
|
|
868
|
+
console.error("Server raw response preview:", err.details.preview);
|
|
869
|
+
}
|
|
870
|
+
} else if (err instanceof ParseError) {
|
|
871
|
+
console.error("Target HTML/JSON parser format changed:", err.message);
|
|
872
|
+
} else if (err instanceof WsperError) {
|
|
873
|
+
console.error(`Generic Wsper error [${err.code}]:`, err.message);
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
```
|
|
968
877
|
|
|
969
|
-
|
|
878
|
+
### Exception Hierarchy
|
|
879
|
+
```
|
|
880
|
+
Error (Node.js)
|
|
881
|
+
└── WsperError
|
|
882
|
+
├── HttpError (Thrown on HTTP failures >= 400 or timeouts)
|
|
883
|
+
├── ParseError (Thrown when DOM scraper or JSON parsing fails)
|
|
884
|
+
├── ValidationError (Thrown when argument formatting is incorrect)
|
|
885
|
+
├── DownloadError (Thrown by the Downloader on sizes/mime violations)
|
|
886
|
+
└── ScraperError (Thrown by custom scraper-specific subroutines)
|
|
887
|
+
```
|