scrapebadger 0.4.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -291
- package/dist/index.d.cts +1904 -102
- package/dist/index.d.ts +1904 -102
- package/dist/index.js +838 -57
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +815 -58
- package/dist/index.mjs.map +1 -1
- package/dist/twitter/index.js +1 -1
- package/dist/twitter/index.js.map +1 -1
- package/dist/twitter/index.mjs +1 -1
- package/dist/twitter/index.mjs.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -12,17 +12,20 @@
|
|
|
12
12
|
<a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"></a>
|
|
13
13
|
</p>
|
|
14
14
|
|
|
15
|
-
The official Node.js/TypeScript client library for the [ScrapeBadger](https://scrapebadger.com) API.
|
|
15
|
+
The official Node.js/TypeScript client library for the [ScrapeBadger](https://scrapebadger.com) API — Twitter, Google, Vinted, and general web scraping.
|
|
16
16
|
|
|
17
17
|
## Features
|
|
18
18
|
|
|
19
19
|
- **Full TypeScript Support** - Complete type definitions for all API endpoints
|
|
20
20
|
- **Modern ESM & CommonJS** - Works with both module systems
|
|
21
21
|
- **Async Iterators** - Automatic pagination with `for await...of` syntax
|
|
22
|
-
- **Smart Rate
|
|
23
|
-
- **Resilient Retries** -
|
|
24
|
-
- **
|
|
25
|
-
- **
|
|
22
|
+
- **Smart Rate Limiting** - Reads API headers and throttles pagination automatically
|
|
23
|
+
- **Resilient Retries** - Exponential backoff with colored console warnings
|
|
24
|
+
- **Typed Exceptions** - Distinct error classes for every failure scenario
|
|
25
|
+
- **37+ Twitter endpoints** - Tweets, users, lists, communities, trends, geo, real-time streams
|
|
26
|
+
- **19 Google product APIs** - Search (with optional deferred AI Overview follow-up), Maps, News, Hotels, Trends (incl. topic autocomplete), Jobs, Shopping (+ merchant URL enrichment), Patents, Scholar (search + profiles + author + author citation + cite formats), Images, Videos, Finance, AI Mode, Lens, **Local Pack**, **Shorts**, **Flights**, Products
|
|
27
|
+
- **Vinted scraping** - Search items, item details, user profiles, brands, colors, markets
|
|
28
|
+
- **Web scraping** - Anti-bot bypass, JS rendering, and AI data extraction
|
|
26
29
|
|
|
27
30
|
## Installation
|
|
28
31
|
|
|
@@ -43,271 +46,39 @@ pnpm add scrapebadger
|
|
|
43
46
|
```typescript
|
|
44
47
|
import { ScrapeBadger } from "scrapebadger";
|
|
45
48
|
|
|
46
|
-
// Create client with API key
|
|
47
49
|
const client = new ScrapeBadger({ apiKey: "your-api-key" });
|
|
48
50
|
|
|
49
|
-
// Or use environment variable (SCRAPEBADGER_API_KEY)
|
|
50
|
-
const client = new ScrapeBadger();
|
|
51
|
-
|
|
52
51
|
// Get a tweet
|
|
53
52
|
const tweet = await client.twitter.tweets.getById("1234567890");
|
|
54
53
|
console.log(`@${tweet.username}: ${tweet.text}`);
|
|
55
54
|
|
|
55
|
+
// Scrape a website
|
|
56
|
+
const result = await client.web.scrape("https://scrapebadger.com", { format: "markdown" });
|
|
57
|
+
console.log(result.content);
|
|
58
|
+
|
|
56
59
|
// Get a user profile
|
|
57
60
|
const user = await client.twitter.users.getByUsername("elonmusk");
|
|
58
61
|
console.log(`${user.name} has ${user.followers_count.toLocaleString()} followers`);
|
|
59
62
|
```
|
|
60
63
|
|
|
61
|
-
##
|
|
62
|
-
|
|
63
|
-
### Search Tweets
|
|
64
|
-
|
|
65
|
-
```typescript
|
|
66
|
-
import { ScrapeBadger } from "scrapebadger";
|
|
67
|
-
|
|
68
|
-
const client = new ScrapeBadger({ apiKey: "your-api-key" });
|
|
69
|
-
|
|
70
|
-
// Basic search (returns first page)
|
|
71
|
-
const results = await client.twitter.tweets.search("python programming");
|
|
72
|
-
for (const tweet of results.data) {
|
|
73
|
-
console.log(`@${tweet.username}: ${tweet.text}`);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Paginate manually
|
|
77
|
-
if (results.hasMore) {
|
|
78
|
-
const nextPage = await client.twitter.tweets.search("python programming", {
|
|
79
|
-
cursor: results.nextCursor,
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
// Automatic pagination with async iterators
|
|
84
|
-
for await (const tweet of client.twitter.tweets.searchAll("python", { maxItems: 100 })) {
|
|
85
|
-
console.log(tweet.text);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// Collect all results into an array
|
|
89
|
-
import { collectAll } from "scrapebadger";
|
|
90
|
-
|
|
91
|
-
const tweets = await collectAll(
|
|
92
|
-
client.twitter.tweets.searchAll("python", { maxItems: 100 })
|
|
93
|
-
);
|
|
94
|
-
console.log(`Fetched ${tweets.length} tweets`);
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
### User Operations
|
|
98
|
-
|
|
99
|
-
```typescript
|
|
100
|
-
// Get user by username
|
|
101
|
-
const user = await client.twitter.users.getByUsername("elonmusk");
|
|
102
|
-
|
|
103
|
-
// Get user by ID
|
|
104
|
-
const userById = await client.twitter.users.getById("44196397");
|
|
105
|
-
|
|
106
|
-
// Get extended profile info
|
|
107
|
-
const about = await client.twitter.users.getAbout("elonmusk");
|
|
108
|
-
console.log(`Account based in: ${about.account_based_in}`);
|
|
109
|
-
console.log(`Username changes: ${about.username_changes}`);
|
|
110
|
-
|
|
111
|
-
// Get followers
|
|
112
|
-
const followers = await client.twitter.users.getFollowers("elonmusk");
|
|
113
|
-
for (const follower of followers.data) {
|
|
114
|
-
console.log(`@${follower.username}`);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// Iterate through all followers
|
|
118
|
-
for await (const follower of client.twitter.users.getFollowersAll("elonmusk", {
|
|
119
|
-
maxItems: 1000,
|
|
120
|
-
})) {
|
|
121
|
-
console.log(follower.username);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// Search users
|
|
125
|
-
const users = await client.twitter.users.search("python developer");
|
|
126
|
-
```
|
|
127
|
-
|
|
128
|
-
### Lists
|
|
129
|
-
|
|
130
|
-
```typescript
|
|
131
|
-
// Get list details
|
|
132
|
-
const list = await client.twitter.lists.getDetail("123456");
|
|
133
|
-
console.log(`${list.name}: ${list.member_count} members`);
|
|
134
|
-
|
|
135
|
-
// Get list tweets
|
|
136
|
-
const tweets = await client.twitter.lists.getTweets("123456");
|
|
137
|
-
|
|
138
|
-
// Get list members
|
|
139
|
-
const members = await client.twitter.lists.getMembers("123456");
|
|
140
|
-
|
|
141
|
-
// Search for lists
|
|
142
|
-
const lists = await client.twitter.lists.search("tech leaders");
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
### Communities
|
|
146
|
-
|
|
147
|
-
```typescript
|
|
148
|
-
// Get community details
|
|
149
|
-
const community = await client.twitter.communities.getDetail("123456");
|
|
150
|
-
console.log(`${community.name}: ${community.member_count} members`);
|
|
151
|
-
|
|
152
|
-
// Get community tweets
|
|
153
|
-
const tweets = await client.twitter.communities.getTweets("123456", {
|
|
154
|
-
tweetType: "Latest",
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
// Search communities
|
|
158
|
-
const communities = await client.twitter.communities.search("python developers");
|
|
159
|
-
```
|
|
160
|
-
|
|
161
|
-
### Trends
|
|
162
|
-
|
|
163
|
-
```typescript
|
|
164
|
-
// Get trending topics
|
|
165
|
-
const trends = await client.twitter.trends.getTrends();
|
|
166
|
-
for (const trend of trends.data) {
|
|
167
|
-
console.log(`${trend.name}: ${trend.tweet_count || "N/A"} tweets`);
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
// Get trends by category
|
|
171
|
-
const newsTrends = await client.twitter.trends.getTrends({
|
|
172
|
-
category: "news",
|
|
173
|
-
});
|
|
174
|
-
|
|
175
|
-
// Get trends for a specific location
|
|
176
|
-
const usTrends = await client.twitter.trends.getPlaceTrends(23424977); // US WOEID
|
|
177
|
-
console.log(`Trends in ${usTrends.name}:`);
|
|
178
|
-
for (const trend of usTrends.trends) {
|
|
179
|
-
console.log(` - ${trend.name}`);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// Get available locations
|
|
183
|
-
const locations = await client.twitter.trends.getAvailableLocations();
|
|
184
|
-
```
|
|
185
|
-
|
|
186
|
-
### Geographic Places
|
|
187
|
-
|
|
188
|
-
```typescript
|
|
189
|
-
// Search for places
|
|
190
|
-
const places = await client.twitter.geo.search({ query: "San Francisco" });
|
|
191
|
-
for (const place of places.data) {
|
|
192
|
-
console.log(`${place.full_name} (${place.place_type})`);
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
// Search by coordinates
|
|
196
|
-
const nearby = await client.twitter.geo.search({
|
|
197
|
-
lat: 37.7749,
|
|
198
|
-
long: -122.4194,
|
|
199
|
-
granularity: "city",
|
|
200
|
-
});
|
|
201
|
-
|
|
202
|
-
// Get place details
|
|
203
|
-
const place = await client.twitter.geo.getDetail("5a110d312052166f");
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
### Stream Monitoring
|
|
207
|
-
|
|
208
|
-
Real-time tweet monitoring with WebSocket streaming and webhook delivery.
|
|
209
|
-
|
|
210
|
-
```typescript
|
|
211
|
-
import { ScrapeBadger, WebSocketStreamError } from "scrapebadger";
|
|
212
|
-
|
|
213
|
-
const client = new ScrapeBadger({ apiKey: "your-api-key" });
|
|
214
|
-
|
|
215
|
-
// Create a monitor
|
|
216
|
-
const monitor = await client.twitter.stream.createMonitor({
|
|
217
|
-
name: "Tech Leaders",
|
|
218
|
-
usernames: ["elonmusk", "naval", "sama"],
|
|
219
|
-
pollIntervalSeconds: 10,
|
|
220
|
-
webhookUrl: "https://example.com/webhook",
|
|
221
|
-
});
|
|
222
|
-
console.log(`Created: ${monitor.id}, tier: ${monitor.pricing_tier}`);
|
|
223
|
-
console.log(`Credits/hr: ${monitor.estimated_credits_per_hour}`);
|
|
224
|
-
|
|
225
|
-
// List monitors
|
|
226
|
-
const { monitors, total } = await client.twitter.stream.listMonitors({ status: "active" });
|
|
227
|
-
console.log(`${total} active monitors`);
|
|
228
|
-
|
|
229
|
-
// Pause / resume
|
|
230
|
-
await client.twitter.stream.pauseMonitor(monitor.id);
|
|
231
|
-
await client.twitter.stream.resumeMonitor(monitor.id);
|
|
232
|
-
|
|
233
|
-
// Delete
|
|
234
|
-
await client.twitter.stream.deleteMonitor(monitor.id);
|
|
235
|
-
```
|
|
236
|
-
|
|
237
|
-
#### EventEmitter streaming
|
|
238
|
-
|
|
239
|
-
```typescript
|
|
240
|
-
const stream = client.twitter.stream.connect({
|
|
241
|
-
reconnect: true,
|
|
242
|
-
reconnectDelaySeconds: 90,
|
|
243
|
-
});
|
|
244
|
-
|
|
245
|
-
stream.on("connected", (e) => {
|
|
246
|
-
console.log("Connected, connection ID:", e.connectionId);
|
|
247
|
-
});
|
|
248
|
-
|
|
249
|
-
stream.on("tweet", (event) => {
|
|
250
|
-
console.log(`@${event.authorUsername}: ${event.tweet.text}`);
|
|
251
|
-
console.log(` latency: ${event.latencyMs}ms`);
|
|
252
|
-
});
|
|
253
|
-
|
|
254
|
-
stream.on("error", (err) => {
|
|
255
|
-
if (err instanceof WebSocketStreamError && err.code === 4001) {
|
|
256
|
-
console.error("API key rejected");
|
|
257
|
-
} else {
|
|
258
|
-
console.error("Stream error:", err.message);
|
|
259
|
-
}
|
|
260
|
-
});
|
|
261
|
-
|
|
262
|
-
stream.on("close", () => console.log("Stream closed"));
|
|
263
|
-
|
|
264
|
-
// Later: graceful disconnect
|
|
265
|
-
stream.close();
|
|
266
|
-
```
|
|
267
|
-
|
|
268
|
-
#### AsyncIterator streaming
|
|
64
|
+
## Authentication
|
|
269
65
|
|
|
270
66
|
```typescript
|
|
271
|
-
|
|
67
|
+
// Pass API key directly
|
|
68
|
+
const client = new ScrapeBadger({ apiKey: "sb_live_xxxxxxxxxxxxx" });
|
|
272
69
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
reconnect: true,
|
|
276
|
-
reconnectDelaySeconds: 90,
|
|
277
|
-
})) {
|
|
278
|
-
if (event.type === "tweet") {
|
|
279
|
-
console.log(`@${event.authorUsername}: ${event.latencyMs}ms latency`);
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
} catch (err) {
|
|
283
|
-
if (err instanceof WebSocketStreamError) {
|
|
284
|
-
console.error("Stream failed:", err.message, err.code);
|
|
285
|
-
}
|
|
286
|
-
}
|
|
70
|
+
// Or use environment variable SCRAPEBADGER_API_KEY
|
|
71
|
+
const client = new ScrapeBadger();
|
|
287
72
|
```
|
|
288
73
|
|
|
289
|
-
|
|
74
|
+
## Available APIs
|
|
290
75
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
app.post(
|
|
298
|
-
"/webhook",
|
|
299
|
-
express.raw({ type: "application/json" }),
|
|
300
|
-
(req, res) => {
|
|
301
|
-
const sig = req.headers["x-scrapebadger-signature"] as string;
|
|
302
|
-
if (!verifyWebhookSignature("my-webhook-secret", req.body, sig)) {
|
|
303
|
-
return res.status(401).json({ error: "Invalid signature" });
|
|
304
|
-
}
|
|
305
|
-
const event = JSON.parse(req.body.toString());
|
|
306
|
-
console.log("Received tweet:", event.tweet_id);
|
|
307
|
-
res.sendStatus(200);
|
|
308
|
-
}
|
|
309
|
-
);
|
|
310
|
-
```
|
|
76
|
+
| API | Description | Documentation |
|
|
77
|
+
|-----|-------------|---------------|
|
|
78
|
+
| **Web Scraping** | Scrape any website with JS rendering, anti-bot bypass, and AI extraction | [Web Scraping Guide](docs/web-scraping.md) |
|
|
79
|
+
| **Twitter** | 37+ endpoints for tweets, users, lists, communities, trends, and real-time streams | [Twitter Guide](docs/twitter.md) |
|
|
80
|
+
| **Google** | 19 products — Search, Maps, News, Hotels, Trends, Jobs, Shopping, Patents, Scholar, Images, Videos, Finance, AI Mode, Lens, Autocomplete, Local, Shorts, Flights, Products | [Google Guide](docs/google.md) |
|
|
81
|
+
| **Vinted** | Search items, get details, user profiles, and reference data across all Vinted markets | [Vinted Guide](docs/vinted.md) |
|
|
311
82
|
|
|
312
83
|
## Error Handling
|
|
313
84
|
|
|
@@ -383,44 +154,8 @@ slows down to spread requests across the remaining window — preventing 429 err
|
|
|
383
154
|
|
|
384
155
|
This works transparently with all tier levels (Free: 60/min, Basic: 300/min,
|
|
385
156
|
Pro: 1000/min, Enterprise: 5000/min).
|
|
386
|
-
```
|
|
387
|
-
|
|
388
|
-
## API Reference
|
|
389
|
-
|
|
390
|
-
### Client
|
|
391
|
-
|
|
392
|
-
- `ScrapeBadger` - Main client class
|
|
393
|
-
|
|
394
|
-
### Twitter Module
|
|
395
|
-
|
|
396
|
-
- `client.twitter.tweets` - Tweet operations
|
|
397
|
-
- `client.twitter.users` - User operations
|
|
398
|
-
- `client.twitter.lists` - List operations
|
|
399
|
-
- `client.twitter.communities` - Community operations
|
|
400
|
-
- `client.twitter.trends` - Trend operations
|
|
401
|
-
- `client.twitter.geo` - Geographic place operations
|
|
402
|
-
- `client.twitter.stream` - Real-time stream monitor management and WebSocket streaming
|
|
403
|
-
|
|
404
|
-
### Stream Client Methods
|
|
405
|
-
|
|
406
|
-
- `createMonitor(params)` - Create a stream monitor
|
|
407
|
-
- `listMonitors(options?)` - List monitors with optional status filter
|
|
408
|
-
- `getMonitor(id)` - Get a monitor by ID
|
|
409
|
-
- `updateMonitor(id, params)` - Partially update a monitor
|
|
410
|
-
- `pauseMonitor(id)` - Pause an active monitor
|
|
411
|
-
- `resumeMonitor(id)` - Resume a paused monitor
|
|
412
|
-
- `deleteMonitor(id)` - Delete a monitor (irreversible)
|
|
413
|
-
- `listDeliveryLogs(options?)` - List tweet delivery logs
|
|
414
|
-
- `listBillingLogs(options?)` - List billing activity logs
|
|
415
|
-
- `connect(options?)` - Connect via EventEmitter (`.on("tweet", handler)`)
|
|
416
|
-
- `connectIter(options?)` - Connect via AsyncIterator (`for await`)
|
|
417
|
-
|
|
418
|
-
### Utilities
|
|
419
|
-
|
|
420
|
-
- `collectAll(asyncIterator)` - Collect async iterator results into an array
|
|
421
|
-
- `verifyWebhookSignature(secret, body, header)` - Verify incoming webhook HMAC signature
|
|
422
157
|
|
|
423
|
-
|
|
158
|
+
## Exceptions
|
|
424
159
|
|
|
425
160
|
- `ScrapeBadgerError` - Base exception class
|
|
426
161
|
- `AuthenticationError` - Invalid or missing API key
|
|
@@ -444,6 +179,7 @@ MIT License - see [LICENSE](LICENSE) for details.
|
|
|
444
179
|
|
|
445
180
|
## Links
|
|
446
181
|
|
|
447
|
-
- [Documentation](https://scrapebadger.com
|
|
182
|
+
- [Documentation](https://docs.scrapebadger.com)
|
|
183
|
+
- [Discord](https://discord.com/invite/3WvwTyWVCx)
|
|
448
184
|
- [GitHub](https://github.com/scrape-badger/scrapebadger-node)
|
|
449
185
|
- [npm](https://www.npmjs.com/package/scrapebadger)
|