scrapebadger 0.4.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,17 +12,20 @@
12
12
  <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"></a>
13
13
  </p>
14
14
 
15
- The official Node.js/TypeScript client library for the [ScrapeBadger](https://scrapebadger.com) API.
15
+ The official Node.js/TypeScript client library for the [ScrapeBadger](https://scrapebadger.com) API — Twitter, Google, Vinted, and general web scraping.
16
16
 
17
17
  ## Features
18
18
 
19
19
  - **Full TypeScript Support** - Complete type definitions for all API endpoints
20
20
  - **Modern ESM & CommonJS** - Works with both module systems
21
21
  - **Async Iterators** - Automatic pagination with `for await...of` syntax
22
- - **Smart Rate Limit Handling** - Reads API rate limit headers and automatically throttles pagination to avoid hitting limits
23
- - **Resilient Retries** - 10 automatic retries with exponential backoff on server errors, with colored console warnings on each retry
24
- - **Error Handling** - Typed exceptions for different error scenarios
25
- - **Tree Shakeable** - Import only what you need
22
+ - **Smart Rate Limiting** - Reads API headers and throttles pagination automatically
23
+ - **Resilient Retries** - Exponential backoff with colored console warnings
24
+ - **Typed Exceptions** - Distinct error classes for every failure scenario
25
+ - **37+ Twitter endpoints** - Tweets, users, lists, communities, trends, geo, real-time streams
26
+ - **19 Google product APIs** - Search (with optional deferred AI Overview follow-up), Maps, News, Hotels, Trends (incl. topic autocomplete), Jobs, Shopping (+ merchant URL enrichment), Patents, Scholar (search + profiles + author + author citation + cite formats), Images, Videos, Finance, AI Mode, Lens, **Local Pack**, **Shorts**, **Flights**, Products
27
+ - **Vinted scraping** - Search items, item details, user profiles, brands, colors, markets
28
+ - **Web scraping** - Anti-bot bypass, JS rendering, and AI data extraction
26
29
 
27
30
  ## Installation
28
31
 
@@ -43,271 +46,39 @@ pnpm add scrapebadger
43
46
  ```typescript
44
47
  import { ScrapeBadger } from "scrapebadger";
45
48
 
46
- // Create client with API key
47
49
  const client = new ScrapeBadger({ apiKey: "your-api-key" });
48
50
 
49
- // Or use environment variable (SCRAPEBADGER_API_KEY)
50
- const client = new ScrapeBadger();
51
-
52
51
  // Get a tweet
53
52
  const tweet = await client.twitter.tweets.getById("1234567890");
54
53
  console.log(`@${tweet.username}: ${tweet.text}`);
55
54
 
55
+ // Scrape a website
56
+ const result = await client.web.scrape("https://scrapebadger.com", { format: "markdown" });
57
+ console.log(result.content);
58
+
56
59
  // Get a user profile
57
60
  const user = await client.twitter.users.getByUsername("elonmusk");
58
61
  console.log(`${user.name} has ${user.followers_count.toLocaleString()} followers`);
59
62
  ```
60
63
 
61
- ## Usage Examples
62
-
63
- ### Search Tweets
64
-
65
- ```typescript
66
- import { ScrapeBadger } from "scrapebadger";
67
-
68
- const client = new ScrapeBadger({ apiKey: "your-api-key" });
69
-
70
- // Basic search (returns first page)
71
- const results = await client.twitter.tweets.search("python programming");
72
- for (const tweet of results.data) {
73
- console.log(`@${tweet.username}: ${tweet.text}`);
74
- }
75
-
76
- // Paginate manually
77
- if (results.hasMore) {
78
- const nextPage = await client.twitter.tweets.search("python programming", {
79
- cursor: results.nextCursor,
80
- });
81
- }
82
-
83
- // Automatic pagination with async iterators
84
- for await (const tweet of client.twitter.tweets.searchAll("python", { maxItems: 100 })) {
85
- console.log(tweet.text);
86
- }
87
-
88
- // Collect all results into an array
89
- import { collectAll } from "scrapebadger";
90
-
91
- const tweets = await collectAll(
92
- client.twitter.tweets.searchAll("python", { maxItems: 100 })
93
- );
94
- console.log(`Fetched ${tweets.length} tweets`);
95
- ```
96
-
97
- ### User Operations
98
-
99
- ```typescript
100
- // Get user by username
101
- const user = await client.twitter.users.getByUsername("elonmusk");
102
-
103
- // Get user by ID
104
- const userById = await client.twitter.users.getById("44196397");
105
-
106
- // Get extended profile info
107
- const about = await client.twitter.users.getAbout("elonmusk");
108
- console.log(`Account based in: ${about.account_based_in}`);
109
- console.log(`Username changes: ${about.username_changes}`);
110
-
111
- // Get followers
112
- const followers = await client.twitter.users.getFollowers("elonmusk");
113
- for (const follower of followers.data) {
114
- console.log(`@${follower.username}`);
115
- }
116
-
117
- // Iterate through all followers
118
- for await (const follower of client.twitter.users.getFollowersAll("elonmusk", {
119
- maxItems: 1000,
120
- })) {
121
- console.log(follower.username);
122
- }
123
-
124
- // Search users
125
- const users = await client.twitter.users.search("python developer");
126
- ```
127
-
128
- ### Lists
129
-
130
- ```typescript
131
- // Get list details
132
- const list = await client.twitter.lists.getDetail("123456");
133
- console.log(`${list.name}: ${list.member_count} members`);
134
-
135
- // Get list tweets
136
- const tweets = await client.twitter.lists.getTweets("123456");
137
-
138
- // Get list members
139
- const members = await client.twitter.lists.getMembers("123456");
140
-
141
- // Search for lists
142
- const lists = await client.twitter.lists.search("tech leaders");
143
- ```
144
-
145
- ### Communities
146
-
147
- ```typescript
148
- // Get community details
149
- const community = await client.twitter.communities.getDetail("123456");
150
- console.log(`${community.name}: ${community.member_count} members`);
151
-
152
- // Get community tweets
153
- const tweets = await client.twitter.communities.getTweets("123456", {
154
- tweetType: "Latest",
155
- });
156
-
157
- // Search communities
158
- const communities = await client.twitter.communities.search("python developers");
159
- ```
160
-
161
- ### Trends
162
-
163
- ```typescript
164
- // Get trending topics
165
- const trends = await client.twitter.trends.getTrends();
166
- for (const trend of trends.data) {
167
- console.log(`${trend.name}: ${trend.tweet_count || "N/A"} tweets`);
168
- }
169
-
170
- // Get trends by category
171
- const newsTrends = await client.twitter.trends.getTrends({
172
- category: "news",
173
- });
174
-
175
- // Get trends for a specific location
176
- const usTrends = await client.twitter.trends.getPlaceTrends(23424977); // US WOEID
177
- console.log(`Trends in ${usTrends.name}:`);
178
- for (const trend of usTrends.trends) {
179
- console.log(` - ${trend.name}`);
180
- }
181
-
182
- // Get available locations
183
- const locations = await client.twitter.trends.getAvailableLocations();
184
- ```
185
-
186
- ### Geographic Places
187
-
188
- ```typescript
189
- // Search for places
190
- const places = await client.twitter.geo.search({ query: "San Francisco" });
191
- for (const place of places.data) {
192
- console.log(`${place.full_name} (${place.place_type})`);
193
- }
194
-
195
- // Search by coordinates
196
- const nearby = await client.twitter.geo.search({
197
- lat: 37.7749,
198
- long: -122.4194,
199
- granularity: "city",
200
- });
201
-
202
- // Get place details
203
- const place = await client.twitter.geo.getDetail("5a110d312052166f");
204
- ```
205
-
206
- ### Stream Monitoring
207
-
208
- Real-time tweet monitoring with WebSocket streaming and webhook delivery.
209
-
210
- ```typescript
211
- import { ScrapeBadger, WebSocketStreamError } from "scrapebadger";
212
-
213
- const client = new ScrapeBadger({ apiKey: "your-api-key" });
214
-
215
- // Create a monitor
216
- const monitor = await client.twitter.stream.createMonitor({
217
- name: "Tech Leaders",
218
- usernames: ["elonmusk", "naval", "sama"],
219
- pollIntervalSeconds: 10,
220
- webhookUrl: "https://example.com/webhook",
221
- });
222
- console.log(`Created: ${monitor.id}, tier: ${monitor.pricing_tier}`);
223
- console.log(`Credits/hr: ${monitor.estimated_credits_per_hour}`);
224
-
225
- // List monitors
226
- const { monitors, total } = await client.twitter.stream.listMonitors({ status: "active" });
227
- console.log(`${total} active monitors`);
228
-
229
- // Pause / resume
230
- await client.twitter.stream.pauseMonitor(monitor.id);
231
- await client.twitter.stream.resumeMonitor(monitor.id);
232
-
233
- // Delete
234
- await client.twitter.stream.deleteMonitor(monitor.id);
235
- ```
236
-
237
- #### EventEmitter streaming
238
-
239
- ```typescript
240
- const stream = client.twitter.stream.connect({
241
- reconnect: true,
242
- reconnectDelaySeconds: 90,
243
- });
244
-
245
- stream.on("connected", (e) => {
246
- console.log("Connected, connection ID:", e.connectionId);
247
- });
248
-
249
- stream.on("tweet", (event) => {
250
- console.log(`@${event.authorUsername}: ${event.tweet.text}`);
251
- console.log(` latency: ${event.latencyMs}ms`);
252
- });
253
-
254
- stream.on("error", (err) => {
255
- if (err instanceof WebSocketStreamError && err.code === 4001) {
256
- console.error("API key rejected");
257
- } else {
258
- console.error("Stream error:", err.message);
259
- }
260
- });
261
-
262
- stream.on("close", () => console.log("Stream closed"));
263
-
264
- // Later: graceful disconnect
265
- stream.close();
266
- ```
267
-
268
- #### AsyncIterator streaming
64
+ ## Authentication
269
65
 
270
66
  ```typescript
271
- import { WebSocketStreamError } from "scrapebadger";
67
+ // Pass API key directly
68
+ const client = new ScrapeBadger({ apiKey: "sb_live_xxxxxxxxxxxxx" });
272
69
 
273
- try {
274
- for await (const event of client.twitter.stream.connectIter({
275
- reconnect: true,
276
- reconnectDelaySeconds: 90,
277
- })) {
278
- if (event.type === "tweet") {
279
- console.log(`@${event.authorUsername}: ${event.latencyMs}ms latency`);
280
- }
281
- }
282
- } catch (err) {
283
- if (err instanceof WebSocketStreamError) {
284
- console.error("Stream failed:", err.message, err.code);
285
- }
286
- }
70
+ // Or use environment variable SCRAPEBADGER_API_KEY
71
+ const client = new ScrapeBadger();
287
72
  ```
288
73
 
289
- #### Webhook signature verification
74
+ ## Available APIs
290
75
 
291
- ```typescript
292
- import { verifyWebhookSignature } from "scrapebadger/twitter";
293
- import express from "express";
294
-
295
- const app = express();
296
-
297
- app.post(
298
- "/webhook",
299
- express.raw({ type: "application/json" }),
300
- (req, res) => {
301
- const sig = req.headers["x-scrapebadger-signature"] as string;
302
- if (!verifyWebhookSignature("my-webhook-secret", req.body, sig)) {
303
- return res.status(401).json({ error: "Invalid signature" });
304
- }
305
- const event = JSON.parse(req.body.toString());
306
- console.log("Received tweet:", event.tweet_id);
307
- res.sendStatus(200);
308
- }
309
- );
310
- ```
76
+ | API | Description | Documentation |
77
+ |-----|-------------|---------------|
78
+ | **Web Scraping** | Scrape any website with JS rendering, anti-bot bypass, and AI extraction | [Web Scraping Guide](docs/web-scraping.md) |
79
+ | **Twitter** | 37+ endpoints for tweets, users, lists, communities, trends, and real-time streams | [Twitter Guide](docs/twitter.md) |
80
+ | **Google** | 19 products — Search, Maps, News, Hotels, Trends, Jobs, Shopping, Patents, Scholar, Images, Videos, Finance, AI Mode, Lens, Autocomplete, Local, Shorts, Flights, Products | [Google Guide](docs/google.md) |
81
+ | **Vinted** | Search items, get details, user profiles, and reference data across all Vinted markets | [Vinted Guide](docs/vinted.md) |
311
82
 
312
83
  ## Error Handling
313
84
 
@@ -383,44 +154,8 @@ slows down to spread requests across the remaining window — preventing 429 err
383
154
 
384
155
  This works transparently with all tier levels (Free: 60/min, Basic: 300/min,
385
156
  Pro: 1000/min, Enterprise: 5000/min).
386
- ```
387
-
388
- ## API Reference
389
-
390
- ### Client
391
-
392
- - `ScrapeBadger` - Main client class
393
-
394
- ### Twitter Module
395
-
396
- - `client.twitter.tweets` - Tweet operations
397
- - `client.twitter.users` - User operations
398
- - `client.twitter.lists` - List operations
399
- - `client.twitter.communities` - Community operations
400
- - `client.twitter.trends` - Trend operations
401
- - `client.twitter.geo` - Geographic place operations
402
- - `client.twitter.stream` - Real-time stream monitor management and WebSocket streaming
403
-
404
- ### Stream Client Methods
405
-
406
- - `createMonitor(params)` - Create a stream monitor
407
- - `listMonitors(options?)` - List monitors with optional status filter
408
- - `getMonitor(id)` - Get a monitor by ID
409
- - `updateMonitor(id, params)` - Partially update a monitor
410
- - `pauseMonitor(id)` - Pause an active monitor
411
- - `resumeMonitor(id)` - Resume a paused monitor
412
- - `deleteMonitor(id)` - Delete a monitor (irreversible)
413
- - `listDeliveryLogs(options?)` - List tweet delivery logs
414
- - `listBillingLogs(options?)` - List billing activity logs
415
- - `connect(options?)` - Connect via EventEmitter (`.on("tweet", handler)`)
416
- - `connectIter(options?)` - Connect via AsyncIterator (`for await`)
417
-
418
- ### Utilities
419
-
420
- - `collectAll(asyncIterator)` - Collect async iterator results into an array
421
- - `verifyWebhookSignature(secret, body, header)` - Verify incoming webhook HMAC signature
422
157
 
423
- ### Exceptions
158
+ ## Exceptions
424
159
 
425
160
  - `ScrapeBadgerError` - Base exception class
426
161
  - `AuthenticationError` - Invalid or missing API key
@@ -444,6 +179,7 @@ MIT License - see [LICENSE](LICENSE) for details.
444
179
 
445
180
  ## Links
446
181
 
447
- - [Documentation](https://scrapebadger.com/docs)
182
+ - [Documentation](https://docs.scrapebadger.com)
183
+ - [Discord](https://discord.com/invite/3WvwTyWVCx)
448
184
  - [GitHub](https://github.com/scrape-badger/scrapebadger-node)
449
185
  - [npm](https://www.npmjs.com/package/scrapebadger)