scrapebadger 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -130,6 +130,19 @@ var BaseClient = class {
130
130
  * Make an HTTP request to the API.
131
131
  */
132
132
  async request(path, options = {}) {
133
+ const { data } = await this.requestRaw(path, options);
134
+ return data;
135
+ }
136
+ /**
137
+ * Make an HTTP request and return both data and rate limit headers.
138
+ */
139
+ async requestWithHeaders(path, options = {}) {
140
+ return this.requestRaw(path, options);
141
+ }
142
+ /**
143
+ * Internal method that builds the request and executes it, returning data and rate limit info.
144
+ */
145
+ async requestRaw(path, options = {}) {
133
146
  const { method = "GET", params, body, headers = {} } = options;
134
147
  const url = new URL(path, this.config.baseUrl);
135
148
  if (params) {
@@ -143,7 +156,7 @@ var BaseClient = class {
143
156
  "Content-Type": "application/json",
144
157
  Accept: "application/json",
145
158
  "X-API-Key": this.config.apiKey,
146
- "User-Agent": "scrapebadger-node/0.1.0",
159
+ "User-Agent": "scrapebadger-node/0.3.1",
147
160
  ...headers
148
161
  };
149
162
  const fetchOptions = {
@@ -162,8 +175,10 @@ var BaseClient = class {
162
175
  let lastError;
163
176
  for (let attempt = 0; attempt <= this.config.maxRetries; attempt++) {
164
177
  try {
165
- const response = await this.fetchWithTimeout(url, options);
166
- return await this.handleResponse(response);
178
+ const httpResponse = await this.fetchWithTimeout(url, options);
179
+ const data = await this.handleResponse(httpResponse);
180
+ const rateLimit = this.parseRateLimitHeaders(httpResponse.headers);
181
+ return { data, rateLimit };
167
182
  } catch (error) {
168
183
  lastError = error;
169
184
  if (error instanceof ScrapeBadgerError && !(error instanceof RateLimitError)) {
@@ -173,18 +188,56 @@ var BaseClient = class {
173
188
  break;
174
189
  }
175
190
  const delay = this.config.retryDelay * Math.pow(2, attempt);
176
- if (error instanceof RateLimitError && error.retryAfter) {
177
- const retryDelay = (error.retryAfter - Date.now() / 1e3) * 1e3;
178
- if (retryDelay > 0 && retryDelay < 6e4) {
179
- await this.sleep(retryDelay);
180
- continue;
191
+ const delaySec = Math.round(delay / 1e3);
192
+ const attemptNum = attempt + 1;
193
+ const maxRetries = this.config.maxRetries;
194
+ if (error instanceof RateLimitError) {
195
+ console.warn(
196
+ `\x1B[33m\u26A0 ScrapeBadger: 429 Rate Limited \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
197
+ );
198
+ if (error.retryAfter) {
199
+ const retryDelay = (error.retryAfter - Date.now() / 1e3) * 1e3;
200
+ if (retryDelay > 0 && retryDelay < 6e4) {
201
+ await this.sleep(retryDelay);
202
+ continue;
203
+ }
181
204
  }
205
+ } else if (error instanceof TimeoutError) {
206
+ console.warn(
207
+ `\x1B[33m\u26A0 ScrapeBadger: TimeoutError \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
208
+ );
209
+ } else if (error instanceof ServerError) {
210
+ console.warn(
211
+ `\x1B[33m\u26A0 ScrapeBadger: ${error.statusCode} ${error.message} \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
212
+ );
213
+ } else {
214
+ console.warn(
215
+ `\x1B[33m\u26A0 ScrapeBadger: ${error.name} \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
216
+ );
182
217
  }
183
218
  await this.sleep(delay);
184
219
  }
185
220
  }
186
221
  throw lastError ?? new ScrapeBadgerError("Request failed after retries");
187
222
  }
223
+ /**
224
+ * Parse rate limit headers from an HTTP response.
225
+ */
226
+ parseRateLimitHeaders(headers) {
227
+ const limit = headers.get("X-RateLimit-Limit");
228
+ const remaining = headers.get("X-RateLimit-Remaining");
229
+ const reset = headers.get("X-RateLimit-Reset");
230
+ if (limit === null || remaining === null || reset === null) {
231
+ return void 0;
232
+ }
233
+ const parsedLimit = parseInt(limit, 10);
234
+ const parsedRemaining = parseInt(remaining, 10);
235
+ const parsedReset = parseInt(reset, 10);
236
+ if (isNaN(parsedLimit) || isNaN(parsedRemaining) || isNaN(parsedReset)) {
237
+ return void 0;
238
+ }
239
+ return { limit: parsedLimit, remaining: parsedRemaining, reset: parsedReset };
240
+ }
188
241
  /**
189
242
  * Fetch with timeout support.
190
243
  */
@@ -266,7 +319,7 @@ var BaseClient = class {
266
319
  // src/internal/config.ts
267
320
  var DEFAULT_BASE_URL = "https://scrapebadger.com";
268
321
  var DEFAULT_TIMEOUT = 3e4;
269
- var DEFAULT_MAX_RETRIES = 3;
322
+ var DEFAULT_MAX_RETRIES = 10;
270
323
  var DEFAULT_RETRY_DELAY = 1e3;
271
324
  function resolveConfig(config) {
272
325
  if (!config.apiKey) {
@@ -295,12 +348,26 @@ function createPaginatedResponse(data, cursor) {
295
348
  hasMore: !!cursor
296
349
  };
297
350
  }
351
+ var RATE_LIMIT_WARN_THRESHOLD = 0.2;
298
352
  async function* paginate(fetchPage, options = {}) {
299
353
  const { maxItems } = options;
300
354
  let cursor;
301
355
  let totalYielded = 0;
302
356
  do {
303
- const response = await fetchPage(cursor);
357
+ const { response, rateLimit } = await fetchPage(cursor);
358
+ if (rateLimit) {
359
+ const { limit, remaining, reset } = rateLimit;
360
+ if (limit > 0 && remaining / limit < RATE_LIMIT_WARN_THRESHOLD) {
361
+ const nowSec = Date.now() / 1e3;
362
+ const windowRemainingSec = Math.max(reset - nowSec, 1);
363
+ const delayMs = remaining > 0 ? windowRemainingSec / remaining * 1e3 : windowRemainingSec * 1e3;
364
+ const resetInSec = Math.round(windowRemainingSec);
365
+ console.warn(
366
+ `\x1B[33m\u26A0 ScrapeBadger: Rate limit: ${remaining}/${limit} remaining (resets in ${resetInSec}s), throttling pagination\x1B[0m`
367
+ );
368
+ await sleep(delayMs);
369
+ }
370
+ }
304
371
  for (const item of response.data) {
305
372
  yield item;
306
373
  totalYielded++;
@@ -318,6 +385,9 @@ async function collectAll(generator) {
318
385
  }
319
386
  return items;
320
387
  }
388
+ function sleep(ms) {
389
+ return new Promise((resolve) => setTimeout(resolve, ms));
390
+ }
321
391
 
322
392
  // src/twitter/tweets.ts
323
393
  var TweetsClient = class {
@@ -515,7 +585,8 @@ var TweetsClient = class {
515
585
  */
516
586
  async *getQuotesAll(tweetId, options = {}) {
517
587
  const fetchPage = async (cursor) => {
518
- return this.getQuotes(tweetId, { ...options, cursor });
588
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/tweets/tweet/${tweetId}/quotes`, { params: { cursor } });
589
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
519
590
  };
520
591
  yield* paginate(fetchPage, options);
521
592
  }
@@ -582,7 +653,15 @@ var TweetsClient = class {
582
653
  */
583
654
  async *searchAll(query, options = {}) {
584
655
  const fetchPage = async (cursor) => {
585
- return this.search(query, { ...options, cursor });
656
+ const { data, rateLimit } = await this.client.requestWithHeaders("/v1/twitter/tweets/advanced_search", {
657
+ params: {
658
+ query,
659
+ query_type: options.queryType ?? "Top",
660
+ count: options.count,
661
+ cursor
662
+ }
663
+ });
664
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
586
665
  };
587
666
  yield* paginate(fetchPage, options);
588
667
  }
@@ -626,10 +705,64 @@ var TweetsClient = class {
626
705
  */
627
706
  async *getUserTweetsAll(username, options = {}) {
628
707
  const fetchPage = async (cursor) => {
629
- return this.getUserTweets(username, { ...options, cursor });
708
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/latest_tweets`, { params: { cursor } });
709
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
630
710
  };
631
711
  yield* paginate(fetchPage, options);
632
712
  }
713
+ /**
714
+ * Get the edit history of a tweet.
715
+ *
716
+ * @param tweetId - The tweet ID to get edit history for.
717
+ * @returns Paginated response containing tweet versions.
718
+ *
719
+ * @example
720
+ * ```typescript
721
+ * const history = await client.twitter.tweets.getEditHistory("1234567890");
722
+ * console.log(`${history.data.length} version(s) of this tweet`);
723
+ * ```
724
+ */
725
+ async getEditHistory(tweetId) {
726
+ const response = await this.client.request(
727
+ `/v1/twitter/tweets/tweet/${tweetId}/edit_history`
728
+ );
729
+ return createPaginatedResponse(response.data ?? [], void 0);
730
+ }
731
+ /**
732
+ * Get community notes (Birdwatch) attached to a tweet.
733
+ *
734
+ * @param tweetId - The tweet ID to get community notes for.
735
+ * @returns Paginated response containing community notes.
736
+ *
737
+ * @example
738
+ * ```typescript
739
+ * const notes = await client.twitter.tweets.getCommunityNotes("1234567890");
740
+ * for (const note of notes.data) {
741
+ * console.log(note.text);
742
+ * }
743
+ * ```
744
+ */
745
+ async getCommunityNotes(tweetId) {
746
+ const response = await this.client.request(
747
+ `/v1/twitter/tweets/tweet/${tweetId}/community_notes`
748
+ );
749
+ return createPaginatedResponse(response.data ?? [], void 0);
750
+ }
751
+ /**
752
+ * Get a long-form article by its ID.
753
+ *
754
+ * @param articleId - The article ID to fetch.
755
+ * @returns The article data.
756
+ *
757
+ * @example
758
+ * ```typescript
759
+ * const article = await client.twitter.tweets.getArticle("abc123");
760
+ * console.log(`${article.title}: ${article.text?.slice(0, 100)}...`);
761
+ * ```
762
+ */
763
+ async getArticle(articleId) {
764
+ return this.client.request(`/v1/twitter/tweets/article/${articleId}`);
765
+ }
633
766
  };
634
767
 
635
768
  // src/twitter/users.ts
@@ -736,7 +869,8 @@ var UsersClient = class {
736
869
  */
737
870
  async *getFollowersAll(username, options = {}) {
738
871
  const fetchPage = async (cursor) => {
739
- return this.getFollowers(username, { ...options, cursor });
872
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/followers`, { params: { cursor } });
873
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
740
874
  };
741
875
  yield* paginate(fetchPage, options);
742
876
  }
@@ -771,7 +905,8 @@ var UsersClient = class {
771
905
  */
772
906
  async *getFollowingAll(username, options = {}) {
773
907
  const fetchPage = async (cursor) => {
774
- return this.getFollowing(username, { ...options, cursor });
908
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/followings`, { params: { cursor } });
909
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
775
910
  };
776
911
  yield* paginate(fetchPage, options);
777
912
  }
@@ -934,10 +1069,97 @@ var UsersClient = class {
934
1069
  */
935
1070
  async *searchAll(query, options = {}) {
936
1071
  const fetchPage = async (cursor) => {
937
- return this.search(query, { ...options, cursor });
1072
+ const { data, rateLimit } = await this.client.requestWithHeaders("/v1/twitter/users/search_users", { params: { query, cursor } });
1073
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
938
1074
  };
939
1075
  yield* paginate(fetchPage, options);
940
1076
  }
1077
+ /**
1078
+ * Get multiple users by their numeric IDs in a single request.
1079
+ *
1080
+ * @param userIds - List of user IDs to fetch.
1081
+ * @returns Paginated response containing the matching users.
1082
+ *
1083
+ * @example
1084
+ * ```typescript
1085
+ * const users = await client.twitter.users.getByIds(["44196397", "783214"]);
1086
+ * for (const user of users.data) {
1087
+ * console.log(`@${user.username}`);
1088
+ * }
1089
+ * ```
1090
+ */
1091
+ async getByIds(userIds) {
1092
+ const response = await this.client.request(
1093
+ "/v1/twitter/users/batch_by_ids",
1094
+ { params: { user_ids: userIds.join(",") } }
1095
+ );
1096
+ return createPaginatedResponse(response.data ?? [], void 0);
1097
+ }
1098
+ /**
1099
+ * Get multiple users by their usernames in a single request.
1100
+ *
1101
+ * @param usernames - List of usernames (without @) to fetch.
1102
+ * @returns Paginated response containing the matching users.
1103
+ *
1104
+ * @example
1105
+ * ```typescript
1106
+ * const users = await client.twitter.users.getByUsernames(["elonmusk", "twitter"]);
1107
+ * for (const user of users.data) {
1108
+ * console.log(`${user.name}: ${user.followers_count?.toLocaleString()} followers`);
1109
+ * }
1110
+ * ```
1111
+ */
1112
+ async getByUsernames(usernames) {
1113
+ const response = await this.client.request(
1114
+ "/v1/twitter/users/batch_by_usernames",
1115
+ { params: { usernames: usernames.join(",") } }
1116
+ );
1117
+ return createPaginatedResponse(response.data ?? [], void 0);
1118
+ }
1119
+ /**
1120
+ * Get tweets that mention a user.
1121
+ *
1122
+ * @param username - The user's username (without @).
1123
+ * @param options - Pagination options with optional count.
1124
+ * @returns Paginated response containing tweets mentioning the user.
1125
+ *
1126
+ * @example
1127
+ * ```typescript
1128
+ * const mentions = await client.twitter.users.getMentions("elonmusk");
1129
+ * for (const tweet of mentions.data) {
1130
+ * console.log(`@${tweet.username}: ${tweet.text.slice(0, 100)}...`);
1131
+ * }
1132
+ * ```
1133
+ */
1134
+ async getMentions(username, options = {}) {
1135
+ const response = await this.client.request(
1136
+ `/v1/twitter/users/${username}/mentions`,
1137
+ { params: { count: options.count, cursor: options.cursor } }
1138
+ );
1139
+ return createPaginatedResponse(response.data ?? [], response.next_cursor);
1140
+ }
1141
+ /**
1142
+ * Get long-form articles authored by a user.
1143
+ *
1144
+ * @param userId - The user's numeric ID.
1145
+ * @param options - Pagination options with optional count.
1146
+ * @returns Paginated response containing the user's articles as tweets.
1147
+ *
1148
+ * @example
1149
+ * ```typescript
1150
+ * const articles = await client.twitter.users.getArticles("44196397");
1151
+ * for (const article of articles.data) {
1152
+ * console.log(article.text?.slice(0, 100));
1153
+ * }
1154
+ * ```
1155
+ */
1156
+ async getArticles(userId, options = {}) {
1157
+ const response = await this.client.request(
1158
+ `/v1/twitter/users/${userId}/articles`,
1159
+ { params: { count: options.count, cursor: options.cursor } }
1160
+ );
1161
+ return createPaginatedResponse(response.data ?? [], response.next_cursor);
1162
+ }
941
1163
  };
942
1164
 
943
1165
  // src/twitter/lists.ts
@@ -994,7 +1216,8 @@ var ListsClient = class {
994
1216
  */
995
1217
  async *getTweetsAll(listId, options = {}) {
996
1218
  const fetchPage = async (cursor) => {
997
- return this.getTweets(listId, { ...options, cursor });
1219
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/lists/${listId}/tweets`, { params: { cursor } });
1220
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
998
1221
  };
999
1222
  yield* paginate(fetchPage, options);
1000
1223
  }
@@ -1029,7 +1252,8 @@ var ListsClient = class {
1029
1252
  */
1030
1253
  async *getMembersAll(listId, options = {}) {
1031
1254
  const fetchPage = async (cursor) => {
1032
- return this.getMembers(listId, { ...options, cursor });
1255
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/lists/${listId}/members`, { params: { cursor } });
1256
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
1033
1257
  };
1034
1258
  yield* paginate(fetchPage, options);
1035
1259
  }
@@ -1082,6 +1306,29 @@ var ListsClient = class {
1082
1306
  );
1083
1307
  return createPaginatedResponse(response.data ?? [], response.next_cursor);
1084
1308
  }
1309
+ /**
1310
+ * Search tweets within a specific list.
1311
+ *
1312
+ * @param listId - The list ID to search within.
1313
+ * @param query - Search query string.
1314
+ * @param options - Pagination options with optional count.
1315
+ * @returns Paginated response containing matching tweets from the list.
1316
+ *
1317
+ * @example
1318
+ * ```typescript
1319
+ * const results = await client.twitter.lists.searchTweets("123456", "python");
1320
+ * for (const tweet of results.data) {
1321
+ * console.log(`@${tweet.username}: ${tweet.text.slice(0, 100)}...`);
1322
+ * }
1323
+ * ```
1324
+ */
1325
+ async searchTweets(listId, query, options = {}) {
1326
+ const response = await this.client.request(
1327
+ `/v1/twitter/lists/${listId}/search_tweets`,
1328
+ { params: { query, count: options.count, cursor: options.cursor } }
1329
+ );
1330
+ return createPaginatedResponse(response.data ?? [], response.next_cursor);
1331
+ }
1085
1332
  };
1086
1333
 
1087
1334
  // src/twitter/communities.ts
@@ -1177,7 +1424,14 @@ var CommunitiesClient = class {
1177
1424
  */
1178
1425
  async *getTweetsAll(communityId, options = {}) {
1179
1426
  const fetchPage = async (cursor) => {
1180
- return this.getTweets(communityId, { ...options, cursor });
1427
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/communities/${communityId}/tweets`, {
1428
+ params: {
1429
+ tweet_type: options.tweetType ?? "Top",
1430
+ count: options.count ?? 40,
1431
+ cursor
1432
+ }
1433
+ });
1434
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
1181
1435
  };
1182
1436
  yield* paginate(fetchPage, options);
1183
1437
  }
@@ -2096,6 +2350,46 @@ function verifyWebhookSignature(secret, body, signatureHeader) {
2096
2350
  }
2097
2351
  }
2098
2352
 
2353
+ // src/twitter/spaces.ts
2354
+ var SpacesClient = class {
2355
+ client;
2356
+ constructor(client) {
2357
+ this.client = client;
2358
+ }
2359
+ /**
2360
+ * Get details for a specific Twitter Space.
2361
+ *
2362
+ * @param spaceId - The Space ID to fetch.
2363
+ * @returns The Space data.
2364
+ * @throws NotFoundError - If the Space doesn't exist.
2365
+ *
2366
+ * @example
2367
+ * ```typescript
2368
+ * const space = await client.twitter.spaces.getDetail("1eaKbrPPbPwKX");
2369
+ * console.log(`${space.title} — ${space.participant_count} participants`);
2370
+ * ```
2371
+ */
2372
+ async getDetail(spaceId) {
2373
+ return this.client.request(`/v1/twitter/spaces/${spaceId}`);
2374
+ }
2375
+ /**
2376
+ * Get details for a live video broadcast.
2377
+ *
2378
+ * @param broadcastId - The broadcast ID to fetch.
2379
+ * @returns The broadcast data.
2380
+ * @throws NotFoundError - If the broadcast doesn't exist.
2381
+ *
2382
+ * @example
2383
+ * ```typescript
2384
+ * const broadcast = await client.twitter.spaces.getBroadcast("broadcast123");
2385
+ * console.log(`${broadcast.title}: ${broadcast.total_viewers} viewers`);
2386
+ * ```
2387
+ */
2388
+ async getBroadcast(broadcastId) {
2389
+ return this.client.request(`/v1/twitter/spaces/broadcast/${broadcastId}`);
2390
+ }
2391
+ };
2392
+
2099
2393
  // src/twitter/client.ts
2100
2394
  var TwitterClient = class {
2101
2395
  /** Client for tweet operations */
@@ -2112,6 +2406,8 @@ var TwitterClient = class {
2112
2406
  geo;
2113
2407
  /** Client for real-time stream monitor management and WebSocket streaming */
2114
2408
  stream;
2409
+ /** Client for Twitter Spaces and live broadcast operations */
2410
+ spaces;
2115
2411
  /**
2116
2412
  * Create a new Twitter client.
2117
2413
  *
@@ -2125,6 +2421,7 @@ var TwitterClient = class {
2125
2421
  this.trends = new TrendsClient(client);
2126
2422
  this.geo = new GeoClient(client);
2127
2423
  this.stream = new StreamClient(client);
2424
+ this.spaces = new SpacesClient(client);
2128
2425
  }
2129
2426
  };
2130
2427
 
@@ -2285,6 +2582,7 @@ exports.RateLimitError = RateLimitError;
2285
2582
  exports.ScrapeBadger = ScrapeBadger;
2286
2583
  exports.ScrapeBadgerError = ScrapeBadgerError;
2287
2584
  exports.ServerError = ServerError;
2585
+ exports.SpacesClient = SpacesClient;
2288
2586
  exports.StreamClient = StreamClient;
2289
2587
  exports.TimeoutError = TimeoutError;
2290
2588
  exports.TrendsClient = TrendsClient;