scrapebadger 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -124,6 +124,19 @@ var BaseClient = class {
124
124
  * Make an HTTP request to the API.
125
125
  */
126
126
  async request(path, options = {}) {
127
+ const { data } = await this.requestRaw(path, options);
128
+ return data;
129
+ }
130
+ /**
131
+ * Make an HTTP request and return both data and rate limit headers.
132
+ */
133
+ async requestWithHeaders(path, options = {}) {
134
+ return this.requestRaw(path, options);
135
+ }
136
+ /**
137
+ * Internal method that builds the request and executes it, returning data and rate limit info.
138
+ */
139
+ async requestRaw(path, options = {}) {
127
140
  const { method = "GET", params, body, headers = {} } = options;
128
141
  const url = new URL(path, this.config.baseUrl);
129
142
  if (params) {
@@ -137,7 +150,7 @@ var BaseClient = class {
137
150
  "Content-Type": "application/json",
138
151
  Accept: "application/json",
139
152
  "X-API-Key": this.config.apiKey,
140
- "User-Agent": "scrapebadger-node/0.1.0",
153
+ "User-Agent": "scrapebadger-node/0.3.1",
141
154
  ...headers
142
155
  };
143
156
  const fetchOptions = {
@@ -156,8 +169,10 @@ var BaseClient = class {
156
169
  let lastError;
157
170
  for (let attempt = 0; attempt <= this.config.maxRetries; attempt++) {
158
171
  try {
159
- const response = await this.fetchWithTimeout(url, options);
160
- return await this.handleResponse(response);
172
+ const httpResponse = await this.fetchWithTimeout(url, options);
173
+ const data = await this.handleResponse(httpResponse);
174
+ const rateLimit = this.parseRateLimitHeaders(httpResponse.headers);
175
+ return { data, rateLimit };
161
176
  } catch (error) {
162
177
  lastError = error;
163
178
  if (error instanceof ScrapeBadgerError && !(error instanceof RateLimitError)) {
@@ -167,18 +182,56 @@ var BaseClient = class {
167
182
  break;
168
183
  }
169
184
  const delay = this.config.retryDelay * Math.pow(2, attempt);
170
- if (error instanceof RateLimitError && error.retryAfter) {
171
- const retryDelay = (error.retryAfter - Date.now() / 1e3) * 1e3;
172
- if (retryDelay > 0 && retryDelay < 6e4) {
173
- await this.sleep(retryDelay);
174
- continue;
185
+ const delaySec = Math.round(delay / 1e3);
186
+ const attemptNum = attempt + 1;
187
+ const maxRetries = this.config.maxRetries;
188
+ if (error instanceof RateLimitError) {
189
+ console.warn(
190
+ `\x1B[33m\u26A0 ScrapeBadger: 429 Rate Limited \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
191
+ );
192
+ if (error.retryAfter) {
193
+ const retryDelay = (error.retryAfter - Date.now() / 1e3) * 1e3;
194
+ if (retryDelay > 0 && retryDelay < 6e4) {
195
+ await this.sleep(retryDelay);
196
+ continue;
197
+ }
175
198
  }
199
+ } else if (error instanceof TimeoutError) {
200
+ console.warn(
201
+ `\x1B[33m\u26A0 ScrapeBadger: TimeoutError \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
202
+ );
203
+ } else if (error instanceof ServerError) {
204
+ console.warn(
205
+ `\x1B[33m\u26A0 ScrapeBadger: ${error.statusCode} ${error.message} \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
206
+ );
207
+ } else {
208
+ console.warn(
209
+ `\x1B[33m\u26A0 ScrapeBadger: ${error.name} \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
210
+ );
176
211
  }
177
212
  await this.sleep(delay);
178
213
  }
179
214
  }
180
215
  throw lastError ?? new ScrapeBadgerError("Request failed after retries");
181
216
  }
217
+ /**
218
+ * Parse rate limit headers from an HTTP response.
219
+ */
220
+ parseRateLimitHeaders(headers) {
221
+ const limit = headers.get("X-RateLimit-Limit");
222
+ const remaining = headers.get("X-RateLimit-Remaining");
223
+ const reset = headers.get("X-RateLimit-Reset");
224
+ if (limit === null || remaining === null || reset === null) {
225
+ return void 0;
226
+ }
227
+ const parsedLimit = parseInt(limit, 10);
228
+ const parsedRemaining = parseInt(remaining, 10);
229
+ const parsedReset = parseInt(reset, 10);
230
+ if (isNaN(parsedLimit) || isNaN(parsedRemaining) || isNaN(parsedReset)) {
231
+ return void 0;
232
+ }
233
+ return { limit: parsedLimit, remaining: parsedRemaining, reset: parsedReset };
234
+ }
182
235
  /**
183
236
  * Fetch with timeout support.
184
237
  */
@@ -260,7 +313,7 @@ var BaseClient = class {
260
313
  // src/internal/config.ts
261
314
  var DEFAULT_BASE_URL = "https://scrapebadger.com";
262
315
  var DEFAULT_TIMEOUT = 3e4;
263
- var DEFAULT_MAX_RETRIES = 3;
316
+ var DEFAULT_MAX_RETRIES = 10;
264
317
  var DEFAULT_RETRY_DELAY = 1e3;
265
318
  function resolveConfig(config) {
266
319
  if (!config.apiKey) {
@@ -289,12 +342,26 @@ function createPaginatedResponse(data, cursor) {
289
342
  hasMore: !!cursor
290
343
  };
291
344
  }
345
+ var RATE_LIMIT_WARN_THRESHOLD = 0.2;
292
346
  async function* paginate(fetchPage, options = {}) {
293
347
  const { maxItems } = options;
294
348
  let cursor;
295
349
  let totalYielded = 0;
296
350
  do {
297
- const response = await fetchPage(cursor);
351
+ const { response, rateLimit } = await fetchPage(cursor);
352
+ if (rateLimit) {
353
+ const { limit, remaining, reset } = rateLimit;
354
+ if (limit > 0 && remaining / limit < RATE_LIMIT_WARN_THRESHOLD) {
355
+ const nowSec = Date.now() / 1e3;
356
+ const windowRemainingSec = Math.max(reset - nowSec, 1);
357
+ const delayMs = remaining > 0 ? windowRemainingSec / remaining * 1e3 : windowRemainingSec * 1e3;
358
+ const resetInSec = Math.round(windowRemainingSec);
359
+ console.warn(
360
+ `\x1B[33m\u26A0 ScrapeBadger: Rate limit: ${remaining}/${limit} remaining (resets in ${resetInSec}s), throttling pagination\x1B[0m`
361
+ );
362
+ await sleep(delayMs);
363
+ }
364
+ }
298
365
  for (const item of response.data) {
299
366
  yield item;
300
367
  totalYielded++;
@@ -312,6 +379,9 @@ async function collectAll(generator) {
312
379
  }
313
380
  return items;
314
381
  }
382
+ function sleep(ms) {
383
+ return new Promise((resolve) => setTimeout(resolve, ms));
384
+ }
315
385
 
316
386
  // src/twitter/tweets.ts
317
387
  var TweetsClient = class {
@@ -509,7 +579,8 @@ var TweetsClient = class {
509
579
  */
510
580
  async *getQuotesAll(tweetId, options = {}) {
511
581
  const fetchPage = async (cursor) => {
512
- return this.getQuotes(tweetId, { ...options, cursor });
582
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/tweets/tweet/${tweetId}/quotes`, { params: { cursor } });
583
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
513
584
  };
514
585
  yield* paginate(fetchPage, options);
515
586
  }
@@ -576,7 +647,15 @@ var TweetsClient = class {
576
647
  */
577
648
  async *searchAll(query, options = {}) {
578
649
  const fetchPage = async (cursor) => {
579
- return this.search(query, { ...options, cursor });
650
+ const { data, rateLimit } = await this.client.requestWithHeaders("/v1/twitter/tweets/advanced_search", {
651
+ params: {
652
+ query,
653
+ query_type: options.queryType ?? "Top",
654
+ count: options.count,
655
+ cursor
656
+ }
657
+ });
658
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
580
659
  };
581
660
  yield* paginate(fetchPage, options);
582
661
  }
@@ -620,7 +699,8 @@ var TweetsClient = class {
620
699
  */
621
700
  async *getUserTweetsAll(username, options = {}) {
622
701
  const fetchPage = async (cursor) => {
623
- return this.getUserTweets(username, { ...options, cursor });
702
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/latest_tweets`, { params: { cursor } });
703
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
624
704
  };
625
705
  yield* paginate(fetchPage, options);
626
706
  }
@@ -730,7 +810,8 @@ var UsersClient = class {
730
810
  */
731
811
  async *getFollowersAll(username, options = {}) {
732
812
  const fetchPage = async (cursor) => {
733
- return this.getFollowers(username, { ...options, cursor });
813
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/followers`, { params: { cursor } });
814
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
734
815
  };
735
816
  yield* paginate(fetchPage, options);
736
817
  }
@@ -765,7 +846,8 @@ var UsersClient = class {
765
846
  */
766
847
  async *getFollowingAll(username, options = {}) {
767
848
  const fetchPage = async (cursor) => {
768
- return this.getFollowing(username, { ...options, cursor });
849
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/followings`, { params: { cursor } });
850
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
769
851
  };
770
852
  yield* paginate(fetchPage, options);
771
853
  }
@@ -928,7 +1010,8 @@ var UsersClient = class {
928
1010
  */
929
1011
  async *searchAll(query, options = {}) {
930
1012
  const fetchPage = async (cursor) => {
931
- return this.search(query, { ...options, cursor });
1013
+ const { data, rateLimit } = await this.client.requestWithHeaders("/v1/twitter/users/search_users", { params: { query, cursor } });
1014
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
932
1015
  };
933
1016
  yield* paginate(fetchPage, options);
934
1017
  }
@@ -988,7 +1071,8 @@ var ListsClient = class {
988
1071
  */
989
1072
  async *getTweetsAll(listId, options = {}) {
990
1073
  const fetchPage = async (cursor) => {
991
- return this.getTweets(listId, { ...options, cursor });
1074
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/lists/${listId}/tweets`, { params: { cursor } });
1075
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
992
1076
  };
993
1077
  yield* paginate(fetchPage, options);
994
1078
  }
@@ -1023,7 +1107,8 @@ var ListsClient = class {
1023
1107
  */
1024
1108
  async *getMembersAll(listId, options = {}) {
1025
1109
  const fetchPage = async (cursor) => {
1026
- return this.getMembers(listId, { ...options, cursor });
1110
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/lists/${listId}/members`, { params: { cursor } });
1111
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
1027
1112
  };
1028
1113
  yield* paginate(fetchPage, options);
1029
1114
  }
@@ -1171,7 +1256,14 @@ var CommunitiesClient = class {
1171
1256
  */
1172
1257
  async *getTweetsAll(communityId, options = {}) {
1173
1258
  const fetchPage = async (cursor) => {
1174
- return this.getTweets(communityId, { ...options, cursor });
1259
+ const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/communities/${communityId}/tweets`, {
1260
+ params: {
1261
+ tweet_type: options.tweetType ?? "Top",
1262
+ count: options.count ?? 40,
1263
+ cursor
1264
+ }
1265
+ });
1266
+ return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
1175
1267
  };
1176
1268
  yield* paginate(fetchPage, options);
1177
1269
  }