scrapebadger 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -5
- package/dist/{index-DQ_jDTcQ.d.cts → index-Cg0sNluO.d.cts} +22 -0
- package/dist/{index-DQ_jDTcQ.d.ts → index-Cg0sNluO.d.ts} +22 -0
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +111 -19
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +111 -19
- package/dist/index.mjs.map +1 -1
- package/dist/twitter/index.d.cts +1 -1
- package/dist/twitter/index.d.ts +1 -1
- package/dist/twitter/index.js +49 -10
- package/dist/twitter/index.js.map +1 -1
- package/dist/twitter/index.mjs +49 -10
- package/dist/twitter/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -124,6 +124,19 @@ var BaseClient = class {
|
|
|
124
124
|
* Make an HTTP request to the API.
|
|
125
125
|
*/
|
|
126
126
|
async request(path, options = {}) {
|
|
127
|
+
const { data } = await this.requestRaw(path, options);
|
|
128
|
+
return data;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Make an HTTP request and return both data and rate limit headers.
|
|
132
|
+
*/
|
|
133
|
+
async requestWithHeaders(path, options = {}) {
|
|
134
|
+
return this.requestRaw(path, options);
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Internal method that builds the request and executes it, returning data and rate limit info.
|
|
138
|
+
*/
|
|
139
|
+
async requestRaw(path, options = {}) {
|
|
127
140
|
const { method = "GET", params, body, headers = {} } = options;
|
|
128
141
|
const url = new URL(path, this.config.baseUrl);
|
|
129
142
|
if (params) {
|
|
@@ -137,7 +150,7 @@ var BaseClient = class {
|
|
|
137
150
|
"Content-Type": "application/json",
|
|
138
151
|
Accept: "application/json",
|
|
139
152
|
"X-API-Key": this.config.apiKey,
|
|
140
|
-
"User-Agent": "scrapebadger-node/0.1
|
|
153
|
+
"User-Agent": "scrapebadger-node/0.3.1",
|
|
141
154
|
...headers
|
|
142
155
|
};
|
|
143
156
|
const fetchOptions = {
|
|
@@ -156,8 +169,10 @@ var BaseClient = class {
|
|
|
156
169
|
let lastError;
|
|
157
170
|
for (let attempt = 0; attempt <= this.config.maxRetries; attempt++) {
|
|
158
171
|
try {
|
|
159
|
-
const
|
|
160
|
-
|
|
172
|
+
const httpResponse = await this.fetchWithTimeout(url, options);
|
|
173
|
+
const data = await this.handleResponse(httpResponse);
|
|
174
|
+
const rateLimit = this.parseRateLimitHeaders(httpResponse.headers);
|
|
175
|
+
return { data, rateLimit };
|
|
161
176
|
} catch (error) {
|
|
162
177
|
lastError = error;
|
|
163
178
|
if (error instanceof ScrapeBadgerError && !(error instanceof RateLimitError)) {
|
|
@@ -167,18 +182,56 @@ var BaseClient = class {
|
|
|
167
182
|
break;
|
|
168
183
|
}
|
|
169
184
|
const delay = this.config.retryDelay * Math.pow(2, attempt);
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
185
|
+
const delaySec = Math.round(delay / 1e3);
|
|
186
|
+
const attemptNum = attempt + 1;
|
|
187
|
+
const maxRetries = this.config.maxRetries;
|
|
188
|
+
if (error instanceof RateLimitError) {
|
|
189
|
+
console.warn(
|
|
190
|
+
`\x1B[33m\u26A0 ScrapeBadger: 429 Rate Limited \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
|
|
191
|
+
);
|
|
192
|
+
if (error.retryAfter) {
|
|
193
|
+
const retryDelay = (error.retryAfter - Date.now() / 1e3) * 1e3;
|
|
194
|
+
if (retryDelay > 0 && retryDelay < 6e4) {
|
|
195
|
+
await this.sleep(retryDelay);
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
175
198
|
}
|
|
199
|
+
} else if (error instanceof TimeoutError) {
|
|
200
|
+
console.warn(
|
|
201
|
+
`\x1B[33m\u26A0 ScrapeBadger: TimeoutError \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
|
|
202
|
+
);
|
|
203
|
+
} else if (error instanceof ServerError) {
|
|
204
|
+
console.warn(
|
|
205
|
+
`\x1B[33m\u26A0 ScrapeBadger: ${error.statusCode} ${error.message} \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
|
|
206
|
+
);
|
|
207
|
+
} else {
|
|
208
|
+
console.warn(
|
|
209
|
+
`\x1B[33m\u26A0 ScrapeBadger: ${error.name} \u2014 retrying in ${delaySec}s (attempt ${attemptNum}/${maxRetries})\x1B[0m`
|
|
210
|
+
);
|
|
176
211
|
}
|
|
177
212
|
await this.sleep(delay);
|
|
178
213
|
}
|
|
179
214
|
}
|
|
180
215
|
throw lastError ?? new ScrapeBadgerError("Request failed after retries");
|
|
181
216
|
}
|
|
217
|
+
/**
|
|
218
|
+
* Parse rate limit headers from an HTTP response.
|
|
219
|
+
*/
|
|
220
|
+
parseRateLimitHeaders(headers) {
|
|
221
|
+
const limit = headers.get("X-RateLimit-Limit");
|
|
222
|
+
const remaining = headers.get("X-RateLimit-Remaining");
|
|
223
|
+
const reset = headers.get("X-RateLimit-Reset");
|
|
224
|
+
if (limit === null || remaining === null || reset === null) {
|
|
225
|
+
return void 0;
|
|
226
|
+
}
|
|
227
|
+
const parsedLimit = parseInt(limit, 10);
|
|
228
|
+
const parsedRemaining = parseInt(remaining, 10);
|
|
229
|
+
const parsedReset = parseInt(reset, 10);
|
|
230
|
+
if (isNaN(parsedLimit) || isNaN(parsedRemaining) || isNaN(parsedReset)) {
|
|
231
|
+
return void 0;
|
|
232
|
+
}
|
|
233
|
+
return { limit: parsedLimit, remaining: parsedRemaining, reset: parsedReset };
|
|
234
|
+
}
|
|
182
235
|
/**
|
|
183
236
|
* Fetch with timeout support.
|
|
184
237
|
*/
|
|
@@ -260,7 +313,7 @@ var BaseClient = class {
|
|
|
260
313
|
// src/internal/config.ts
|
|
261
314
|
var DEFAULT_BASE_URL = "https://scrapebadger.com";
|
|
262
315
|
var DEFAULT_TIMEOUT = 3e4;
|
|
263
|
-
var DEFAULT_MAX_RETRIES =
|
|
316
|
+
var DEFAULT_MAX_RETRIES = 10;
|
|
264
317
|
var DEFAULT_RETRY_DELAY = 1e3;
|
|
265
318
|
function resolveConfig(config) {
|
|
266
319
|
if (!config.apiKey) {
|
|
@@ -289,12 +342,26 @@ function createPaginatedResponse(data, cursor) {
|
|
|
289
342
|
hasMore: !!cursor
|
|
290
343
|
};
|
|
291
344
|
}
|
|
345
|
+
var RATE_LIMIT_WARN_THRESHOLD = 0.2;
|
|
292
346
|
async function* paginate(fetchPage, options = {}) {
|
|
293
347
|
const { maxItems } = options;
|
|
294
348
|
let cursor;
|
|
295
349
|
let totalYielded = 0;
|
|
296
350
|
do {
|
|
297
|
-
const response = await fetchPage(cursor);
|
|
351
|
+
const { response, rateLimit } = await fetchPage(cursor);
|
|
352
|
+
if (rateLimit) {
|
|
353
|
+
const { limit, remaining, reset } = rateLimit;
|
|
354
|
+
if (limit > 0 && remaining / limit < RATE_LIMIT_WARN_THRESHOLD) {
|
|
355
|
+
const nowSec = Date.now() / 1e3;
|
|
356
|
+
const windowRemainingSec = Math.max(reset - nowSec, 1);
|
|
357
|
+
const delayMs = remaining > 0 ? windowRemainingSec / remaining * 1e3 : windowRemainingSec * 1e3;
|
|
358
|
+
const resetInSec = Math.round(windowRemainingSec);
|
|
359
|
+
console.warn(
|
|
360
|
+
`\x1B[33m\u26A0 ScrapeBadger: Rate limit: ${remaining}/${limit} remaining (resets in ${resetInSec}s), throttling pagination\x1B[0m`
|
|
361
|
+
);
|
|
362
|
+
await sleep(delayMs);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
298
365
|
for (const item of response.data) {
|
|
299
366
|
yield item;
|
|
300
367
|
totalYielded++;
|
|
@@ -312,6 +379,9 @@ async function collectAll(generator) {
|
|
|
312
379
|
}
|
|
313
380
|
return items;
|
|
314
381
|
}
|
|
382
|
+
function sleep(ms) {
|
|
383
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
384
|
+
}
|
|
315
385
|
|
|
316
386
|
// src/twitter/tweets.ts
|
|
317
387
|
var TweetsClient = class {
|
|
@@ -509,7 +579,8 @@ var TweetsClient = class {
|
|
|
509
579
|
*/
|
|
510
580
|
async *getQuotesAll(tweetId, options = {}) {
|
|
511
581
|
const fetchPage = async (cursor) => {
|
|
512
|
-
|
|
582
|
+
const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/tweets/tweet/${tweetId}/quotes`, { params: { cursor } });
|
|
583
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
513
584
|
};
|
|
514
585
|
yield* paginate(fetchPage, options);
|
|
515
586
|
}
|
|
@@ -576,7 +647,15 @@ var TweetsClient = class {
|
|
|
576
647
|
*/
|
|
577
648
|
async *searchAll(query, options = {}) {
|
|
578
649
|
const fetchPage = async (cursor) => {
|
|
579
|
-
|
|
650
|
+
const { data, rateLimit } = await this.client.requestWithHeaders("/v1/twitter/tweets/advanced_search", {
|
|
651
|
+
params: {
|
|
652
|
+
query,
|
|
653
|
+
query_type: options.queryType ?? "Top",
|
|
654
|
+
count: options.count,
|
|
655
|
+
cursor
|
|
656
|
+
}
|
|
657
|
+
});
|
|
658
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
580
659
|
};
|
|
581
660
|
yield* paginate(fetchPage, options);
|
|
582
661
|
}
|
|
@@ -620,7 +699,8 @@ var TweetsClient = class {
|
|
|
620
699
|
*/
|
|
621
700
|
async *getUserTweetsAll(username, options = {}) {
|
|
622
701
|
const fetchPage = async (cursor) => {
|
|
623
|
-
|
|
702
|
+
const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/latest_tweets`, { params: { cursor } });
|
|
703
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
624
704
|
};
|
|
625
705
|
yield* paginate(fetchPage, options);
|
|
626
706
|
}
|
|
@@ -730,7 +810,8 @@ var UsersClient = class {
|
|
|
730
810
|
*/
|
|
731
811
|
async *getFollowersAll(username, options = {}) {
|
|
732
812
|
const fetchPage = async (cursor) => {
|
|
733
|
-
|
|
813
|
+
const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/followers`, { params: { cursor } });
|
|
814
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
734
815
|
};
|
|
735
816
|
yield* paginate(fetchPage, options);
|
|
736
817
|
}
|
|
@@ -765,7 +846,8 @@ var UsersClient = class {
|
|
|
765
846
|
*/
|
|
766
847
|
async *getFollowingAll(username, options = {}) {
|
|
767
848
|
const fetchPage = async (cursor) => {
|
|
768
|
-
|
|
849
|
+
const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/users/${username}/followings`, { params: { cursor } });
|
|
850
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
769
851
|
};
|
|
770
852
|
yield* paginate(fetchPage, options);
|
|
771
853
|
}
|
|
@@ -928,7 +1010,8 @@ var UsersClient = class {
|
|
|
928
1010
|
*/
|
|
929
1011
|
async *searchAll(query, options = {}) {
|
|
930
1012
|
const fetchPage = async (cursor) => {
|
|
931
|
-
|
|
1013
|
+
const { data, rateLimit } = await this.client.requestWithHeaders("/v1/twitter/users/search_users", { params: { query, cursor } });
|
|
1014
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
932
1015
|
};
|
|
933
1016
|
yield* paginate(fetchPage, options);
|
|
934
1017
|
}
|
|
@@ -988,7 +1071,8 @@ var ListsClient = class {
|
|
|
988
1071
|
*/
|
|
989
1072
|
async *getTweetsAll(listId, options = {}) {
|
|
990
1073
|
const fetchPage = async (cursor) => {
|
|
991
|
-
|
|
1074
|
+
const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/lists/${listId}/tweets`, { params: { cursor } });
|
|
1075
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
992
1076
|
};
|
|
993
1077
|
yield* paginate(fetchPage, options);
|
|
994
1078
|
}
|
|
@@ -1023,7 +1107,8 @@ var ListsClient = class {
|
|
|
1023
1107
|
*/
|
|
1024
1108
|
async *getMembersAll(listId, options = {}) {
|
|
1025
1109
|
const fetchPage = async (cursor) => {
|
|
1026
|
-
|
|
1110
|
+
const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/lists/${listId}/members`, { params: { cursor } });
|
|
1111
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
1027
1112
|
};
|
|
1028
1113
|
yield* paginate(fetchPage, options);
|
|
1029
1114
|
}
|
|
@@ -1171,7 +1256,14 @@ var CommunitiesClient = class {
|
|
|
1171
1256
|
*/
|
|
1172
1257
|
async *getTweetsAll(communityId, options = {}) {
|
|
1173
1258
|
const fetchPage = async (cursor) => {
|
|
1174
|
-
|
|
1259
|
+
const { data, rateLimit } = await this.client.requestWithHeaders(`/v1/twitter/communities/${communityId}/tweets`, {
|
|
1260
|
+
params: {
|
|
1261
|
+
tweet_type: options.tweetType ?? "Top",
|
|
1262
|
+
count: options.count ?? 40,
|
|
1263
|
+
cursor
|
|
1264
|
+
}
|
|
1265
|
+
});
|
|
1266
|
+
return { response: createPaginatedResponse(data.data ?? [], data.next_cursor), rateLimit };
|
|
1175
1267
|
};
|
|
1176
1268
|
yield* paginate(fetchPage, options);
|
|
1177
1269
|
}
|