@the-convocation/twitter-scraper 0.16.1 → 0.16.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +135 -43
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +135 -43
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +135 -43
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +135 -43
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +3 -2
- package/package.json +3 -1
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import debug from 'debug';
|
|
1
2
|
import { Cookie, CookieJar } from 'tough-cookie';
|
|
2
3
|
import setCookie from 'set-cookie-parser';
|
|
3
4
|
import { Headers } from 'headers-polyfill';
|
|
@@ -12,7 +13,7 @@ class ApiError extends Error {
|
|
|
12
13
|
super(
|
|
13
14
|
`Response status: ${response.status} | headers: ${JSON.stringify(
|
|
14
15
|
headersToString(response.headers)
|
|
15
|
-
)} | data: ${data}`
|
|
16
|
+
)} | data: ${typeof data === "string" ? data : JSON.stringify(data)}`
|
|
16
17
|
);
|
|
17
18
|
this.response = response;
|
|
18
19
|
this.data = data;
|
|
@@ -48,10 +49,15 @@ class AuthenticationError extends Error {
|
|
|
48
49
|
}
|
|
49
50
|
}
|
|
50
51
|
|
|
52
|
+
const log$2 = debug("twitter-scraper:rate-limit");
|
|
51
53
|
class WaitingRateLimitStrategy {
|
|
52
54
|
async onRateLimit({ response: res }) {
|
|
55
|
+
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
53
56
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
54
57
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
58
|
+
log$2(
|
|
59
|
+
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
60
|
+
);
|
|
55
61
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
56
62
|
const currentTime = (/* @__PURE__ */ new Date()).valueOf() / 1e3;
|
|
57
63
|
const timeDeltaMs = 1e3 * (parseInt(xRateLimitReset) - currentTime);
|
|
@@ -102,12 +108,14 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
102
108
|
}
|
|
103
109
|
}
|
|
104
110
|
|
|
111
|
+
const log$1 = debug("twitter-scraper:api");
|
|
105
112
|
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
106
113
|
async function jitter(maxMs) {
|
|
107
114
|
const jitter2 = Math.random() * maxMs;
|
|
108
115
|
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
109
116
|
}
|
|
110
117
|
async function requestApi(url, auth, method = "GET", platform = new Platform()) {
|
|
118
|
+
log$1(`Making ${method} request to ${url}`);
|
|
111
119
|
const headers = new Headers();
|
|
112
120
|
await auth.installTo(headers, url);
|
|
113
121
|
await platform.randomizeCiphers();
|
|
@@ -134,6 +142,7 @@ async function requestApi(url, auth, method = "GET", platform = new Platform())
|
|
|
134
142
|
}
|
|
135
143
|
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
136
144
|
if (res.status === 429) {
|
|
145
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
137
146
|
await auth.onRateLimit({
|
|
138
147
|
fetchParameters,
|
|
139
148
|
response: res
|
|
@@ -292,11 +301,17 @@ class TwitterGuestAuth {
|
|
|
292
301
|
}
|
|
293
302
|
headers.set("cookie", await this.getCookieString());
|
|
294
303
|
}
|
|
295
|
-
getCookies() {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
304
|
+
async getCookies() {
|
|
305
|
+
const cookies = await Promise.all([
|
|
306
|
+
this.jar.getCookies(this.getCookieJarUrl()),
|
|
307
|
+
this.jar.getCookies("https://twitter.com"),
|
|
308
|
+
this.jar.getCookies("https://x.com")
|
|
309
|
+
]);
|
|
310
|
+
return cookies.flat();
|
|
311
|
+
}
|
|
312
|
+
async getCookieString() {
|
|
313
|
+
const cookies = await this.getCookies();
|
|
314
|
+
return cookies.map((cookie) => `${cookie.key}=${cookie.value}`).join("; ");
|
|
300
315
|
}
|
|
301
316
|
async removeCookie(key) {
|
|
302
317
|
const store = this.jar.store;
|
|
@@ -316,7 +331,7 @@ class TwitterGuestAuth {
|
|
|
316
331
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
317
332
|
*/
|
|
318
333
|
async updateGuestToken() {
|
|
319
|
-
const guestActivateUrl = "https://api.
|
|
334
|
+
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
320
335
|
const headers = new Headers({
|
|
321
336
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
322
337
|
Cookie: await this.getCookieString()
|
|
@@ -350,6 +365,7 @@ class TwitterGuestAuth {
|
|
|
350
365
|
}
|
|
351
366
|
}
|
|
352
367
|
|
|
368
|
+
const log = debug("twitter-scraper:auth-user");
|
|
353
369
|
const TwitterUserAuthSubtask = Type.Object({
|
|
354
370
|
subtask_id: Type.String(),
|
|
355
371
|
enter_text: Type.Optional(Type.Object({}))
|
|
@@ -401,7 +417,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
401
417
|
}
|
|
402
418
|
async isLoggedIn() {
|
|
403
419
|
const res = await requestApi(
|
|
404
|
-
"https://api.
|
|
420
|
+
"https://api.x.com/1.1/account/verify_credentials.json",
|
|
405
421
|
this
|
|
406
422
|
);
|
|
407
423
|
if (!res.success) {
|
|
@@ -445,7 +461,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
445
461
|
}
|
|
446
462
|
try {
|
|
447
463
|
await requestApi(
|
|
448
|
-
"https://api.
|
|
464
|
+
"https://api.x.com/1.1/account/logout.json",
|
|
449
465
|
this,
|
|
450
466
|
"POST"
|
|
451
467
|
);
|
|
@@ -481,15 +497,59 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
481
497
|
this.removeCookie("external_referer=");
|
|
482
498
|
this.removeCookie("ct0=");
|
|
483
499
|
this.removeCookie("aa_u=");
|
|
500
|
+
this.removeCookie("__cf_bm=");
|
|
484
501
|
return await this.executeFlowTask({
|
|
485
502
|
flow_name: "login",
|
|
486
503
|
input_flow_data: {
|
|
487
504
|
flow_context: {
|
|
488
505
|
debug_overrides: {},
|
|
489
506
|
start_location: {
|
|
490
|
-
location: "
|
|
507
|
+
location: "unknown"
|
|
491
508
|
}
|
|
492
509
|
}
|
|
510
|
+
},
|
|
511
|
+
subtask_versions: {
|
|
512
|
+
action_list: 2,
|
|
513
|
+
alert_dialog: 1,
|
|
514
|
+
app_download_cta: 1,
|
|
515
|
+
check_logged_in_account: 1,
|
|
516
|
+
choice_selection: 3,
|
|
517
|
+
contacts_live_sync_permission_prompt: 0,
|
|
518
|
+
cta: 7,
|
|
519
|
+
email_verification: 2,
|
|
520
|
+
end_flow: 1,
|
|
521
|
+
enter_date: 1,
|
|
522
|
+
enter_email: 2,
|
|
523
|
+
enter_password: 5,
|
|
524
|
+
enter_phone: 2,
|
|
525
|
+
enter_recaptcha: 1,
|
|
526
|
+
enter_text: 5,
|
|
527
|
+
enter_username: 2,
|
|
528
|
+
generic_urt: 3,
|
|
529
|
+
in_app_notification: 1,
|
|
530
|
+
interest_picker: 3,
|
|
531
|
+
js_instrumentation: 1,
|
|
532
|
+
menu_dialog: 1,
|
|
533
|
+
notifications_permission_prompt: 2,
|
|
534
|
+
open_account: 2,
|
|
535
|
+
open_home_timeline: 1,
|
|
536
|
+
open_link: 1,
|
|
537
|
+
phone_verification: 4,
|
|
538
|
+
privacy_options: 1,
|
|
539
|
+
security_key: 3,
|
|
540
|
+
select_avatar: 4,
|
|
541
|
+
select_banner: 2,
|
|
542
|
+
settings_list: 7,
|
|
543
|
+
show_code: 1,
|
|
544
|
+
sign_up: 2,
|
|
545
|
+
sign_up_review: 4,
|
|
546
|
+
tweet_selection_urt: 1,
|
|
547
|
+
update_users: 1,
|
|
548
|
+
upload_media: 1,
|
|
549
|
+
user_recommendations_list: 4,
|
|
550
|
+
user_recommendations_urt: 1,
|
|
551
|
+
wait_spinner: 3,
|
|
552
|
+
web_modal: 1
|
|
493
553
|
}
|
|
494
554
|
});
|
|
495
555
|
}
|
|
@@ -622,7 +682,10 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
622
682
|
});
|
|
623
683
|
}
|
|
624
684
|
async executeFlowTask(data) {
|
|
625
|
-
|
|
685
|
+
let onboardingTaskUrl = "https://api.x.com/1.1/onboarding/task.json";
|
|
686
|
+
if ("flow_name" in data) {
|
|
687
|
+
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
688
|
+
}
|
|
626
689
|
const token = this.guestToken;
|
|
627
690
|
if (token == null) {
|
|
628
691
|
throw new AuthenticationError(
|
|
@@ -640,13 +703,37 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
640
703
|
"x-twitter-client-language": "en"
|
|
641
704
|
});
|
|
642
705
|
await this.installCsrfToken(headers);
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
706
|
+
let res;
|
|
707
|
+
do {
|
|
708
|
+
const fetchParameters = [
|
|
709
|
+
onboardingTaskUrl,
|
|
710
|
+
{
|
|
711
|
+
credentials: "include",
|
|
712
|
+
method: "POST",
|
|
713
|
+
headers,
|
|
714
|
+
body: JSON.stringify(data)
|
|
715
|
+
}
|
|
716
|
+
];
|
|
717
|
+
try {
|
|
718
|
+
res = await this.fetch(...fetchParameters);
|
|
719
|
+
} catch (err) {
|
|
720
|
+
if (!(err instanceof Error)) {
|
|
721
|
+
throw err;
|
|
722
|
+
}
|
|
723
|
+
return {
|
|
724
|
+
status: "error",
|
|
725
|
+
err: new Error("Failed to perform request.")
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
await updateCookieJar(this.jar, res.headers);
|
|
729
|
+
if (res.status === 429) {
|
|
730
|
+
log("Rate limit hit, waiting before retrying...");
|
|
731
|
+
await this.onRateLimit({
|
|
732
|
+
fetchParameters,
|
|
733
|
+
response: res
|
|
734
|
+
});
|
|
735
|
+
}
|
|
736
|
+
} while (res.status === 429);
|
|
650
737
|
if (!res.ok) {
|
|
651
738
|
return { status: "error", err: await ApiError.fromResponse(res) };
|
|
652
739
|
}
|
|
@@ -746,33 +833,33 @@ const apiRequestFactory = createApiRequestFactory(endpoints);
|
|
|
746
833
|
function getAvatarOriginalSizeUrl(avatarUrl) {
|
|
747
834
|
return avatarUrl ? avatarUrl.replace("_normal", "") : void 0;
|
|
748
835
|
}
|
|
749
|
-
function parseProfile(
|
|
836
|
+
function parseProfile(legacy, isBlueVerified) {
|
|
750
837
|
const profile = {
|
|
751
|
-
avatar: getAvatarOriginalSizeUrl(
|
|
752
|
-
banner:
|
|
753
|
-
biography:
|
|
754
|
-
followersCount:
|
|
755
|
-
followingCount:
|
|
756
|
-
friendsCount:
|
|
757
|
-
mediaCount:
|
|
758
|
-
isPrivate:
|
|
759
|
-
isVerified:
|
|
760
|
-
likesCount:
|
|
761
|
-
listedCount:
|
|
762
|
-
location:
|
|
763
|
-
name:
|
|
764
|
-
pinnedTweetIds:
|
|
765
|
-
tweetsCount:
|
|
766
|
-
url: `https://twitter.com/${
|
|
767
|
-
userId:
|
|
768
|
-
username:
|
|
838
|
+
avatar: getAvatarOriginalSizeUrl(legacy.profile_image_url_https),
|
|
839
|
+
banner: legacy.profile_banner_url,
|
|
840
|
+
biography: legacy.description,
|
|
841
|
+
followersCount: legacy.followers_count,
|
|
842
|
+
followingCount: legacy.friends_count,
|
|
843
|
+
friendsCount: legacy.friends_count,
|
|
844
|
+
mediaCount: legacy.media_count,
|
|
845
|
+
isPrivate: legacy.protected ?? false,
|
|
846
|
+
isVerified: legacy.verified,
|
|
847
|
+
likesCount: legacy.favourites_count,
|
|
848
|
+
listedCount: legacy.listed_count,
|
|
849
|
+
location: legacy.location,
|
|
850
|
+
name: legacy.name,
|
|
851
|
+
pinnedTweetIds: legacy.pinned_tweet_ids_str,
|
|
852
|
+
tweetsCount: legacy.statuses_count,
|
|
853
|
+
url: `https://twitter.com/${legacy.screen_name}`,
|
|
854
|
+
userId: legacy.id_str,
|
|
855
|
+
username: legacy.screen_name,
|
|
769
856
|
isBlueVerified: isBlueVerified ?? false,
|
|
770
|
-
canDm:
|
|
857
|
+
canDm: legacy.can_dm
|
|
771
858
|
};
|
|
772
|
-
if (
|
|
773
|
-
profile.joined = new Date(Date.parse(
|
|
859
|
+
if (legacy.created_at != null) {
|
|
860
|
+
profile.joined = new Date(Date.parse(legacy.created_at));
|
|
774
861
|
}
|
|
775
|
-
const urls =
|
|
862
|
+
const urls = legacy.entities?.url?.urls;
|
|
776
863
|
if (urls?.length != null && urls?.length > 0) {
|
|
777
864
|
profile.website = urls[0].expanded_url;
|
|
778
865
|
}
|
|
@@ -811,15 +898,20 @@ async function getProfile(username, auth) {
|
|
|
811
898
|
};
|
|
812
899
|
}
|
|
813
900
|
legacy.id_str = user.rest_id;
|
|
901
|
+
legacy.screen_name ?? (legacy.screen_name = user.core?.screen_name);
|
|
902
|
+
legacy.profile_image_url_https ?? (legacy.profile_image_url_https = user.avatar?.image_url);
|
|
903
|
+
legacy.created_at ?? (legacy.created_at = user.core?.created_at);
|
|
904
|
+
legacy.location ?? (legacy.location = user.location?.location);
|
|
905
|
+
legacy.name ?? (legacy.name = user.core?.name);
|
|
814
906
|
if (legacy.screen_name == null || legacy.screen_name.length === 0) {
|
|
815
907
|
return {
|
|
816
908
|
success: false,
|
|
817
|
-
err: new Error(`
|
|
909
|
+
err: new Error(`User ${username} does not exist or is private.`)
|
|
818
910
|
};
|
|
819
911
|
}
|
|
820
912
|
return {
|
|
821
913
|
success: true,
|
|
822
|
-
value: parseProfile(
|
|
914
|
+
value: parseProfile(legacy, user.is_blue_verified)
|
|
823
915
|
};
|
|
824
916
|
}
|
|
825
917
|
const idCache = /* @__PURE__ */ new Map();
|