@the-convocation/twitter-scraper 0.16.1 → 0.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import debug from 'debug';
1
2
  import { Cookie, CookieJar } from 'tough-cookie';
2
3
  import setCookie from 'set-cookie-parser';
3
4
  import { Headers } from 'headers-polyfill';
@@ -12,7 +13,7 @@ class ApiError extends Error {
12
13
  super(
13
14
  `Response status: ${response.status} | headers: ${JSON.stringify(
14
15
  headersToString(response.headers)
15
- )} | data: ${data}`
16
+ )} | data: ${typeof data === "string" ? data : JSON.stringify(data)}`
16
17
  );
17
18
  this.response = response;
18
19
  this.data = data;
@@ -48,10 +49,15 @@ class AuthenticationError extends Error {
48
49
  }
49
50
  }
50
51
 
52
+ const log$2 = debug("twitter-scraper:rate-limit");
51
53
  class WaitingRateLimitStrategy {
52
54
  async onRateLimit({ response: res }) {
55
+ const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
53
56
  const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
54
57
  const xRateLimitReset = res.headers.get("x-rate-limit-reset");
58
+ log$2(
59
+ `Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
60
+ );
55
61
  if (xRateLimitRemaining == "0" && xRateLimitReset) {
56
62
  const currentTime = (/* @__PURE__ */ new Date()).valueOf() / 1e3;
57
63
  const timeDeltaMs = 1e3 * (parseInt(xRateLimitReset) - currentTime);
@@ -102,12 +108,14 @@ async function updateCookieJar(cookieJar, headers) {
102
108
  }
103
109
  }
104
110
 
111
+ const log$1 = debug("twitter-scraper:api");
105
112
  const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
106
113
  async function jitter(maxMs) {
107
114
  const jitter2 = Math.random() * maxMs;
108
115
  await new Promise((resolve) => setTimeout(resolve, jitter2));
109
116
  }
110
117
  async function requestApi(url, auth, method = "GET", platform = new Platform()) {
118
+ log$1(`Making ${method} request to ${url}`);
111
119
  const headers = new Headers();
112
120
  await auth.installTo(headers, url);
113
121
  await platform.randomizeCiphers();
@@ -134,6 +142,7 @@ async function requestApi(url, auth, method = "GET", platform = new Platform())
134
142
  }
135
143
  await updateCookieJar(auth.cookieJar(), res.headers);
136
144
  if (res.status === 429) {
145
+ log$1("Rate limit hit, waiting for retry...");
137
146
  await auth.onRateLimit({
138
147
  fetchParameters,
139
148
  response: res
@@ -292,11 +301,17 @@ class TwitterGuestAuth {
292
301
  }
293
302
  headers.set("cookie", await this.getCookieString());
294
303
  }
295
- getCookies() {
296
- return this.jar.getCookies(this.getCookieJarUrl());
297
- }
298
- getCookieString() {
299
- return this.jar.getCookieString(this.getCookieJarUrl());
304
+ async getCookies() {
305
+ const cookies = await Promise.all([
306
+ this.jar.getCookies(this.getCookieJarUrl()),
307
+ this.jar.getCookies("https://twitter.com"),
308
+ this.jar.getCookies("https://x.com")
309
+ ]);
310
+ return cookies.flat();
311
+ }
312
+ async getCookieString() {
313
+ const cookies = await this.getCookies();
314
+ return cookies.map((cookie) => `${cookie.key}=${cookie.value}`).join("; ");
300
315
  }
301
316
  async removeCookie(key) {
302
317
  const store = this.jar.store;
@@ -316,7 +331,7 @@ class TwitterGuestAuth {
316
331
  * Updates the authentication state with a new guest token from the Twitter API.
317
332
  */
318
333
  async updateGuestToken() {
319
- const guestActivateUrl = "https://api.twitter.com/1.1/guest/activate.json";
334
+ const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
320
335
  const headers = new Headers({
321
336
  Authorization: `Bearer ${this.bearerToken}`,
322
337
  Cookie: await this.getCookieString()
@@ -350,6 +365,7 @@ class TwitterGuestAuth {
350
365
  }
351
366
  }
352
367
 
368
+ const log = debug("twitter-scraper:auth-user");
353
369
  const TwitterUserAuthSubtask = Type.Object({
354
370
  subtask_id: Type.String(),
355
371
  enter_text: Type.Optional(Type.Object({}))
@@ -401,7 +417,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
401
417
  }
402
418
  async isLoggedIn() {
403
419
  const res = await requestApi(
404
- "https://api.twitter.com/1.1/account/verify_credentials.json",
420
+ "https://api.x.com/1.1/account/verify_credentials.json",
405
421
  this
406
422
  );
407
423
  if (!res.success) {
@@ -445,7 +461,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
445
461
  }
446
462
  try {
447
463
  await requestApi(
448
- "https://api.twitter.com/1.1/account/logout.json",
464
+ "https://api.x.com/1.1/account/logout.json",
449
465
  this,
450
466
  "POST"
451
467
  );
@@ -481,15 +497,59 @@ class TwitterUserAuth extends TwitterGuestAuth {
481
497
  this.removeCookie("external_referer=");
482
498
  this.removeCookie("ct0=");
483
499
  this.removeCookie("aa_u=");
500
+ this.removeCookie("__cf_bm=");
484
501
  return await this.executeFlowTask({
485
502
  flow_name: "login",
486
503
  input_flow_data: {
487
504
  flow_context: {
488
505
  debug_overrides: {},
489
506
  start_location: {
490
- location: "splash_screen"
507
+ location: "unknown"
491
508
  }
492
509
  }
510
+ },
511
+ subtask_versions: {
512
+ action_list: 2,
513
+ alert_dialog: 1,
514
+ app_download_cta: 1,
515
+ check_logged_in_account: 1,
516
+ choice_selection: 3,
517
+ contacts_live_sync_permission_prompt: 0,
518
+ cta: 7,
519
+ email_verification: 2,
520
+ end_flow: 1,
521
+ enter_date: 1,
522
+ enter_email: 2,
523
+ enter_password: 5,
524
+ enter_phone: 2,
525
+ enter_recaptcha: 1,
526
+ enter_text: 5,
527
+ enter_username: 2,
528
+ generic_urt: 3,
529
+ in_app_notification: 1,
530
+ interest_picker: 3,
531
+ js_instrumentation: 1,
532
+ menu_dialog: 1,
533
+ notifications_permission_prompt: 2,
534
+ open_account: 2,
535
+ open_home_timeline: 1,
536
+ open_link: 1,
537
+ phone_verification: 4,
538
+ privacy_options: 1,
539
+ security_key: 3,
540
+ select_avatar: 4,
541
+ select_banner: 2,
542
+ settings_list: 7,
543
+ show_code: 1,
544
+ sign_up: 2,
545
+ sign_up_review: 4,
546
+ tweet_selection_urt: 1,
547
+ update_users: 1,
548
+ upload_media: 1,
549
+ user_recommendations_list: 4,
550
+ user_recommendations_urt: 1,
551
+ wait_spinner: 3,
552
+ web_modal: 1
493
553
  }
494
554
  });
495
555
  }
@@ -622,7 +682,10 @@ class TwitterUserAuth extends TwitterGuestAuth {
622
682
  });
623
683
  }
624
684
  async executeFlowTask(data) {
625
- const onboardingTaskUrl = "https://api.twitter.com/1.1/onboarding/task.json";
685
+ let onboardingTaskUrl = "https://api.x.com/1.1/onboarding/task.json";
686
+ if ("flow_name" in data) {
687
+ onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
688
+ }
626
689
  const token = this.guestToken;
627
690
  if (token == null) {
628
691
  throw new AuthenticationError(
@@ -640,13 +703,37 @@ class TwitterUserAuth extends TwitterGuestAuth {
640
703
  "x-twitter-client-language": "en"
641
704
  });
642
705
  await this.installCsrfToken(headers);
643
- const res = await this.fetch(onboardingTaskUrl, {
644
- credentials: "include",
645
- method: "POST",
646
- headers,
647
- body: JSON.stringify(data)
648
- });
649
- await updateCookieJar(this.jar, res.headers);
706
+ let res;
707
+ do {
708
+ const fetchParameters = [
709
+ onboardingTaskUrl,
710
+ {
711
+ credentials: "include",
712
+ method: "POST",
713
+ headers,
714
+ body: JSON.stringify(data)
715
+ }
716
+ ];
717
+ try {
718
+ res = await this.fetch(...fetchParameters);
719
+ } catch (err) {
720
+ if (!(err instanceof Error)) {
721
+ throw err;
722
+ }
723
+ return {
724
+ status: "error",
725
+ err: new Error("Failed to perform request.")
726
+ };
727
+ }
728
+ await updateCookieJar(this.jar, res.headers);
729
+ if (res.status === 429) {
730
+ log("Rate limit hit, waiting before retrying...");
731
+ await this.onRateLimit({
732
+ fetchParameters,
733
+ response: res
734
+ });
735
+ }
736
+ } while (res.status === 429);
650
737
  if (!res.ok) {
651
738
  return { status: "error", err: await ApiError.fromResponse(res) };
652
739
  }
@@ -746,33 +833,33 @@ const apiRequestFactory = createApiRequestFactory(endpoints);
746
833
  function getAvatarOriginalSizeUrl(avatarUrl) {
747
834
  return avatarUrl ? avatarUrl.replace("_normal", "") : void 0;
748
835
  }
749
- function parseProfile(user, isBlueVerified) {
836
+ function parseProfile(legacy, isBlueVerified) {
750
837
  const profile = {
751
- avatar: getAvatarOriginalSizeUrl(user.profile_image_url_https),
752
- banner: user.profile_banner_url,
753
- biography: user.description,
754
- followersCount: user.followers_count,
755
- followingCount: user.friends_count,
756
- friendsCount: user.friends_count,
757
- mediaCount: user.media_count,
758
- isPrivate: user.protected ?? false,
759
- isVerified: user.verified,
760
- likesCount: user.favourites_count,
761
- listedCount: user.listed_count,
762
- location: user.location,
763
- name: user.name,
764
- pinnedTweetIds: user.pinned_tweet_ids_str,
765
- tweetsCount: user.statuses_count,
766
- url: `https://twitter.com/${user.screen_name}`,
767
- userId: user.id_str,
768
- username: user.screen_name,
838
+ avatar: getAvatarOriginalSizeUrl(legacy.profile_image_url_https),
839
+ banner: legacy.profile_banner_url,
840
+ biography: legacy.description,
841
+ followersCount: legacy.followers_count,
842
+ followingCount: legacy.friends_count,
843
+ friendsCount: legacy.friends_count,
844
+ mediaCount: legacy.media_count,
845
+ isPrivate: legacy.protected ?? false,
846
+ isVerified: legacy.verified,
847
+ likesCount: legacy.favourites_count,
848
+ listedCount: legacy.listed_count,
849
+ location: legacy.location,
850
+ name: legacy.name,
851
+ pinnedTweetIds: legacy.pinned_tweet_ids_str,
852
+ tweetsCount: legacy.statuses_count,
853
+ url: `https://twitter.com/${legacy.screen_name}`,
854
+ userId: legacy.id_str,
855
+ username: legacy.screen_name,
769
856
  isBlueVerified: isBlueVerified ?? false,
770
- canDm: user.can_dm
857
+ canDm: legacy.can_dm
771
858
  };
772
- if (user.created_at != null) {
773
- profile.joined = new Date(Date.parse(user.created_at));
859
+ if (legacy.created_at != null) {
860
+ profile.joined = new Date(Date.parse(legacy.created_at));
774
861
  }
775
- const urls = user.entities?.url?.urls;
862
+ const urls = legacy.entities?.url?.urls;
776
863
  if (urls?.length != null && urls?.length > 0) {
777
864
  profile.website = urls[0].expanded_url;
778
865
  }
@@ -811,15 +898,20 @@ async function getProfile(username, auth) {
811
898
  };
812
899
  }
813
900
  legacy.id_str = user.rest_id;
901
+ legacy.screen_name ?? (legacy.screen_name = user.core?.screen_name);
902
+ legacy.profile_image_url_https ?? (legacy.profile_image_url_https = user.avatar?.image_url);
903
+ legacy.created_at ?? (legacy.created_at = user.core?.created_at);
904
+ legacy.location ?? (legacy.location = user.location?.location);
905
+ legacy.name ?? (legacy.name = user.core?.name);
814
906
  if (legacy.screen_name == null || legacy.screen_name.length === 0) {
815
907
  return {
816
908
  success: false,
817
- err: new Error(`Either ${username} does not exist or is private.`)
909
+ err: new Error(`User ${username} does not exist or is private.`)
818
910
  };
819
911
  }
820
912
  return {
821
913
  success: true,
822
- value: parseProfile(user.legacy, user.is_blue_verified)
914
+ value: parseProfile(legacy, user.is_blue_verified)
823
915
  };
824
916
  }
825
917
  const idCache = /* @__PURE__ */ new Map();