@the-convocation/twitter-scraper 0.16.1 → 0.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import debug from 'debug';
1
2
  import { Cookie, CookieJar } from 'tough-cookie';
2
3
  import setCookie from 'set-cookie-parser';
3
4
  import { Headers } from 'headers-polyfill';
@@ -14,7 +15,7 @@ class ApiError extends Error {
14
15
  super(
15
16
  `Response status: ${response.status} | headers: ${JSON.stringify(
16
17
  headersToString(response.headers)
17
- )} | data: ${data}`
18
+ )} | data: ${typeof data === "string" ? data : JSON.stringify(data)}`
18
19
  );
19
20
  this.response = response;
20
21
  this.data = data;
@@ -50,10 +51,15 @@ class AuthenticationError extends Error {
50
51
  }
51
52
  }
52
53
 
54
+ const log$2 = debug("twitter-scraper:rate-limit");
53
55
  class WaitingRateLimitStrategy {
54
56
  async onRateLimit({ response: res }) {
57
+ const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
55
58
  const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
56
59
  const xRateLimitReset = res.headers.get("x-rate-limit-reset");
60
+ log$2(
61
+ `Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
62
+ );
57
63
  if (xRateLimitRemaining == "0" && xRateLimitReset) {
58
64
  const currentTime = (/* @__PURE__ */ new Date()).valueOf() / 1e3;
59
65
  const timeDeltaMs = 1e3 * (parseInt(xRateLimitReset) - currentTime);
@@ -101,12 +107,14 @@ async function updateCookieJar(cookieJar, headers) {
101
107
  }
102
108
  }
103
109
 
110
+ const log$1 = debug("twitter-scraper:api");
104
111
  const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
105
112
  async function jitter(maxMs) {
106
113
  const jitter2 = Math.random() * maxMs;
107
114
  await new Promise((resolve) => setTimeout(resolve, jitter2));
108
115
  }
109
116
  async function requestApi(url, auth, method = "GET", platform = new Platform()) {
117
+ log$1(`Making ${method} request to ${url}`);
110
118
  const headers = new Headers();
111
119
  await auth.installTo(headers, url);
112
120
  await platform.randomizeCiphers();
@@ -133,6 +141,7 @@ async function requestApi(url, auth, method = "GET", platform = new Platform())
133
141
  }
134
142
  await updateCookieJar(auth.cookieJar(), res.headers);
135
143
  if (res.status === 429) {
144
+ log$1("Rate limit hit, waiting for retry...");
136
145
  await auth.onRateLimit({
137
146
  fetchParameters,
138
147
  response: res
@@ -291,11 +300,17 @@ class TwitterGuestAuth {
291
300
  }
292
301
  headers.set("cookie", await this.getCookieString());
293
302
  }
294
- getCookies() {
295
- return this.jar.getCookies(this.getCookieJarUrl());
296
- }
297
- getCookieString() {
298
- return this.jar.getCookieString(this.getCookieJarUrl());
303
+ async getCookies() {
304
+ const cookies = await Promise.all([
305
+ this.jar.getCookies(this.getCookieJarUrl()),
306
+ this.jar.getCookies("https://twitter.com"),
307
+ this.jar.getCookies("https://x.com")
308
+ ]);
309
+ return cookies.flat();
310
+ }
311
+ async getCookieString() {
312
+ const cookies = await this.getCookies();
313
+ return cookies.map((cookie) => `${cookie.key}=${cookie.value}`).join("; ");
299
314
  }
300
315
  async removeCookie(key) {
301
316
  const store = this.jar.store;
@@ -315,7 +330,7 @@ class TwitterGuestAuth {
315
330
  * Updates the authentication state with a new guest token from the Twitter API.
316
331
  */
317
332
  async updateGuestToken() {
318
- const guestActivateUrl = "https://api.twitter.com/1.1/guest/activate.json";
333
+ const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
319
334
  const headers = new Headers({
320
335
  Authorization: `Bearer ${this.bearerToken}`,
321
336
  Cookie: await this.getCookieString()
@@ -349,6 +364,7 @@ class TwitterGuestAuth {
349
364
  }
350
365
  }
351
366
 
367
+ const log = debug("twitter-scraper:auth-user");
352
368
  const TwitterUserAuthSubtask = Type.Object({
353
369
  subtask_id: Type.String(),
354
370
  enter_text: Type.Optional(Type.Object({}))
@@ -400,7 +416,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
400
416
  }
401
417
  async isLoggedIn() {
402
418
  const res = await requestApi(
403
- "https://api.twitter.com/1.1/account/verify_credentials.json",
419
+ "https://api.x.com/1.1/account/verify_credentials.json",
404
420
  this
405
421
  );
406
422
  if (!res.success) {
@@ -444,7 +460,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
444
460
  }
445
461
  try {
446
462
  await requestApi(
447
- "https://api.twitter.com/1.1/account/logout.json",
463
+ "https://api.x.com/1.1/account/logout.json",
448
464
  this,
449
465
  "POST"
450
466
  );
@@ -480,15 +496,59 @@ class TwitterUserAuth extends TwitterGuestAuth {
480
496
  this.removeCookie("external_referer=");
481
497
  this.removeCookie("ct0=");
482
498
  this.removeCookie("aa_u=");
499
+ this.removeCookie("__cf_bm=");
483
500
  return await this.executeFlowTask({
484
501
  flow_name: "login",
485
502
  input_flow_data: {
486
503
  flow_context: {
487
504
  debug_overrides: {},
488
505
  start_location: {
489
- location: "splash_screen"
506
+ location: "unknown"
490
507
  }
491
508
  }
509
+ },
510
+ subtask_versions: {
511
+ action_list: 2,
512
+ alert_dialog: 1,
513
+ app_download_cta: 1,
514
+ check_logged_in_account: 1,
515
+ choice_selection: 3,
516
+ contacts_live_sync_permission_prompt: 0,
517
+ cta: 7,
518
+ email_verification: 2,
519
+ end_flow: 1,
520
+ enter_date: 1,
521
+ enter_email: 2,
522
+ enter_password: 5,
523
+ enter_phone: 2,
524
+ enter_recaptcha: 1,
525
+ enter_text: 5,
526
+ enter_username: 2,
527
+ generic_urt: 3,
528
+ in_app_notification: 1,
529
+ interest_picker: 3,
530
+ js_instrumentation: 1,
531
+ menu_dialog: 1,
532
+ notifications_permission_prompt: 2,
533
+ open_account: 2,
534
+ open_home_timeline: 1,
535
+ open_link: 1,
536
+ phone_verification: 4,
537
+ privacy_options: 1,
538
+ security_key: 3,
539
+ select_avatar: 4,
540
+ select_banner: 2,
541
+ settings_list: 7,
542
+ show_code: 1,
543
+ sign_up: 2,
544
+ sign_up_review: 4,
545
+ tweet_selection_urt: 1,
546
+ update_users: 1,
547
+ upload_media: 1,
548
+ user_recommendations_list: 4,
549
+ user_recommendations_urt: 1,
550
+ wait_spinner: 3,
551
+ web_modal: 1
492
552
  }
493
553
  });
494
554
  }
@@ -621,7 +681,10 @@ class TwitterUserAuth extends TwitterGuestAuth {
621
681
  });
622
682
  }
623
683
  async executeFlowTask(data) {
624
- const onboardingTaskUrl = "https://api.twitter.com/1.1/onboarding/task.json";
684
+ let onboardingTaskUrl = "https://api.x.com/1.1/onboarding/task.json";
685
+ if ("flow_name" in data) {
686
+ onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
687
+ }
625
688
  const token = this.guestToken;
626
689
  if (token == null) {
627
690
  throw new AuthenticationError(
@@ -639,13 +702,37 @@ class TwitterUserAuth extends TwitterGuestAuth {
639
702
  "x-twitter-client-language": "en"
640
703
  });
641
704
  await this.installCsrfToken(headers);
642
- const res = await this.fetch(onboardingTaskUrl, {
643
- credentials: "include",
644
- method: "POST",
645
- headers,
646
- body: JSON.stringify(data)
647
- });
648
- await updateCookieJar(this.jar, res.headers);
705
+ let res;
706
+ do {
707
+ const fetchParameters = [
708
+ onboardingTaskUrl,
709
+ {
710
+ credentials: "include",
711
+ method: "POST",
712
+ headers,
713
+ body: JSON.stringify(data)
714
+ }
715
+ ];
716
+ try {
717
+ res = await this.fetch(...fetchParameters);
718
+ } catch (err) {
719
+ if (!(err instanceof Error)) {
720
+ throw err;
721
+ }
722
+ return {
723
+ status: "error",
724
+ err: new Error("Failed to perform request.")
725
+ };
726
+ }
727
+ await updateCookieJar(this.jar, res.headers);
728
+ if (res.status === 429) {
729
+ log("Rate limit hit, waiting before retrying...");
730
+ await this.onRateLimit({
731
+ fetchParameters,
732
+ response: res
733
+ });
734
+ }
735
+ } while (res.status === 429);
649
736
  if (!res.ok) {
650
737
  return { status: "error", err: await ApiError.fromResponse(res) };
651
738
  }
@@ -745,33 +832,33 @@ const apiRequestFactory = createApiRequestFactory(endpoints);
745
832
  function getAvatarOriginalSizeUrl(avatarUrl) {
746
833
  return avatarUrl ? avatarUrl.replace("_normal", "") : void 0;
747
834
  }
748
- function parseProfile(user, isBlueVerified) {
835
+ function parseProfile(legacy, isBlueVerified) {
749
836
  const profile = {
750
- avatar: getAvatarOriginalSizeUrl(user.profile_image_url_https),
751
- banner: user.profile_banner_url,
752
- biography: user.description,
753
- followersCount: user.followers_count,
754
- followingCount: user.friends_count,
755
- friendsCount: user.friends_count,
756
- mediaCount: user.media_count,
757
- isPrivate: user.protected ?? false,
758
- isVerified: user.verified,
759
- likesCount: user.favourites_count,
760
- listedCount: user.listed_count,
761
- location: user.location,
762
- name: user.name,
763
- pinnedTweetIds: user.pinned_tweet_ids_str,
764
- tweetsCount: user.statuses_count,
765
- url: `https://twitter.com/${user.screen_name}`,
766
- userId: user.id_str,
767
- username: user.screen_name,
837
+ avatar: getAvatarOriginalSizeUrl(legacy.profile_image_url_https),
838
+ banner: legacy.profile_banner_url,
839
+ biography: legacy.description,
840
+ followersCount: legacy.followers_count,
841
+ followingCount: legacy.friends_count,
842
+ friendsCount: legacy.friends_count,
843
+ mediaCount: legacy.media_count,
844
+ isPrivate: legacy.protected ?? false,
845
+ isVerified: legacy.verified,
846
+ likesCount: legacy.favourites_count,
847
+ listedCount: legacy.listed_count,
848
+ location: legacy.location,
849
+ name: legacy.name,
850
+ pinnedTweetIds: legacy.pinned_tweet_ids_str,
851
+ tweetsCount: legacy.statuses_count,
852
+ url: `https://twitter.com/${legacy.screen_name}`,
853
+ userId: legacy.id_str,
854
+ username: legacy.screen_name,
768
855
  isBlueVerified: isBlueVerified ?? false,
769
- canDm: user.can_dm
856
+ canDm: legacy.can_dm
770
857
  };
771
- if (user.created_at != null) {
772
- profile.joined = new Date(Date.parse(user.created_at));
858
+ if (legacy.created_at != null) {
859
+ profile.joined = new Date(Date.parse(legacy.created_at));
773
860
  }
774
- const urls = user.entities?.url?.urls;
861
+ const urls = legacy.entities?.url?.urls;
775
862
  if (urls?.length != null && urls?.length > 0) {
776
863
  profile.website = urls[0].expanded_url;
777
864
  }
@@ -810,15 +897,20 @@ async function getProfile(username, auth) {
810
897
  };
811
898
  }
812
899
  legacy.id_str = user.rest_id;
900
+ legacy.screen_name ?? (legacy.screen_name = user.core?.screen_name);
901
+ legacy.profile_image_url_https ?? (legacy.profile_image_url_https = user.avatar?.image_url);
902
+ legacy.created_at ?? (legacy.created_at = user.core?.created_at);
903
+ legacy.location ?? (legacy.location = user.location?.location);
904
+ legacy.name ?? (legacy.name = user.core?.name);
813
905
  if (legacy.screen_name == null || legacy.screen_name.length === 0) {
814
906
  return {
815
907
  success: false,
816
- err: new Error(`Either ${username} does not exist or is private.`)
908
+ err: new Error(`User ${username} does not exist or is private.`)
817
909
  };
818
910
  }
819
911
  return {
820
912
  success: true,
821
- value: parseProfile(user.legacy, user.is_blue_verified)
913
+ value: parseProfile(legacy, user.is_blue_verified)
822
914
  };
823
915
  }
824
916
  const idCache = /* @__PURE__ */ new Map();