@the-convocation/twitter-scraper 0.16.1 → 0.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  'use strict';
2
2
 
3
+ var debug = require('debug');
3
4
  var toughCookie = require('tough-cookie');
4
5
  var setCookie = require('set-cookie-parser');
5
6
  var headersPolyfill = require('headers-polyfill');
@@ -35,7 +36,7 @@ class ApiError extends Error {
35
36
  super(
36
37
  `Response status: ${response.status} | headers: ${JSON.stringify(
37
38
  headersToString(response.headers)
38
- )} | data: ${data}`
39
+ )} | data: ${typeof data === "string" ? data : JSON.stringify(data)}`
39
40
  );
40
41
  this.response = response;
41
42
  this.data = data;
@@ -71,10 +72,15 @@ class AuthenticationError extends Error {
71
72
  }
72
73
  }
73
74
 
75
+ const log$2 = debug("twitter-scraper:rate-limit");
74
76
  class WaitingRateLimitStrategy {
75
77
  async onRateLimit({ response: res }) {
78
+ const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
76
79
  const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
77
80
  const xRateLimitReset = res.headers.get("x-rate-limit-reset");
81
+ log$2(
82
+ `Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
83
+ );
78
84
  if (xRateLimitRemaining == "0" && xRateLimitReset) {
79
85
  const currentTime = (/* @__PURE__ */ new Date()).valueOf() / 1e3;
80
86
  const timeDeltaMs = 1e3 * (parseInt(xRateLimitReset) - currentTime);
@@ -122,12 +128,14 @@ async function updateCookieJar(cookieJar, headers) {
122
128
  }
123
129
  }
124
130
 
131
+ const log$1 = debug("twitter-scraper:api");
125
132
  const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
126
133
  async function jitter(maxMs) {
127
134
  const jitter2 = Math.random() * maxMs;
128
135
  await new Promise((resolve) => setTimeout(resolve, jitter2));
129
136
  }
130
137
  async function requestApi(url, auth, method = "GET", platform = new Platform()) {
138
+ log$1(`Making ${method} request to ${url}`);
131
139
  const headers = new headersPolyfill.Headers();
132
140
  await auth.installTo(headers, url);
133
141
  await platform.randomizeCiphers();
@@ -154,6 +162,7 @@ async function requestApi(url, auth, method = "GET", platform = new Platform())
154
162
  }
155
163
  await updateCookieJar(auth.cookieJar(), res.headers);
156
164
  if (res.status === 429) {
165
+ log$1("Rate limit hit, waiting for retry...");
157
166
  await auth.onRateLimit({
158
167
  fetchParameters,
159
168
  response: res
@@ -312,11 +321,17 @@ class TwitterGuestAuth {
312
321
  }
313
322
  headers.set("cookie", await this.getCookieString());
314
323
  }
315
- getCookies() {
316
- return this.jar.getCookies(this.getCookieJarUrl());
317
- }
318
- getCookieString() {
319
- return this.jar.getCookieString(this.getCookieJarUrl());
324
+ async getCookies() {
325
+ const cookies = await Promise.all([
326
+ this.jar.getCookies(this.getCookieJarUrl()),
327
+ this.jar.getCookies("https://twitter.com"),
328
+ this.jar.getCookies("https://x.com")
329
+ ]);
330
+ return cookies.flat();
331
+ }
332
+ async getCookieString() {
333
+ const cookies = await this.getCookies();
334
+ return cookies.map((cookie) => `${cookie.key}=${cookie.value}`).join("; ");
320
335
  }
321
336
  async removeCookie(key) {
322
337
  const store = this.jar.store;
@@ -336,7 +351,7 @@ class TwitterGuestAuth {
336
351
  * Updates the authentication state with a new guest token from the Twitter API.
337
352
  */
338
353
  async updateGuestToken() {
339
- const guestActivateUrl = "https://api.twitter.com/1.1/guest/activate.json";
354
+ const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
340
355
  const headers = new headersPolyfill.Headers({
341
356
  Authorization: `Bearer ${this.bearerToken}`,
342
357
  Cookie: await this.getCookieString()
@@ -370,6 +385,7 @@ class TwitterGuestAuth {
370
385
  }
371
386
  }
372
387
 
388
+ const log = debug("twitter-scraper:auth-user");
373
389
  const TwitterUserAuthSubtask = typebox.Type.Object({
374
390
  subtask_id: typebox.Type.String(),
375
391
  enter_text: typebox.Type.Optional(typebox.Type.Object({}))
@@ -421,7 +437,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
421
437
  }
422
438
  async isLoggedIn() {
423
439
  const res = await requestApi(
424
- "https://api.twitter.com/1.1/account/verify_credentials.json",
440
+ "https://api.x.com/1.1/account/verify_credentials.json",
425
441
  this
426
442
  );
427
443
  if (!res.success) {
@@ -465,7 +481,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
465
481
  }
466
482
  try {
467
483
  await requestApi(
468
- "https://api.twitter.com/1.1/account/logout.json",
484
+ "https://api.x.com/1.1/account/logout.json",
469
485
  this,
470
486
  "POST"
471
487
  );
@@ -501,15 +517,59 @@ class TwitterUserAuth extends TwitterGuestAuth {
501
517
  this.removeCookie("external_referer=");
502
518
  this.removeCookie("ct0=");
503
519
  this.removeCookie("aa_u=");
520
+ this.removeCookie("__cf_bm=");
504
521
  return await this.executeFlowTask({
505
522
  flow_name: "login",
506
523
  input_flow_data: {
507
524
  flow_context: {
508
525
  debug_overrides: {},
509
526
  start_location: {
510
- location: "splash_screen"
527
+ location: "unknown"
511
528
  }
512
529
  }
530
+ },
531
+ subtask_versions: {
532
+ action_list: 2,
533
+ alert_dialog: 1,
534
+ app_download_cta: 1,
535
+ check_logged_in_account: 1,
536
+ choice_selection: 3,
537
+ contacts_live_sync_permission_prompt: 0,
538
+ cta: 7,
539
+ email_verification: 2,
540
+ end_flow: 1,
541
+ enter_date: 1,
542
+ enter_email: 2,
543
+ enter_password: 5,
544
+ enter_phone: 2,
545
+ enter_recaptcha: 1,
546
+ enter_text: 5,
547
+ enter_username: 2,
548
+ generic_urt: 3,
549
+ in_app_notification: 1,
550
+ interest_picker: 3,
551
+ js_instrumentation: 1,
552
+ menu_dialog: 1,
553
+ notifications_permission_prompt: 2,
554
+ open_account: 2,
555
+ open_home_timeline: 1,
556
+ open_link: 1,
557
+ phone_verification: 4,
558
+ privacy_options: 1,
559
+ security_key: 3,
560
+ select_avatar: 4,
561
+ select_banner: 2,
562
+ settings_list: 7,
563
+ show_code: 1,
564
+ sign_up: 2,
565
+ sign_up_review: 4,
566
+ tweet_selection_urt: 1,
567
+ update_users: 1,
568
+ upload_media: 1,
569
+ user_recommendations_list: 4,
570
+ user_recommendations_urt: 1,
571
+ wait_spinner: 3,
572
+ web_modal: 1
513
573
  }
514
574
  });
515
575
  }
@@ -642,7 +702,10 @@ class TwitterUserAuth extends TwitterGuestAuth {
642
702
  });
643
703
  }
644
704
  async executeFlowTask(data) {
645
- const onboardingTaskUrl = "https://api.twitter.com/1.1/onboarding/task.json";
705
+ let onboardingTaskUrl = "https://api.x.com/1.1/onboarding/task.json";
706
+ if ("flow_name" in data) {
707
+ onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
708
+ }
646
709
  const token = this.guestToken;
647
710
  if (token == null) {
648
711
  throw new AuthenticationError(
@@ -660,13 +723,37 @@ class TwitterUserAuth extends TwitterGuestAuth {
660
723
  "x-twitter-client-language": "en"
661
724
  });
662
725
  await this.installCsrfToken(headers);
663
- const res = await this.fetch(onboardingTaskUrl, {
664
- credentials: "include",
665
- method: "POST",
666
- headers,
667
- body: JSON.stringify(data)
668
- });
669
- await updateCookieJar(this.jar, res.headers);
726
+ let res;
727
+ do {
728
+ const fetchParameters = [
729
+ onboardingTaskUrl,
730
+ {
731
+ credentials: "include",
732
+ method: "POST",
733
+ headers,
734
+ body: JSON.stringify(data)
735
+ }
736
+ ];
737
+ try {
738
+ res = await this.fetch(...fetchParameters);
739
+ } catch (err) {
740
+ if (!(err instanceof Error)) {
741
+ throw err;
742
+ }
743
+ return {
744
+ status: "error",
745
+ err: new Error("Failed to perform request.")
746
+ };
747
+ }
748
+ await updateCookieJar(this.jar, res.headers);
749
+ if (res.status === 429) {
750
+ log("Rate limit hit, waiting before retrying...");
751
+ await this.onRateLimit({
752
+ fetchParameters,
753
+ response: res
754
+ });
755
+ }
756
+ } while (res.status === 429);
670
757
  if (!res.ok) {
671
758
  return { status: "error", err: await ApiError.fromResponse(res) };
672
759
  }
@@ -766,33 +853,33 @@ const apiRequestFactory = createApiRequestFactory(endpoints);
766
853
  function getAvatarOriginalSizeUrl(avatarUrl) {
767
854
  return avatarUrl ? avatarUrl.replace("_normal", "") : void 0;
768
855
  }
769
- function parseProfile(user, isBlueVerified) {
856
+ function parseProfile(legacy, isBlueVerified) {
770
857
  const profile = {
771
- avatar: getAvatarOriginalSizeUrl(user.profile_image_url_https),
772
- banner: user.profile_banner_url,
773
- biography: user.description,
774
- followersCount: user.followers_count,
775
- followingCount: user.friends_count,
776
- friendsCount: user.friends_count,
777
- mediaCount: user.media_count,
778
- isPrivate: user.protected ?? false,
779
- isVerified: user.verified,
780
- likesCount: user.favourites_count,
781
- listedCount: user.listed_count,
782
- location: user.location,
783
- name: user.name,
784
- pinnedTweetIds: user.pinned_tweet_ids_str,
785
- tweetsCount: user.statuses_count,
786
- url: `https://twitter.com/${user.screen_name}`,
787
- userId: user.id_str,
788
- username: user.screen_name,
858
+ avatar: getAvatarOriginalSizeUrl(legacy.profile_image_url_https),
859
+ banner: legacy.profile_banner_url,
860
+ biography: legacy.description,
861
+ followersCount: legacy.followers_count,
862
+ followingCount: legacy.friends_count,
863
+ friendsCount: legacy.friends_count,
864
+ mediaCount: legacy.media_count,
865
+ isPrivate: legacy.protected ?? false,
866
+ isVerified: legacy.verified,
867
+ likesCount: legacy.favourites_count,
868
+ listedCount: legacy.listed_count,
869
+ location: legacy.location,
870
+ name: legacy.name,
871
+ pinnedTweetIds: legacy.pinned_tweet_ids_str,
872
+ tweetsCount: legacy.statuses_count,
873
+ url: `https://twitter.com/${legacy.screen_name}`,
874
+ userId: legacy.id_str,
875
+ username: legacy.screen_name,
789
876
  isBlueVerified: isBlueVerified ?? false,
790
- canDm: user.can_dm
877
+ canDm: legacy.can_dm
791
878
  };
792
- if (user.created_at != null) {
793
- profile.joined = new Date(Date.parse(user.created_at));
879
+ if (legacy.created_at != null) {
880
+ profile.joined = new Date(Date.parse(legacy.created_at));
794
881
  }
795
- const urls = user.entities?.url?.urls;
882
+ const urls = legacy.entities?.url?.urls;
796
883
  if (urls?.length != null && urls?.length > 0) {
797
884
  profile.website = urls[0].expanded_url;
798
885
  }
@@ -831,15 +918,20 @@ async function getProfile(username, auth) {
831
918
  };
832
919
  }
833
920
  legacy.id_str = user.rest_id;
921
+ legacy.screen_name ?? (legacy.screen_name = user.core?.screen_name);
922
+ legacy.profile_image_url_https ?? (legacy.profile_image_url_https = user.avatar?.image_url);
923
+ legacy.created_at ?? (legacy.created_at = user.core?.created_at);
924
+ legacy.location ?? (legacy.location = user.location?.location);
925
+ legacy.name ?? (legacy.name = user.core?.name);
834
926
  if (legacy.screen_name == null || legacy.screen_name.length === 0) {
835
927
  return {
836
928
  success: false,
837
- err: new Error(`Either ${username} does not exist or is private.`)
929
+ err: new Error(`User ${username} does not exist or is private.`)
838
930
  };
839
931
  }
840
932
  return {
841
933
  success: true,
842
- value: parseProfile(user.legacy, user.is_blue_verified)
934
+ value: parseProfile(legacy, user.is_blue_verified)
843
935
  };
844
936
  }
845
937
  const idCache = /* @__PURE__ */ new Map();