@the-convocation/twitter-scraper 0.16.1 → 0.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  'use strict';
2
2
 
3
+ var debug = require('debug');
3
4
  var toughCookie = require('tough-cookie');
4
5
  var setCookie = require('set-cookie-parser');
5
6
  var headersPolyfill = require('headers-polyfill');
@@ -33,7 +34,7 @@ class ApiError extends Error {
33
34
  super(
34
35
  `Response status: ${response.status} | headers: ${JSON.stringify(
35
36
  headersToString(response.headers)
36
- )} | data: ${data}`
37
+ )} | data: ${typeof data === "string" ? data : JSON.stringify(data)}`
37
38
  );
38
39
  this.response = response;
39
40
  this.data = data;
@@ -69,10 +70,15 @@ class AuthenticationError extends Error {
69
70
  }
70
71
  }
71
72
 
73
+ const log$2 = debug("twitter-scraper:rate-limit");
72
74
  class WaitingRateLimitStrategy {
73
75
  async onRateLimit({ response: res }) {
76
+ const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
74
77
  const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
75
78
  const xRateLimitReset = res.headers.get("x-rate-limit-reset");
79
+ log$2(
80
+ `Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
81
+ );
76
82
  if (xRateLimitRemaining == "0" && xRateLimitReset) {
77
83
  const currentTime = (/* @__PURE__ */ new Date()).valueOf() / 1e3;
78
84
  const timeDeltaMs = 1e3 * (parseInt(xRateLimitReset) - currentTime);
@@ -123,12 +129,14 @@ async function updateCookieJar(cookieJar, headers) {
123
129
  }
124
130
  }
125
131
 
132
+ const log$1 = debug("twitter-scraper:api");
126
133
  const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
127
134
  async function jitter(maxMs) {
128
135
  const jitter2 = Math.random() * maxMs;
129
136
  await new Promise((resolve) => setTimeout(resolve, jitter2));
130
137
  }
131
138
  async function requestApi(url, auth, method = "GET", platform = new Platform()) {
139
+ log$1(`Making ${method} request to ${url}`);
132
140
  const headers = new headersPolyfill.Headers();
133
141
  await auth.installTo(headers, url);
134
142
  await platform.randomizeCiphers();
@@ -155,6 +163,7 @@ async function requestApi(url, auth, method = "GET", platform = new Platform())
155
163
  }
156
164
  await updateCookieJar(auth.cookieJar(), res.headers);
157
165
  if (res.status === 429) {
166
+ log$1("Rate limit hit, waiting for retry...");
158
167
  await auth.onRateLimit({
159
168
  fetchParameters,
160
169
  response: res
@@ -313,11 +322,17 @@ class TwitterGuestAuth {
313
322
  }
314
323
  headers.set("cookie", await this.getCookieString());
315
324
  }
316
- getCookies() {
317
- return this.jar.getCookies(this.getCookieJarUrl());
318
- }
319
- getCookieString() {
320
- return this.jar.getCookieString(this.getCookieJarUrl());
325
+ async getCookies() {
326
+ const cookies = await Promise.all([
327
+ this.jar.getCookies(this.getCookieJarUrl()),
328
+ this.jar.getCookies("https://twitter.com"),
329
+ this.jar.getCookies("https://x.com")
330
+ ]);
331
+ return cookies.flat();
332
+ }
333
+ async getCookieString() {
334
+ const cookies = await this.getCookies();
335
+ return cookies.map((cookie) => `${cookie.key}=${cookie.value}`).join("; ");
321
336
  }
322
337
  async removeCookie(key) {
323
338
  const store = this.jar.store;
@@ -337,7 +352,7 @@ class TwitterGuestAuth {
337
352
  * Updates the authentication state with a new guest token from the Twitter API.
338
353
  */
339
354
  async updateGuestToken() {
340
- const guestActivateUrl = "https://api.twitter.com/1.1/guest/activate.json";
355
+ const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
341
356
  const headers = new headersPolyfill.Headers({
342
357
  Authorization: `Bearer ${this.bearerToken}`,
343
358
  Cookie: await this.getCookieString()
@@ -371,6 +386,7 @@ class TwitterGuestAuth {
371
386
  }
372
387
  }
373
388
 
389
+ const log = debug("twitter-scraper:auth-user");
374
390
  const TwitterUserAuthSubtask = typebox.Type.Object({
375
391
  subtask_id: typebox.Type.String(),
376
392
  enter_text: typebox.Type.Optional(typebox.Type.Object({}))
@@ -422,7 +438,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
422
438
  }
423
439
  async isLoggedIn() {
424
440
  const res = await requestApi(
425
- "https://api.twitter.com/1.1/account/verify_credentials.json",
441
+ "https://api.x.com/1.1/account/verify_credentials.json",
426
442
  this
427
443
  );
428
444
  if (!res.success) {
@@ -466,7 +482,7 @@ class TwitterUserAuth extends TwitterGuestAuth {
466
482
  }
467
483
  try {
468
484
  await requestApi(
469
- "https://api.twitter.com/1.1/account/logout.json",
485
+ "https://api.x.com/1.1/account/logout.json",
470
486
  this,
471
487
  "POST"
472
488
  );
@@ -502,15 +518,59 @@ class TwitterUserAuth extends TwitterGuestAuth {
502
518
  this.removeCookie("external_referer=");
503
519
  this.removeCookie("ct0=");
504
520
  this.removeCookie("aa_u=");
521
+ this.removeCookie("__cf_bm=");
505
522
  return await this.executeFlowTask({
506
523
  flow_name: "login",
507
524
  input_flow_data: {
508
525
  flow_context: {
509
526
  debug_overrides: {},
510
527
  start_location: {
511
- location: "splash_screen"
528
+ location: "unknown"
512
529
  }
513
530
  }
531
+ },
532
+ subtask_versions: {
533
+ action_list: 2,
534
+ alert_dialog: 1,
535
+ app_download_cta: 1,
536
+ check_logged_in_account: 1,
537
+ choice_selection: 3,
538
+ contacts_live_sync_permission_prompt: 0,
539
+ cta: 7,
540
+ email_verification: 2,
541
+ end_flow: 1,
542
+ enter_date: 1,
543
+ enter_email: 2,
544
+ enter_password: 5,
545
+ enter_phone: 2,
546
+ enter_recaptcha: 1,
547
+ enter_text: 5,
548
+ enter_username: 2,
549
+ generic_urt: 3,
550
+ in_app_notification: 1,
551
+ interest_picker: 3,
552
+ js_instrumentation: 1,
553
+ menu_dialog: 1,
554
+ notifications_permission_prompt: 2,
555
+ open_account: 2,
556
+ open_home_timeline: 1,
557
+ open_link: 1,
558
+ phone_verification: 4,
559
+ privacy_options: 1,
560
+ security_key: 3,
561
+ select_avatar: 4,
562
+ select_banner: 2,
563
+ settings_list: 7,
564
+ show_code: 1,
565
+ sign_up: 2,
566
+ sign_up_review: 4,
567
+ tweet_selection_urt: 1,
568
+ update_users: 1,
569
+ upload_media: 1,
570
+ user_recommendations_list: 4,
571
+ user_recommendations_urt: 1,
572
+ wait_spinner: 3,
573
+ web_modal: 1
514
574
  }
515
575
  });
516
576
  }
@@ -643,7 +703,10 @@ class TwitterUserAuth extends TwitterGuestAuth {
643
703
  });
644
704
  }
645
705
  async executeFlowTask(data) {
646
- const onboardingTaskUrl = "https://api.twitter.com/1.1/onboarding/task.json";
706
+ let onboardingTaskUrl = "https://api.x.com/1.1/onboarding/task.json";
707
+ if ("flow_name" in data) {
708
+ onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
709
+ }
647
710
  const token = this.guestToken;
648
711
  if (token == null) {
649
712
  throw new AuthenticationError(
@@ -661,13 +724,37 @@ class TwitterUserAuth extends TwitterGuestAuth {
661
724
  "x-twitter-client-language": "en"
662
725
  });
663
726
  await this.installCsrfToken(headers);
664
- const res = await this.fetch(onboardingTaskUrl, {
665
- credentials: "include",
666
- method: "POST",
667
- headers,
668
- body: JSON.stringify(data)
669
- });
670
- await updateCookieJar(this.jar, res.headers);
727
+ let res;
728
+ do {
729
+ const fetchParameters = [
730
+ onboardingTaskUrl,
731
+ {
732
+ credentials: "include",
733
+ method: "POST",
734
+ headers,
735
+ body: JSON.stringify(data)
736
+ }
737
+ ];
738
+ try {
739
+ res = await this.fetch(...fetchParameters);
740
+ } catch (err) {
741
+ if (!(err instanceof Error)) {
742
+ throw err;
743
+ }
744
+ return {
745
+ status: "error",
746
+ err: new Error("Failed to perform request.")
747
+ };
748
+ }
749
+ await updateCookieJar(this.jar, res.headers);
750
+ if (res.status === 429) {
751
+ log("Rate limit hit, waiting before retrying...");
752
+ await this.onRateLimit({
753
+ fetchParameters,
754
+ response: res
755
+ });
756
+ }
757
+ } while (res.status === 429);
671
758
  if (!res.ok) {
672
759
  return { status: "error", err: await ApiError.fromResponse(res) };
673
760
  }
@@ -767,33 +854,33 @@ const apiRequestFactory = createApiRequestFactory(endpoints);
767
854
  function getAvatarOriginalSizeUrl(avatarUrl) {
768
855
  return avatarUrl ? avatarUrl.replace("_normal", "") : void 0;
769
856
  }
770
- function parseProfile(user, isBlueVerified) {
857
+ function parseProfile(legacy, isBlueVerified) {
771
858
  const profile = {
772
- avatar: getAvatarOriginalSizeUrl(user.profile_image_url_https),
773
- banner: user.profile_banner_url,
774
- biography: user.description,
775
- followersCount: user.followers_count,
776
- followingCount: user.friends_count,
777
- friendsCount: user.friends_count,
778
- mediaCount: user.media_count,
779
- isPrivate: user.protected ?? false,
780
- isVerified: user.verified,
781
- likesCount: user.favourites_count,
782
- listedCount: user.listed_count,
783
- location: user.location,
784
- name: user.name,
785
- pinnedTweetIds: user.pinned_tweet_ids_str,
786
- tweetsCount: user.statuses_count,
787
- url: `https://twitter.com/${user.screen_name}`,
788
- userId: user.id_str,
789
- username: user.screen_name,
859
+ avatar: getAvatarOriginalSizeUrl(legacy.profile_image_url_https),
860
+ banner: legacy.profile_banner_url,
861
+ biography: legacy.description,
862
+ followersCount: legacy.followers_count,
863
+ followingCount: legacy.friends_count,
864
+ friendsCount: legacy.friends_count,
865
+ mediaCount: legacy.media_count,
866
+ isPrivate: legacy.protected ?? false,
867
+ isVerified: legacy.verified,
868
+ likesCount: legacy.favourites_count,
869
+ listedCount: legacy.listed_count,
870
+ location: legacy.location,
871
+ name: legacy.name,
872
+ pinnedTweetIds: legacy.pinned_tweet_ids_str,
873
+ tweetsCount: legacy.statuses_count,
874
+ url: `https://twitter.com/${legacy.screen_name}`,
875
+ userId: legacy.id_str,
876
+ username: legacy.screen_name,
790
877
  isBlueVerified: isBlueVerified ?? false,
791
- canDm: user.can_dm
878
+ canDm: legacy.can_dm
792
879
  };
793
- if (user.created_at != null) {
794
- profile.joined = new Date(Date.parse(user.created_at));
880
+ if (legacy.created_at != null) {
881
+ profile.joined = new Date(Date.parse(legacy.created_at));
795
882
  }
796
- const urls = user.entities?.url?.urls;
883
+ const urls = legacy.entities?.url?.urls;
797
884
  if (urls?.length != null && urls?.length > 0) {
798
885
  profile.website = urls[0].expanded_url;
799
886
  }
@@ -832,15 +919,20 @@ async function getProfile(username, auth) {
832
919
  };
833
920
  }
834
921
  legacy.id_str = user.rest_id;
922
+ legacy.screen_name ?? (legacy.screen_name = user.core?.screen_name);
923
+ legacy.profile_image_url_https ?? (legacy.profile_image_url_https = user.avatar?.image_url);
924
+ legacy.created_at ?? (legacy.created_at = user.core?.created_at);
925
+ legacy.location ?? (legacy.location = user.location?.location);
926
+ legacy.name ?? (legacy.name = user.core?.name);
835
927
  if (legacy.screen_name == null || legacy.screen_name.length === 0) {
836
928
  return {
837
929
  success: false,
838
- err: new Error(`Either ${username} does not exist or is private.`)
930
+ err: new Error(`User ${username} does not exist or is private.`)
839
931
  };
840
932
  }
841
933
  return {
842
934
  success: true,
843
- value: parseProfile(user.legacy, user.is_blue_verified)
935
+ value: parseProfile(legacy, user.is_blue_verified)
844
936
  };
845
937
  }
846
938
  const idCache = /* @__PURE__ */ new Map();