@the-convocation/twitter-scraper 0.19.1 → 0.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +441 -196
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +441 -196
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +438 -193
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +438 -193
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +13 -0
- package/examples/node-integration/package.json +2 -1
- package/package.json +6 -4
package/dist/node/cjs/index.cjs
CHANGED
|
@@ -72,13 +72,13 @@ class AuthenticationError extends Error {
|
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
const log$
|
|
75
|
+
const log$6 = debug("twitter-scraper:rate-limit");
|
|
76
76
|
class WaitingRateLimitStrategy {
|
|
77
77
|
async onRateLimit({ response: res }) {
|
|
78
78
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
79
79
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
80
80
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
81
|
-
log$
|
|
81
|
+
log$6(
|
|
82
82
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
83
83
|
);
|
|
84
84
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -94,20 +94,7 @@ class ErrorRateLimitStrategy {
|
|
|
94
94
|
}
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
-
|
|
98
|
-
async randomizeCiphers() {
|
|
99
|
-
const platform = await Platform.importPlatform();
|
|
100
|
-
await platform?.randomizeCiphers();
|
|
101
|
-
}
|
|
102
|
-
static async importPlatform() {
|
|
103
|
-
{
|
|
104
|
-
const { platform } = await Promise.resolve().then(function () { return index; });
|
|
105
|
-
return platform;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
const log$3 = debug("twitter-scraper:requests");
|
|
97
|
+
const log$5 = debug("twitter-scraper:requests");
|
|
111
98
|
async function updateCookieJar(cookieJar, headers) {
|
|
112
99
|
let setCookieHeaders = [];
|
|
113
100
|
if (typeof headers.getSetCookie === "function") {
|
|
@@ -122,12 +109,12 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
122
109
|
for (const cookieStr of setCookieHeaders) {
|
|
123
110
|
const cookie = toughCookie.Cookie.parse(cookieStr);
|
|
124
111
|
if (!cookie) {
|
|
125
|
-
log$
|
|
112
|
+
log$5(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
126
113
|
continue;
|
|
127
114
|
}
|
|
128
115
|
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
129
116
|
if (cookie.key === "ct0") {
|
|
130
|
-
log$
|
|
117
|
+
log$5(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
131
118
|
}
|
|
132
119
|
continue;
|
|
133
120
|
}
|
|
@@ -135,7 +122,7 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
135
122
|
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
136
123
|
await cookieJar.setCookie(cookie, url);
|
|
137
124
|
if (cookie.key === "ct0") {
|
|
138
|
-
log$
|
|
125
|
+
log$5(
|
|
139
126
|
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
140
127
|
0,
|
|
141
128
|
20
|
|
@@ -143,9 +130,9 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
143
130
|
);
|
|
144
131
|
}
|
|
145
132
|
} catch (err) {
|
|
146
|
-
log$
|
|
133
|
+
log$5(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
147
134
|
if (cookie.key === "ct0") {
|
|
148
|
-
log$
|
|
135
|
+
log$5(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
149
136
|
}
|
|
150
137
|
}
|
|
151
138
|
}
|
|
@@ -159,141 +146,84 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
159
146
|
}
|
|
160
147
|
}
|
|
161
148
|
|
|
162
|
-
const log$
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
149
|
+
const log$4 = debug("twitter-scraper:xpff");
|
|
150
|
+
let isoCrypto = null;
|
|
151
|
+
function getCrypto() {
|
|
152
|
+
if (isoCrypto != null) {
|
|
153
|
+
return isoCrypto;
|
|
154
|
+
}
|
|
155
|
+
if (typeof crypto === "undefined") {
|
|
156
|
+
log$4("Global crypto is undefined, importing from crypto module...");
|
|
157
|
+
const { webcrypto } = require("crypto");
|
|
158
|
+
isoCrypto = webcrypto;
|
|
159
|
+
return webcrypto;
|
|
160
|
+
}
|
|
161
|
+
isoCrypto = crypto;
|
|
162
|
+
return crypto;
|
|
163
|
+
}
|
|
164
|
+
async function sha256(message) {
|
|
165
|
+
const msgBuffer = new TextEncoder().encode(message);
|
|
166
|
+
const hashBuffer = await getCrypto().subtle.digest("SHA-256", msgBuffer);
|
|
167
|
+
return new Uint8Array(hashBuffer);
|
|
168
|
+
}
|
|
169
|
+
function buf2hex(buffer) {
|
|
170
|
+
return [...new Uint8Array(buffer)].map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
171
|
+
}
|
|
172
|
+
class XPFFHeaderGenerator {
|
|
173
|
+
constructor(seed) {
|
|
174
|
+
this.seed = seed;
|
|
175
|
+
}
|
|
176
|
+
async deriveKey(guestId) {
|
|
177
|
+
const combined = `${this.seed}${guestId}`;
|
|
178
|
+
const result = await sha256(combined);
|
|
179
|
+
return result;
|
|
180
|
+
}
|
|
181
|
+
async generateHeader(plaintext, guestId) {
|
|
182
|
+
log$4(`Generating XPFF key for guest ID: ${guestId}`);
|
|
183
|
+
const key = await this.deriveKey(guestId);
|
|
184
|
+
const nonce = getCrypto().getRandomValues(new Uint8Array(12));
|
|
185
|
+
const cipher = await getCrypto().subtle.importKey(
|
|
186
|
+
"raw",
|
|
187
|
+
key,
|
|
188
|
+
{ name: "AES-GCM" },
|
|
189
|
+
false,
|
|
190
|
+
["encrypt"]
|
|
191
|
+
);
|
|
192
|
+
const encrypted = await getCrypto().subtle.encrypt(
|
|
176
193
|
{
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
}
|
|
202
|
-
if (!res.ok) {
|
|
203
|
-
return {
|
|
204
|
-
success: false,
|
|
205
|
-
err: await ApiError.fromResponse(res)
|
|
206
|
-
};
|
|
207
|
-
}
|
|
208
|
-
const value = await flexParseJson(res);
|
|
209
|
-
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
210
|
-
auth.deleteToken();
|
|
211
|
-
return { success: true, value };
|
|
212
|
-
} else {
|
|
213
|
-
return { success: true, value };
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
async function flexParseJson(res) {
|
|
217
|
-
try {
|
|
218
|
-
return await res.json();
|
|
219
|
-
} catch {
|
|
220
|
-
log$2("Failed to parse response as JSON, trying text parse...");
|
|
221
|
-
const text = await res.text();
|
|
222
|
-
log$2("Response text:", text);
|
|
223
|
-
return JSON.parse(text);
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
function addApiFeatures(o) {
|
|
227
|
-
return {
|
|
228
|
-
...o,
|
|
229
|
-
rweb_lists_timeline_redesign_enabled: true,
|
|
230
|
-
responsive_web_graphql_exclude_directive_enabled: true,
|
|
231
|
-
verified_phone_label_enabled: false,
|
|
232
|
-
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
233
|
-
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
234
|
-
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
235
|
-
tweetypie_unmention_optimization_enabled: true,
|
|
236
|
-
responsive_web_edit_tweet_api_enabled: true,
|
|
237
|
-
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
238
|
-
view_counts_everywhere_api_enabled: true,
|
|
239
|
-
longform_notetweets_consumption_enabled: true,
|
|
240
|
-
tweet_awards_web_tipping_enabled: false,
|
|
241
|
-
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
242
|
-
standardized_nudges_misinfo: true,
|
|
243
|
-
longform_notetweets_rich_text_read_enabled: true,
|
|
244
|
-
responsive_web_enhance_cards_enabled: false,
|
|
245
|
-
subscriptions_verification_info_enabled: true,
|
|
246
|
-
subscriptions_verification_info_reason_enabled: true,
|
|
247
|
-
subscriptions_verification_info_verified_since_enabled: true,
|
|
248
|
-
super_follow_badge_privacy_enabled: false,
|
|
249
|
-
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
250
|
-
super_follow_tweet_api_enabled: false,
|
|
251
|
-
super_follow_user_api_enabled: false,
|
|
252
|
-
android_graphql_skip_api_media_color_palette: false,
|
|
253
|
-
creator_subscriptions_subscription_count_enabled: false,
|
|
254
|
-
blue_business_profile_image_shape_enabled: false,
|
|
255
|
-
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
256
|
-
};
|
|
194
|
+
name: "AES-GCM",
|
|
195
|
+
iv: nonce
|
|
196
|
+
},
|
|
197
|
+
cipher,
|
|
198
|
+
new TextEncoder().encode(plaintext)
|
|
199
|
+
);
|
|
200
|
+
const combined = new Uint8Array(nonce.length + encrypted.byteLength);
|
|
201
|
+
combined.set(nonce);
|
|
202
|
+
combined.set(new Uint8Array(encrypted), nonce.length);
|
|
203
|
+
const result = buf2hex(combined);
|
|
204
|
+
log$4(`XPFF header generated for guest ID ${guestId}: ${result}`);
|
|
205
|
+
return result;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
const xpffBaseKey = "0e6be1f1e21ffc33590b888fd4dc81b19713e570e805d4e5df80a493c9571a05";
|
|
209
|
+
function xpffPlain() {
|
|
210
|
+
const timestamp = Date.now();
|
|
211
|
+
return JSON.stringify({
|
|
212
|
+
navigator_properties: {
|
|
213
|
+
hasBeenActive: "true",
|
|
214
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
215
|
+
webdriver: "false"
|
|
216
|
+
},
|
|
217
|
+
created_at: timestamp
|
|
218
|
+
});
|
|
257
219
|
}
|
|
258
|
-
function
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
params.set("include_followed_by", "1");
|
|
263
|
-
params.set("include_want_retweets", "1");
|
|
264
|
-
params.set("include_mute_edge", "1");
|
|
265
|
-
params.set("include_can_dm", "1");
|
|
266
|
-
params.set("include_can_media_tag", "1");
|
|
267
|
-
params.set("include_ext_has_nft_avatar", "1");
|
|
268
|
-
params.set("include_ext_is_blue_verified", "1");
|
|
269
|
-
params.set("include_ext_verified_type", "1");
|
|
270
|
-
params.set("skip_status", "1");
|
|
271
|
-
params.set("cards_platform", "Web-12");
|
|
272
|
-
params.set("include_cards", "1");
|
|
273
|
-
params.set("include_ext_alt_text", "true");
|
|
274
|
-
params.set("include_ext_limited_action_results", "false");
|
|
275
|
-
params.set("include_quote_count", "true");
|
|
276
|
-
params.set("include_reply_count", "1");
|
|
277
|
-
params.set("tweet_mode", "extended");
|
|
278
|
-
params.set("include_ext_collab_control", "true");
|
|
279
|
-
params.set("include_ext_views", "true");
|
|
280
|
-
params.set("include_entities", "true");
|
|
281
|
-
params.set("include_user_entities", "true");
|
|
282
|
-
params.set("include_ext_media_color", "true");
|
|
283
|
-
params.set("include_ext_media_availability", "true");
|
|
284
|
-
params.set("include_ext_sensitive_media_warning", "true");
|
|
285
|
-
params.set("include_ext_trusted_friends_metadata", "true");
|
|
286
|
-
params.set("send_error_codes", "true");
|
|
287
|
-
params.set("simple_quoted_tweet", "true");
|
|
288
|
-
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
289
|
-
params.set(
|
|
290
|
-
"ext",
|
|
291
|
-
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
292
|
-
);
|
|
293
|
-
return params;
|
|
220
|
+
async function generateXPFFHeader(guestId) {
|
|
221
|
+
const generator = new XPFFHeaderGenerator(xpffBaseKey);
|
|
222
|
+
const plaintext = xpffPlain();
|
|
223
|
+
return generator.generateHeader(plaintext, guestId);
|
|
294
224
|
}
|
|
295
225
|
|
|
296
|
-
const log$
|
|
226
|
+
const log$3 = debug("twitter-scraper:auth");
|
|
297
227
|
function withTransform(fetchFn, transform) {
|
|
298
228
|
return async (input, init) => {
|
|
299
229
|
const fetchArgs = await transform?.request?.(input, init) ?? [
|
|
@@ -343,28 +273,37 @@ class TwitterGuestAuth {
|
|
|
343
273
|
}
|
|
344
274
|
return new Date(this.guestCreatedAt);
|
|
345
275
|
}
|
|
346
|
-
async installTo(headers) {
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
"
|
|
354
|
-
|
|
276
|
+
async installTo(headers, _url, bearerTokenOverride) {
|
|
277
|
+
const tokenToUse = bearerTokenOverride ?? this.bearerToken;
|
|
278
|
+
if (!bearerTokenOverride) {
|
|
279
|
+
if (this.shouldUpdate()) {
|
|
280
|
+
await this.updateGuestToken();
|
|
281
|
+
}
|
|
282
|
+
if (this.guestToken) {
|
|
283
|
+
headers.set("x-guest-token", this.guestToken);
|
|
284
|
+
}
|
|
355
285
|
}
|
|
356
|
-
headers.set("authorization", `Bearer ${
|
|
357
|
-
headers.set("x-guest-token", token);
|
|
286
|
+
headers.set("authorization", `Bearer ${tokenToUse}`);
|
|
358
287
|
headers.set(
|
|
359
288
|
"user-agent",
|
|
360
289
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
361
290
|
);
|
|
291
|
+
await this.installCsrfToken(headers);
|
|
292
|
+
if (this.options?.experimental?.xpff) {
|
|
293
|
+
const guestId = await this.guestId();
|
|
294
|
+
if (guestId != null) {
|
|
295
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
296
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
headers.set("cookie", await this.getCookieString());
|
|
300
|
+
}
|
|
301
|
+
async installCsrfToken(headers) {
|
|
362
302
|
const cookies = await this.getCookies();
|
|
363
303
|
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
364
304
|
if (xCsrfToken) {
|
|
365
305
|
headers.set("x-csrf-token", xCsrfToken.value);
|
|
366
306
|
}
|
|
367
|
-
headers.set("cookie", await this.getCookieString());
|
|
368
307
|
}
|
|
369
308
|
async setCookie(key, value) {
|
|
370
309
|
const cookie = toughCookie.Cookie.parse(`${key}=${value}`);
|
|
@@ -397,16 +336,28 @@ class TwitterGuestAuth {
|
|
|
397
336
|
getCookieJarUrl() {
|
|
398
337
|
return typeof document !== "undefined" ? document.location.toString() : "https://x.com";
|
|
399
338
|
}
|
|
339
|
+
async guestId() {
|
|
340
|
+
const cookies = await this.getCookies();
|
|
341
|
+
const guestIdCookie = cookies.find((cookie) => cookie.key === "guest_id");
|
|
342
|
+
return guestIdCookie ? guestIdCookie.value : null;
|
|
343
|
+
}
|
|
400
344
|
/**
|
|
401
345
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
402
346
|
*/
|
|
403
347
|
async updateGuestToken() {
|
|
348
|
+
try {
|
|
349
|
+
await this.updateGuestTokenCore();
|
|
350
|
+
} catch (err) {
|
|
351
|
+
log$3("Failed to update guest token; this may cause issues:", err);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
async updateGuestTokenCore() {
|
|
404
355
|
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
405
356
|
const headers = new headersPolyfill.Headers({
|
|
406
357
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
407
358
|
Cookie: await this.getCookieString()
|
|
408
359
|
});
|
|
409
|
-
log$
|
|
360
|
+
log$3(`Making POST request to ${guestActivateUrl}`);
|
|
410
361
|
const res = await this.fetch(guestActivateUrl, {
|
|
411
362
|
method: "POST",
|
|
412
363
|
headers,
|
|
@@ -427,7 +378,7 @@ class TwitterGuestAuth {
|
|
|
427
378
|
this.guestToken = newGuestToken;
|
|
428
379
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
429
380
|
await this.setCookie("gt", newGuestToken);
|
|
430
|
-
log$
|
|
381
|
+
log$3(`Updated guest token: ${newGuestToken}`);
|
|
431
382
|
}
|
|
432
383
|
/**
|
|
433
384
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -438,6 +389,278 @@ class TwitterGuestAuth {
|
|
|
438
389
|
}
|
|
439
390
|
}
|
|
440
391
|
|
|
392
|
+
class Platform {
|
|
393
|
+
async randomizeCiphers() {
|
|
394
|
+
const platform = await Platform.importPlatform();
|
|
395
|
+
await platform?.randomizeCiphers();
|
|
396
|
+
}
|
|
397
|
+
static async importPlatform() {
|
|
398
|
+
{
|
|
399
|
+
const { platform } = await Promise.resolve().then(function () { return index; });
|
|
400
|
+
return platform;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
const log$2 = debug("twitter-scraper:xctxid");
|
|
406
|
+
let linkedom = null;
|
|
407
|
+
function linkedomImport() {
|
|
408
|
+
if (!linkedom) {
|
|
409
|
+
const mod = require("linkedom");
|
|
410
|
+
linkedom = mod;
|
|
411
|
+
return mod;
|
|
412
|
+
}
|
|
413
|
+
return linkedom;
|
|
414
|
+
}
|
|
415
|
+
async function parseHTML(html) {
|
|
416
|
+
if (typeof window !== "undefined") {
|
|
417
|
+
const { defaultView } = new DOMParser().parseFromString(html, "text/html");
|
|
418
|
+
if (!defaultView) {
|
|
419
|
+
throw new Error("Failed to get defaultView from parsed HTML.");
|
|
420
|
+
}
|
|
421
|
+
return defaultView;
|
|
422
|
+
} else {
|
|
423
|
+
const { DOMParser: DOMParser2 } = linkedomImport();
|
|
424
|
+
return new DOMParser2().parseFromString(html, "text/html").defaultView;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
async function handleXMigration(fetchFn) {
|
|
428
|
+
const headers = {
|
|
429
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
430
|
+
"accept-language": "ja",
|
|
431
|
+
"cache-control": "no-cache",
|
|
432
|
+
pragma: "no-cache",
|
|
433
|
+
priority: "u=0, i",
|
|
434
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
435
|
+
"sec-ch-ua-mobile": "?0",
|
|
436
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
437
|
+
"sec-fetch-dest": "document",
|
|
438
|
+
"sec-fetch-mode": "navigate",
|
|
439
|
+
"sec-fetch-site": "none",
|
|
440
|
+
"sec-fetch-user": "?1",
|
|
441
|
+
"upgrade-insecure-requests": "1",
|
|
442
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
443
|
+
};
|
|
444
|
+
const response = await fetchFn("https://x.com", {
|
|
445
|
+
headers
|
|
446
|
+
});
|
|
447
|
+
if (!response.ok) {
|
|
448
|
+
throw new Error(`Failed to fetch X homepage: ${response.statusText}`);
|
|
449
|
+
}
|
|
450
|
+
const htmlText = await response.text();
|
|
451
|
+
let dom = await parseHTML(htmlText);
|
|
452
|
+
let document = dom.window.document;
|
|
453
|
+
const migrationRedirectionRegex = new RegExp(
|
|
454
|
+
"(http(?:s)?://(?:www\\.)?(twitter|x){1}\\.com(/x)?/migrate([/?])?tok=[a-zA-Z0-9%\\-_]+)+",
|
|
455
|
+
"i"
|
|
456
|
+
);
|
|
457
|
+
const metaRefresh = document.querySelector("meta[http-equiv='refresh']");
|
|
458
|
+
const metaContent = metaRefresh ? metaRefresh.getAttribute("content") || "" : "";
|
|
459
|
+
const migrationRedirectionUrl = migrationRedirectionRegex.exec(metaContent) || migrationRedirectionRegex.exec(htmlText);
|
|
460
|
+
if (migrationRedirectionUrl) {
|
|
461
|
+
const redirectResponse = await fetch(migrationRedirectionUrl[0]);
|
|
462
|
+
if (!redirectResponse.ok) {
|
|
463
|
+
throw new Error(
|
|
464
|
+
`Failed to follow migration redirection: ${redirectResponse.statusText}`
|
|
465
|
+
);
|
|
466
|
+
}
|
|
467
|
+
const redirectHtml = await redirectResponse.text();
|
|
468
|
+
dom = await parseHTML(redirectHtml);
|
|
469
|
+
document = dom.window.document;
|
|
470
|
+
}
|
|
471
|
+
const migrationForm = document.querySelector("form[name='f']") || document.querySelector("form[action='https://x.com/x/migrate']");
|
|
472
|
+
if (migrationForm) {
|
|
473
|
+
const url = migrationForm.getAttribute("action") || "https://x.com/x/migrate";
|
|
474
|
+
const method = migrationForm.getAttribute("method") || "POST";
|
|
475
|
+
const requestPayload = new FormData();
|
|
476
|
+
const inputFields = migrationForm.querySelectorAll("input");
|
|
477
|
+
for (const element of Array.from(inputFields)) {
|
|
478
|
+
const name = element.getAttribute("name");
|
|
479
|
+
const value = element.getAttribute("value");
|
|
480
|
+
if (name && value) {
|
|
481
|
+
requestPayload.append(name, value);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
const formResponse = await fetch(url, {
|
|
485
|
+
method,
|
|
486
|
+
body: requestPayload,
|
|
487
|
+
headers
|
|
488
|
+
});
|
|
489
|
+
if (!formResponse.ok) {
|
|
490
|
+
throw new Error(
|
|
491
|
+
`Failed to submit migration form: ${formResponse.statusText}`
|
|
492
|
+
);
|
|
493
|
+
}
|
|
494
|
+
const formHtml = await formResponse.text();
|
|
495
|
+
dom = await parseHTML(formHtml);
|
|
496
|
+
document = dom.window.document;
|
|
497
|
+
}
|
|
498
|
+
return document;
|
|
499
|
+
}
|
|
500
|
+
let ClientTransaction = null;
|
|
501
|
+
function clientTransaction() {
|
|
502
|
+
if (!ClientTransaction) {
|
|
503
|
+
const mod = require("x-client-transaction-id");
|
|
504
|
+
const ctx = mod.ClientTransaction;
|
|
505
|
+
ClientTransaction = ctx;
|
|
506
|
+
return ctx;
|
|
507
|
+
}
|
|
508
|
+
return ClientTransaction;
|
|
509
|
+
}
|
|
510
|
+
async function generateTransactionId(url, fetchFn, method) {
|
|
511
|
+
const parsedUrl = new URL(url);
|
|
512
|
+
const path = parsedUrl.pathname;
|
|
513
|
+
log$2(`Generating transaction ID for ${method} ${path}`);
|
|
514
|
+
const document = await handleXMigration(fetchFn);
|
|
515
|
+
const transaction = await clientTransaction().create(document);
|
|
516
|
+
const transactionId = await transaction.generateTransactionId(method, path);
|
|
517
|
+
log$2(`Transaction ID: ${transactionId}`);
|
|
518
|
+
return transactionId;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
const log$1 = debug("twitter-scraper:api");
|
|
522
|
+
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
523
|
+
const bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA";
|
|
524
|
+
async function jitter(maxMs) {
|
|
525
|
+
const jitter2 = Math.random() * maxMs;
|
|
526
|
+
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
527
|
+
}
|
|
528
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new headersPolyfill.Headers(), bearerTokenOverride) {
|
|
529
|
+
log$1(`Making ${method} request to ${url}`);
|
|
530
|
+
await auth.installTo(headers, url, bearerTokenOverride);
|
|
531
|
+
await platform.randomizeCiphers();
|
|
532
|
+
if (auth instanceof TwitterGuestAuth && auth.options?.experimental?.xClientTransactionId) {
|
|
533
|
+
const transactionId = await generateTransactionId(
|
|
534
|
+
url,
|
|
535
|
+
auth.fetch.bind(auth),
|
|
536
|
+
method
|
|
537
|
+
);
|
|
538
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
539
|
+
}
|
|
540
|
+
let res;
|
|
541
|
+
do {
|
|
542
|
+
const fetchParameters = [
|
|
543
|
+
url,
|
|
544
|
+
{
|
|
545
|
+
method,
|
|
546
|
+
headers,
|
|
547
|
+
credentials: "include"
|
|
548
|
+
}
|
|
549
|
+
];
|
|
550
|
+
try {
|
|
551
|
+
res = await auth.fetch(...fetchParameters);
|
|
552
|
+
} catch (err) {
|
|
553
|
+
if (!(err instanceof Error)) {
|
|
554
|
+
throw err;
|
|
555
|
+
}
|
|
556
|
+
return {
|
|
557
|
+
success: false,
|
|
558
|
+
err: new Error("Failed to perform request.")
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
562
|
+
if (res.status === 429) {
|
|
563
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
564
|
+
await auth.onRateLimit({
|
|
565
|
+
fetchParameters,
|
|
566
|
+
response: res
|
|
567
|
+
});
|
|
568
|
+
}
|
|
569
|
+
} while (res.status === 429);
|
|
570
|
+
if (!res.ok) {
|
|
571
|
+
return {
|
|
572
|
+
success: false,
|
|
573
|
+
err: await ApiError.fromResponse(res)
|
|
574
|
+
};
|
|
575
|
+
}
|
|
576
|
+
const value = await flexParseJson(res);
|
|
577
|
+
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
578
|
+
auth.deleteToken();
|
|
579
|
+
return { success: true, value };
|
|
580
|
+
} else {
|
|
581
|
+
return { success: true, value };
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
async function flexParseJson(res) {
|
|
585
|
+
try {
|
|
586
|
+
return await res.json();
|
|
587
|
+
} catch {
|
|
588
|
+
log$1("Failed to parse response as JSON, trying text parse...");
|
|
589
|
+
const text = await res.text();
|
|
590
|
+
log$1("Response text:", text);
|
|
591
|
+
return JSON.parse(text);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
function addApiFeatures(o) {
|
|
595
|
+
return {
|
|
596
|
+
...o,
|
|
597
|
+
rweb_lists_timeline_redesign_enabled: true,
|
|
598
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
599
|
+
verified_phone_label_enabled: false,
|
|
600
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
601
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
602
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
603
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
604
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
605
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
606
|
+
view_counts_everywhere_api_enabled: true,
|
|
607
|
+
longform_notetweets_consumption_enabled: true,
|
|
608
|
+
tweet_awards_web_tipping_enabled: false,
|
|
609
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
610
|
+
standardized_nudges_misinfo: true,
|
|
611
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
612
|
+
responsive_web_enhance_cards_enabled: false,
|
|
613
|
+
subscriptions_verification_info_enabled: true,
|
|
614
|
+
subscriptions_verification_info_reason_enabled: true,
|
|
615
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
616
|
+
super_follow_badge_privacy_enabled: false,
|
|
617
|
+
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
618
|
+
super_follow_tweet_api_enabled: false,
|
|
619
|
+
super_follow_user_api_enabled: false,
|
|
620
|
+
android_graphql_skip_api_media_color_palette: false,
|
|
621
|
+
creator_subscriptions_subscription_count_enabled: false,
|
|
622
|
+
blue_business_profile_image_shape_enabled: false,
|
|
623
|
+
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
624
|
+
};
|
|
625
|
+
}
|
|
626
|
+
function addApiParams(params, includeTweetReplies) {
|
|
627
|
+
params.set("include_profile_interstitial_type", "1");
|
|
628
|
+
params.set("include_blocking", "1");
|
|
629
|
+
params.set("include_blocked_by", "1");
|
|
630
|
+
params.set("include_followed_by", "1");
|
|
631
|
+
params.set("include_want_retweets", "1");
|
|
632
|
+
params.set("include_mute_edge", "1");
|
|
633
|
+
params.set("include_can_dm", "1");
|
|
634
|
+
params.set("include_can_media_tag", "1");
|
|
635
|
+
params.set("include_ext_has_nft_avatar", "1");
|
|
636
|
+
params.set("include_ext_is_blue_verified", "1");
|
|
637
|
+
params.set("include_ext_verified_type", "1");
|
|
638
|
+
params.set("skip_status", "1");
|
|
639
|
+
params.set("cards_platform", "Web-12");
|
|
640
|
+
params.set("include_cards", "1");
|
|
641
|
+
params.set("include_ext_alt_text", "true");
|
|
642
|
+
params.set("include_ext_limited_action_results", "false");
|
|
643
|
+
params.set("include_quote_count", "true");
|
|
644
|
+
params.set("include_reply_count", "1");
|
|
645
|
+
params.set("tweet_mode", "extended");
|
|
646
|
+
params.set("include_ext_collab_control", "true");
|
|
647
|
+
params.set("include_ext_views", "true");
|
|
648
|
+
params.set("include_entities", "true");
|
|
649
|
+
params.set("include_user_entities", "true");
|
|
650
|
+
params.set("include_ext_media_color", "true");
|
|
651
|
+
params.set("include_ext_media_availability", "true");
|
|
652
|
+
params.set("include_ext_sensitive_media_warning", "true");
|
|
653
|
+
params.set("include_ext_trusted_friends_metadata", "true");
|
|
654
|
+
params.set("send_error_codes", "true");
|
|
655
|
+
params.set("simple_quoted_tweet", "true");
|
|
656
|
+
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
657
|
+
params.set(
|
|
658
|
+
"ext",
|
|
659
|
+
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
660
|
+
);
|
|
661
|
+
return params;
|
|
662
|
+
}
|
|
663
|
+
|
|
441
664
|
const log = debug("twitter-scraper:auth-user");
|
|
442
665
|
const TwitterUserAuthSubtask = typebox.Type.Object({
|
|
443
666
|
subtask_id: typebox.Type.String(),
|
|
@@ -545,25 +768,26 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
545
768
|
this.jar = new toughCookie.CookieJar();
|
|
546
769
|
}
|
|
547
770
|
}
|
|
548
|
-
async
|
|
549
|
-
const
|
|
550
|
-
|
|
551
|
-
if (xCsrfToken) {
|
|
552
|
-
headers.set("x-csrf-token", xCsrfToken.value);
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
async installTo(headers) {
|
|
556
|
-
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
557
|
-
const cookie = await this.getCookieString();
|
|
558
|
-
headers.set("cookie", cookie);
|
|
559
|
-
if (this.guestToken) {
|
|
560
|
-
headers.set("x-guest-token", this.guestToken);
|
|
561
|
-
}
|
|
771
|
+
async installTo(headers, _url, bearerTokenOverride) {
|
|
772
|
+
const tokenToUse = bearerTokenOverride ?? this.bearerToken;
|
|
773
|
+
headers.set("authorization", `Bearer ${tokenToUse}`);
|
|
562
774
|
headers.set(
|
|
563
775
|
"user-agent",
|
|
564
776
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
565
777
|
);
|
|
778
|
+
if (this.guestToken) {
|
|
779
|
+
headers.set("x-guest-token", this.guestToken);
|
|
780
|
+
}
|
|
566
781
|
await this.installCsrfToken(headers);
|
|
782
|
+
if (this.options?.experimental?.xpff) {
|
|
783
|
+
const guestId = await this.guestId();
|
|
784
|
+
if (guestId != null) {
|
|
785
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
786
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
const cookie = await this.getCookieString();
|
|
790
|
+
headers.set("cookie", cookie);
|
|
567
791
|
}
|
|
568
792
|
async initLogin() {
|
|
569
793
|
this.removeCookie("twitter_ads_id=");
|
|
@@ -768,12 +992,6 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
768
992
|
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
769
993
|
}
|
|
770
994
|
log(`Making POST request to ${onboardingTaskUrl}`);
|
|
771
|
-
const token = this.guestToken;
|
|
772
|
-
if (token == null) {
|
|
773
|
-
throw new AuthenticationError(
|
|
774
|
-
"Authentication token is null or undefined."
|
|
775
|
-
);
|
|
776
|
-
}
|
|
777
995
|
const headers = new headersPolyfill.Headers({
|
|
778
996
|
accept: "*/*",
|
|
779
997
|
"accept-language": "en-US,en;q=0.9",
|
|
@@ -790,12 +1008,19 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
790
1008
|
"sec-fetch-mode": "cors",
|
|
791
1009
|
"sec-fetch-site": "same-origin",
|
|
792
1010
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
793
|
-
"x-guest-token": token,
|
|
794
1011
|
"x-twitter-auth-type": "OAuth2Client",
|
|
795
1012
|
"x-twitter-active-user": "yes",
|
|
796
1013
|
"x-twitter-client-language": "en"
|
|
797
1014
|
});
|
|
798
|
-
await this.installTo(headers);
|
|
1015
|
+
await this.installTo(headers, onboardingTaskUrl);
|
|
1016
|
+
if (this.options?.experimental?.xClientTransactionId) {
|
|
1017
|
+
const transactionId = await generateTransactionId(
|
|
1018
|
+
onboardingTaskUrl,
|
|
1019
|
+
this.fetch.bind(this),
|
|
1020
|
+
"POST"
|
|
1021
|
+
);
|
|
1022
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
1023
|
+
}
|
|
799
1024
|
let res;
|
|
800
1025
|
do {
|
|
801
1026
|
const fetchParameters = [
|
|
@@ -1818,7 +2043,11 @@ async function getTrends(auth) {
|
|
|
1818
2043
|
params.set("entity_tokens", "false");
|
|
1819
2044
|
const res = await requestApi(
|
|
1820
2045
|
`https://api.x.com/2/guide.json?${params.toString()}`,
|
|
1821
|
-
auth
|
|
2046
|
+
auth,
|
|
2047
|
+
"GET",
|
|
2048
|
+
void 0,
|
|
2049
|
+
void 0,
|
|
2050
|
+
bearerToken2
|
|
1822
2051
|
);
|
|
1823
2052
|
if (!res.success) {
|
|
1824
2053
|
throw res.err;
|
|
@@ -1901,7 +2130,11 @@ async function fetchTweets(userId, maxTweets, cursor, auth) {
|
|
|
1901
2130
|
}
|
|
1902
2131
|
const res = await requestApi(
|
|
1903
2132
|
userTweetsRequest.toRequestUrl(),
|
|
1904
|
-
auth
|
|
2133
|
+
auth,
|
|
2134
|
+
"GET",
|
|
2135
|
+
void 0,
|
|
2136
|
+
void 0,
|
|
2137
|
+
bearerToken2
|
|
1905
2138
|
);
|
|
1906
2139
|
if (!res.success) {
|
|
1907
2140
|
throw res.err;
|
|
@@ -2047,7 +2280,11 @@ async function getTweet(id, auth) {
|
|
|
2047
2280
|
tweetDetailRequest.variables.focalTweetId = id;
|
|
2048
2281
|
const res = await requestApi(
|
|
2049
2282
|
tweetDetailRequest.toRequestUrl(),
|
|
2050
|
-
auth
|
|
2283
|
+
auth,
|
|
2284
|
+
"GET",
|
|
2285
|
+
void 0,
|
|
2286
|
+
void 0,
|
|
2287
|
+
bearerToken2
|
|
2051
2288
|
);
|
|
2052
2289
|
if (!res.success) {
|
|
2053
2290
|
throw res.err;
|
|
@@ -2063,7 +2300,11 @@ async function getTweetAnonymous(id, auth) {
|
|
|
2063
2300
|
tweetResultByRestIdRequest.variables.tweetId = id;
|
|
2064
2301
|
const res = await requestApi(
|
|
2065
2302
|
tweetResultByRestIdRequest.toRequestUrl(),
|
|
2066
|
-
auth
|
|
2303
|
+
auth,
|
|
2304
|
+
"GET",
|
|
2305
|
+
void 0,
|
|
2306
|
+
void 0,
|
|
2307
|
+
bearerToken2
|
|
2067
2308
|
);
|
|
2068
2309
|
if (!res.success) {
|
|
2069
2310
|
throw res.err;
|
|
@@ -2620,7 +2861,11 @@ class Scraper {
|
|
|
2620
2861
|
return {
|
|
2621
2862
|
fetch: this.options?.fetch,
|
|
2622
2863
|
transform: this.options?.transform,
|
|
2623
|
-
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2864
|
+
rateLimitStrategy: this.options?.rateLimitStrategy,
|
|
2865
|
+
experimental: {
|
|
2866
|
+
xClientTransactionId: this.options?.experimental?.xClientTransactionId,
|
|
2867
|
+
xpff: this.options?.experimental?.xpff
|
|
2868
|
+
}
|
|
2624
2869
|
};
|
|
2625
2870
|
}
|
|
2626
2871
|
handleResponse(res) {
|