@the-convocation/twitter-scraper 0.19.1 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +408 -184
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +408 -184
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +405 -181
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +405 -181
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +13 -0
- package/examples/node-integration/package.json +2 -1
- package/package.json +6 -4
|
@@ -70,13 +70,13 @@ class AuthenticationError extends Error {
|
|
|
70
70
|
}
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
const log$
|
|
73
|
+
const log$6 = debug("twitter-scraper:rate-limit");
|
|
74
74
|
class WaitingRateLimitStrategy {
|
|
75
75
|
async onRateLimit({ response: res }) {
|
|
76
76
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
77
77
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
78
78
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
79
|
-
log$
|
|
79
|
+
log$6(
|
|
80
80
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
81
81
|
);
|
|
82
82
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -92,23 +92,7 @@ class ErrorRateLimitStrategy {
|
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
-
const
|
|
96
|
-
randomizeCiphers() {
|
|
97
|
-
return Promise.resolve();
|
|
98
|
-
}
|
|
99
|
-
}();
|
|
100
|
-
|
|
101
|
-
class Platform {
|
|
102
|
-
async randomizeCiphers() {
|
|
103
|
-
const platform = await Platform.importPlatform();
|
|
104
|
-
await platform?.randomizeCiphers();
|
|
105
|
-
}
|
|
106
|
-
static async importPlatform() {
|
|
107
|
-
return genericPlatform;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
const log$3 = debug("twitter-scraper:requests");
|
|
95
|
+
const log$5 = debug("twitter-scraper:requests");
|
|
112
96
|
async function updateCookieJar(cookieJar, headers) {
|
|
113
97
|
let setCookieHeaders = [];
|
|
114
98
|
if (typeof headers.getSetCookie === "function") {
|
|
@@ -123,12 +107,12 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
123
107
|
for (const cookieStr of setCookieHeaders) {
|
|
124
108
|
const cookie = toughCookie.Cookie.parse(cookieStr);
|
|
125
109
|
if (!cookie) {
|
|
126
|
-
log$
|
|
110
|
+
log$5(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
127
111
|
continue;
|
|
128
112
|
}
|
|
129
113
|
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
130
114
|
if (cookie.key === "ct0") {
|
|
131
|
-
log$
|
|
115
|
+
log$5(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
132
116
|
}
|
|
133
117
|
continue;
|
|
134
118
|
}
|
|
@@ -136,7 +120,7 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
136
120
|
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
137
121
|
await cookieJar.setCookie(cookie, url);
|
|
138
122
|
if (cookie.key === "ct0") {
|
|
139
|
-
log$
|
|
123
|
+
log$5(
|
|
140
124
|
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
141
125
|
0,
|
|
142
126
|
20
|
|
@@ -144,9 +128,9 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
144
128
|
);
|
|
145
129
|
}
|
|
146
130
|
} catch (err) {
|
|
147
|
-
log$
|
|
131
|
+
log$5(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
148
132
|
if (cookie.key === "ct0") {
|
|
149
|
-
log$
|
|
133
|
+
log$5(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
150
134
|
}
|
|
151
135
|
}
|
|
152
136
|
}
|
|
@@ -160,141 +144,84 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
160
144
|
}
|
|
161
145
|
}
|
|
162
146
|
|
|
163
|
-
const log$
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
147
|
+
const log$4 = debug("twitter-scraper:xpff");
|
|
148
|
+
let isoCrypto = null;
|
|
149
|
+
function getCrypto() {
|
|
150
|
+
if (isoCrypto != null) {
|
|
151
|
+
return isoCrypto;
|
|
152
|
+
}
|
|
153
|
+
if (typeof crypto === "undefined") {
|
|
154
|
+
log$4("Global crypto is undefined, importing from crypto module...");
|
|
155
|
+
const { webcrypto } = require("crypto");
|
|
156
|
+
isoCrypto = webcrypto;
|
|
157
|
+
return webcrypto;
|
|
158
|
+
}
|
|
159
|
+
isoCrypto = crypto;
|
|
160
|
+
return crypto;
|
|
161
|
+
}
|
|
162
|
+
async function sha256(message) {
|
|
163
|
+
const msgBuffer = new TextEncoder().encode(message);
|
|
164
|
+
const hashBuffer = await getCrypto().subtle.digest("SHA-256", msgBuffer);
|
|
165
|
+
return new Uint8Array(hashBuffer);
|
|
166
|
+
}
|
|
167
|
+
function buf2hex(buffer) {
|
|
168
|
+
return [...new Uint8Array(buffer)].map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
169
|
+
}
|
|
170
|
+
class XPFFHeaderGenerator {
|
|
171
|
+
constructor(seed) {
|
|
172
|
+
this.seed = seed;
|
|
173
|
+
}
|
|
174
|
+
async deriveKey(guestId) {
|
|
175
|
+
const combined = `${this.seed}${guestId}`;
|
|
176
|
+
const result = await sha256(combined);
|
|
177
|
+
return result;
|
|
178
|
+
}
|
|
179
|
+
async generateHeader(plaintext, guestId) {
|
|
180
|
+
log$4(`Generating XPFF key for guest ID: ${guestId}`);
|
|
181
|
+
const key = await this.deriveKey(guestId);
|
|
182
|
+
const nonce = getCrypto().getRandomValues(new Uint8Array(12));
|
|
183
|
+
const cipher = await getCrypto().subtle.importKey(
|
|
184
|
+
"raw",
|
|
185
|
+
key,
|
|
186
|
+
{ name: "AES-GCM" },
|
|
187
|
+
false,
|
|
188
|
+
["encrypt"]
|
|
189
|
+
);
|
|
190
|
+
const encrypted = await getCrypto().subtle.encrypt(
|
|
177
191
|
{
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
}
|
|
203
|
-
if (!res.ok) {
|
|
204
|
-
return {
|
|
205
|
-
success: false,
|
|
206
|
-
err: await ApiError.fromResponse(res)
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
const value = await flexParseJson(res);
|
|
210
|
-
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
211
|
-
auth.deleteToken();
|
|
212
|
-
return { success: true, value };
|
|
213
|
-
} else {
|
|
214
|
-
return { success: true, value };
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
async function flexParseJson(res) {
|
|
218
|
-
try {
|
|
219
|
-
return await res.json();
|
|
220
|
-
} catch {
|
|
221
|
-
log$2("Failed to parse response as JSON, trying text parse...");
|
|
222
|
-
const text = await res.text();
|
|
223
|
-
log$2("Response text:", text);
|
|
224
|
-
return JSON.parse(text);
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
function addApiFeatures(o) {
|
|
228
|
-
return {
|
|
229
|
-
...o,
|
|
230
|
-
rweb_lists_timeline_redesign_enabled: true,
|
|
231
|
-
responsive_web_graphql_exclude_directive_enabled: true,
|
|
232
|
-
verified_phone_label_enabled: false,
|
|
233
|
-
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
234
|
-
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
235
|
-
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
236
|
-
tweetypie_unmention_optimization_enabled: true,
|
|
237
|
-
responsive_web_edit_tweet_api_enabled: true,
|
|
238
|
-
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
239
|
-
view_counts_everywhere_api_enabled: true,
|
|
240
|
-
longform_notetweets_consumption_enabled: true,
|
|
241
|
-
tweet_awards_web_tipping_enabled: false,
|
|
242
|
-
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
243
|
-
standardized_nudges_misinfo: true,
|
|
244
|
-
longform_notetweets_rich_text_read_enabled: true,
|
|
245
|
-
responsive_web_enhance_cards_enabled: false,
|
|
246
|
-
subscriptions_verification_info_enabled: true,
|
|
247
|
-
subscriptions_verification_info_reason_enabled: true,
|
|
248
|
-
subscriptions_verification_info_verified_since_enabled: true,
|
|
249
|
-
super_follow_badge_privacy_enabled: false,
|
|
250
|
-
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
251
|
-
super_follow_tweet_api_enabled: false,
|
|
252
|
-
super_follow_user_api_enabled: false,
|
|
253
|
-
android_graphql_skip_api_media_color_palette: false,
|
|
254
|
-
creator_subscriptions_subscription_count_enabled: false,
|
|
255
|
-
blue_business_profile_image_shape_enabled: false,
|
|
256
|
-
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
257
|
-
};
|
|
192
|
+
name: "AES-GCM",
|
|
193
|
+
iv: nonce
|
|
194
|
+
},
|
|
195
|
+
cipher,
|
|
196
|
+
new TextEncoder().encode(plaintext)
|
|
197
|
+
);
|
|
198
|
+
const combined = new Uint8Array(nonce.length + encrypted.byteLength);
|
|
199
|
+
combined.set(nonce);
|
|
200
|
+
combined.set(new Uint8Array(encrypted), nonce.length);
|
|
201
|
+
const result = buf2hex(combined);
|
|
202
|
+
log$4(`XPFF header generated for guest ID ${guestId}: ${result}`);
|
|
203
|
+
return result;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
const xpffBaseKey = "0e6be1f1e21ffc33590b888fd4dc81b19713e570e805d4e5df80a493c9571a05";
|
|
207
|
+
function xpffPlain() {
|
|
208
|
+
const timestamp = Date.now();
|
|
209
|
+
return JSON.stringify({
|
|
210
|
+
navigator_properties: {
|
|
211
|
+
hasBeenActive: "true",
|
|
212
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
213
|
+
webdriver: "false"
|
|
214
|
+
},
|
|
215
|
+
created_at: timestamp
|
|
216
|
+
});
|
|
258
217
|
}
|
|
259
|
-
function
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
params.set("include_followed_by", "1");
|
|
264
|
-
params.set("include_want_retweets", "1");
|
|
265
|
-
params.set("include_mute_edge", "1");
|
|
266
|
-
params.set("include_can_dm", "1");
|
|
267
|
-
params.set("include_can_media_tag", "1");
|
|
268
|
-
params.set("include_ext_has_nft_avatar", "1");
|
|
269
|
-
params.set("include_ext_is_blue_verified", "1");
|
|
270
|
-
params.set("include_ext_verified_type", "1");
|
|
271
|
-
params.set("skip_status", "1");
|
|
272
|
-
params.set("cards_platform", "Web-12");
|
|
273
|
-
params.set("include_cards", "1");
|
|
274
|
-
params.set("include_ext_alt_text", "true");
|
|
275
|
-
params.set("include_ext_limited_action_results", "false");
|
|
276
|
-
params.set("include_quote_count", "true");
|
|
277
|
-
params.set("include_reply_count", "1");
|
|
278
|
-
params.set("tweet_mode", "extended");
|
|
279
|
-
params.set("include_ext_collab_control", "true");
|
|
280
|
-
params.set("include_ext_views", "true");
|
|
281
|
-
params.set("include_entities", "true");
|
|
282
|
-
params.set("include_user_entities", "true");
|
|
283
|
-
params.set("include_ext_media_color", "true");
|
|
284
|
-
params.set("include_ext_media_availability", "true");
|
|
285
|
-
params.set("include_ext_sensitive_media_warning", "true");
|
|
286
|
-
params.set("include_ext_trusted_friends_metadata", "true");
|
|
287
|
-
params.set("send_error_codes", "true");
|
|
288
|
-
params.set("simple_quoted_tweet", "true");
|
|
289
|
-
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
290
|
-
params.set(
|
|
291
|
-
"ext",
|
|
292
|
-
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
293
|
-
);
|
|
294
|
-
return params;
|
|
218
|
+
async function generateXPFFHeader(guestId) {
|
|
219
|
+
const generator = new XPFFHeaderGenerator(xpffBaseKey);
|
|
220
|
+
const plaintext = xpffPlain();
|
|
221
|
+
return generator.generateHeader(plaintext, guestId);
|
|
295
222
|
}
|
|
296
223
|
|
|
297
|
-
const log$
|
|
224
|
+
const log$3 = debug("twitter-scraper:auth");
|
|
298
225
|
function withTransform(fetchFn, transform) {
|
|
299
226
|
return async (input, init) => {
|
|
300
227
|
const fetchArgs = await transform?.request?.(input, init) ?? [
|
|
@@ -348,24 +275,30 @@ class TwitterGuestAuth {
|
|
|
348
275
|
if (this.shouldUpdate()) {
|
|
349
276
|
await this.updateGuestToken();
|
|
350
277
|
}
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
throw new AuthenticationError(
|
|
354
|
-
"Authentication token is null or undefined."
|
|
355
|
-
);
|
|
278
|
+
if (this.guestToken) {
|
|
279
|
+
headers.set("x-guest-token", this.guestToken);
|
|
356
280
|
}
|
|
357
281
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
358
|
-
headers.set("x-guest-token", token);
|
|
359
282
|
headers.set(
|
|
360
283
|
"user-agent",
|
|
361
284
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
362
285
|
);
|
|
286
|
+
await this.installCsrfToken(headers);
|
|
287
|
+
if (this.options?.experimental?.xpff) {
|
|
288
|
+
const guestId = await this.guestId();
|
|
289
|
+
if (guestId != null) {
|
|
290
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
291
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
headers.set("cookie", await this.getCookieString());
|
|
295
|
+
}
|
|
296
|
+
async installCsrfToken(headers) {
|
|
363
297
|
const cookies = await this.getCookies();
|
|
364
298
|
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
365
299
|
if (xCsrfToken) {
|
|
366
300
|
headers.set("x-csrf-token", xCsrfToken.value);
|
|
367
301
|
}
|
|
368
|
-
headers.set("cookie", await this.getCookieString());
|
|
369
302
|
}
|
|
370
303
|
async setCookie(key, value) {
|
|
371
304
|
const cookie = toughCookie.Cookie.parse(`${key}=${value}`);
|
|
@@ -398,16 +331,28 @@ class TwitterGuestAuth {
|
|
|
398
331
|
getCookieJarUrl() {
|
|
399
332
|
return typeof document !== "undefined" ? document.location.toString() : "https://x.com";
|
|
400
333
|
}
|
|
334
|
+
async guestId() {
|
|
335
|
+
const cookies = await this.getCookies();
|
|
336
|
+
const guestIdCookie = cookies.find((cookie) => cookie.key === "guest_id");
|
|
337
|
+
return guestIdCookie ? guestIdCookie.value : null;
|
|
338
|
+
}
|
|
401
339
|
/**
|
|
402
340
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
403
341
|
*/
|
|
404
342
|
async updateGuestToken() {
|
|
343
|
+
try {
|
|
344
|
+
await this.updateGuestTokenCore();
|
|
345
|
+
} catch (err) {
|
|
346
|
+
log$3("Failed to update guest token; this may cause issues:", err);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
async updateGuestTokenCore() {
|
|
405
350
|
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
406
351
|
const headers = new headersPolyfill.Headers({
|
|
407
352
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
408
353
|
Cookie: await this.getCookieString()
|
|
409
354
|
});
|
|
410
|
-
log$
|
|
355
|
+
log$3(`Making POST request to ${guestActivateUrl}`);
|
|
411
356
|
const res = await this.fetch(guestActivateUrl, {
|
|
412
357
|
method: "POST",
|
|
413
358
|
headers,
|
|
@@ -428,7 +373,7 @@ class TwitterGuestAuth {
|
|
|
428
373
|
this.guestToken = newGuestToken;
|
|
429
374
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
430
375
|
await this.setCookie("gt", newGuestToken);
|
|
431
|
-
log$
|
|
376
|
+
log$3(`Updated guest token: ${newGuestToken}`);
|
|
432
377
|
}
|
|
433
378
|
/**
|
|
434
379
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -439,6 +384,280 @@ class TwitterGuestAuth {
|
|
|
439
384
|
}
|
|
440
385
|
}
|
|
441
386
|
|
|
387
|
+
const genericPlatform = new class {
|
|
388
|
+
randomizeCiphers() {
|
|
389
|
+
return Promise.resolve();
|
|
390
|
+
}
|
|
391
|
+
}();
|
|
392
|
+
|
|
393
|
+
class Platform {
|
|
394
|
+
async randomizeCiphers() {
|
|
395
|
+
const platform = await Platform.importPlatform();
|
|
396
|
+
await platform?.randomizeCiphers();
|
|
397
|
+
}
|
|
398
|
+
static async importPlatform() {
|
|
399
|
+
return genericPlatform;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
const log$2 = debug("twitter-scraper:xctxid");
|
|
404
|
+
let linkedom = null;
|
|
405
|
+
function linkedomImport() {
|
|
406
|
+
if (!linkedom) {
|
|
407
|
+
const mod = require("linkedom");
|
|
408
|
+
linkedom = mod;
|
|
409
|
+
return mod;
|
|
410
|
+
}
|
|
411
|
+
return linkedom;
|
|
412
|
+
}
|
|
413
|
+
async function parseHTML(html) {
|
|
414
|
+
if (typeof window !== "undefined") {
|
|
415
|
+
const { defaultView } = new DOMParser().parseFromString(html, "text/html");
|
|
416
|
+
if (!defaultView) {
|
|
417
|
+
throw new Error("Failed to get defaultView from parsed HTML.");
|
|
418
|
+
}
|
|
419
|
+
return defaultView;
|
|
420
|
+
} else {
|
|
421
|
+
const { DOMParser: DOMParser2 } = linkedomImport();
|
|
422
|
+
return new DOMParser2().parseFromString(html, "text/html").defaultView;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
async function handleXMigration(fetchFn) {
|
|
426
|
+
const headers = {
|
|
427
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
428
|
+
"accept-language": "ja",
|
|
429
|
+
"cache-control": "no-cache",
|
|
430
|
+
pragma: "no-cache",
|
|
431
|
+
priority: "u=0, i",
|
|
432
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
433
|
+
"sec-ch-ua-mobile": "?0",
|
|
434
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
435
|
+
"sec-fetch-dest": "document",
|
|
436
|
+
"sec-fetch-mode": "navigate",
|
|
437
|
+
"sec-fetch-site": "none",
|
|
438
|
+
"sec-fetch-user": "?1",
|
|
439
|
+
"upgrade-insecure-requests": "1",
|
|
440
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
441
|
+
};
|
|
442
|
+
const response = await fetchFn("https://x.com", {
|
|
443
|
+
headers
|
|
444
|
+
});
|
|
445
|
+
if (!response.ok) {
|
|
446
|
+
throw new Error(`Failed to fetch X homepage: ${response.statusText}`);
|
|
447
|
+
}
|
|
448
|
+
const htmlText = await response.text();
|
|
449
|
+
let dom = await parseHTML(htmlText);
|
|
450
|
+
let document = dom.window.document;
|
|
451
|
+
const migrationRedirectionRegex = new RegExp(
|
|
452
|
+
"(http(?:s)?://(?:www\\.)?(twitter|x){1}\\.com(/x)?/migrate([/?])?tok=[a-zA-Z0-9%\\-_]+)+",
|
|
453
|
+
"i"
|
|
454
|
+
);
|
|
455
|
+
const metaRefresh = document.querySelector("meta[http-equiv='refresh']");
|
|
456
|
+
const metaContent = metaRefresh ? metaRefresh.getAttribute("content") || "" : "";
|
|
457
|
+
const migrationRedirectionUrl = migrationRedirectionRegex.exec(metaContent) || migrationRedirectionRegex.exec(htmlText);
|
|
458
|
+
if (migrationRedirectionUrl) {
|
|
459
|
+
const redirectResponse = await fetch(migrationRedirectionUrl[0]);
|
|
460
|
+
if (!redirectResponse.ok) {
|
|
461
|
+
throw new Error(
|
|
462
|
+
`Failed to follow migration redirection: ${redirectResponse.statusText}`
|
|
463
|
+
);
|
|
464
|
+
}
|
|
465
|
+
const redirectHtml = await redirectResponse.text();
|
|
466
|
+
dom = await parseHTML(redirectHtml);
|
|
467
|
+
document = dom.window.document;
|
|
468
|
+
}
|
|
469
|
+
const migrationForm = document.querySelector("form[name='f']") || document.querySelector("form[action='https://x.com/x/migrate']");
|
|
470
|
+
if (migrationForm) {
|
|
471
|
+
const url = migrationForm.getAttribute("action") || "https://x.com/x/migrate";
|
|
472
|
+
const method = migrationForm.getAttribute("method") || "POST";
|
|
473
|
+
const requestPayload = new FormData();
|
|
474
|
+
const inputFields = migrationForm.querySelectorAll("input");
|
|
475
|
+
for (const element of Array.from(inputFields)) {
|
|
476
|
+
const name = element.getAttribute("name");
|
|
477
|
+
const value = element.getAttribute("value");
|
|
478
|
+
if (name && value) {
|
|
479
|
+
requestPayload.append(name, value);
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
const formResponse = await fetch(url, {
|
|
483
|
+
method,
|
|
484
|
+
body: requestPayload,
|
|
485
|
+
headers
|
|
486
|
+
});
|
|
487
|
+
if (!formResponse.ok) {
|
|
488
|
+
throw new Error(
|
|
489
|
+
`Failed to submit migration form: ${formResponse.statusText}`
|
|
490
|
+
);
|
|
491
|
+
}
|
|
492
|
+
const formHtml = await formResponse.text();
|
|
493
|
+
dom = await parseHTML(formHtml);
|
|
494
|
+
document = dom.window.document;
|
|
495
|
+
}
|
|
496
|
+
return document;
|
|
497
|
+
}
|
|
498
|
+
let ClientTransaction = null;
|
|
499
|
+
function clientTransaction() {
|
|
500
|
+
if (!ClientTransaction) {
|
|
501
|
+
const mod = require("x-client-transaction-id");
|
|
502
|
+
const ctx = mod.ClientTransaction;
|
|
503
|
+
ClientTransaction = ctx;
|
|
504
|
+
return ctx;
|
|
505
|
+
}
|
|
506
|
+
return ClientTransaction;
|
|
507
|
+
}
|
|
508
|
+
async function generateTransactionId(url, fetchFn, method) {
|
|
509
|
+
const parsedUrl = new URL(url);
|
|
510
|
+
const path = parsedUrl.pathname;
|
|
511
|
+
log$2(`Generating transaction ID for ${method} ${path}`);
|
|
512
|
+
const document = await handleXMigration(fetchFn);
|
|
513
|
+
const transaction = await clientTransaction().create(document);
|
|
514
|
+
const transactionId = await transaction.generateTransactionId(method, path);
|
|
515
|
+
log$2(`Transaction ID: ${transactionId}`);
|
|
516
|
+
return transactionId;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
const log$1 = debug("twitter-scraper:api");
|
|
520
|
+
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
521
|
+
async function jitter(maxMs) {
|
|
522
|
+
const jitter2 = Math.random() * maxMs;
|
|
523
|
+
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
524
|
+
}
|
|
525
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new headersPolyfill.Headers()) {
|
|
526
|
+
log$1(`Making ${method} request to ${url}`);
|
|
527
|
+
await auth.installTo(headers, url);
|
|
528
|
+
await platform.randomizeCiphers();
|
|
529
|
+
if (auth instanceof TwitterGuestAuth && auth.options?.experimental?.xClientTransactionId) {
|
|
530
|
+
const transactionId = await generateTransactionId(
|
|
531
|
+
url,
|
|
532
|
+
auth.fetch.bind(auth),
|
|
533
|
+
method
|
|
534
|
+
);
|
|
535
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
536
|
+
}
|
|
537
|
+
let res;
|
|
538
|
+
do {
|
|
539
|
+
const fetchParameters = [
|
|
540
|
+
url,
|
|
541
|
+
{
|
|
542
|
+
method,
|
|
543
|
+
headers,
|
|
544
|
+
credentials: "include"
|
|
545
|
+
}
|
|
546
|
+
];
|
|
547
|
+
try {
|
|
548
|
+
res = await auth.fetch(...fetchParameters);
|
|
549
|
+
} catch (err) {
|
|
550
|
+
if (!(err instanceof Error)) {
|
|
551
|
+
throw err;
|
|
552
|
+
}
|
|
553
|
+
return {
|
|
554
|
+
success: false,
|
|
555
|
+
err: new Error("Failed to perform request.")
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
559
|
+
if (res.status === 429) {
|
|
560
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
561
|
+
await auth.onRateLimit({
|
|
562
|
+
fetchParameters,
|
|
563
|
+
response: res
|
|
564
|
+
});
|
|
565
|
+
}
|
|
566
|
+
} while (res.status === 429);
|
|
567
|
+
if (!res.ok) {
|
|
568
|
+
return {
|
|
569
|
+
success: false,
|
|
570
|
+
err: await ApiError.fromResponse(res)
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
const value = await flexParseJson(res);
|
|
574
|
+
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
575
|
+
auth.deleteToken();
|
|
576
|
+
return { success: true, value };
|
|
577
|
+
} else {
|
|
578
|
+
return { success: true, value };
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
async function flexParseJson(res) {
|
|
582
|
+
try {
|
|
583
|
+
return await res.json();
|
|
584
|
+
} catch {
|
|
585
|
+
log$1("Failed to parse response as JSON, trying text parse...");
|
|
586
|
+
const text = await res.text();
|
|
587
|
+
log$1("Response text:", text);
|
|
588
|
+
return JSON.parse(text);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
function addApiFeatures(o) {
|
|
592
|
+
return {
|
|
593
|
+
...o,
|
|
594
|
+
rweb_lists_timeline_redesign_enabled: true,
|
|
595
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
596
|
+
verified_phone_label_enabled: false,
|
|
597
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
598
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
599
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
600
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
601
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
602
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
603
|
+
view_counts_everywhere_api_enabled: true,
|
|
604
|
+
longform_notetweets_consumption_enabled: true,
|
|
605
|
+
tweet_awards_web_tipping_enabled: false,
|
|
606
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
607
|
+
standardized_nudges_misinfo: true,
|
|
608
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
609
|
+
responsive_web_enhance_cards_enabled: false,
|
|
610
|
+
subscriptions_verification_info_enabled: true,
|
|
611
|
+
subscriptions_verification_info_reason_enabled: true,
|
|
612
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
613
|
+
super_follow_badge_privacy_enabled: false,
|
|
614
|
+
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
615
|
+
super_follow_tweet_api_enabled: false,
|
|
616
|
+
super_follow_user_api_enabled: false,
|
|
617
|
+
android_graphql_skip_api_media_color_palette: false,
|
|
618
|
+
creator_subscriptions_subscription_count_enabled: false,
|
|
619
|
+
blue_business_profile_image_shape_enabled: false,
|
|
620
|
+
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
621
|
+
};
|
|
622
|
+
}
|
|
623
|
+
function addApiParams(params, includeTweetReplies) {
|
|
624
|
+
params.set("include_profile_interstitial_type", "1");
|
|
625
|
+
params.set("include_blocking", "1");
|
|
626
|
+
params.set("include_blocked_by", "1");
|
|
627
|
+
params.set("include_followed_by", "1");
|
|
628
|
+
params.set("include_want_retweets", "1");
|
|
629
|
+
params.set("include_mute_edge", "1");
|
|
630
|
+
params.set("include_can_dm", "1");
|
|
631
|
+
params.set("include_can_media_tag", "1");
|
|
632
|
+
params.set("include_ext_has_nft_avatar", "1");
|
|
633
|
+
params.set("include_ext_is_blue_verified", "1");
|
|
634
|
+
params.set("include_ext_verified_type", "1");
|
|
635
|
+
params.set("skip_status", "1");
|
|
636
|
+
params.set("cards_platform", "Web-12");
|
|
637
|
+
params.set("include_cards", "1");
|
|
638
|
+
params.set("include_ext_alt_text", "true");
|
|
639
|
+
params.set("include_ext_limited_action_results", "false");
|
|
640
|
+
params.set("include_quote_count", "true");
|
|
641
|
+
params.set("include_reply_count", "1");
|
|
642
|
+
params.set("tweet_mode", "extended");
|
|
643
|
+
params.set("include_ext_collab_control", "true");
|
|
644
|
+
params.set("include_ext_views", "true");
|
|
645
|
+
params.set("include_entities", "true");
|
|
646
|
+
params.set("include_user_entities", "true");
|
|
647
|
+
params.set("include_ext_media_color", "true");
|
|
648
|
+
params.set("include_ext_media_availability", "true");
|
|
649
|
+
params.set("include_ext_sensitive_media_warning", "true");
|
|
650
|
+
params.set("include_ext_trusted_friends_metadata", "true");
|
|
651
|
+
params.set("send_error_codes", "true");
|
|
652
|
+
params.set("simple_quoted_tweet", "true");
|
|
653
|
+
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
654
|
+
params.set(
|
|
655
|
+
"ext",
|
|
656
|
+
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
657
|
+
);
|
|
658
|
+
return params;
|
|
659
|
+
}
|
|
660
|
+
|
|
442
661
|
const log = debug("twitter-scraper:auth-user");
|
|
443
662
|
const TwitterUserAuthSubtask = typebox.Type.Object({
|
|
444
663
|
subtask_id: typebox.Type.String(),
|
|
@@ -546,25 +765,25 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
546
765
|
this.jar = new toughCookie.CookieJar();
|
|
547
766
|
}
|
|
548
767
|
}
|
|
549
|
-
async installCsrfToken(headers) {
|
|
550
|
-
const cookies = await this.getCookies();
|
|
551
|
-
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
552
|
-
if (xCsrfToken) {
|
|
553
|
-
headers.set("x-csrf-token", xCsrfToken.value);
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
768
|
async installTo(headers) {
|
|
557
769
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
558
|
-
const cookie = await this.getCookieString();
|
|
559
|
-
headers.set("cookie", cookie);
|
|
560
|
-
if (this.guestToken) {
|
|
561
|
-
headers.set("x-guest-token", this.guestToken);
|
|
562
|
-
}
|
|
563
770
|
headers.set(
|
|
564
771
|
"user-agent",
|
|
565
772
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
566
773
|
);
|
|
774
|
+
if (this.guestToken) {
|
|
775
|
+
headers.set("x-guest-token", this.guestToken);
|
|
776
|
+
}
|
|
567
777
|
await this.installCsrfToken(headers);
|
|
778
|
+
if (this.options?.experimental?.xpff) {
|
|
779
|
+
const guestId = await this.guestId();
|
|
780
|
+
if (guestId != null) {
|
|
781
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
782
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
const cookie = await this.getCookieString();
|
|
786
|
+
headers.set("cookie", cookie);
|
|
568
787
|
}
|
|
569
788
|
async initLogin() {
|
|
570
789
|
this.removeCookie("twitter_ads_id=");
|
|
@@ -769,12 +988,6 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
769
988
|
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
770
989
|
}
|
|
771
990
|
log(`Making POST request to ${onboardingTaskUrl}`);
|
|
772
|
-
const token = this.guestToken;
|
|
773
|
-
if (token == null) {
|
|
774
|
-
throw new AuthenticationError(
|
|
775
|
-
"Authentication token is null or undefined."
|
|
776
|
-
);
|
|
777
|
-
}
|
|
778
991
|
const headers = new headersPolyfill.Headers({
|
|
779
992
|
accept: "*/*",
|
|
780
993
|
"accept-language": "en-US,en;q=0.9",
|
|
@@ -791,12 +1004,19 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
791
1004
|
"sec-fetch-mode": "cors",
|
|
792
1005
|
"sec-fetch-site": "same-origin",
|
|
793
1006
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
794
|
-
"x-guest-token": token,
|
|
795
1007
|
"x-twitter-auth-type": "OAuth2Client",
|
|
796
1008
|
"x-twitter-active-user": "yes",
|
|
797
1009
|
"x-twitter-client-language": "en"
|
|
798
1010
|
});
|
|
799
1011
|
await this.installTo(headers);
|
|
1012
|
+
if (this.options?.experimental?.xClientTransactionId) {
|
|
1013
|
+
const transactionId = await generateTransactionId(
|
|
1014
|
+
onboardingTaskUrl,
|
|
1015
|
+
this.fetch.bind(this),
|
|
1016
|
+
"POST"
|
|
1017
|
+
);
|
|
1018
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
1019
|
+
}
|
|
800
1020
|
let res;
|
|
801
1021
|
do {
|
|
802
1022
|
const fetchParameters = [
|
|
@@ -2621,7 +2841,11 @@ class Scraper {
|
|
|
2621
2841
|
return {
|
|
2622
2842
|
fetch: this.options?.fetch,
|
|
2623
2843
|
transform: this.options?.transform,
|
|
2624
|
-
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2844
|
+
rateLimitStrategy: this.options?.rateLimitStrategy,
|
|
2845
|
+
experimental: {
|
|
2846
|
+
xClientTransactionId: this.options?.experimental?.xClientTransactionId,
|
|
2847
|
+
xpff: this.options?.experimental?.xpff
|
|
2848
|
+
}
|
|
2625
2849
|
};
|
|
2626
2850
|
}
|
|
2627
2851
|
handleResponse(res) {
|