@the-convocation/twitter-scraper 0.19.1 → 0.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +441 -196
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +441 -196
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +438 -193
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +438 -193
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +13 -0
- package/examples/node-integration/package.json +2 -1
- package/package.json +6 -4
|
@@ -70,13 +70,13 @@ class AuthenticationError extends Error {
|
|
|
70
70
|
}
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
const log$
|
|
73
|
+
const log$6 = debug("twitter-scraper:rate-limit");
|
|
74
74
|
class WaitingRateLimitStrategy {
|
|
75
75
|
async onRateLimit({ response: res }) {
|
|
76
76
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
77
77
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
78
78
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
79
|
-
log$
|
|
79
|
+
log$6(
|
|
80
80
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
81
81
|
);
|
|
82
82
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -92,23 +92,7 @@ class ErrorRateLimitStrategy {
|
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
-
const
|
|
96
|
-
randomizeCiphers() {
|
|
97
|
-
return Promise.resolve();
|
|
98
|
-
}
|
|
99
|
-
}();
|
|
100
|
-
|
|
101
|
-
class Platform {
|
|
102
|
-
async randomizeCiphers() {
|
|
103
|
-
const platform = await Platform.importPlatform();
|
|
104
|
-
await platform?.randomizeCiphers();
|
|
105
|
-
}
|
|
106
|
-
static async importPlatform() {
|
|
107
|
-
return genericPlatform;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
const log$3 = debug("twitter-scraper:requests");
|
|
95
|
+
const log$5 = debug("twitter-scraper:requests");
|
|
112
96
|
async function updateCookieJar(cookieJar, headers) {
|
|
113
97
|
let setCookieHeaders = [];
|
|
114
98
|
if (typeof headers.getSetCookie === "function") {
|
|
@@ -123,12 +107,12 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
123
107
|
for (const cookieStr of setCookieHeaders) {
|
|
124
108
|
const cookie = toughCookie.Cookie.parse(cookieStr);
|
|
125
109
|
if (!cookie) {
|
|
126
|
-
log$
|
|
110
|
+
log$5(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
127
111
|
continue;
|
|
128
112
|
}
|
|
129
113
|
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
130
114
|
if (cookie.key === "ct0") {
|
|
131
|
-
log$
|
|
115
|
+
log$5(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
132
116
|
}
|
|
133
117
|
continue;
|
|
134
118
|
}
|
|
@@ -136,7 +120,7 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
136
120
|
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
137
121
|
await cookieJar.setCookie(cookie, url);
|
|
138
122
|
if (cookie.key === "ct0") {
|
|
139
|
-
log$
|
|
123
|
+
log$5(
|
|
140
124
|
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
141
125
|
0,
|
|
142
126
|
20
|
|
@@ -144,9 +128,9 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
144
128
|
);
|
|
145
129
|
}
|
|
146
130
|
} catch (err) {
|
|
147
|
-
log$
|
|
131
|
+
log$5(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
148
132
|
if (cookie.key === "ct0") {
|
|
149
|
-
log$
|
|
133
|
+
log$5(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
150
134
|
}
|
|
151
135
|
}
|
|
152
136
|
}
|
|
@@ -160,141 +144,84 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
160
144
|
}
|
|
161
145
|
}
|
|
162
146
|
|
|
163
|
-
const log$
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
147
|
+
const log$4 = debug("twitter-scraper:xpff");
|
|
148
|
+
let isoCrypto = null;
|
|
149
|
+
function getCrypto() {
|
|
150
|
+
if (isoCrypto != null) {
|
|
151
|
+
return isoCrypto;
|
|
152
|
+
}
|
|
153
|
+
if (typeof crypto === "undefined") {
|
|
154
|
+
log$4("Global crypto is undefined, importing from crypto module...");
|
|
155
|
+
const { webcrypto } = require("crypto");
|
|
156
|
+
isoCrypto = webcrypto;
|
|
157
|
+
return webcrypto;
|
|
158
|
+
}
|
|
159
|
+
isoCrypto = crypto;
|
|
160
|
+
return crypto;
|
|
161
|
+
}
|
|
162
|
+
async function sha256(message) {
|
|
163
|
+
const msgBuffer = new TextEncoder().encode(message);
|
|
164
|
+
const hashBuffer = await getCrypto().subtle.digest("SHA-256", msgBuffer);
|
|
165
|
+
return new Uint8Array(hashBuffer);
|
|
166
|
+
}
|
|
167
|
+
function buf2hex(buffer) {
|
|
168
|
+
return [...new Uint8Array(buffer)].map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
169
|
+
}
|
|
170
|
+
class XPFFHeaderGenerator {
|
|
171
|
+
constructor(seed) {
|
|
172
|
+
this.seed = seed;
|
|
173
|
+
}
|
|
174
|
+
async deriveKey(guestId) {
|
|
175
|
+
const combined = `${this.seed}${guestId}`;
|
|
176
|
+
const result = await sha256(combined);
|
|
177
|
+
return result;
|
|
178
|
+
}
|
|
179
|
+
async generateHeader(plaintext, guestId) {
|
|
180
|
+
log$4(`Generating XPFF key for guest ID: ${guestId}`);
|
|
181
|
+
const key = await this.deriveKey(guestId);
|
|
182
|
+
const nonce = getCrypto().getRandomValues(new Uint8Array(12));
|
|
183
|
+
const cipher = await getCrypto().subtle.importKey(
|
|
184
|
+
"raw",
|
|
185
|
+
key,
|
|
186
|
+
{ name: "AES-GCM" },
|
|
187
|
+
false,
|
|
188
|
+
["encrypt"]
|
|
189
|
+
);
|
|
190
|
+
const encrypted = await getCrypto().subtle.encrypt(
|
|
177
191
|
{
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
}
|
|
203
|
-
if (!res.ok) {
|
|
204
|
-
return {
|
|
205
|
-
success: false,
|
|
206
|
-
err: await ApiError.fromResponse(res)
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
const value = await flexParseJson(res);
|
|
210
|
-
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
211
|
-
auth.deleteToken();
|
|
212
|
-
return { success: true, value };
|
|
213
|
-
} else {
|
|
214
|
-
return { success: true, value };
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
async function flexParseJson(res) {
|
|
218
|
-
try {
|
|
219
|
-
return await res.json();
|
|
220
|
-
} catch {
|
|
221
|
-
log$2("Failed to parse response as JSON, trying text parse...");
|
|
222
|
-
const text = await res.text();
|
|
223
|
-
log$2("Response text:", text);
|
|
224
|
-
return JSON.parse(text);
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
function addApiFeatures(o) {
|
|
228
|
-
return {
|
|
229
|
-
...o,
|
|
230
|
-
rweb_lists_timeline_redesign_enabled: true,
|
|
231
|
-
responsive_web_graphql_exclude_directive_enabled: true,
|
|
232
|
-
verified_phone_label_enabled: false,
|
|
233
|
-
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
234
|
-
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
235
|
-
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
236
|
-
tweetypie_unmention_optimization_enabled: true,
|
|
237
|
-
responsive_web_edit_tweet_api_enabled: true,
|
|
238
|
-
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
239
|
-
view_counts_everywhere_api_enabled: true,
|
|
240
|
-
longform_notetweets_consumption_enabled: true,
|
|
241
|
-
tweet_awards_web_tipping_enabled: false,
|
|
242
|
-
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
243
|
-
standardized_nudges_misinfo: true,
|
|
244
|
-
longform_notetweets_rich_text_read_enabled: true,
|
|
245
|
-
responsive_web_enhance_cards_enabled: false,
|
|
246
|
-
subscriptions_verification_info_enabled: true,
|
|
247
|
-
subscriptions_verification_info_reason_enabled: true,
|
|
248
|
-
subscriptions_verification_info_verified_since_enabled: true,
|
|
249
|
-
super_follow_badge_privacy_enabled: false,
|
|
250
|
-
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
251
|
-
super_follow_tweet_api_enabled: false,
|
|
252
|
-
super_follow_user_api_enabled: false,
|
|
253
|
-
android_graphql_skip_api_media_color_palette: false,
|
|
254
|
-
creator_subscriptions_subscription_count_enabled: false,
|
|
255
|
-
blue_business_profile_image_shape_enabled: false,
|
|
256
|
-
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
257
|
-
};
|
|
192
|
+
name: "AES-GCM",
|
|
193
|
+
iv: nonce
|
|
194
|
+
},
|
|
195
|
+
cipher,
|
|
196
|
+
new TextEncoder().encode(plaintext)
|
|
197
|
+
);
|
|
198
|
+
const combined = new Uint8Array(nonce.length + encrypted.byteLength);
|
|
199
|
+
combined.set(nonce);
|
|
200
|
+
combined.set(new Uint8Array(encrypted), nonce.length);
|
|
201
|
+
const result = buf2hex(combined);
|
|
202
|
+
log$4(`XPFF header generated for guest ID ${guestId}: ${result}`);
|
|
203
|
+
return result;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
const xpffBaseKey = "0e6be1f1e21ffc33590b888fd4dc81b19713e570e805d4e5df80a493c9571a05";
|
|
207
|
+
function xpffPlain() {
|
|
208
|
+
const timestamp = Date.now();
|
|
209
|
+
return JSON.stringify({
|
|
210
|
+
navigator_properties: {
|
|
211
|
+
hasBeenActive: "true",
|
|
212
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
213
|
+
webdriver: "false"
|
|
214
|
+
},
|
|
215
|
+
created_at: timestamp
|
|
216
|
+
});
|
|
258
217
|
}
|
|
259
|
-
function
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
params.set("include_followed_by", "1");
|
|
264
|
-
params.set("include_want_retweets", "1");
|
|
265
|
-
params.set("include_mute_edge", "1");
|
|
266
|
-
params.set("include_can_dm", "1");
|
|
267
|
-
params.set("include_can_media_tag", "1");
|
|
268
|
-
params.set("include_ext_has_nft_avatar", "1");
|
|
269
|
-
params.set("include_ext_is_blue_verified", "1");
|
|
270
|
-
params.set("include_ext_verified_type", "1");
|
|
271
|
-
params.set("skip_status", "1");
|
|
272
|
-
params.set("cards_platform", "Web-12");
|
|
273
|
-
params.set("include_cards", "1");
|
|
274
|
-
params.set("include_ext_alt_text", "true");
|
|
275
|
-
params.set("include_ext_limited_action_results", "false");
|
|
276
|
-
params.set("include_quote_count", "true");
|
|
277
|
-
params.set("include_reply_count", "1");
|
|
278
|
-
params.set("tweet_mode", "extended");
|
|
279
|
-
params.set("include_ext_collab_control", "true");
|
|
280
|
-
params.set("include_ext_views", "true");
|
|
281
|
-
params.set("include_entities", "true");
|
|
282
|
-
params.set("include_user_entities", "true");
|
|
283
|
-
params.set("include_ext_media_color", "true");
|
|
284
|
-
params.set("include_ext_media_availability", "true");
|
|
285
|
-
params.set("include_ext_sensitive_media_warning", "true");
|
|
286
|
-
params.set("include_ext_trusted_friends_metadata", "true");
|
|
287
|
-
params.set("send_error_codes", "true");
|
|
288
|
-
params.set("simple_quoted_tweet", "true");
|
|
289
|
-
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
290
|
-
params.set(
|
|
291
|
-
"ext",
|
|
292
|
-
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
293
|
-
);
|
|
294
|
-
return params;
|
|
218
|
+
async function generateXPFFHeader(guestId) {
|
|
219
|
+
const generator = new XPFFHeaderGenerator(xpffBaseKey);
|
|
220
|
+
const plaintext = xpffPlain();
|
|
221
|
+
return generator.generateHeader(plaintext, guestId);
|
|
295
222
|
}
|
|
296
223
|
|
|
297
|
-
const log$
|
|
224
|
+
const log$3 = debug("twitter-scraper:auth");
|
|
298
225
|
function withTransform(fetchFn, transform) {
|
|
299
226
|
return async (input, init) => {
|
|
300
227
|
const fetchArgs = await transform?.request?.(input, init) ?? [
|
|
@@ -344,28 +271,37 @@ class TwitterGuestAuth {
|
|
|
344
271
|
}
|
|
345
272
|
return new Date(this.guestCreatedAt);
|
|
346
273
|
}
|
|
347
|
-
async installTo(headers) {
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
"
|
|
355
|
-
|
|
274
|
+
async installTo(headers, _url, bearerTokenOverride) {
|
|
275
|
+
const tokenToUse = bearerTokenOverride ?? this.bearerToken;
|
|
276
|
+
if (!bearerTokenOverride) {
|
|
277
|
+
if (this.shouldUpdate()) {
|
|
278
|
+
await this.updateGuestToken();
|
|
279
|
+
}
|
|
280
|
+
if (this.guestToken) {
|
|
281
|
+
headers.set("x-guest-token", this.guestToken);
|
|
282
|
+
}
|
|
356
283
|
}
|
|
357
|
-
headers.set("authorization", `Bearer ${
|
|
358
|
-
headers.set("x-guest-token", token);
|
|
284
|
+
headers.set("authorization", `Bearer ${tokenToUse}`);
|
|
359
285
|
headers.set(
|
|
360
286
|
"user-agent",
|
|
361
287
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
362
288
|
);
|
|
289
|
+
await this.installCsrfToken(headers);
|
|
290
|
+
if (this.options?.experimental?.xpff) {
|
|
291
|
+
const guestId = await this.guestId();
|
|
292
|
+
if (guestId != null) {
|
|
293
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
294
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
headers.set("cookie", await this.getCookieString());
|
|
298
|
+
}
|
|
299
|
+
async installCsrfToken(headers) {
|
|
363
300
|
const cookies = await this.getCookies();
|
|
364
301
|
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
365
302
|
if (xCsrfToken) {
|
|
366
303
|
headers.set("x-csrf-token", xCsrfToken.value);
|
|
367
304
|
}
|
|
368
|
-
headers.set("cookie", await this.getCookieString());
|
|
369
305
|
}
|
|
370
306
|
async setCookie(key, value) {
|
|
371
307
|
const cookie = toughCookie.Cookie.parse(`${key}=${value}`);
|
|
@@ -398,16 +334,28 @@ class TwitterGuestAuth {
|
|
|
398
334
|
getCookieJarUrl() {
|
|
399
335
|
return typeof document !== "undefined" ? document.location.toString() : "https://x.com";
|
|
400
336
|
}
|
|
337
|
+
async guestId() {
|
|
338
|
+
const cookies = await this.getCookies();
|
|
339
|
+
const guestIdCookie = cookies.find((cookie) => cookie.key === "guest_id");
|
|
340
|
+
return guestIdCookie ? guestIdCookie.value : null;
|
|
341
|
+
}
|
|
401
342
|
/**
|
|
402
343
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
403
344
|
*/
|
|
404
345
|
async updateGuestToken() {
|
|
346
|
+
try {
|
|
347
|
+
await this.updateGuestTokenCore();
|
|
348
|
+
} catch (err) {
|
|
349
|
+
log$3("Failed to update guest token; this may cause issues:", err);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
async updateGuestTokenCore() {
|
|
405
353
|
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
406
354
|
const headers = new headersPolyfill.Headers({
|
|
407
355
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
408
356
|
Cookie: await this.getCookieString()
|
|
409
357
|
});
|
|
410
|
-
log$
|
|
358
|
+
log$3(`Making POST request to ${guestActivateUrl}`);
|
|
411
359
|
const res = await this.fetch(guestActivateUrl, {
|
|
412
360
|
method: "POST",
|
|
413
361
|
headers,
|
|
@@ -428,7 +376,7 @@ class TwitterGuestAuth {
|
|
|
428
376
|
this.guestToken = newGuestToken;
|
|
429
377
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
430
378
|
await this.setCookie("gt", newGuestToken);
|
|
431
|
-
log$
|
|
379
|
+
log$3(`Updated guest token: ${newGuestToken}`);
|
|
432
380
|
}
|
|
433
381
|
/**
|
|
434
382
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -439,6 +387,281 @@ class TwitterGuestAuth {
|
|
|
439
387
|
}
|
|
440
388
|
}
|
|
441
389
|
|
|
390
|
+
const genericPlatform = new class {
|
|
391
|
+
randomizeCiphers() {
|
|
392
|
+
return Promise.resolve();
|
|
393
|
+
}
|
|
394
|
+
}();
|
|
395
|
+
|
|
396
|
+
class Platform {
|
|
397
|
+
async randomizeCiphers() {
|
|
398
|
+
const platform = await Platform.importPlatform();
|
|
399
|
+
await platform?.randomizeCiphers();
|
|
400
|
+
}
|
|
401
|
+
static async importPlatform() {
|
|
402
|
+
return genericPlatform;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
const log$2 = debug("twitter-scraper:xctxid");
|
|
407
|
+
let linkedom = null;
|
|
408
|
+
function linkedomImport() {
|
|
409
|
+
if (!linkedom) {
|
|
410
|
+
const mod = require("linkedom");
|
|
411
|
+
linkedom = mod;
|
|
412
|
+
return mod;
|
|
413
|
+
}
|
|
414
|
+
return linkedom;
|
|
415
|
+
}
|
|
416
|
+
async function parseHTML(html) {
|
|
417
|
+
if (typeof window !== "undefined") {
|
|
418
|
+
const { defaultView } = new DOMParser().parseFromString(html, "text/html");
|
|
419
|
+
if (!defaultView) {
|
|
420
|
+
throw new Error("Failed to get defaultView from parsed HTML.");
|
|
421
|
+
}
|
|
422
|
+
return defaultView;
|
|
423
|
+
} else {
|
|
424
|
+
const { DOMParser: DOMParser2 } = linkedomImport();
|
|
425
|
+
return new DOMParser2().parseFromString(html, "text/html").defaultView;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
async function handleXMigration(fetchFn) {
|
|
429
|
+
const headers = {
|
|
430
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
431
|
+
"accept-language": "ja",
|
|
432
|
+
"cache-control": "no-cache",
|
|
433
|
+
pragma: "no-cache",
|
|
434
|
+
priority: "u=0, i",
|
|
435
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
436
|
+
"sec-ch-ua-mobile": "?0",
|
|
437
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
438
|
+
"sec-fetch-dest": "document",
|
|
439
|
+
"sec-fetch-mode": "navigate",
|
|
440
|
+
"sec-fetch-site": "none",
|
|
441
|
+
"sec-fetch-user": "?1",
|
|
442
|
+
"upgrade-insecure-requests": "1",
|
|
443
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
444
|
+
};
|
|
445
|
+
const response = await fetchFn("https://x.com", {
|
|
446
|
+
headers
|
|
447
|
+
});
|
|
448
|
+
if (!response.ok) {
|
|
449
|
+
throw new Error(`Failed to fetch X homepage: ${response.statusText}`);
|
|
450
|
+
}
|
|
451
|
+
const htmlText = await response.text();
|
|
452
|
+
let dom = await parseHTML(htmlText);
|
|
453
|
+
let document = dom.window.document;
|
|
454
|
+
const migrationRedirectionRegex = new RegExp(
|
|
455
|
+
"(http(?:s)?://(?:www\\.)?(twitter|x){1}\\.com(/x)?/migrate([/?])?tok=[a-zA-Z0-9%\\-_]+)+",
|
|
456
|
+
"i"
|
|
457
|
+
);
|
|
458
|
+
const metaRefresh = document.querySelector("meta[http-equiv='refresh']");
|
|
459
|
+
const metaContent = metaRefresh ? metaRefresh.getAttribute("content") || "" : "";
|
|
460
|
+
const migrationRedirectionUrl = migrationRedirectionRegex.exec(metaContent) || migrationRedirectionRegex.exec(htmlText);
|
|
461
|
+
if (migrationRedirectionUrl) {
|
|
462
|
+
const redirectResponse = await fetch(migrationRedirectionUrl[0]);
|
|
463
|
+
if (!redirectResponse.ok) {
|
|
464
|
+
throw new Error(
|
|
465
|
+
`Failed to follow migration redirection: ${redirectResponse.statusText}`
|
|
466
|
+
);
|
|
467
|
+
}
|
|
468
|
+
const redirectHtml = await redirectResponse.text();
|
|
469
|
+
dom = await parseHTML(redirectHtml);
|
|
470
|
+
document = dom.window.document;
|
|
471
|
+
}
|
|
472
|
+
const migrationForm = document.querySelector("form[name='f']") || document.querySelector("form[action='https://x.com/x/migrate']");
|
|
473
|
+
if (migrationForm) {
|
|
474
|
+
const url = migrationForm.getAttribute("action") || "https://x.com/x/migrate";
|
|
475
|
+
const method = migrationForm.getAttribute("method") || "POST";
|
|
476
|
+
const requestPayload = new FormData();
|
|
477
|
+
const inputFields = migrationForm.querySelectorAll("input");
|
|
478
|
+
for (const element of Array.from(inputFields)) {
|
|
479
|
+
const name = element.getAttribute("name");
|
|
480
|
+
const value = element.getAttribute("value");
|
|
481
|
+
if (name && value) {
|
|
482
|
+
requestPayload.append(name, value);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
const formResponse = await fetch(url, {
|
|
486
|
+
method,
|
|
487
|
+
body: requestPayload,
|
|
488
|
+
headers
|
|
489
|
+
});
|
|
490
|
+
if (!formResponse.ok) {
|
|
491
|
+
throw new Error(
|
|
492
|
+
`Failed to submit migration form: ${formResponse.statusText}`
|
|
493
|
+
);
|
|
494
|
+
}
|
|
495
|
+
const formHtml = await formResponse.text();
|
|
496
|
+
dom = await parseHTML(formHtml);
|
|
497
|
+
document = dom.window.document;
|
|
498
|
+
}
|
|
499
|
+
return document;
|
|
500
|
+
}
|
|
501
|
+
let ClientTransaction = null;
|
|
502
|
+
function clientTransaction() {
|
|
503
|
+
if (!ClientTransaction) {
|
|
504
|
+
const mod = require("x-client-transaction-id");
|
|
505
|
+
const ctx = mod.ClientTransaction;
|
|
506
|
+
ClientTransaction = ctx;
|
|
507
|
+
return ctx;
|
|
508
|
+
}
|
|
509
|
+
return ClientTransaction;
|
|
510
|
+
}
|
|
511
|
+
async function generateTransactionId(url, fetchFn, method) {
|
|
512
|
+
const parsedUrl = new URL(url);
|
|
513
|
+
const path = parsedUrl.pathname;
|
|
514
|
+
log$2(`Generating transaction ID for ${method} ${path}`);
|
|
515
|
+
const document = await handleXMigration(fetchFn);
|
|
516
|
+
const transaction = await clientTransaction().create(document);
|
|
517
|
+
const transactionId = await transaction.generateTransactionId(method, path);
|
|
518
|
+
log$2(`Transaction ID: ${transactionId}`);
|
|
519
|
+
return transactionId;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
const log$1 = debug("twitter-scraper:api");
|
|
523
|
+
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
524
|
+
const bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA";
|
|
525
|
+
async function jitter(maxMs) {
|
|
526
|
+
const jitter2 = Math.random() * maxMs;
|
|
527
|
+
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
528
|
+
}
|
|
529
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new headersPolyfill.Headers(), bearerTokenOverride) {
|
|
530
|
+
log$1(`Making ${method} request to ${url}`);
|
|
531
|
+
await auth.installTo(headers, url, bearerTokenOverride);
|
|
532
|
+
await platform.randomizeCiphers();
|
|
533
|
+
if (auth instanceof TwitterGuestAuth && auth.options?.experimental?.xClientTransactionId) {
|
|
534
|
+
const transactionId = await generateTransactionId(
|
|
535
|
+
url,
|
|
536
|
+
auth.fetch.bind(auth),
|
|
537
|
+
method
|
|
538
|
+
);
|
|
539
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
540
|
+
}
|
|
541
|
+
let res;
|
|
542
|
+
do {
|
|
543
|
+
const fetchParameters = [
|
|
544
|
+
url,
|
|
545
|
+
{
|
|
546
|
+
method,
|
|
547
|
+
headers,
|
|
548
|
+
credentials: "include"
|
|
549
|
+
}
|
|
550
|
+
];
|
|
551
|
+
try {
|
|
552
|
+
res = await auth.fetch(...fetchParameters);
|
|
553
|
+
} catch (err) {
|
|
554
|
+
if (!(err instanceof Error)) {
|
|
555
|
+
throw err;
|
|
556
|
+
}
|
|
557
|
+
return {
|
|
558
|
+
success: false,
|
|
559
|
+
err: new Error("Failed to perform request.")
|
|
560
|
+
};
|
|
561
|
+
}
|
|
562
|
+
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
563
|
+
if (res.status === 429) {
|
|
564
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
565
|
+
await auth.onRateLimit({
|
|
566
|
+
fetchParameters,
|
|
567
|
+
response: res
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
} while (res.status === 429);
|
|
571
|
+
if (!res.ok) {
|
|
572
|
+
return {
|
|
573
|
+
success: false,
|
|
574
|
+
err: await ApiError.fromResponse(res)
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
const value = await flexParseJson(res);
|
|
578
|
+
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
579
|
+
auth.deleteToken();
|
|
580
|
+
return { success: true, value };
|
|
581
|
+
} else {
|
|
582
|
+
return { success: true, value };
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
async function flexParseJson(res) {
|
|
586
|
+
try {
|
|
587
|
+
return await res.json();
|
|
588
|
+
} catch {
|
|
589
|
+
log$1("Failed to parse response as JSON, trying text parse...");
|
|
590
|
+
const text = await res.text();
|
|
591
|
+
log$1("Response text:", text);
|
|
592
|
+
return JSON.parse(text);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
function addApiFeatures(o) {
|
|
596
|
+
return {
|
|
597
|
+
...o,
|
|
598
|
+
rweb_lists_timeline_redesign_enabled: true,
|
|
599
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
600
|
+
verified_phone_label_enabled: false,
|
|
601
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
602
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
603
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
604
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
605
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
606
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
607
|
+
view_counts_everywhere_api_enabled: true,
|
|
608
|
+
longform_notetweets_consumption_enabled: true,
|
|
609
|
+
tweet_awards_web_tipping_enabled: false,
|
|
610
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
611
|
+
standardized_nudges_misinfo: true,
|
|
612
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
613
|
+
responsive_web_enhance_cards_enabled: false,
|
|
614
|
+
subscriptions_verification_info_enabled: true,
|
|
615
|
+
subscriptions_verification_info_reason_enabled: true,
|
|
616
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
617
|
+
super_follow_badge_privacy_enabled: false,
|
|
618
|
+
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
619
|
+
super_follow_tweet_api_enabled: false,
|
|
620
|
+
super_follow_user_api_enabled: false,
|
|
621
|
+
android_graphql_skip_api_media_color_palette: false,
|
|
622
|
+
creator_subscriptions_subscription_count_enabled: false,
|
|
623
|
+
blue_business_profile_image_shape_enabled: false,
|
|
624
|
+
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
625
|
+
};
|
|
626
|
+
}
|
|
627
|
+
function addApiParams(params, includeTweetReplies) {
|
|
628
|
+
params.set("include_profile_interstitial_type", "1");
|
|
629
|
+
params.set("include_blocking", "1");
|
|
630
|
+
params.set("include_blocked_by", "1");
|
|
631
|
+
params.set("include_followed_by", "1");
|
|
632
|
+
params.set("include_want_retweets", "1");
|
|
633
|
+
params.set("include_mute_edge", "1");
|
|
634
|
+
params.set("include_can_dm", "1");
|
|
635
|
+
params.set("include_can_media_tag", "1");
|
|
636
|
+
params.set("include_ext_has_nft_avatar", "1");
|
|
637
|
+
params.set("include_ext_is_blue_verified", "1");
|
|
638
|
+
params.set("include_ext_verified_type", "1");
|
|
639
|
+
params.set("skip_status", "1");
|
|
640
|
+
params.set("cards_platform", "Web-12");
|
|
641
|
+
params.set("include_cards", "1");
|
|
642
|
+
params.set("include_ext_alt_text", "true");
|
|
643
|
+
params.set("include_ext_limited_action_results", "false");
|
|
644
|
+
params.set("include_quote_count", "true");
|
|
645
|
+
params.set("include_reply_count", "1");
|
|
646
|
+
params.set("tweet_mode", "extended");
|
|
647
|
+
params.set("include_ext_collab_control", "true");
|
|
648
|
+
params.set("include_ext_views", "true");
|
|
649
|
+
params.set("include_entities", "true");
|
|
650
|
+
params.set("include_user_entities", "true");
|
|
651
|
+
params.set("include_ext_media_color", "true");
|
|
652
|
+
params.set("include_ext_media_availability", "true");
|
|
653
|
+
params.set("include_ext_sensitive_media_warning", "true");
|
|
654
|
+
params.set("include_ext_trusted_friends_metadata", "true");
|
|
655
|
+
params.set("send_error_codes", "true");
|
|
656
|
+
params.set("simple_quoted_tweet", "true");
|
|
657
|
+
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
658
|
+
params.set(
|
|
659
|
+
"ext",
|
|
660
|
+
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
661
|
+
);
|
|
662
|
+
return params;
|
|
663
|
+
}
|
|
664
|
+
|
|
442
665
|
const log = debug("twitter-scraper:auth-user");
|
|
443
666
|
const TwitterUserAuthSubtask = typebox.Type.Object({
|
|
444
667
|
subtask_id: typebox.Type.String(),
|
|
@@ -546,25 +769,26 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
546
769
|
this.jar = new toughCookie.CookieJar();
|
|
547
770
|
}
|
|
548
771
|
}
|
|
549
|
-
async
|
|
550
|
-
const
|
|
551
|
-
|
|
552
|
-
if (xCsrfToken) {
|
|
553
|
-
headers.set("x-csrf-token", xCsrfToken.value);
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
async installTo(headers) {
|
|
557
|
-
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
558
|
-
const cookie = await this.getCookieString();
|
|
559
|
-
headers.set("cookie", cookie);
|
|
560
|
-
if (this.guestToken) {
|
|
561
|
-
headers.set("x-guest-token", this.guestToken);
|
|
562
|
-
}
|
|
772
|
+
async installTo(headers, _url, bearerTokenOverride) {
|
|
773
|
+
const tokenToUse = bearerTokenOverride ?? this.bearerToken;
|
|
774
|
+
headers.set("authorization", `Bearer ${tokenToUse}`);
|
|
563
775
|
headers.set(
|
|
564
776
|
"user-agent",
|
|
565
777
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
566
778
|
);
|
|
779
|
+
if (this.guestToken) {
|
|
780
|
+
headers.set("x-guest-token", this.guestToken);
|
|
781
|
+
}
|
|
567
782
|
await this.installCsrfToken(headers);
|
|
783
|
+
if (this.options?.experimental?.xpff) {
|
|
784
|
+
const guestId = await this.guestId();
|
|
785
|
+
if (guestId != null) {
|
|
786
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
787
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
const cookie = await this.getCookieString();
|
|
791
|
+
headers.set("cookie", cookie);
|
|
568
792
|
}
|
|
569
793
|
async initLogin() {
|
|
570
794
|
this.removeCookie("twitter_ads_id=");
|
|
@@ -769,12 +993,6 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
769
993
|
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
770
994
|
}
|
|
771
995
|
log(`Making POST request to ${onboardingTaskUrl}`);
|
|
772
|
-
const token = this.guestToken;
|
|
773
|
-
if (token == null) {
|
|
774
|
-
throw new AuthenticationError(
|
|
775
|
-
"Authentication token is null or undefined."
|
|
776
|
-
);
|
|
777
|
-
}
|
|
778
996
|
const headers = new headersPolyfill.Headers({
|
|
779
997
|
accept: "*/*",
|
|
780
998
|
"accept-language": "en-US,en;q=0.9",
|
|
@@ -791,12 +1009,19 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
791
1009
|
"sec-fetch-mode": "cors",
|
|
792
1010
|
"sec-fetch-site": "same-origin",
|
|
793
1011
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
794
|
-
"x-guest-token": token,
|
|
795
1012
|
"x-twitter-auth-type": "OAuth2Client",
|
|
796
1013
|
"x-twitter-active-user": "yes",
|
|
797
1014
|
"x-twitter-client-language": "en"
|
|
798
1015
|
});
|
|
799
|
-
await this.installTo(headers);
|
|
1016
|
+
await this.installTo(headers, onboardingTaskUrl);
|
|
1017
|
+
if (this.options?.experimental?.xClientTransactionId) {
|
|
1018
|
+
const transactionId = await generateTransactionId(
|
|
1019
|
+
onboardingTaskUrl,
|
|
1020
|
+
this.fetch.bind(this),
|
|
1021
|
+
"POST"
|
|
1022
|
+
);
|
|
1023
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
1024
|
+
}
|
|
800
1025
|
let res;
|
|
801
1026
|
do {
|
|
802
1027
|
const fetchParameters = [
|
|
@@ -1819,7 +2044,11 @@ async function getTrends(auth) {
|
|
|
1819
2044
|
params.set("entity_tokens", "false");
|
|
1820
2045
|
const res = await requestApi(
|
|
1821
2046
|
`https://api.x.com/2/guide.json?${params.toString()}`,
|
|
1822
|
-
auth
|
|
2047
|
+
auth,
|
|
2048
|
+
"GET",
|
|
2049
|
+
void 0,
|
|
2050
|
+
void 0,
|
|
2051
|
+
bearerToken2
|
|
1823
2052
|
);
|
|
1824
2053
|
if (!res.success) {
|
|
1825
2054
|
throw res.err;
|
|
@@ -1902,7 +2131,11 @@ async function fetchTweets(userId, maxTweets, cursor, auth) {
|
|
|
1902
2131
|
}
|
|
1903
2132
|
const res = await requestApi(
|
|
1904
2133
|
userTweetsRequest.toRequestUrl(),
|
|
1905
|
-
auth
|
|
2134
|
+
auth,
|
|
2135
|
+
"GET",
|
|
2136
|
+
void 0,
|
|
2137
|
+
void 0,
|
|
2138
|
+
bearerToken2
|
|
1906
2139
|
);
|
|
1907
2140
|
if (!res.success) {
|
|
1908
2141
|
throw res.err;
|
|
@@ -2048,7 +2281,11 @@ async function getTweet(id, auth) {
|
|
|
2048
2281
|
tweetDetailRequest.variables.focalTweetId = id;
|
|
2049
2282
|
const res = await requestApi(
|
|
2050
2283
|
tweetDetailRequest.toRequestUrl(),
|
|
2051
|
-
auth
|
|
2284
|
+
auth,
|
|
2285
|
+
"GET",
|
|
2286
|
+
void 0,
|
|
2287
|
+
void 0,
|
|
2288
|
+
bearerToken2
|
|
2052
2289
|
);
|
|
2053
2290
|
if (!res.success) {
|
|
2054
2291
|
throw res.err;
|
|
@@ -2064,7 +2301,11 @@ async function getTweetAnonymous(id, auth) {
|
|
|
2064
2301
|
tweetResultByRestIdRequest.variables.tweetId = id;
|
|
2065
2302
|
const res = await requestApi(
|
|
2066
2303
|
tweetResultByRestIdRequest.toRequestUrl(),
|
|
2067
|
-
auth
|
|
2304
|
+
auth,
|
|
2305
|
+
"GET",
|
|
2306
|
+
void 0,
|
|
2307
|
+
void 0,
|
|
2308
|
+
bearerToken2
|
|
2068
2309
|
);
|
|
2069
2310
|
if (!res.success) {
|
|
2070
2311
|
throw res.err;
|
|
@@ -2621,7 +2862,11 @@ class Scraper {
|
|
|
2621
2862
|
return {
|
|
2622
2863
|
fetch: this.options?.fetch,
|
|
2623
2864
|
transform: this.options?.transform,
|
|
2624
|
-
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2865
|
+
rateLimitStrategy: this.options?.rateLimitStrategy,
|
|
2866
|
+
experimental: {
|
|
2867
|
+
xClientTransactionId: this.options?.experimental?.xClientTransactionId,
|
|
2868
|
+
xpff: this.options?.experimental?.xpff
|
|
2869
|
+
}
|
|
2625
2870
|
};
|
|
2626
2871
|
}
|
|
2627
2872
|
handleResponse(res) {
|