@the-convocation/twitter-scraper 0.19.1 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +408 -184
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +408 -184
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +405 -181
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +405 -181
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +13 -0
- package/examples/node-integration/package.json +2 -1
- package/package.json +6 -4
package/dist/node/cjs/index.cjs
CHANGED
|
@@ -72,13 +72,13 @@ class AuthenticationError extends Error {
|
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
const log$
|
|
75
|
+
const log$6 = debug("twitter-scraper:rate-limit");
|
|
76
76
|
class WaitingRateLimitStrategy {
|
|
77
77
|
async onRateLimit({ response: res }) {
|
|
78
78
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
79
79
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
80
80
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
81
|
-
log$
|
|
81
|
+
log$6(
|
|
82
82
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
83
83
|
);
|
|
84
84
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -94,20 +94,7 @@ class ErrorRateLimitStrategy {
|
|
|
94
94
|
}
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
-
|
|
98
|
-
async randomizeCiphers() {
|
|
99
|
-
const platform = await Platform.importPlatform();
|
|
100
|
-
await platform?.randomizeCiphers();
|
|
101
|
-
}
|
|
102
|
-
static async importPlatform() {
|
|
103
|
-
{
|
|
104
|
-
const { platform } = await Promise.resolve().then(function () { return index; });
|
|
105
|
-
return platform;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
const log$3 = debug("twitter-scraper:requests");
|
|
97
|
+
const log$5 = debug("twitter-scraper:requests");
|
|
111
98
|
async function updateCookieJar(cookieJar, headers) {
|
|
112
99
|
let setCookieHeaders = [];
|
|
113
100
|
if (typeof headers.getSetCookie === "function") {
|
|
@@ -122,12 +109,12 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
122
109
|
for (const cookieStr of setCookieHeaders) {
|
|
123
110
|
const cookie = toughCookie.Cookie.parse(cookieStr);
|
|
124
111
|
if (!cookie) {
|
|
125
|
-
log$
|
|
112
|
+
log$5(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
126
113
|
continue;
|
|
127
114
|
}
|
|
128
115
|
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
129
116
|
if (cookie.key === "ct0") {
|
|
130
|
-
log$
|
|
117
|
+
log$5(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
131
118
|
}
|
|
132
119
|
continue;
|
|
133
120
|
}
|
|
@@ -135,7 +122,7 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
135
122
|
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
136
123
|
await cookieJar.setCookie(cookie, url);
|
|
137
124
|
if (cookie.key === "ct0") {
|
|
138
|
-
log$
|
|
125
|
+
log$5(
|
|
139
126
|
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
140
127
|
0,
|
|
141
128
|
20
|
|
@@ -143,9 +130,9 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
143
130
|
);
|
|
144
131
|
}
|
|
145
132
|
} catch (err) {
|
|
146
|
-
log$
|
|
133
|
+
log$5(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
147
134
|
if (cookie.key === "ct0") {
|
|
148
|
-
log$
|
|
135
|
+
log$5(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
149
136
|
}
|
|
150
137
|
}
|
|
151
138
|
}
|
|
@@ -159,141 +146,84 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
159
146
|
}
|
|
160
147
|
}
|
|
161
148
|
|
|
162
|
-
const log$
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
149
|
+
const log$4 = debug("twitter-scraper:xpff");
|
|
150
|
+
let isoCrypto = null;
|
|
151
|
+
function getCrypto() {
|
|
152
|
+
if (isoCrypto != null) {
|
|
153
|
+
return isoCrypto;
|
|
154
|
+
}
|
|
155
|
+
if (typeof crypto === "undefined") {
|
|
156
|
+
log$4("Global crypto is undefined, importing from crypto module...");
|
|
157
|
+
const { webcrypto } = require("crypto");
|
|
158
|
+
isoCrypto = webcrypto;
|
|
159
|
+
return webcrypto;
|
|
160
|
+
}
|
|
161
|
+
isoCrypto = crypto;
|
|
162
|
+
return crypto;
|
|
163
|
+
}
|
|
164
|
+
async function sha256(message) {
|
|
165
|
+
const msgBuffer = new TextEncoder().encode(message);
|
|
166
|
+
const hashBuffer = await getCrypto().subtle.digest("SHA-256", msgBuffer);
|
|
167
|
+
return new Uint8Array(hashBuffer);
|
|
168
|
+
}
|
|
169
|
+
function buf2hex(buffer) {
|
|
170
|
+
return [...new Uint8Array(buffer)].map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
171
|
+
}
|
|
172
|
+
class XPFFHeaderGenerator {
|
|
173
|
+
constructor(seed) {
|
|
174
|
+
this.seed = seed;
|
|
175
|
+
}
|
|
176
|
+
async deriveKey(guestId) {
|
|
177
|
+
const combined = `${this.seed}${guestId}`;
|
|
178
|
+
const result = await sha256(combined);
|
|
179
|
+
return result;
|
|
180
|
+
}
|
|
181
|
+
async generateHeader(plaintext, guestId) {
|
|
182
|
+
log$4(`Generating XPFF key for guest ID: ${guestId}`);
|
|
183
|
+
const key = await this.deriveKey(guestId);
|
|
184
|
+
const nonce = getCrypto().getRandomValues(new Uint8Array(12));
|
|
185
|
+
const cipher = await getCrypto().subtle.importKey(
|
|
186
|
+
"raw",
|
|
187
|
+
key,
|
|
188
|
+
{ name: "AES-GCM" },
|
|
189
|
+
false,
|
|
190
|
+
["encrypt"]
|
|
191
|
+
);
|
|
192
|
+
const encrypted = await getCrypto().subtle.encrypt(
|
|
176
193
|
{
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
}
|
|
202
|
-
if (!res.ok) {
|
|
203
|
-
return {
|
|
204
|
-
success: false,
|
|
205
|
-
err: await ApiError.fromResponse(res)
|
|
206
|
-
};
|
|
207
|
-
}
|
|
208
|
-
const value = await flexParseJson(res);
|
|
209
|
-
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
210
|
-
auth.deleteToken();
|
|
211
|
-
return { success: true, value };
|
|
212
|
-
} else {
|
|
213
|
-
return { success: true, value };
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
async function flexParseJson(res) {
|
|
217
|
-
try {
|
|
218
|
-
return await res.json();
|
|
219
|
-
} catch {
|
|
220
|
-
log$2("Failed to parse response as JSON, trying text parse...");
|
|
221
|
-
const text = await res.text();
|
|
222
|
-
log$2("Response text:", text);
|
|
223
|
-
return JSON.parse(text);
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
function addApiFeatures(o) {
|
|
227
|
-
return {
|
|
228
|
-
...o,
|
|
229
|
-
rweb_lists_timeline_redesign_enabled: true,
|
|
230
|
-
responsive_web_graphql_exclude_directive_enabled: true,
|
|
231
|
-
verified_phone_label_enabled: false,
|
|
232
|
-
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
233
|
-
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
234
|
-
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
235
|
-
tweetypie_unmention_optimization_enabled: true,
|
|
236
|
-
responsive_web_edit_tweet_api_enabled: true,
|
|
237
|
-
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
238
|
-
view_counts_everywhere_api_enabled: true,
|
|
239
|
-
longform_notetweets_consumption_enabled: true,
|
|
240
|
-
tweet_awards_web_tipping_enabled: false,
|
|
241
|
-
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
242
|
-
standardized_nudges_misinfo: true,
|
|
243
|
-
longform_notetweets_rich_text_read_enabled: true,
|
|
244
|
-
responsive_web_enhance_cards_enabled: false,
|
|
245
|
-
subscriptions_verification_info_enabled: true,
|
|
246
|
-
subscriptions_verification_info_reason_enabled: true,
|
|
247
|
-
subscriptions_verification_info_verified_since_enabled: true,
|
|
248
|
-
super_follow_badge_privacy_enabled: false,
|
|
249
|
-
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
250
|
-
super_follow_tweet_api_enabled: false,
|
|
251
|
-
super_follow_user_api_enabled: false,
|
|
252
|
-
android_graphql_skip_api_media_color_palette: false,
|
|
253
|
-
creator_subscriptions_subscription_count_enabled: false,
|
|
254
|
-
blue_business_profile_image_shape_enabled: false,
|
|
255
|
-
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
256
|
-
};
|
|
194
|
+
name: "AES-GCM",
|
|
195
|
+
iv: nonce
|
|
196
|
+
},
|
|
197
|
+
cipher,
|
|
198
|
+
new TextEncoder().encode(plaintext)
|
|
199
|
+
);
|
|
200
|
+
const combined = new Uint8Array(nonce.length + encrypted.byteLength);
|
|
201
|
+
combined.set(nonce);
|
|
202
|
+
combined.set(new Uint8Array(encrypted), nonce.length);
|
|
203
|
+
const result = buf2hex(combined);
|
|
204
|
+
log$4(`XPFF header generated for guest ID ${guestId}: ${result}`);
|
|
205
|
+
return result;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
const xpffBaseKey = "0e6be1f1e21ffc33590b888fd4dc81b19713e570e805d4e5df80a493c9571a05";
|
|
209
|
+
function xpffPlain() {
|
|
210
|
+
const timestamp = Date.now();
|
|
211
|
+
return JSON.stringify({
|
|
212
|
+
navigator_properties: {
|
|
213
|
+
hasBeenActive: "true",
|
|
214
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
215
|
+
webdriver: "false"
|
|
216
|
+
},
|
|
217
|
+
created_at: timestamp
|
|
218
|
+
});
|
|
257
219
|
}
|
|
258
|
-
function
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
params.set("include_followed_by", "1");
|
|
263
|
-
params.set("include_want_retweets", "1");
|
|
264
|
-
params.set("include_mute_edge", "1");
|
|
265
|
-
params.set("include_can_dm", "1");
|
|
266
|
-
params.set("include_can_media_tag", "1");
|
|
267
|
-
params.set("include_ext_has_nft_avatar", "1");
|
|
268
|
-
params.set("include_ext_is_blue_verified", "1");
|
|
269
|
-
params.set("include_ext_verified_type", "1");
|
|
270
|
-
params.set("skip_status", "1");
|
|
271
|
-
params.set("cards_platform", "Web-12");
|
|
272
|
-
params.set("include_cards", "1");
|
|
273
|
-
params.set("include_ext_alt_text", "true");
|
|
274
|
-
params.set("include_ext_limited_action_results", "false");
|
|
275
|
-
params.set("include_quote_count", "true");
|
|
276
|
-
params.set("include_reply_count", "1");
|
|
277
|
-
params.set("tweet_mode", "extended");
|
|
278
|
-
params.set("include_ext_collab_control", "true");
|
|
279
|
-
params.set("include_ext_views", "true");
|
|
280
|
-
params.set("include_entities", "true");
|
|
281
|
-
params.set("include_user_entities", "true");
|
|
282
|
-
params.set("include_ext_media_color", "true");
|
|
283
|
-
params.set("include_ext_media_availability", "true");
|
|
284
|
-
params.set("include_ext_sensitive_media_warning", "true");
|
|
285
|
-
params.set("include_ext_trusted_friends_metadata", "true");
|
|
286
|
-
params.set("send_error_codes", "true");
|
|
287
|
-
params.set("simple_quoted_tweet", "true");
|
|
288
|
-
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
289
|
-
params.set(
|
|
290
|
-
"ext",
|
|
291
|
-
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
292
|
-
);
|
|
293
|
-
return params;
|
|
220
|
+
async function generateXPFFHeader(guestId) {
|
|
221
|
+
const generator = new XPFFHeaderGenerator(xpffBaseKey);
|
|
222
|
+
const plaintext = xpffPlain();
|
|
223
|
+
return generator.generateHeader(plaintext, guestId);
|
|
294
224
|
}
|
|
295
225
|
|
|
296
|
-
const log$
|
|
226
|
+
const log$3 = debug("twitter-scraper:auth");
|
|
297
227
|
function withTransform(fetchFn, transform) {
|
|
298
228
|
return async (input, init) => {
|
|
299
229
|
const fetchArgs = await transform?.request?.(input, init) ?? [
|
|
@@ -347,24 +277,30 @@ class TwitterGuestAuth {
|
|
|
347
277
|
if (this.shouldUpdate()) {
|
|
348
278
|
await this.updateGuestToken();
|
|
349
279
|
}
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
throw new AuthenticationError(
|
|
353
|
-
"Authentication token is null or undefined."
|
|
354
|
-
);
|
|
280
|
+
if (this.guestToken) {
|
|
281
|
+
headers.set("x-guest-token", this.guestToken);
|
|
355
282
|
}
|
|
356
283
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
357
|
-
headers.set("x-guest-token", token);
|
|
358
284
|
headers.set(
|
|
359
285
|
"user-agent",
|
|
360
286
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
361
287
|
);
|
|
288
|
+
await this.installCsrfToken(headers);
|
|
289
|
+
if (this.options?.experimental?.xpff) {
|
|
290
|
+
const guestId = await this.guestId();
|
|
291
|
+
if (guestId != null) {
|
|
292
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
293
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
headers.set("cookie", await this.getCookieString());
|
|
297
|
+
}
|
|
298
|
+
async installCsrfToken(headers) {
|
|
362
299
|
const cookies = await this.getCookies();
|
|
363
300
|
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
364
301
|
if (xCsrfToken) {
|
|
365
302
|
headers.set("x-csrf-token", xCsrfToken.value);
|
|
366
303
|
}
|
|
367
|
-
headers.set("cookie", await this.getCookieString());
|
|
368
304
|
}
|
|
369
305
|
async setCookie(key, value) {
|
|
370
306
|
const cookie = toughCookie.Cookie.parse(`${key}=${value}`);
|
|
@@ -397,16 +333,28 @@ class TwitterGuestAuth {
|
|
|
397
333
|
getCookieJarUrl() {
|
|
398
334
|
return typeof document !== "undefined" ? document.location.toString() : "https://x.com";
|
|
399
335
|
}
|
|
336
|
+
async guestId() {
|
|
337
|
+
const cookies = await this.getCookies();
|
|
338
|
+
const guestIdCookie = cookies.find((cookie) => cookie.key === "guest_id");
|
|
339
|
+
return guestIdCookie ? guestIdCookie.value : null;
|
|
340
|
+
}
|
|
400
341
|
/**
|
|
401
342
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
402
343
|
*/
|
|
403
344
|
async updateGuestToken() {
|
|
345
|
+
try {
|
|
346
|
+
await this.updateGuestTokenCore();
|
|
347
|
+
} catch (err) {
|
|
348
|
+
log$3("Failed to update guest token; this may cause issues:", err);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
async updateGuestTokenCore() {
|
|
404
352
|
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
405
353
|
const headers = new headersPolyfill.Headers({
|
|
406
354
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
407
355
|
Cookie: await this.getCookieString()
|
|
408
356
|
});
|
|
409
|
-
log$
|
|
357
|
+
log$3(`Making POST request to ${guestActivateUrl}`);
|
|
410
358
|
const res = await this.fetch(guestActivateUrl, {
|
|
411
359
|
method: "POST",
|
|
412
360
|
headers,
|
|
@@ -427,7 +375,7 @@ class TwitterGuestAuth {
|
|
|
427
375
|
this.guestToken = newGuestToken;
|
|
428
376
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
429
377
|
await this.setCookie("gt", newGuestToken);
|
|
430
|
-
log$
|
|
378
|
+
log$3(`Updated guest token: ${newGuestToken}`);
|
|
431
379
|
}
|
|
432
380
|
/**
|
|
433
381
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -438,6 +386,277 @@ class TwitterGuestAuth {
|
|
|
438
386
|
}
|
|
439
387
|
}
|
|
440
388
|
|
|
389
|
+
class Platform {
|
|
390
|
+
async randomizeCiphers() {
|
|
391
|
+
const platform = await Platform.importPlatform();
|
|
392
|
+
await platform?.randomizeCiphers();
|
|
393
|
+
}
|
|
394
|
+
static async importPlatform() {
|
|
395
|
+
{
|
|
396
|
+
const { platform } = await Promise.resolve().then(function () { return index; });
|
|
397
|
+
return platform;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
const log$2 = debug("twitter-scraper:xctxid");
|
|
403
|
+
let linkedom = null;
|
|
404
|
+
function linkedomImport() {
|
|
405
|
+
if (!linkedom) {
|
|
406
|
+
const mod = require("linkedom");
|
|
407
|
+
linkedom = mod;
|
|
408
|
+
return mod;
|
|
409
|
+
}
|
|
410
|
+
return linkedom;
|
|
411
|
+
}
|
|
412
|
+
async function parseHTML(html) {
|
|
413
|
+
if (typeof window !== "undefined") {
|
|
414
|
+
const { defaultView } = new DOMParser().parseFromString(html, "text/html");
|
|
415
|
+
if (!defaultView) {
|
|
416
|
+
throw new Error("Failed to get defaultView from parsed HTML.");
|
|
417
|
+
}
|
|
418
|
+
return defaultView;
|
|
419
|
+
} else {
|
|
420
|
+
const { DOMParser: DOMParser2 } = linkedomImport();
|
|
421
|
+
return new DOMParser2().parseFromString(html, "text/html").defaultView;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
async function handleXMigration(fetchFn) {
|
|
425
|
+
const headers = {
|
|
426
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
427
|
+
"accept-language": "ja",
|
|
428
|
+
"cache-control": "no-cache",
|
|
429
|
+
pragma: "no-cache",
|
|
430
|
+
priority: "u=0, i",
|
|
431
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
432
|
+
"sec-ch-ua-mobile": "?0",
|
|
433
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
434
|
+
"sec-fetch-dest": "document",
|
|
435
|
+
"sec-fetch-mode": "navigate",
|
|
436
|
+
"sec-fetch-site": "none",
|
|
437
|
+
"sec-fetch-user": "?1",
|
|
438
|
+
"upgrade-insecure-requests": "1",
|
|
439
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
440
|
+
};
|
|
441
|
+
const response = await fetchFn("https://x.com", {
|
|
442
|
+
headers
|
|
443
|
+
});
|
|
444
|
+
if (!response.ok) {
|
|
445
|
+
throw new Error(`Failed to fetch X homepage: ${response.statusText}`);
|
|
446
|
+
}
|
|
447
|
+
const htmlText = await response.text();
|
|
448
|
+
let dom = await parseHTML(htmlText);
|
|
449
|
+
let document = dom.window.document;
|
|
450
|
+
const migrationRedirectionRegex = new RegExp(
|
|
451
|
+
"(http(?:s)?://(?:www\\.)?(twitter|x){1}\\.com(/x)?/migrate([/?])?tok=[a-zA-Z0-9%\\-_]+)+",
|
|
452
|
+
"i"
|
|
453
|
+
);
|
|
454
|
+
const metaRefresh = document.querySelector("meta[http-equiv='refresh']");
|
|
455
|
+
const metaContent = metaRefresh ? metaRefresh.getAttribute("content") || "" : "";
|
|
456
|
+
const migrationRedirectionUrl = migrationRedirectionRegex.exec(metaContent) || migrationRedirectionRegex.exec(htmlText);
|
|
457
|
+
if (migrationRedirectionUrl) {
|
|
458
|
+
const redirectResponse = await fetch(migrationRedirectionUrl[0]);
|
|
459
|
+
if (!redirectResponse.ok) {
|
|
460
|
+
throw new Error(
|
|
461
|
+
`Failed to follow migration redirection: ${redirectResponse.statusText}`
|
|
462
|
+
);
|
|
463
|
+
}
|
|
464
|
+
const redirectHtml = await redirectResponse.text();
|
|
465
|
+
dom = await parseHTML(redirectHtml);
|
|
466
|
+
document = dom.window.document;
|
|
467
|
+
}
|
|
468
|
+
const migrationForm = document.querySelector("form[name='f']") || document.querySelector("form[action='https://x.com/x/migrate']");
|
|
469
|
+
if (migrationForm) {
|
|
470
|
+
const url = migrationForm.getAttribute("action") || "https://x.com/x/migrate";
|
|
471
|
+
const method = migrationForm.getAttribute("method") || "POST";
|
|
472
|
+
const requestPayload = new FormData();
|
|
473
|
+
const inputFields = migrationForm.querySelectorAll("input");
|
|
474
|
+
for (const element of Array.from(inputFields)) {
|
|
475
|
+
const name = element.getAttribute("name");
|
|
476
|
+
const value = element.getAttribute("value");
|
|
477
|
+
if (name && value) {
|
|
478
|
+
requestPayload.append(name, value);
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
const formResponse = await fetch(url, {
|
|
482
|
+
method,
|
|
483
|
+
body: requestPayload,
|
|
484
|
+
headers
|
|
485
|
+
});
|
|
486
|
+
if (!formResponse.ok) {
|
|
487
|
+
throw new Error(
|
|
488
|
+
`Failed to submit migration form: ${formResponse.statusText}`
|
|
489
|
+
);
|
|
490
|
+
}
|
|
491
|
+
const formHtml = await formResponse.text();
|
|
492
|
+
dom = await parseHTML(formHtml);
|
|
493
|
+
document = dom.window.document;
|
|
494
|
+
}
|
|
495
|
+
return document;
|
|
496
|
+
}
|
|
497
|
+
let ClientTransaction = null;
|
|
498
|
+
function clientTransaction() {
|
|
499
|
+
if (!ClientTransaction) {
|
|
500
|
+
const mod = require("x-client-transaction-id");
|
|
501
|
+
const ctx = mod.ClientTransaction;
|
|
502
|
+
ClientTransaction = ctx;
|
|
503
|
+
return ctx;
|
|
504
|
+
}
|
|
505
|
+
return ClientTransaction;
|
|
506
|
+
}
|
|
507
|
+
async function generateTransactionId(url, fetchFn, method) {
|
|
508
|
+
const parsedUrl = new URL(url);
|
|
509
|
+
const path = parsedUrl.pathname;
|
|
510
|
+
log$2(`Generating transaction ID for ${method} ${path}`);
|
|
511
|
+
const document = await handleXMigration(fetchFn);
|
|
512
|
+
const transaction = await clientTransaction().create(document);
|
|
513
|
+
const transactionId = await transaction.generateTransactionId(method, path);
|
|
514
|
+
log$2(`Transaction ID: ${transactionId}`);
|
|
515
|
+
return transactionId;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
const log$1 = debug("twitter-scraper:api");
|
|
519
|
+
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
520
|
+
async function jitter(maxMs) {
|
|
521
|
+
const jitter2 = Math.random() * maxMs;
|
|
522
|
+
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
523
|
+
}
|
|
524
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new headersPolyfill.Headers()) {
|
|
525
|
+
log$1(`Making ${method} request to ${url}`);
|
|
526
|
+
await auth.installTo(headers, url);
|
|
527
|
+
await platform.randomizeCiphers();
|
|
528
|
+
if (auth instanceof TwitterGuestAuth && auth.options?.experimental?.xClientTransactionId) {
|
|
529
|
+
const transactionId = await generateTransactionId(
|
|
530
|
+
url,
|
|
531
|
+
auth.fetch.bind(auth),
|
|
532
|
+
method
|
|
533
|
+
);
|
|
534
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
535
|
+
}
|
|
536
|
+
let res;
|
|
537
|
+
do {
|
|
538
|
+
const fetchParameters = [
|
|
539
|
+
url,
|
|
540
|
+
{
|
|
541
|
+
method,
|
|
542
|
+
headers,
|
|
543
|
+
credentials: "include"
|
|
544
|
+
}
|
|
545
|
+
];
|
|
546
|
+
try {
|
|
547
|
+
res = await auth.fetch(...fetchParameters);
|
|
548
|
+
} catch (err) {
|
|
549
|
+
if (!(err instanceof Error)) {
|
|
550
|
+
throw err;
|
|
551
|
+
}
|
|
552
|
+
return {
|
|
553
|
+
success: false,
|
|
554
|
+
err: new Error("Failed to perform request.")
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
558
|
+
if (res.status === 429) {
|
|
559
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
560
|
+
await auth.onRateLimit({
|
|
561
|
+
fetchParameters,
|
|
562
|
+
response: res
|
|
563
|
+
});
|
|
564
|
+
}
|
|
565
|
+
} while (res.status === 429);
|
|
566
|
+
if (!res.ok) {
|
|
567
|
+
return {
|
|
568
|
+
success: false,
|
|
569
|
+
err: await ApiError.fromResponse(res)
|
|
570
|
+
};
|
|
571
|
+
}
|
|
572
|
+
const value = await flexParseJson(res);
|
|
573
|
+
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
574
|
+
auth.deleteToken();
|
|
575
|
+
return { success: true, value };
|
|
576
|
+
} else {
|
|
577
|
+
return { success: true, value };
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
async function flexParseJson(res) {
|
|
581
|
+
try {
|
|
582
|
+
return await res.json();
|
|
583
|
+
} catch {
|
|
584
|
+
log$1("Failed to parse response as JSON, trying text parse...");
|
|
585
|
+
const text = await res.text();
|
|
586
|
+
log$1("Response text:", text);
|
|
587
|
+
return JSON.parse(text);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
function addApiFeatures(o) {
|
|
591
|
+
return {
|
|
592
|
+
...o,
|
|
593
|
+
rweb_lists_timeline_redesign_enabled: true,
|
|
594
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
595
|
+
verified_phone_label_enabled: false,
|
|
596
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
597
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
598
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
599
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
600
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
601
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
602
|
+
view_counts_everywhere_api_enabled: true,
|
|
603
|
+
longform_notetweets_consumption_enabled: true,
|
|
604
|
+
tweet_awards_web_tipping_enabled: false,
|
|
605
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
606
|
+
standardized_nudges_misinfo: true,
|
|
607
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
608
|
+
responsive_web_enhance_cards_enabled: false,
|
|
609
|
+
subscriptions_verification_info_enabled: true,
|
|
610
|
+
subscriptions_verification_info_reason_enabled: true,
|
|
611
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
612
|
+
super_follow_badge_privacy_enabled: false,
|
|
613
|
+
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
614
|
+
super_follow_tweet_api_enabled: false,
|
|
615
|
+
super_follow_user_api_enabled: false,
|
|
616
|
+
android_graphql_skip_api_media_color_palette: false,
|
|
617
|
+
creator_subscriptions_subscription_count_enabled: false,
|
|
618
|
+
blue_business_profile_image_shape_enabled: false,
|
|
619
|
+
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
function addApiParams(params, includeTweetReplies) {
|
|
623
|
+
params.set("include_profile_interstitial_type", "1");
|
|
624
|
+
params.set("include_blocking", "1");
|
|
625
|
+
params.set("include_blocked_by", "1");
|
|
626
|
+
params.set("include_followed_by", "1");
|
|
627
|
+
params.set("include_want_retweets", "1");
|
|
628
|
+
params.set("include_mute_edge", "1");
|
|
629
|
+
params.set("include_can_dm", "1");
|
|
630
|
+
params.set("include_can_media_tag", "1");
|
|
631
|
+
params.set("include_ext_has_nft_avatar", "1");
|
|
632
|
+
params.set("include_ext_is_blue_verified", "1");
|
|
633
|
+
params.set("include_ext_verified_type", "1");
|
|
634
|
+
params.set("skip_status", "1");
|
|
635
|
+
params.set("cards_platform", "Web-12");
|
|
636
|
+
params.set("include_cards", "1");
|
|
637
|
+
params.set("include_ext_alt_text", "true");
|
|
638
|
+
params.set("include_ext_limited_action_results", "false");
|
|
639
|
+
params.set("include_quote_count", "true");
|
|
640
|
+
params.set("include_reply_count", "1");
|
|
641
|
+
params.set("tweet_mode", "extended");
|
|
642
|
+
params.set("include_ext_collab_control", "true");
|
|
643
|
+
params.set("include_ext_views", "true");
|
|
644
|
+
params.set("include_entities", "true");
|
|
645
|
+
params.set("include_user_entities", "true");
|
|
646
|
+
params.set("include_ext_media_color", "true");
|
|
647
|
+
params.set("include_ext_media_availability", "true");
|
|
648
|
+
params.set("include_ext_sensitive_media_warning", "true");
|
|
649
|
+
params.set("include_ext_trusted_friends_metadata", "true");
|
|
650
|
+
params.set("send_error_codes", "true");
|
|
651
|
+
params.set("simple_quoted_tweet", "true");
|
|
652
|
+
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
653
|
+
params.set(
|
|
654
|
+
"ext",
|
|
655
|
+
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
656
|
+
);
|
|
657
|
+
return params;
|
|
658
|
+
}
|
|
659
|
+
|
|
441
660
|
const log = debug("twitter-scraper:auth-user");
|
|
442
661
|
const TwitterUserAuthSubtask = typebox.Type.Object({
|
|
443
662
|
subtask_id: typebox.Type.String(),
|
|
@@ -545,25 +764,25 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
545
764
|
this.jar = new toughCookie.CookieJar();
|
|
546
765
|
}
|
|
547
766
|
}
|
|
548
|
-
async installCsrfToken(headers) {
|
|
549
|
-
const cookies = await this.getCookies();
|
|
550
|
-
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
551
|
-
if (xCsrfToken) {
|
|
552
|
-
headers.set("x-csrf-token", xCsrfToken.value);
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
767
|
async installTo(headers) {
|
|
556
768
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
557
|
-
const cookie = await this.getCookieString();
|
|
558
|
-
headers.set("cookie", cookie);
|
|
559
|
-
if (this.guestToken) {
|
|
560
|
-
headers.set("x-guest-token", this.guestToken);
|
|
561
|
-
}
|
|
562
769
|
headers.set(
|
|
563
770
|
"user-agent",
|
|
564
771
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
565
772
|
);
|
|
773
|
+
if (this.guestToken) {
|
|
774
|
+
headers.set("x-guest-token", this.guestToken);
|
|
775
|
+
}
|
|
566
776
|
await this.installCsrfToken(headers);
|
|
777
|
+
if (this.options?.experimental?.xpff) {
|
|
778
|
+
const guestId = await this.guestId();
|
|
779
|
+
if (guestId != null) {
|
|
780
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
781
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
const cookie = await this.getCookieString();
|
|
785
|
+
headers.set("cookie", cookie);
|
|
567
786
|
}
|
|
568
787
|
async initLogin() {
|
|
569
788
|
this.removeCookie("twitter_ads_id=");
|
|
@@ -768,12 +987,6 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
768
987
|
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
769
988
|
}
|
|
770
989
|
log(`Making POST request to ${onboardingTaskUrl}`);
|
|
771
|
-
const token = this.guestToken;
|
|
772
|
-
if (token == null) {
|
|
773
|
-
throw new AuthenticationError(
|
|
774
|
-
"Authentication token is null or undefined."
|
|
775
|
-
);
|
|
776
|
-
}
|
|
777
990
|
const headers = new headersPolyfill.Headers({
|
|
778
991
|
accept: "*/*",
|
|
779
992
|
"accept-language": "en-US,en;q=0.9",
|
|
@@ -790,12 +1003,19 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
790
1003
|
"sec-fetch-mode": "cors",
|
|
791
1004
|
"sec-fetch-site": "same-origin",
|
|
792
1005
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
793
|
-
"x-guest-token": token,
|
|
794
1006
|
"x-twitter-auth-type": "OAuth2Client",
|
|
795
1007
|
"x-twitter-active-user": "yes",
|
|
796
1008
|
"x-twitter-client-language": "en"
|
|
797
1009
|
});
|
|
798
1010
|
await this.installTo(headers);
|
|
1011
|
+
if (this.options?.experimental?.xClientTransactionId) {
|
|
1012
|
+
const transactionId = await generateTransactionId(
|
|
1013
|
+
onboardingTaskUrl,
|
|
1014
|
+
this.fetch.bind(this),
|
|
1015
|
+
"POST"
|
|
1016
|
+
);
|
|
1017
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
1018
|
+
}
|
|
799
1019
|
let res;
|
|
800
1020
|
do {
|
|
801
1021
|
const fetchParameters = [
|
|
@@ -2620,7 +2840,11 @@ class Scraper {
|
|
|
2620
2840
|
return {
|
|
2621
2841
|
fetch: this.options?.fetch,
|
|
2622
2842
|
transform: this.options?.transform,
|
|
2623
|
-
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2843
|
+
rateLimitStrategy: this.options?.rateLimitStrategy,
|
|
2844
|
+
experimental: {
|
|
2845
|
+
xClientTransactionId: this.options?.experimental?.xClientTransactionId,
|
|
2846
|
+
xpff: this.options?.experimental?.xpff
|
|
2847
|
+
}
|
|
2624
2848
|
};
|
|
2625
2849
|
}
|
|
2626
2850
|
handleResponse(res) {
|