@the-convocation/twitter-scraper 0.19.1 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +408 -184
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +408 -184
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +405 -181
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +405 -181
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +13 -0
- package/examples/node-integration/package.json +2 -1
- package/package.json +6 -4
|
@@ -49,13 +49,13 @@ class AuthenticationError extends Error {
|
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
-
const log$
|
|
52
|
+
const log$6 = debug("twitter-scraper:rate-limit");
|
|
53
53
|
class WaitingRateLimitStrategy {
|
|
54
54
|
async onRateLimit({ response: res }) {
|
|
55
55
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
56
56
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
57
57
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
58
|
-
log$
|
|
58
|
+
log$6(
|
|
59
59
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
60
60
|
);
|
|
61
61
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -71,23 +71,7 @@ class ErrorRateLimitStrategy {
|
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
const
|
|
75
|
-
randomizeCiphers() {
|
|
76
|
-
return Promise.resolve();
|
|
77
|
-
}
|
|
78
|
-
}();
|
|
79
|
-
|
|
80
|
-
class Platform {
|
|
81
|
-
async randomizeCiphers() {
|
|
82
|
-
const platform = await Platform.importPlatform();
|
|
83
|
-
await platform?.randomizeCiphers();
|
|
84
|
-
}
|
|
85
|
-
static async importPlatform() {
|
|
86
|
-
return genericPlatform;
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
const log$3 = debug("twitter-scraper:requests");
|
|
74
|
+
const log$5 = debug("twitter-scraper:requests");
|
|
91
75
|
async function updateCookieJar(cookieJar, headers) {
|
|
92
76
|
let setCookieHeaders = [];
|
|
93
77
|
if (typeof headers.getSetCookie === "function") {
|
|
@@ -102,12 +86,12 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
102
86
|
for (const cookieStr of setCookieHeaders) {
|
|
103
87
|
const cookie = Cookie.parse(cookieStr);
|
|
104
88
|
if (!cookie) {
|
|
105
|
-
log$
|
|
89
|
+
log$5(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
106
90
|
continue;
|
|
107
91
|
}
|
|
108
92
|
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
109
93
|
if (cookie.key === "ct0") {
|
|
110
|
-
log$
|
|
94
|
+
log$5(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
111
95
|
}
|
|
112
96
|
continue;
|
|
113
97
|
}
|
|
@@ -115,7 +99,7 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
115
99
|
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
116
100
|
await cookieJar.setCookie(cookie, url);
|
|
117
101
|
if (cookie.key === "ct0") {
|
|
118
|
-
log$
|
|
102
|
+
log$5(
|
|
119
103
|
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
120
104
|
0,
|
|
121
105
|
20
|
|
@@ -123,9 +107,9 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
123
107
|
);
|
|
124
108
|
}
|
|
125
109
|
} catch (err) {
|
|
126
|
-
log$
|
|
110
|
+
log$5(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
127
111
|
if (cookie.key === "ct0") {
|
|
128
|
-
log$
|
|
112
|
+
log$5(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
129
113
|
}
|
|
130
114
|
}
|
|
131
115
|
}
|
|
@@ -139,141 +123,84 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
139
123
|
}
|
|
140
124
|
}
|
|
141
125
|
|
|
142
|
-
const log$
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
126
|
+
const log$4 = debug("twitter-scraper:xpff");
|
|
127
|
+
let isoCrypto = null;
|
|
128
|
+
function getCrypto() {
|
|
129
|
+
if (isoCrypto != null) {
|
|
130
|
+
return isoCrypto;
|
|
131
|
+
}
|
|
132
|
+
if (typeof crypto === "undefined") {
|
|
133
|
+
log$4("Global crypto is undefined, importing from crypto module...");
|
|
134
|
+
const { webcrypto } = require("crypto");
|
|
135
|
+
isoCrypto = webcrypto;
|
|
136
|
+
return webcrypto;
|
|
137
|
+
}
|
|
138
|
+
isoCrypto = crypto;
|
|
139
|
+
return crypto;
|
|
140
|
+
}
|
|
141
|
+
async function sha256(message) {
|
|
142
|
+
const msgBuffer = new TextEncoder().encode(message);
|
|
143
|
+
const hashBuffer = await getCrypto().subtle.digest("SHA-256", msgBuffer);
|
|
144
|
+
return new Uint8Array(hashBuffer);
|
|
145
|
+
}
|
|
146
|
+
function buf2hex(buffer) {
|
|
147
|
+
return [...new Uint8Array(buffer)].map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
148
|
+
}
|
|
149
|
+
class XPFFHeaderGenerator {
|
|
150
|
+
constructor(seed) {
|
|
151
|
+
this.seed = seed;
|
|
152
|
+
}
|
|
153
|
+
async deriveKey(guestId) {
|
|
154
|
+
const combined = `${this.seed}${guestId}`;
|
|
155
|
+
const result = await sha256(combined);
|
|
156
|
+
return result;
|
|
157
|
+
}
|
|
158
|
+
async generateHeader(plaintext, guestId) {
|
|
159
|
+
log$4(`Generating XPFF key for guest ID: ${guestId}`);
|
|
160
|
+
const key = await this.deriveKey(guestId);
|
|
161
|
+
const nonce = getCrypto().getRandomValues(new Uint8Array(12));
|
|
162
|
+
const cipher = await getCrypto().subtle.importKey(
|
|
163
|
+
"raw",
|
|
164
|
+
key,
|
|
165
|
+
{ name: "AES-GCM" },
|
|
166
|
+
false,
|
|
167
|
+
["encrypt"]
|
|
168
|
+
);
|
|
169
|
+
const encrypted = await getCrypto().subtle.encrypt(
|
|
156
170
|
{
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
}
|
|
182
|
-
if (!res.ok) {
|
|
183
|
-
return {
|
|
184
|
-
success: false,
|
|
185
|
-
err: await ApiError.fromResponse(res)
|
|
186
|
-
};
|
|
187
|
-
}
|
|
188
|
-
const value = await flexParseJson(res);
|
|
189
|
-
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
190
|
-
auth.deleteToken();
|
|
191
|
-
return { success: true, value };
|
|
192
|
-
} else {
|
|
193
|
-
return { success: true, value };
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
async function flexParseJson(res) {
|
|
197
|
-
try {
|
|
198
|
-
return await res.json();
|
|
199
|
-
} catch {
|
|
200
|
-
log$2("Failed to parse response as JSON, trying text parse...");
|
|
201
|
-
const text = await res.text();
|
|
202
|
-
log$2("Response text:", text);
|
|
203
|
-
return JSON.parse(text);
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
function addApiFeatures(o) {
|
|
207
|
-
return {
|
|
208
|
-
...o,
|
|
209
|
-
rweb_lists_timeline_redesign_enabled: true,
|
|
210
|
-
responsive_web_graphql_exclude_directive_enabled: true,
|
|
211
|
-
verified_phone_label_enabled: false,
|
|
212
|
-
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
213
|
-
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
214
|
-
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
215
|
-
tweetypie_unmention_optimization_enabled: true,
|
|
216
|
-
responsive_web_edit_tweet_api_enabled: true,
|
|
217
|
-
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
218
|
-
view_counts_everywhere_api_enabled: true,
|
|
219
|
-
longform_notetweets_consumption_enabled: true,
|
|
220
|
-
tweet_awards_web_tipping_enabled: false,
|
|
221
|
-
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
222
|
-
standardized_nudges_misinfo: true,
|
|
223
|
-
longform_notetweets_rich_text_read_enabled: true,
|
|
224
|
-
responsive_web_enhance_cards_enabled: false,
|
|
225
|
-
subscriptions_verification_info_enabled: true,
|
|
226
|
-
subscriptions_verification_info_reason_enabled: true,
|
|
227
|
-
subscriptions_verification_info_verified_since_enabled: true,
|
|
228
|
-
super_follow_badge_privacy_enabled: false,
|
|
229
|
-
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
230
|
-
super_follow_tweet_api_enabled: false,
|
|
231
|
-
super_follow_user_api_enabled: false,
|
|
232
|
-
android_graphql_skip_api_media_color_palette: false,
|
|
233
|
-
creator_subscriptions_subscription_count_enabled: false,
|
|
234
|
-
blue_business_profile_image_shape_enabled: false,
|
|
235
|
-
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
236
|
-
};
|
|
171
|
+
name: "AES-GCM",
|
|
172
|
+
iv: nonce
|
|
173
|
+
},
|
|
174
|
+
cipher,
|
|
175
|
+
new TextEncoder().encode(plaintext)
|
|
176
|
+
);
|
|
177
|
+
const combined = new Uint8Array(nonce.length + encrypted.byteLength);
|
|
178
|
+
combined.set(nonce);
|
|
179
|
+
combined.set(new Uint8Array(encrypted), nonce.length);
|
|
180
|
+
const result = buf2hex(combined);
|
|
181
|
+
log$4(`XPFF header generated for guest ID ${guestId}: ${result}`);
|
|
182
|
+
return result;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
const xpffBaseKey = "0e6be1f1e21ffc33590b888fd4dc81b19713e570e805d4e5df80a493c9571a05";
|
|
186
|
+
function xpffPlain() {
|
|
187
|
+
const timestamp = Date.now();
|
|
188
|
+
return JSON.stringify({
|
|
189
|
+
navigator_properties: {
|
|
190
|
+
hasBeenActive: "true",
|
|
191
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
192
|
+
webdriver: "false"
|
|
193
|
+
},
|
|
194
|
+
created_at: timestamp
|
|
195
|
+
});
|
|
237
196
|
}
|
|
238
|
-
function
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
params.set("include_followed_by", "1");
|
|
243
|
-
params.set("include_want_retweets", "1");
|
|
244
|
-
params.set("include_mute_edge", "1");
|
|
245
|
-
params.set("include_can_dm", "1");
|
|
246
|
-
params.set("include_can_media_tag", "1");
|
|
247
|
-
params.set("include_ext_has_nft_avatar", "1");
|
|
248
|
-
params.set("include_ext_is_blue_verified", "1");
|
|
249
|
-
params.set("include_ext_verified_type", "1");
|
|
250
|
-
params.set("skip_status", "1");
|
|
251
|
-
params.set("cards_platform", "Web-12");
|
|
252
|
-
params.set("include_cards", "1");
|
|
253
|
-
params.set("include_ext_alt_text", "true");
|
|
254
|
-
params.set("include_ext_limited_action_results", "false");
|
|
255
|
-
params.set("include_quote_count", "true");
|
|
256
|
-
params.set("include_reply_count", "1");
|
|
257
|
-
params.set("tweet_mode", "extended");
|
|
258
|
-
params.set("include_ext_collab_control", "true");
|
|
259
|
-
params.set("include_ext_views", "true");
|
|
260
|
-
params.set("include_entities", "true");
|
|
261
|
-
params.set("include_user_entities", "true");
|
|
262
|
-
params.set("include_ext_media_color", "true");
|
|
263
|
-
params.set("include_ext_media_availability", "true");
|
|
264
|
-
params.set("include_ext_sensitive_media_warning", "true");
|
|
265
|
-
params.set("include_ext_trusted_friends_metadata", "true");
|
|
266
|
-
params.set("send_error_codes", "true");
|
|
267
|
-
params.set("simple_quoted_tweet", "true");
|
|
268
|
-
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
269
|
-
params.set(
|
|
270
|
-
"ext",
|
|
271
|
-
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
272
|
-
);
|
|
273
|
-
return params;
|
|
197
|
+
async function generateXPFFHeader(guestId) {
|
|
198
|
+
const generator = new XPFFHeaderGenerator(xpffBaseKey);
|
|
199
|
+
const plaintext = xpffPlain();
|
|
200
|
+
return generator.generateHeader(plaintext, guestId);
|
|
274
201
|
}
|
|
275
202
|
|
|
276
|
-
const log$
|
|
203
|
+
const log$3 = debug("twitter-scraper:auth");
|
|
277
204
|
function withTransform(fetchFn, transform) {
|
|
278
205
|
return async (input, init) => {
|
|
279
206
|
const fetchArgs = await transform?.request?.(input, init) ?? [
|
|
@@ -327,24 +254,30 @@ class TwitterGuestAuth {
|
|
|
327
254
|
if (this.shouldUpdate()) {
|
|
328
255
|
await this.updateGuestToken();
|
|
329
256
|
}
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
throw new AuthenticationError(
|
|
333
|
-
"Authentication token is null or undefined."
|
|
334
|
-
);
|
|
257
|
+
if (this.guestToken) {
|
|
258
|
+
headers.set("x-guest-token", this.guestToken);
|
|
335
259
|
}
|
|
336
260
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
337
|
-
headers.set("x-guest-token", token);
|
|
338
261
|
headers.set(
|
|
339
262
|
"user-agent",
|
|
340
263
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
341
264
|
);
|
|
265
|
+
await this.installCsrfToken(headers);
|
|
266
|
+
if (this.options?.experimental?.xpff) {
|
|
267
|
+
const guestId = await this.guestId();
|
|
268
|
+
if (guestId != null) {
|
|
269
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
270
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
headers.set("cookie", await this.getCookieString());
|
|
274
|
+
}
|
|
275
|
+
async installCsrfToken(headers) {
|
|
342
276
|
const cookies = await this.getCookies();
|
|
343
277
|
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
344
278
|
if (xCsrfToken) {
|
|
345
279
|
headers.set("x-csrf-token", xCsrfToken.value);
|
|
346
280
|
}
|
|
347
|
-
headers.set("cookie", await this.getCookieString());
|
|
348
281
|
}
|
|
349
282
|
async setCookie(key, value) {
|
|
350
283
|
const cookie = Cookie.parse(`${key}=${value}`);
|
|
@@ -377,16 +310,28 @@ class TwitterGuestAuth {
|
|
|
377
310
|
getCookieJarUrl() {
|
|
378
311
|
return typeof document !== "undefined" ? document.location.toString() : "https://x.com";
|
|
379
312
|
}
|
|
313
|
+
async guestId() {
|
|
314
|
+
const cookies = await this.getCookies();
|
|
315
|
+
const guestIdCookie = cookies.find((cookie) => cookie.key === "guest_id");
|
|
316
|
+
return guestIdCookie ? guestIdCookie.value : null;
|
|
317
|
+
}
|
|
380
318
|
/**
|
|
381
319
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
382
320
|
*/
|
|
383
321
|
async updateGuestToken() {
|
|
322
|
+
try {
|
|
323
|
+
await this.updateGuestTokenCore();
|
|
324
|
+
} catch (err) {
|
|
325
|
+
log$3("Failed to update guest token; this may cause issues:", err);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
async updateGuestTokenCore() {
|
|
384
329
|
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
385
330
|
const headers = new Headers({
|
|
386
331
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
387
332
|
Cookie: await this.getCookieString()
|
|
388
333
|
});
|
|
389
|
-
log$
|
|
334
|
+
log$3(`Making POST request to ${guestActivateUrl}`);
|
|
390
335
|
const res = await this.fetch(guestActivateUrl, {
|
|
391
336
|
method: "POST",
|
|
392
337
|
headers,
|
|
@@ -407,7 +352,7 @@ class TwitterGuestAuth {
|
|
|
407
352
|
this.guestToken = newGuestToken;
|
|
408
353
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
409
354
|
await this.setCookie("gt", newGuestToken);
|
|
410
|
-
log$
|
|
355
|
+
log$3(`Updated guest token: ${newGuestToken}`);
|
|
411
356
|
}
|
|
412
357
|
/**
|
|
413
358
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -418,6 +363,280 @@ class TwitterGuestAuth {
|
|
|
418
363
|
}
|
|
419
364
|
}
|
|
420
365
|
|
|
366
|
+
const genericPlatform = new class {
|
|
367
|
+
randomizeCiphers() {
|
|
368
|
+
return Promise.resolve();
|
|
369
|
+
}
|
|
370
|
+
}();
|
|
371
|
+
|
|
372
|
+
class Platform {
|
|
373
|
+
async randomizeCiphers() {
|
|
374
|
+
const platform = await Platform.importPlatform();
|
|
375
|
+
await platform?.randomizeCiphers();
|
|
376
|
+
}
|
|
377
|
+
static async importPlatform() {
|
|
378
|
+
return genericPlatform;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const log$2 = debug("twitter-scraper:xctxid");
|
|
383
|
+
let linkedom = null;
|
|
384
|
+
function linkedomImport() {
|
|
385
|
+
if (!linkedom) {
|
|
386
|
+
const mod = require("linkedom");
|
|
387
|
+
linkedom = mod;
|
|
388
|
+
return mod;
|
|
389
|
+
}
|
|
390
|
+
return linkedom;
|
|
391
|
+
}
|
|
392
|
+
async function parseHTML(html) {
|
|
393
|
+
if (typeof window !== "undefined") {
|
|
394
|
+
const { defaultView } = new DOMParser().parseFromString(html, "text/html");
|
|
395
|
+
if (!defaultView) {
|
|
396
|
+
throw new Error("Failed to get defaultView from parsed HTML.");
|
|
397
|
+
}
|
|
398
|
+
return defaultView;
|
|
399
|
+
} else {
|
|
400
|
+
const { DOMParser: DOMParser2 } = linkedomImport();
|
|
401
|
+
return new DOMParser2().parseFromString(html, "text/html").defaultView;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
async function handleXMigration(fetchFn) {
|
|
405
|
+
const headers = {
|
|
406
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
407
|
+
"accept-language": "ja",
|
|
408
|
+
"cache-control": "no-cache",
|
|
409
|
+
pragma: "no-cache",
|
|
410
|
+
priority: "u=0, i",
|
|
411
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
412
|
+
"sec-ch-ua-mobile": "?0",
|
|
413
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
414
|
+
"sec-fetch-dest": "document",
|
|
415
|
+
"sec-fetch-mode": "navigate",
|
|
416
|
+
"sec-fetch-site": "none",
|
|
417
|
+
"sec-fetch-user": "?1",
|
|
418
|
+
"upgrade-insecure-requests": "1",
|
|
419
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
420
|
+
};
|
|
421
|
+
const response = await fetchFn("https://x.com", {
|
|
422
|
+
headers
|
|
423
|
+
});
|
|
424
|
+
if (!response.ok) {
|
|
425
|
+
throw new Error(`Failed to fetch X homepage: ${response.statusText}`);
|
|
426
|
+
}
|
|
427
|
+
const htmlText = await response.text();
|
|
428
|
+
let dom = await parseHTML(htmlText);
|
|
429
|
+
let document = dom.window.document;
|
|
430
|
+
const migrationRedirectionRegex = new RegExp(
|
|
431
|
+
"(http(?:s)?://(?:www\\.)?(twitter|x){1}\\.com(/x)?/migrate([/?])?tok=[a-zA-Z0-9%\\-_]+)+",
|
|
432
|
+
"i"
|
|
433
|
+
);
|
|
434
|
+
const metaRefresh = document.querySelector("meta[http-equiv='refresh']");
|
|
435
|
+
const metaContent = metaRefresh ? metaRefresh.getAttribute("content") || "" : "";
|
|
436
|
+
const migrationRedirectionUrl = migrationRedirectionRegex.exec(metaContent) || migrationRedirectionRegex.exec(htmlText);
|
|
437
|
+
if (migrationRedirectionUrl) {
|
|
438
|
+
const redirectResponse = await fetch(migrationRedirectionUrl[0]);
|
|
439
|
+
if (!redirectResponse.ok) {
|
|
440
|
+
throw new Error(
|
|
441
|
+
`Failed to follow migration redirection: ${redirectResponse.statusText}`
|
|
442
|
+
);
|
|
443
|
+
}
|
|
444
|
+
const redirectHtml = await redirectResponse.text();
|
|
445
|
+
dom = await parseHTML(redirectHtml);
|
|
446
|
+
document = dom.window.document;
|
|
447
|
+
}
|
|
448
|
+
const migrationForm = document.querySelector("form[name='f']") || document.querySelector("form[action='https://x.com/x/migrate']");
|
|
449
|
+
if (migrationForm) {
|
|
450
|
+
const url = migrationForm.getAttribute("action") || "https://x.com/x/migrate";
|
|
451
|
+
const method = migrationForm.getAttribute("method") || "POST";
|
|
452
|
+
const requestPayload = new FormData();
|
|
453
|
+
const inputFields = migrationForm.querySelectorAll("input");
|
|
454
|
+
for (const element of Array.from(inputFields)) {
|
|
455
|
+
const name = element.getAttribute("name");
|
|
456
|
+
const value = element.getAttribute("value");
|
|
457
|
+
if (name && value) {
|
|
458
|
+
requestPayload.append(name, value);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
const formResponse = await fetch(url, {
|
|
462
|
+
method,
|
|
463
|
+
body: requestPayload,
|
|
464
|
+
headers
|
|
465
|
+
});
|
|
466
|
+
if (!formResponse.ok) {
|
|
467
|
+
throw new Error(
|
|
468
|
+
`Failed to submit migration form: ${formResponse.statusText}`
|
|
469
|
+
);
|
|
470
|
+
}
|
|
471
|
+
const formHtml = await formResponse.text();
|
|
472
|
+
dom = await parseHTML(formHtml);
|
|
473
|
+
document = dom.window.document;
|
|
474
|
+
}
|
|
475
|
+
return document;
|
|
476
|
+
}
|
|
477
|
+
let ClientTransaction = null;
|
|
478
|
+
function clientTransaction() {
|
|
479
|
+
if (!ClientTransaction) {
|
|
480
|
+
const mod = require("x-client-transaction-id");
|
|
481
|
+
const ctx = mod.ClientTransaction;
|
|
482
|
+
ClientTransaction = ctx;
|
|
483
|
+
return ctx;
|
|
484
|
+
}
|
|
485
|
+
return ClientTransaction;
|
|
486
|
+
}
|
|
487
|
+
async function generateTransactionId(url, fetchFn, method) {
|
|
488
|
+
const parsedUrl = new URL(url);
|
|
489
|
+
const path = parsedUrl.pathname;
|
|
490
|
+
log$2(`Generating transaction ID for ${method} ${path}`);
|
|
491
|
+
const document = await handleXMigration(fetchFn);
|
|
492
|
+
const transaction = await clientTransaction().create(document);
|
|
493
|
+
const transactionId = await transaction.generateTransactionId(method, path);
|
|
494
|
+
log$2(`Transaction ID: ${transactionId}`);
|
|
495
|
+
return transactionId;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
const log$1 = debug("twitter-scraper:api");
|
|
499
|
+
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
500
|
+
async function jitter(maxMs) {
|
|
501
|
+
const jitter2 = Math.random() * maxMs;
|
|
502
|
+
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
503
|
+
}
|
|
504
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new Headers()) {
|
|
505
|
+
log$1(`Making ${method} request to ${url}`);
|
|
506
|
+
await auth.installTo(headers, url);
|
|
507
|
+
await platform.randomizeCiphers();
|
|
508
|
+
if (auth instanceof TwitterGuestAuth && auth.options?.experimental?.xClientTransactionId) {
|
|
509
|
+
const transactionId = await generateTransactionId(
|
|
510
|
+
url,
|
|
511
|
+
auth.fetch.bind(auth),
|
|
512
|
+
method
|
|
513
|
+
);
|
|
514
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
515
|
+
}
|
|
516
|
+
let res;
|
|
517
|
+
do {
|
|
518
|
+
const fetchParameters = [
|
|
519
|
+
url,
|
|
520
|
+
{
|
|
521
|
+
method,
|
|
522
|
+
headers,
|
|
523
|
+
credentials: "include"
|
|
524
|
+
}
|
|
525
|
+
];
|
|
526
|
+
try {
|
|
527
|
+
res = await auth.fetch(...fetchParameters);
|
|
528
|
+
} catch (err) {
|
|
529
|
+
if (!(err instanceof Error)) {
|
|
530
|
+
throw err;
|
|
531
|
+
}
|
|
532
|
+
return {
|
|
533
|
+
success: false,
|
|
534
|
+
err: new Error("Failed to perform request.")
|
|
535
|
+
};
|
|
536
|
+
}
|
|
537
|
+
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
538
|
+
if (res.status === 429) {
|
|
539
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
540
|
+
await auth.onRateLimit({
|
|
541
|
+
fetchParameters,
|
|
542
|
+
response: res
|
|
543
|
+
});
|
|
544
|
+
}
|
|
545
|
+
} while (res.status === 429);
|
|
546
|
+
if (!res.ok) {
|
|
547
|
+
return {
|
|
548
|
+
success: false,
|
|
549
|
+
err: await ApiError.fromResponse(res)
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
const value = await flexParseJson(res);
|
|
553
|
+
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
554
|
+
auth.deleteToken();
|
|
555
|
+
return { success: true, value };
|
|
556
|
+
} else {
|
|
557
|
+
return { success: true, value };
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
async function flexParseJson(res) {
|
|
561
|
+
try {
|
|
562
|
+
return await res.json();
|
|
563
|
+
} catch {
|
|
564
|
+
log$1("Failed to parse response as JSON, trying text parse...");
|
|
565
|
+
const text = await res.text();
|
|
566
|
+
log$1("Response text:", text);
|
|
567
|
+
return JSON.parse(text);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
function addApiFeatures(o) {
|
|
571
|
+
return {
|
|
572
|
+
...o,
|
|
573
|
+
rweb_lists_timeline_redesign_enabled: true,
|
|
574
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
575
|
+
verified_phone_label_enabled: false,
|
|
576
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
577
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
578
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
579
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
580
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
581
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
582
|
+
view_counts_everywhere_api_enabled: true,
|
|
583
|
+
longform_notetweets_consumption_enabled: true,
|
|
584
|
+
tweet_awards_web_tipping_enabled: false,
|
|
585
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
586
|
+
standardized_nudges_misinfo: true,
|
|
587
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
588
|
+
responsive_web_enhance_cards_enabled: false,
|
|
589
|
+
subscriptions_verification_info_enabled: true,
|
|
590
|
+
subscriptions_verification_info_reason_enabled: true,
|
|
591
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
592
|
+
super_follow_badge_privacy_enabled: false,
|
|
593
|
+
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
594
|
+
super_follow_tweet_api_enabled: false,
|
|
595
|
+
super_follow_user_api_enabled: false,
|
|
596
|
+
android_graphql_skip_api_media_color_palette: false,
|
|
597
|
+
creator_subscriptions_subscription_count_enabled: false,
|
|
598
|
+
blue_business_profile_image_shape_enabled: false,
|
|
599
|
+
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
600
|
+
};
|
|
601
|
+
}
|
|
602
|
+
function addApiParams(params, includeTweetReplies) {
|
|
603
|
+
params.set("include_profile_interstitial_type", "1");
|
|
604
|
+
params.set("include_blocking", "1");
|
|
605
|
+
params.set("include_blocked_by", "1");
|
|
606
|
+
params.set("include_followed_by", "1");
|
|
607
|
+
params.set("include_want_retweets", "1");
|
|
608
|
+
params.set("include_mute_edge", "1");
|
|
609
|
+
params.set("include_can_dm", "1");
|
|
610
|
+
params.set("include_can_media_tag", "1");
|
|
611
|
+
params.set("include_ext_has_nft_avatar", "1");
|
|
612
|
+
params.set("include_ext_is_blue_verified", "1");
|
|
613
|
+
params.set("include_ext_verified_type", "1");
|
|
614
|
+
params.set("skip_status", "1");
|
|
615
|
+
params.set("cards_platform", "Web-12");
|
|
616
|
+
params.set("include_cards", "1");
|
|
617
|
+
params.set("include_ext_alt_text", "true");
|
|
618
|
+
params.set("include_ext_limited_action_results", "false");
|
|
619
|
+
params.set("include_quote_count", "true");
|
|
620
|
+
params.set("include_reply_count", "1");
|
|
621
|
+
params.set("tweet_mode", "extended");
|
|
622
|
+
params.set("include_ext_collab_control", "true");
|
|
623
|
+
params.set("include_ext_views", "true");
|
|
624
|
+
params.set("include_entities", "true");
|
|
625
|
+
params.set("include_user_entities", "true");
|
|
626
|
+
params.set("include_ext_media_color", "true");
|
|
627
|
+
params.set("include_ext_media_availability", "true");
|
|
628
|
+
params.set("include_ext_sensitive_media_warning", "true");
|
|
629
|
+
params.set("include_ext_trusted_friends_metadata", "true");
|
|
630
|
+
params.set("send_error_codes", "true");
|
|
631
|
+
params.set("simple_quoted_tweet", "true");
|
|
632
|
+
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
633
|
+
params.set(
|
|
634
|
+
"ext",
|
|
635
|
+
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
636
|
+
);
|
|
637
|
+
return params;
|
|
638
|
+
}
|
|
639
|
+
|
|
421
640
|
const log = debug("twitter-scraper:auth-user");
|
|
422
641
|
const TwitterUserAuthSubtask = Type.Object({
|
|
423
642
|
subtask_id: Type.String(),
|
|
@@ -525,25 +744,25 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
525
744
|
this.jar = new CookieJar();
|
|
526
745
|
}
|
|
527
746
|
}
|
|
528
|
-
async installCsrfToken(headers) {
|
|
529
|
-
const cookies = await this.getCookies();
|
|
530
|
-
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
531
|
-
if (xCsrfToken) {
|
|
532
|
-
headers.set("x-csrf-token", xCsrfToken.value);
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
747
|
async installTo(headers) {
|
|
536
748
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
537
|
-
const cookie = await this.getCookieString();
|
|
538
|
-
headers.set("cookie", cookie);
|
|
539
|
-
if (this.guestToken) {
|
|
540
|
-
headers.set("x-guest-token", this.guestToken);
|
|
541
|
-
}
|
|
542
749
|
headers.set(
|
|
543
750
|
"user-agent",
|
|
544
751
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
545
752
|
);
|
|
753
|
+
if (this.guestToken) {
|
|
754
|
+
headers.set("x-guest-token", this.guestToken);
|
|
755
|
+
}
|
|
546
756
|
await this.installCsrfToken(headers);
|
|
757
|
+
if (this.options?.experimental?.xpff) {
|
|
758
|
+
const guestId = await this.guestId();
|
|
759
|
+
if (guestId != null) {
|
|
760
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
761
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
const cookie = await this.getCookieString();
|
|
765
|
+
headers.set("cookie", cookie);
|
|
547
766
|
}
|
|
548
767
|
async initLogin() {
|
|
549
768
|
this.removeCookie("twitter_ads_id=");
|
|
@@ -748,12 +967,6 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
748
967
|
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
749
968
|
}
|
|
750
969
|
log(`Making POST request to ${onboardingTaskUrl}`);
|
|
751
|
-
const token = this.guestToken;
|
|
752
|
-
if (token == null) {
|
|
753
|
-
throw new AuthenticationError(
|
|
754
|
-
"Authentication token is null or undefined."
|
|
755
|
-
);
|
|
756
|
-
}
|
|
757
970
|
const headers = new Headers({
|
|
758
971
|
accept: "*/*",
|
|
759
972
|
"accept-language": "en-US,en;q=0.9",
|
|
@@ -770,12 +983,19 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
770
983
|
"sec-fetch-mode": "cors",
|
|
771
984
|
"sec-fetch-site": "same-origin",
|
|
772
985
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
773
|
-
"x-guest-token": token,
|
|
774
986
|
"x-twitter-auth-type": "OAuth2Client",
|
|
775
987
|
"x-twitter-active-user": "yes",
|
|
776
988
|
"x-twitter-client-language": "en"
|
|
777
989
|
});
|
|
778
990
|
await this.installTo(headers);
|
|
991
|
+
if (this.options?.experimental?.xClientTransactionId) {
|
|
992
|
+
const transactionId = await generateTransactionId(
|
|
993
|
+
onboardingTaskUrl,
|
|
994
|
+
this.fetch.bind(this),
|
|
995
|
+
"POST"
|
|
996
|
+
);
|
|
997
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
998
|
+
}
|
|
779
999
|
let res;
|
|
780
1000
|
do {
|
|
781
1001
|
const fetchParameters = [
|
|
@@ -2600,7 +2820,11 @@ class Scraper {
|
|
|
2600
2820
|
return {
|
|
2601
2821
|
fetch: this.options?.fetch,
|
|
2602
2822
|
transform: this.options?.transform,
|
|
2603
|
-
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2823
|
+
rateLimitStrategy: this.options?.rateLimitStrategy,
|
|
2824
|
+
experimental: {
|
|
2825
|
+
xClientTransactionId: this.options?.experimental?.xClientTransactionId,
|
|
2826
|
+
xpff: this.options?.experimental?.xpff
|
|
2827
|
+
}
|
|
2604
2828
|
};
|
|
2605
2829
|
}
|
|
2606
2830
|
handleResponse(res) {
|