@the-convocation/twitter-scraper 0.19.1 → 0.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +441 -196
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +441 -196
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +438 -193
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +438 -193
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +13 -0
- package/examples/node-integration/package.json +2 -1
- package/package.json +6 -4
|
@@ -49,13 +49,13 @@ class AuthenticationError extends Error {
|
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
-
const log$
|
|
52
|
+
const log$6 = debug("twitter-scraper:rate-limit");
|
|
53
53
|
class WaitingRateLimitStrategy {
|
|
54
54
|
async onRateLimit({ response: res }) {
|
|
55
55
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
56
56
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
57
57
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
58
|
-
log$
|
|
58
|
+
log$6(
|
|
59
59
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
60
60
|
);
|
|
61
61
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -71,23 +71,7 @@ class ErrorRateLimitStrategy {
|
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
const
|
|
75
|
-
randomizeCiphers() {
|
|
76
|
-
return Promise.resolve();
|
|
77
|
-
}
|
|
78
|
-
}();
|
|
79
|
-
|
|
80
|
-
class Platform {
|
|
81
|
-
async randomizeCiphers() {
|
|
82
|
-
const platform = await Platform.importPlatform();
|
|
83
|
-
await platform?.randomizeCiphers();
|
|
84
|
-
}
|
|
85
|
-
static async importPlatform() {
|
|
86
|
-
return genericPlatform;
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
const log$3 = debug("twitter-scraper:requests");
|
|
74
|
+
const log$5 = debug("twitter-scraper:requests");
|
|
91
75
|
async function updateCookieJar(cookieJar, headers) {
|
|
92
76
|
let setCookieHeaders = [];
|
|
93
77
|
if (typeof headers.getSetCookie === "function") {
|
|
@@ -102,12 +86,12 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
102
86
|
for (const cookieStr of setCookieHeaders) {
|
|
103
87
|
const cookie = Cookie.parse(cookieStr);
|
|
104
88
|
if (!cookie) {
|
|
105
|
-
log$
|
|
89
|
+
log$5(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
106
90
|
continue;
|
|
107
91
|
}
|
|
108
92
|
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
109
93
|
if (cookie.key === "ct0") {
|
|
110
|
-
log$
|
|
94
|
+
log$5(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
111
95
|
}
|
|
112
96
|
continue;
|
|
113
97
|
}
|
|
@@ -115,7 +99,7 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
115
99
|
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
116
100
|
await cookieJar.setCookie(cookie, url);
|
|
117
101
|
if (cookie.key === "ct0") {
|
|
118
|
-
log$
|
|
102
|
+
log$5(
|
|
119
103
|
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
120
104
|
0,
|
|
121
105
|
20
|
|
@@ -123,9 +107,9 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
123
107
|
);
|
|
124
108
|
}
|
|
125
109
|
} catch (err) {
|
|
126
|
-
log$
|
|
110
|
+
log$5(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
127
111
|
if (cookie.key === "ct0") {
|
|
128
|
-
log$
|
|
112
|
+
log$5(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
129
113
|
}
|
|
130
114
|
}
|
|
131
115
|
}
|
|
@@ -139,141 +123,84 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
139
123
|
}
|
|
140
124
|
}
|
|
141
125
|
|
|
142
|
-
const log$
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
126
|
+
const log$4 = debug("twitter-scraper:xpff");
|
|
127
|
+
let isoCrypto = null;
|
|
128
|
+
function getCrypto() {
|
|
129
|
+
if (isoCrypto != null) {
|
|
130
|
+
return isoCrypto;
|
|
131
|
+
}
|
|
132
|
+
if (typeof crypto === "undefined") {
|
|
133
|
+
log$4("Global crypto is undefined, importing from crypto module...");
|
|
134
|
+
const { webcrypto } = require("crypto");
|
|
135
|
+
isoCrypto = webcrypto;
|
|
136
|
+
return webcrypto;
|
|
137
|
+
}
|
|
138
|
+
isoCrypto = crypto;
|
|
139
|
+
return crypto;
|
|
140
|
+
}
|
|
141
|
+
async function sha256(message) {
|
|
142
|
+
const msgBuffer = new TextEncoder().encode(message);
|
|
143
|
+
const hashBuffer = await getCrypto().subtle.digest("SHA-256", msgBuffer);
|
|
144
|
+
return new Uint8Array(hashBuffer);
|
|
145
|
+
}
|
|
146
|
+
function buf2hex(buffer) {
|
|
147
|
+
return [...new Uint8Array(buffer)].map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
148
|
+
}
|
|
149
|
+
class XPFFHeaderGenerator {
|
|
150
|
+
constructor(seed) {
|
|
151
|
+
this.seed = seed;
|
|
152
|
+
}
|
|
153
|
+
async deriveKey(guestId) {
|
|
154
|
+
const combined = `${this.seed}${guestId}`;
|
|
155
|
+
const result = await sha256(combined);
|
|
156
|
+
return result;
|
|
157
|
+
}
|
|
158
|
+
async generateHeader(plaintext, guestId) {
|
|
159
|
+
log$4(`Generating XPFF key for guest ID: ${guestId}`);
|
|
160
|
+
const key = await this.deriveKey(guestId);
|
|
161
|
+
const nonce = getCrypto().getRandomValues(new Uint8Array(12));
|
|
162
|
+
const cipher = await getCrypto().subtle.importKey(
|
|
163
|
+
"raw",
|
|
164
|
+
key,
|
|
165
|
+
{ name: "AES-GCM" },
|
|
166
|
+
false,
|
|
167
|
+
["encrypt"]
|
|
168
|
+
);
|
|
169
|
+
const encrypted = await getCrypto().subtle.encrypt(
|
|
156
170
|
{
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
}
|
|
182
|
-
if (!res.ok) {
|
|
183
|
-
return {
|
|
184
|
-
success: false,
|
|
185
|
-
err: await ApiError.fromResponse(res)
|
|
186
|
-
};
|
|
187
|
-
}
|
|
188
|
-
const value = await flexParseJson(res);
|
|
189
|
-
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
190
|
-
auth.deleteToken();
|
|
191
|
-
return { success: true, value };
|
|
192
|
-
} else {
|
|
193
|
-
return { success: true, value };
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
async function flexParseJson(res) {
|
|
197
|
-
try {
|
|
198
|
-
return await res.json();
|
|
199
|
-
} catch {
|
|
200
|
-
log$2("Failed to parse response as JSON, trying text parse...");
|
|
201
|
-
const text = await res.text();
|
|
202
|
-
log$2("Response text:", text);
|
|
203
|
-
return JSON.parse(text);
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
function addApiFeatures(o) {
|
|
207
|
-
return {
|
|
208
|
-
...o,
|
|
209
|
-
rweb_lists_timeline_redesign_enabled: true,
|
|
210
|
-
responsive_web_graphql_exclude_directive_enabled: true,
|
|
211
|
-
verified_phone_label_enabled: false,
|
|
212
|
-
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
213
|
-
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
214
|
-
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
215
|
-
tweetypie_unmention_optimization_enabled: true,
|
|
216
|
-
responsive_web_edit_tweet_api_enabled: true,
|
|
217
|
-
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
218
|
-
view_counts_everywhere_api_enabled: true,
|
|
219
|
-
longform_notetweets_consumption_enabled: true,
|
|
220
|
-
tweet_awards_web_tipping_enabled: false,
|
|
221
|
-
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
222
|
-
standardized_nudges_misinfo: true,
|
|
223
|
-
longform_notetweets_rich_text_read_enabled: true,
|
|
224
|
-
responsive_web_enhance_cards_enabled: false,
|
|
225
|
-
subscriptions_verification_info_enabled: true,
|
|
226
|
-
subscriptions_verification_info_reason_enabled: true,
|
|
227
|
-
subscriptions_verification_info_verified_since_enabled: true,
|
|
228
|
-
super_follow_badge_privacy_enabled: false,
|
|
229
|
-
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
230
|
-
super_follow_tweet_api_enabled: false,
|
|
231
|
-
super_follow_user_api_enabled: false,
|
|
232
|
-
android_graphql_skip_api_media_color_palette: false,
|
|
233
|
-
creator_subscriptions_subscription_count_enabled: false,
|
|
234
|
-
blue_business_profile_image_shape_enabled: false,
|
|
235
|
-
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
236
|
-
};
|
|
171
|
+
name: "AES-GCM",
|
|
172
|
+
iv: nonce
|
|
173
|
+
},
|
|
174
|
+
cipher,
|
|
175
|
+
new TextEncoder().encode(plaintext)
|
|
176
|
+
);
|
|
177
|
+
const combined = new Uint8Array(nonce.length + encrypted.byteLength);
|
|
178
|
+
combined.set(nonce);
|
|
179
|
+
combined.set(new Uint8Array(encrypted), nonce.length);
|
|
180
|
+
const result = buf2hex(combined);
|
|
181
|
+
log$4(`XPFF header generated for guest ID ${guestId}: ${result}`);
|
|
182
|
+
return result;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
const xpffBaseKey = "0e6be1f1e21ffc33590b888fd4dc81b19713e570e805d4e5df80a493c9571a05";
|
|
186
|
+
function xpffPlain() {
|
|
187
|
+
const timestamp = Date.now();
|
|
188
|
+
return JSON.stringify({
|
|
189
|
+
navigator_properties: {
|
|
190
|
+
hasBeenActive: "true",
|
|
191
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
192
|
+
webdriver: "false"
|
|
193
|
+
},
|
|
194
|
+
created_at: timestamp
|
|
195
|
+
});
|
|
237
196
|
}
|
|
238
|
-
function
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
params.set("include_followed_by", "1");
|
|
243
|
-
params.set("include_want_retweets", "1");
|
|
244
|
-
params.set("include_mute_edge", "1");
|
|
245
|
-
params.set("include_can_dm", "1");
|
|
246
|
-
params.set("include_can_media_tag", "1");
|
|
247
|
-
params.set("include_ext_has_nft_avatar", "1");
|
|
248
|
-
params.set("include_ext_is_blue_verified", "1");
|
|
249
|
-
params.set("include_ext_verified_type", "1");
|
|
250
|
-
params.set("skip_status", "1");
|
|
251
|
-
params.set("cards_platform", "Web-12");
|
|
252
|
-
params.set("include_cards", "1");
|
|
253
|
-
params.set("include_ext_alt_text", "true");
|
|
254
|
-
params.set("include_ext_limited_action_results", "false");
|
|
255
|
-
params.set("include_quote_count", "true");
|
|
256
|
-
params.set("include_reply_count", "1");
|
|
257
|
-
params.set("tweet_mode", "extended");
|
|
258
|
-
params.set("include_ext_collab_control", "true");
|
|
259
|
-
params.set("include_ext_views", "true");
|
|
260
|
-
params.set("include_entities", "true");
|
|
261
|
-
params.set("include_user_entities", "true");
|
|
262
|
-
params.set("include_ext_media_color", "true");
|
|
263
|
-
params.set("include_ext_media_availability", "true");
|
|
264
|
-
params.set("include_ext_sensitive_media_warning", "true");
|
|
265
|
-
params.set("include_ext_trusted_friends_metadata", "true");
|
|
266
|
-
params.set("send_error_codes", "true");
|
|
267
|
-
params.set("simple_quoted_tweet", "true");
|
|
268
|
-
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
269
|
-
params.set(
|
|
270
|
-
"ext",
|
|
271
|
-
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
272
|
-
);
|
|
273
|
-
return params;
|
|
197
|
+
async function generateXPFFHeader(guestId) {
|
|
198
|
+
const generator = new XPFFHeaderGenerator(xpffBaseKey);
|
|
199
|
+
const plaintext = xpffPlain();
|
|
200
|
+
return generator.generateHeader(plaintext, guestId);
|
|
274
201
|
}
|
|
275
202
|
|
|
276
|
-
const log$
|
|
203
|
+
const log$3 = debug("twitter-scraper:auth");
|
|
277
204
|
function withTransform(fetchFn, transform) {
|
|
278
205
|
return async (input, init) => {
|
|
279
206
|
const fetchArgs = await transform?.request?.(input, init) ?? [
|
|
@@ -323,28 +250,37 @@ class TwitterGuestAuth {
|
|
|
323
250
|
}
|
|
324
251
|
return new Date(this.guestCreatedAt);
|
|
325
252
|
}
|
|
326
|
-
async installTo(headers) {
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
"
|
|
334
|
-
|
|
253
|
+
async installTo(headers, _url, bearerTokenOverride) {
|
|
254
|
+
const tokenToUse = bearerTokenOverride ?? this.bearerToken;
|
|
255
|
+
if (!bearerTokenOverride) {
|
|
256
|
+
if (this.shouldUpdate()) {
|
|
257
|
+
await this.updateGuestToken();
|
|
258
|
+
}
|
|
259
|
+
if (this.guestToken) {
|
|
260
|
+
headers.set("x-guest-token", this.guestToken);
|
|
261
|
+
}
|
|
335
262
|
}
|
|
336
|
-
headers.set("authorization", `Bearer ${
|
|
337
|
-
headers.set("x-guest-token", token);
|
|
263
|
+
headers.set("authorization", `Bearer ${tokenToUse}`);
|
|
338
264
|
headers.set(
|
|
339
265
|
"user-agent",
|
|
340
266
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
341
267
|
);
|
|
268
|
+
await this.installCsrfToken(headers);
|
|
269
|
+
if (this.options?.experimental?.xpff) {
|
|
270
|
+
const guestId = await this.guestId();
|
|
271
|
+
if (guestId != null) {
|
|
272
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
273
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
headers.set("cookie", await this.getCookieString());
|
|
277
|
+
}
|
|
278
|
+
async installCsrfToken(headers) {
|
|
342
279
|
const cookies = await this.getCookies();
|
|
343
280
|
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
344
281
|
if (xCsrfToken) {
|
|
345
282
|
headers.set("x-csrf-token", xCsrfToken.value);
|
|
346
283
|
}
|
|
347
|
-
headers.set("cookie", await this.getCookieString());
|
|
348
284
|
}
|
|
349
285
|
async setCookie(key, value) {
|
|
350
286
|
const cookie = Cookie.parse(`${key}=${value}`);
|
|
@@ -377,16 +313,28 @@ class TwitterGuestAuth {
|
|
|
377
313
|
getCookieJarUrl() {
|
|
378
314
|
return typeof document !== "undefined" ? document.location.toString() : "https://x.com";
|
|
379
315
|
}
|
|
316
|
+
async guestId() {
|
|
317
|
+
const cookies = await this.getCookies();
|
|
318
|
+
const guestIdCookie = cookies.find((cookie) => cookie.key === "guest_id");
|
|
319
|
+
return guestIdCookie ? guestIdCookie.value : null;
|
|
320
|
+
}
|
|
380
321
|
/**
|
|
381
322
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
382
323
|
*/
|
|
383
324
|
async updateGuestToken() {
|
|
325
|
+
try {
|
|
326
|
+
await this.updateGuestTokenCore();
|
|
327
|
+
} catch (err) {
|
|
328
|
+
log$3("Failed to update guest token; this may cause issues:", err);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
async updateGuestTokenCore() {
|
|
384
332
|
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
385
333
|
const headers = new Headers({
|
|
386
334
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
387
335
|
Cookie: await this.getCookieString()
|
|
388
336
|
});
|
|
389
|
-
log$
|
|
337
|
+
log$3(`Making POST request to ${guestActivateUrl}`);
|
|
390
338
|
const res = await this.fetch(guestActivateUrl, {
|
|
391
339
|
method: "POST",
|
|
392
340
|
headers,
|
|
@@ -407,7 +355,7 @@ class TwitterGuestAuth {
|
|
|
407
355
|
this.guestToken = newGuestToken;
|
|
408
356
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
409
357
|
await this.setCookie("gt", newGuestToken);
|
|
410
|
-
log$
|
|
358
|
+
log$3(`Updated guest token: ${newGuestToken}`);
|
|
411
359
|
}
|
|
412
360
|
/**
|
|
413
361
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -418,6 +366,281 @@ class TwitterGuestAuth {
|
|
|
418
366
|
}
|
|
419
367
|
}
|
|
420
368
|
|
|
369
|
+
const genericPlatform = new class {
|
|
370
|
+
randomizeCiphers() {
|
|
371
|
+
return Promise.resolve();
|
|
372
|
+
}
|
|
373
|
+
}();
|
|
374
|
+
|
|
375
|
+
class Platform {
|
|
376
|
+
async randomizeCiphers() {
|
|
377
|
+
const platform = await Platform.importPlatform();
|
|
378
|
+
await platform?.randomizeCiphers();
|
|
379
|
+
}
|
|
380
|
+
static async importPlatform() {
|
|
381
|
+
return genericPlatform;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const log$2 = debug("twitter-scraper:xctxid");
|
|
386
|
+
let linkedom = null;
|
|
387
|
+
function linkedomImport() {
|
|
388
|
+
if (!linkedom) {
|
|
389
|
+
const mod = require("linkedom");
|
|
390
|
+
linkedom = mod;
|
|
391
|
+
return mod;
|
|
392
|
+
}
|
|
393
|
+
return linkedom;
|
|
394
|
+
}
|
|
395
|
+
async function parseHTML(html) {
|
|
396
|
+
if (typeof window !== "undefined") {
|
|
397
|
+
const { defaultView } = new DOMParser().parseFromString(html, "text/html");
|
|
398
|
+
if (!defaultView) {
|
|
399
|
+
throw new Error("Failed to get defaultView from parsed HTML.");
|
|
400
|
+
}
|
|
401
|
+
return defaultView;
|
|
402
|
+
} else {
|
|
403
|
+
const { DOMParser: DOMParser2 } = linkedomImport();
|
|
404
|
+
return new DOMParser2().parseFromString(html, "text/html").defaultView;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
async function handleXMigration(fetchFn) {
|
|
408
|
+
const headers = {
|
|
409
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
410
|
+
"accept-language": "ja",
|
|
411
|
+
"cache-control": "no-cache",
|
|
412
|
+
pragma: "no-cache",
|
|
413
|
+
priority: "u=0, i",
|
|
414
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
415
|
+
"sec-ch-ua-mobile": "?0",
|
|
416
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
417
|
+
"sec-fetch-dest": "document",
|
|
418
|
+
"sec-fetch-mode": "navigate",
|
|
419
|
+
"sec-fetch-site": "none",
|
|
420
|
+
"sec-fetch-user": "?1",
|
|
421
|
+
"upgrade-insecure-requests": "1",
|
|
422
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
423
|
+
};
|
|
424
|
+
const response = await fetchFn("https://x.com", {
|
|
425
|
+
headers
|
|
426
|
+
});
|
|
427
|
+
if (!response.ok) {
|
|
428
|
+
throw new Error(`Failed to fetch X homepage: ${response.statusText}`);
|
|
429
|
+
}
|
|
430
|
+
const htmlText = await response.text();
|
|
431
|
+
let dom = await parseHTML(htmlText);
|
|
432
|
+
let document = dom.window.document;
|
|
433
|
+
const migrationRedirectionRegex = new RegExp(
|
|
434
|
+
"(http(?:s)?://(?:www\\.)?(twitter|x){1}\\.com(/x)?/migrate([/?])?tok=[a-zA-Z0-9%\\-_]+)+",
|
|
435
|
+
"i"
|
|
436
|
+
);
|
|
437
|
+
const metaRefresh = document.querySelector("meta[http-equiv='refresh']");
|
|
438
|
+
const metaContent = metaRefresh ? metaRefresh.getAttribute("content") || "" : "";
|
|
439
|
+
const migrationRedirectionUrl = migrationRedirectionRegex.exec(metaContent) || migrationRedirectionRegex.exec(htmlText);
|
|
440
|
+
if (migrationRedirectionUrl) {
|
|
441
|
+
const redirectResponse = await fetch(migrationRedirectionUrl[0]);
|
|
442
|
+
if (!redirectResponse.ok) {
|
|
443
|
+
throw new Error(
|
|
444
|
+
`Failed to follow migration redirection: ${redirectResponse.statusText}`
|
|
445
|
+
);
|
|
446
|
+
}
|
|
447
|
+
const redirectHtml = await redirectResponse.text();
|
|
448
|
+
dom = await parseHTML(redirectHtml);
|
|
449
|
+
document = dom.window.document;
|
|
450
|
+
}
|
|
451
|
+
const migrationForm = document.querySelector("form[name='f']") || document.querySelector("form[action='https://x.com/x/migrate']");
|
|
452
|
+
if (migrationForm) {
|
|
453
|
+
const url = migrationForm.getAttribute("action") || "https://x.com/x/migrate";
|
|
454
|
+
const method = migrationForm.getAttribute("method") || "POST";
|
|
455
|
+
const requestPayload = new FormData();
|
|
456
|
+
const inputFields = migrationForm.querySelectorAll("input");
|
|
457
|
+
for (const element of Array.from(inputFields)) {
|
|
458
|
+
const name = element.getAttribute("name");
|
|
459
|
+
const value = element.getAttribute("value");
|
|
460
|
+
if (name && value) {
|
|
461
|
+
requestPayload.append(name, value);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
const formResponse = await fetch(url, {
|
|
465
|
+
method,
|
|
466
|
+
body: requestPayload,
|
|
467
|
+
headers
|
|
468
|
+
});
|
|
469
|
+
if (!formResponse.ok) {
|
|
470
|
+
throw new Error(
|
|
471
|
+
`Failed to submit migration form: ${formResponse.statusText}`
|
|
472
|
+
);
|
|
473
|
+
}
|
|
474
|
+
const formHtml = await formResponse.text();
|
|
475
|
+
dom = await parseHTML(formHtml);
|
|
476
|
+
document = dom.window.document;
|
|
477
|
+
}
|
|
478
|
+
return document;
|
|
479
|
+
}
|
|
480
|
+
let ClientTransaction = null;
|
|
481
|
+
function clientTransaction() {
|
|
482
|
+
if (!ClientTransaction) {
|
|
483
|
+
const mod = require("x-client-transaction-id");
|
|
484
|
+
const ctx = mod.ClientTransaction;
|
|
485
|
+
ClientTransaction = ctx;
|
|
486
|
+
return ctx;
|
|
487
|
+
}
|
|
488
|
+
return ClientTransaction;
|
|
489
|
+
}
|
|
490
|
+
async function generateTransactionId(url, fetchFn, method) {
|
|
491
|
+
const parsedUrl = new URL(url);
|
|
492
|
+
const path = parsedUrl.pathname;
|
|
493
|
+
log$2(`Generating transaction ID for ${method} ${path}`);
|
|
494
|
+
const document = await handleXMigration(fetchFn);
|
|
495
|
+
const transaction = await clientTransaction().create(document);
|
|
496
|
+
const transactionId = await transaction.generateTransactionId(method, path);
|
|
497
|
+
log$2(`Transaction ID: ${transactionId}`);
|
|
498
|
+
return transactionId;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
const log$1 = debug("twitter-scraper:api");
|
|
502
|
+
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
503
|
+
const bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA";
|
|
504
|
+
async function jitter(maxMs) {
|
|
505
|
+
const jitter2 = Math.random() * maxMs;
|
|
506
|
+
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
507
|
+
}
|
|
508
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new Headers(), bearerTokenOverride) {
|
|
509
|
+
log$1(`Making ${method} request to ${url}`);
|
|
510
|
+
await auth.installTo(headers, url, bearerTokenOverride);
|
|
511
|
+
await platform.randomizeCiphers();
|
|
512
|
+
if (auth instanceof TwitterGuestAuth && auth.options?.experimental?.xClientTransactionId) {
|
|
513
|
+
const transactionId = await generateTransactionId(
|
|
514
|
+
url,
|
|
515
|
+
auth.fetch.bind(auth),
|
|
516
|
+
method
|
|
517
|
+
);
|
|
518
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
519
|
+
}
|
|
520
|
+
let res;
|
|
521
|
+
do {
|
|
522
|
+
const fetchParameters = [
|
|
523
|
+
url,
|
|
524
|
+
{
|
|
525
|
+
method,
|
|
526
|
+
headers,
|
|
527
|
+
credentials: "include"
|
|
528
|
+
}
|
|
529
|
+
];
|
|
530
|
+
try {
|
|
531
|
+
res = await auth.fetch(...fetchParameters);
|
|
532
|
+
} catch (err) {
|
|
533
|
+
if (!(err instanceof Error)) {
|
|
534
|
+
throw err;
|
|
535
|
+
}
|
|
536
|
+
return {
|
|
537
|
+
success: false,
|
|
538
|
+
err: new Error("Failed to perform request.")
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
542
|
+
if (res.status === 429) {
|
|
543
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
544
|
+
await auth.onRateLimit({
|
|
545
|
+
fetchParameters,
|
|
546
|
+
response: res
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
} while (res.status === 429);
|
|
550
|
+
if (!res.ok) {
|
|
551
|
+
return {
|
|
552
|
+
success: false,
|
|
553
|
+
err: await ApiError.fromResponse(res)
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
const value = await flexParseJson(res);
|
|
557
|
+
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
558
|
+
auth.deleteToken();
|
|
559
|
+
return { success: true, value };
|
|
560
|
+
} else {
|
|
561
|
+
return { success: true, value };
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
async function flexParseJson(res) {
|
|
565
|
+
try {
|
|
566
|
+
return await res.json();
|
|
567
|
+
} catch {
|
|
568
|
+
log$1("Failed to parse response as JSON, trying text parse...");
|
|
569
|
+
const text = await res.text();
|
|
570
|
+
log$1("Response text:", text);
|
|
571
|
+
return JSON.parse(text);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
function addApiFeatures(o) {
|
|
575
|
+
return {
|
|
576
|
+
...o,
|
|
577
|
+
rweb_lists_timeline_redesign_enabled: true,
|
|
578
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
579
|
+
verified_phone_label_enabled: false,
|
|
580
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
581
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
582
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
583
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
584
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
585
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
586
|
+
view_counts_everywhere_api_enabled: true,
|
|
587
|
+
longform_notetweets_consumption_enabled: true,
|
|
588
|
+
tweet_awards_web_tipping_enabled: false,
|
|
589
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
590
|
+
standardized_nudges_misinfo: true,
|
|
591
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
592
|
+
responsive_web_enhance_cards_enabled: false,
|
|
593
|
+
subscriptions_verification_info_enabled: true,
|
|
594
|
+
subscriptions_verification_info_reason_enabled: true,
|
|
595
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
596
|
+
super_follow_badge_privacy_enabled: false,
|
|
597
|
+
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
598
|
+
super_follow_tweet_api_enabled: false,
|
|
599
|
+
super_follow_user_api_enabled: false,
|
|
600
|
+
android_graphql_skip_api_media_color_palette: false,
|
|
601
|
+
creator_subscriptions_subscription_count_enabled: false,
|
|
602
|
+
blue_business_profile_image_shape_enabled: false,
|
|
603
|
+
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
604
|
+
};
|
|
605
|
+
}
|
|
606
|
+
function addApiParams(params, includeTweetReplies) {
|
|
607
|
+
params.set("include_profile_interstitial_type", "1");
|
|
608
|
+
params.set("include_blocking", "1");
|
|
609
|
+
params.set("include_blocked_by", "1");
|
|
610
|
+
params.set("include_followed_by", "1");
|
|
611
|
+
params.set("include_want_retweets", "1");
|
|
612
|
+
params.set("include_mute_edge", "1");
|
|
613
|
+
params.set("include_can_dm", "1");
|
|
614
|
+
params.set("include_can_media_tag", "1");
|
|
615
|
+
params.set("include_ext_has_nft_avatar", "1");
|
|
616
|
+
params.set("include_ext_is_blue_verified", "1");
|
|
617
|
+
params.set("include_ext_verified_type", "1");
|
|
618
|
+
params.set("skip_status", "1");
|
|
619
|
+
params.set("cards_platform", "Web-12");
|
|
620
|
+
params.set("include_cards", "1");
|
|
621
|
+
params.set("include_ext_alt_text", "true");
|
|
622
|
+
params.set("include_ext_limited_action_results", "false");
|
|
623
|
+
params.set("include_quote_count", "true");
|
|
624
|
+
params.set("include_reply_count", "1");
|
|
625
|
+
params.set("tweet_mode", "extended");
|
|
626
|
+
params.set("include_ext_collab_control", "true");
|
|
627
|
+
params.set("include_ext_views", "true");
|
|
628
|
+
params.set("include_entities", "true");
|
|
629
|
+
params.set("include_user_entities", "true");
|
|
630
|
+
params.set("include_ext_media_color", "true");
|
|
631
|
+
params.set("include_ext_media_availability", "true");
|
|
632
|
+
params.set("include_ext_sensitive_media_warning", "true");
|
|
633
|
+
params.set("include_ext_trusted_friends_metadata", "true");
|
|
634
|
+
params.set("send_error_codes", "true");
|
|
635
|
+
params.set("simple_quoted_tweet", "true");
|
|
636
|
+
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
637
|
+
params.set(
|
|
638
|
+
"ext",
|
|
639
|
+
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
640
|
+
);
|
|
641
|
+
return params;
|
|
642
|
+
}
|
|
643
|
+
|
|
421
644
|
const log = debug("twitter-scraper:auth-user");
|
|
422
645
|
const TwitterUserAuthSubtask = Type.Object({
|
|
423
646
|
subtask_id: Type.String(),
|
|
@@ -525,25 +748,26 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
525
748
|
this.jar = new CookieJar();
|
|
526
749
|
}
|
|
527
750
|
}
|
|
528
|
-
async
|
|
529
|
-
const
|
|
530
|
-
|
|
531
|
-
if (xCsrfToken) {
|
|
532
|
-
headers.set("x-csrf-token", xCsrfToken.value);
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
|
-
async installTo(headers) {
|
|
536
|
-
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
537
|
-
const cookie = await this.getCookieString();
|
|
538
|
-
headers.set("cookie", cookie);
|
|
539
|
-
if (this.guestToken) {
|
|
540
|
-
headers.set("x-guest-token", this.guestToken);
|
|
541
|
-
}
|
|
751
|
+
async installTo(headers, _url, bearerTokenOverride) {
|
|
752
|
+
const tokenToUse = bearerTokenOverride ?? this.bearerToken;
|
|
753
|
+
headers.set("authorization", `Bearer ${tokenToUse}`);
|
|
542
754
|
headers.set(
|
|
543
755
|
"user-agent",
|
|
544
756
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
545
757
|
);
|
|
758
|
+
if (this.guestToken) {
|
|
759
|
+
headers.set("x-guest-token", this.guestToken);
|
|
760
|
+
}
|
|
546
761
|
await this.installCsrfToken(headers);
|
|
762
|
+
if (this.options?.experimental?.xpff) {
|
|
763
|
+
const guestId = await this.guestId();
|
|
764
|
+
if (guestId != null) {
|
|
765
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
766
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
const cookie = await this.getCookieString();
|
|
770
|
+
headers.set("cookie", cookie);
|
|
547
771
|
}
|
|
548
772
|
async initLogin() {
|
|
549
773
|
this.removeCookie("twitter_ads_id=");
|
|
@@ -748,12 +972,6 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
748
972
|
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
749
973
|
}
|
|
750
974
|
log(`Making POST request to ${onboardingTaskUrl}`);
|
|
751
|
-
const token = this.guestToken;
|
|
752
|
-
if (token == null) {
|
|
753
|
-
throw new AuthenticationError(
|
|
754
|
-
"Authentication token is null or undefined."
|
|
755
|
-
);
|
|
756
|
-
}
|
|
757
975
|
const headers = new Headers({
|
|
758
976
|
accept: "*/*",
|
|
759
977
|
"accept-language": "en-US,en;q=0.9",
|
|
@@ -770,12 +988,19 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
770
988
|
"sec-fetch-mode": "cors",
|
|
771
989
|
"sec-fetch-site": "same-origin",
|
|
772
990
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
773
|
-
"x-guest-token": token,
|
|
774
991
|
"x-twitter-auth-type": "OAuth2Client",
|
|
775
992
|
"x-twitter-active-user": "yes",
|
|
776
993
|
"x-twitter-client-language": "en"
|
|
777
994
|
});
|
|
778
|
-
await this.installTo(headers);
|
|
995
|
+
await this.installTo(headers, onboardingTaskUrl);
|
|
996
|
+
if (this.options?.experimental?.xClientTransactionId) {
|
|
997
|
+
const transactionId = await generateTransactionId(
|
|
998
|
+
onboardingTaskUrl,
|
|
999
|
+
this.fetch.bind(this),
|
|
1000
|
+
"POST"
|
|
1001
|
+
);
|
|
1002
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
1003
|
+
}
|
|
779
1004
|
let res;
|
|
780
1005
|
do {
|
|
781
1006
|
const fetchParameters = [
|
|
@@ -1798,7 +2023,11 @@ async function getTrends(auth) {
|
|
|
1798
2023
|
params.set("entity_tokens", "false");
|
|
1799
2024
|
const res = await requestApi(
|
|
1800
2025
|
`https://api.x.com/2/guide.json?${params.toString()}`,
|
|
1801
|
-
auth
|
|
2026
|
+
auth,
|
|
2027
|
+
"GET",
|
|
2028
|
+
void 0,
|
|
2029
|
+
void 0,
|
|
2030
|
+
bearerToken2
|
|
1802
2031
|
);
|
|
1803
2032
|
if (!res.success) {
|
|
1804
2033
|
throw res.err;
|
|
@@ -1881,7 +2110,11 @@ async function fetchTweets(userId, maxTweets, cursor, auth) {
|
|
|
1881
2110
|
}
|
|
1882
2111
|
const res = await requestApi(
|
|
1883
2112
|
userTweetsRequest.toRequestUrl(),
|
|
1884
|
-
auth
|
|
2113
|
+
auth,
|
|
2114
|
+
"GET",
|
|
2115
|
+
void 0,
|
|
2116
|
+
void 0,
|
|
2117
|
+
bearerToken2
|
|
1885
2118
|
);
|
|
1886
2119
|
if (!res.success) {
|
|
1887
2120
|
throw res.err;
|
|
@@ -2027,7 +2260,11 @@ async function getTweet(id, auth) {
|
|
|
2027
2260
|
tweetDetailRequest.variables.focalTweetId = id;
|
|
2028
2261
|
const res = await requestApi(
|
|
2029
2262
|
tweetDetailRequest.toRequestUrl(),
|
|
2030
|
-
auth
|
|
2263
|
+
auth,
|
|
2264
|
+
"GET",
|
|
2265
|
+
void 0,
|
|
2266
|
+
void 0,
|
|
2267
|
+
bearerToken2
|
|
2031
2268
|
);
|
|
2032
2269
|
if (!res.success) {
|
|
2033
2270
|
throw res.err;
|
|
@@ -2043,7 +2280,11 @@ async function getTweetAnonymous(id, auth) {
|
|
|
2043
2280
|
tweetResultByRestIdRequest.variables.tweetId = id;
|
|
2044
2281
|
const res = await requestApi(
|
|
2045
2282
|
tweetResultByRestIdRequest.toRequestUrl(),
|
|
2046
|
-
auth
|
|
2283
|
+
auth,
|
|
2284
|
+
"GET",
|
|
2285
|
+
void 0,
|
|
2286
|
+
void 0,
|
|
2287
|
+
bearerToken2
|
|
2047
2288
|
);
|
|
2048
2289
|
if (!res.success) {
|
|
2049
2290
|
throw res.err;
|
|
@@ -2600,7 +2841,11 @@ class Scraper {
|
|
|
2600
2841
|
return {
|
|
2601
2842
|
fetch: this.options?.fetch,
|
|
2602
2843
|
transform: this.options?.transform,
|
|
2603
|
-
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2844
|
+
rateLimitStrategy: this.options?.rateLimitStrategy,
|
|
2845
|
+
experimental: {
|
|
2846
|
+
xClientTransactionId: this.options?.experimental?.xClientTransactionId,
|
|
2847
|
+
xpff: this.options?.experimental?.xpff
|
|
2848
|
+
}
|
|
2604
2849
|
};
|
|
2605
2850
|
}
|
|
2606
2851
|
handleResponse(res) {
|