@the-convocation/twitter-scraper 0.19.1 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +408 -184
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +408 -184
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +405 -181
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +405 -181
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +13 -0
- package/examples/node-integration/package.json +2 -1
- package/package.json +6 -4
package/dist/node/esm/index.mjs
CHANGED
|
@@ -51,13 +51,13 @@ class AuthenticationError extends Error {
|
|
|
51
51
|
}
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
const log$
|
|
54
|
+
const log$6 = debug("twitter-scraper:rate-limit");
|
|
55
55
|
class WaitingRateLimitStrategy {
|
|
56
56
|
async onRateLimit({ response: res }) {
|
|
57
57
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
58
58
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
59
59
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
60
|
-
log$
|
|
60
|
+
log$6(
|
|
61
61
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
62
62
|
);
|
|
63
63
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -73,20 +73,7 @@ class ErrorRateLimitStrategy {
|
|
|
73
73
|
}
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
-
|
|
77
|
-
async randomizeCiphers() {
|
|
78
|
-
const platform = await Platform.importPlatform();
|
|
79
|
-
await platform?.randomizeCiphers();
|
|
80
|
-
}
|
|
81
|
-
static async importPlatform() {
|
|
82
|
-
{
|
|
83
|
-
const { platform } = await Promise.resolve().then(function () { return index; });
|
|
84
|
-
return platform;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
const log$3 = debug("twitter-scraper:requests");
|
|
76
|
+
const log$5 = debug("twitter-scraper:requests");
|
|
90
77
|
async function updateCookieJar(cookieJar, headers) {
|
|
91
78
|
let setCookieHeaders = [];
|
|
92
79
|
if (typeof headers.getSetCookie === "function") {
|
|
@@ -101,12 +88,12 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
101
88
|
for (const cookieStr of setCookieHeaders) {
|
|
102
89
|
const cookie = Cookie.parse(cookieStr);
|
|
103
90
|
if (!cookie) {
|
|
104
|
-
log$
|
|
91
|
+
log$5(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
105
92
|
continue;
|
|
106
93
|
}
|
|
107
94
|
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
108
95
|
if (cookie.key === "ct0") {
|
|
109
|
-
log$
|
|
96
|
+
log$5(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
110
97
|
}
|
|
111
98
|
continue;
|
|
112
99
|
}
|
|
@@ -114,7 +101,7 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
114
101
|
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
115
102
|
await cookieJar.setCookie(cookie, url);
|
|
116
103
|
if (cookie.key === "ct0") {
|
|
117
|
-
log$
|
|
104
|
+
log$5(
|
|
118
105
|
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
119
106
|
0,
|
|
120
107
|
20
|
|
@@ -122,9 +109,9 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
122
109
|
);
|
|
123
110
|
}
|
|
124
111
|
} catch (err) {
|
|
125
|
-
log$
|
|
112
|
+
log$5(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
126
113
|
if (cookie.key === "ct0") {
|
|
127
|
-
log$
|
|
114
|
+
log$5(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
128
115
|
}
|
|
129
116
|
}
|
|
130
117
|
}
|
|
@@ -138,141 +125,84 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
138
125
|
}
|
|
139
126
|
}
|
|
140
127
|
|
|
141
|
-
const log$
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
128
|
+
const log$4 = debug("twitter-scraper:xpff");
|
|
129
|
+
let isoCrypto = null;
|
|
130
|
+
function getCrypto() {
|
|
131
|
+
if (isoCrypto != null) {
|
|
132
|
+
return isoCrypto;
|
|
133
|
+
}
|
|
134
|
+
if (typeof crypto === "undefined") {
|
|
135
|
+
log$4("Global crypto is undefined, importing from crypto module...");
|
|
136
|
+
const { webcrypto } = require("crypto");
|
|
137
|
+
isoCrypto = webcrypto;
|
|
138
|
+
return webcrypto;
|
|
139
|
+
}
|
|
140
|
+
isoCrypto = crypto;
|
|
141
|
+
return crypto;
|
|
142
|
+
}
|
|
143
|
+
async function sha256(message) {
|
|
144
|
+
const msgBuffer = new TextEncoder().encode(message);
|
|
145
|
+
const hashBuffer = await getCrypto().subtle.digest("SHA-256", msgBuffer);
|
|
146
|
+
return new Uint8Array(hashBuffer);
|
|
147
|
+
}
|
|
148
|
+
function buf2hex(buffer) {
|
|
149
|
+
return [...new Uint8Array(buffer)].map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
150
|
+
}
|
|
151
|
+
class XPFFHeaderGenerator {
|
|
152
|
+
constructor(seed) {
|
|
153
|
+
this.seed = seed;
|
|
154
|
+
}
|
|
155
|
+
async deriveKey(guestId) {
|
|
156
|
+
const combined = `${this.seed}${guestId}`;
|
|
157
|
+
const result = await sha256(combined);
|
|
158
|
+
return result;
|
|
159
|
+
}
|
|
160
|
+
async generateHeader(plaintext, guestId) {
|
|
161
|
+
log$4(`Generating XPFF key for guest ID: ${guestId}`);
|
|
162
|
+
const key = await this.deriveKey(guestId);
|
|
163
|
+
const nonce = getCrypto().getRandomValues(new Uint8Array(12));
|
|
164
|
+
const cipher = await getCrypto().subtle.importKey(
|
|
165
|
+
"raw",
|
|
166
|
+
key,
|
|
167
|
+
{ name: "AES-GCM" },
|
|
168
|
+
false,
|
|
169
|
+
["encrypt"]
|
|
170
|
+
);
|
|
171
|
+
const encrypted = await getCrypto().subtle.encrypt(
|
|
155
172
|
{
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
}
|
|
181
|
-
if (!res.ok) {
|
|
182
|
-
return {
|
|
183
|
-
success: false,
|
|
184
|
-
err: await ApiError.fromResponse(res)
|
|
185
|
-
};
|
|
186
|
-
}
|
|
187
|
-
const value = await flexParseJson(res);
|
|
188
|
-
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
189
|
-
auth.deleteToken();
|
|
190
|
-
return { success: true, value };
|
|
191
|
-
} else {
|
|
192
|
-
return { success: true, value };
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
async function flexParseJson(res) {
|
|
196
|
-
try {
|
|
197
|
-
return await res.json();
|
|
198
|
-
} catch {
|
|
199
|
-
log$2("Failed to parse response as JSON, trying text parse...");
|
|
200
|
-
const text = await res.text();
|
|
201
|
-
log$2("Response text:", text);
|
|
202
|
-
return JSON.parse(text);
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
function addApiFeatures(o) {
|
|
206
|
-
return {
|
|
207
|
-
...o,
|
|
208
|
-
rweb_lists_timeline_redesign_enabled: true,
|
|
209
|
-
responsive_web_graphql_exclude_directive_enabled: true,
|
|
210
|
-
verified_phone_label_enabled: false,
|
|
211
|
-
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
212
|
-
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
213
|
-
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
214
|
-
tweetypie_unmention_optimization_enabled: true,
|
|
215
|
-
responsive_web_edit_tweet_api_enabled: true,
|
|
216
|
-
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
217
|
-
view_counts_everywhere_api_enabled: true,
|
|
218
|
-
longform_notetweets_consumption_enabled: true,
|
|
219
|
-
tweet_awards_web_tipping_enabled: false,
|
|
220
|
-
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
221
|
-
standardized_nudges_misinfo: true,
|
|
222
|
-
longform_notetweets_rich_text_read_enabled: true,
|
|
223
|
-
responsive_web_enhance_cards_enabled: false,
|
|
224
|
-
subscriptions_verification_info_enabled: true,
|
|
225
|
-
subscriptions_verification_info_reason_enabled: true,
|
|
226
|
-
subscriptions_verification_info_verified_since_enabled: true,
|
|
227
|
-
super_follow_badge_privacy_enabled: false,
|
|
228
|
-
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
229
|
-
super_follow_tweet_api_enabled: false,
|
|
230
|
-
super_follow_user_api_enabled: false,
|
|
231
|
-
android_graphql_skip_api_media_color_palette: false,
|
|
232
|
-
creator_subscriptions_subscription_count_enabled: false,
|
|
233
|
-
blue_business_profile_image_shape_enabled: false,
|
|
234
|
-
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
235
|
-
};
|
|
173
|
+
name: "AES-GCM",
|
|
174
|
+
iv: nonce
|
|
175
|
+
},
|
|
176
|
+
cipher,
|
|
177
|
+
new TextEncoder().encode(plaintext)
|
|
178
|
+
);
|
|
179
|
+
const combined = new Uint8Array(nonce.length + encrypted.byteLength);
|
|
180
|
+
combined.set(nonce);
|
|
181
|
+
combined.set(new Uint8Array(encrypted), nonce.length);
|
|
182
|
+
const result = buf2hex(combined);
|
|
183
|
+
log$4(`XPFF header generated for guest ID ${guestId}: ${result}`);
|
|
184
|
+
return result;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
const xpffBaseKey = "0e6be1f1e21ffc33590b888fd4dc81b19713e570e805d4e5df80a493c9571a05";
|
|
188
|
+
function xpffPlain() {
|
|
189
|
+
const timestamp = Date.now();
|
|
190
|
+
return JSON.stringify({
|
|
191
|
+
navigator_properties: {
|
|
192
|
+
hasBeenActive: "true",
|
|
193
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
194
|
+
webdriver: "false"
|
|
195
|
+
},
|
|
196
|
+
created_at: timestamp
|
|
197
|
+
});
|
|
236
198
|
}
|
|
237
|
-
function
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
params.set("include_followed_by", "1");
|
|
242
|
-
params.set("include_want_retweets", "1");
|
|
243
|
-
params.set("include_mute_edge", "1");
|
|
244
|
-
params.set("include_can_dm", "1");
|
|
245
|
-
params.set("include_can_media_tag", "1");
|
|
246
|
-
params.set("include_ext_has_nft_avatar", "1");
|
|
247
|
-
params.set("include_ext_is_blue_verified", "1");
|
|
248
|
-
params.set("include_ext_verified_type", "1");
|
|
249
|
-
params.set("skip_status", "1");
|
|
250
|
-
params.set("cards_platform", "Web-12");
|
|
251
|
-
params.set("include_cards", "1");
|
|
252
|
-
params.set("include_ext_alt_text", "true");
|
|
253
|
-
params.set("include_ext_limited_action_results", "false");
|
|
254
|
-
params.set("include_quote_count", "true");
|
|
255
|
-
params.set("include_reply_count", "1");
|
|
256
|
-
params.set("tweet_mode", "extended");
|
|
257
|
-
params.set("include_ext_collab_control", "true");
|
|
258
|
-
params.set("include_ext_views", "true");
|
|
259
|
-
params.set("include_entities", "true");
|
|
260
|
-
params.set("include_user_entities", "true");
|
|
261
|
-
params.set("include_ext_media_color", "true");
|
|
262
|
-
params.set("include_ext_media_availability", "true");
|
|
263
|
-
params.set("include_ext_sensitive_media_warning", "true");
|
|
264
|
-
params.set("include_ext_trusted_friends_metadata", "true");
|
|
265
|
-
params.set("send_error_codes", "true");
|
|
266
|
-
params.set("simple_quoted_tweet", "true");
|
|
267
|
-
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
268
|
-
params.set(
|
|
269
|
-
"ext",
|
|
270
|
-
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
271
|
-
);
|
|
272
|
-
return params;
|
|
199
|
+
async function generateXPFFHeader(guestId) {
|
|
200
|
+
const generator = new XPFFHeaderGenerator(xpffBaseKey);
|
|
201
|
+
const plaintext = xpffPlain();
|
|
202
|
+
return generator.generateHeader(plaintext, guestId);
|
|
273
203
|
}
|
|
274
204
|
|
|
275
|
-
const log$
|
|
205
|
+
const log$3 = debug("twitter-scraper:auth");
|
|
276
206
|
function withTransform(fetchFn, transform) {
|
|
277
207
|
return async (input, init) => {
|
|
278
208
|
const fetchArgs = await transform?.request?.(input, init) ?? [
|
|
@@ -326,24 +256,30 @@ class TwitterGuestAuth {
|
|
|
326
256
|
if (this.shouldUpdate()) {
|
|
327
257
|
await this.updateGuestToken();
|
|
328
258
|
}
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
throw new AuthenticationError(
|
|
332
|
-
"Authentication token is null or undefined."
|
|
333
|
-
);
|
|
259
|
+
if (this.guestToken) {
|
|
260
|
+
headers.set("x-guest-token", this.guestToken);
|
|
334
261
|
}
|
|
335
262
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
336
|
-
headers.set("x-guest-token", token);
|
|
337
263
|
headers.set(
|
|
338
264
|
"user-agent",
|
|
339
265
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
340
266
|
);
|
|
267
|
+
await this.installCsrfToken(headers);
|
|
268
|
+
if (this.options?.experimental?.xpff) {
|
|
269
|
+
const guestId = await this.guestId();
|
|
270
|
+
if (guestId != null) {
|
|
271
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
272
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
headers.set("cookie", await this.getCookieString());
|
|
276
|
+
}
|
|
277
|
+
async installCsrfToken(headers) {
|
|
341
278
|
const cookies = await this.getCookies();
|
|
342
279
|
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
343
280
|
if (xCsrfToken) {
|
|
344
281
|
headers.set("x-csrf-token", xCsrfToken.value);
|
|
345
282
|
}
|
|
346
|
-
headers.set("cookie", await this.getCookieString());
|
|
347
283
|
}
|
|
348
284
|
async setCookie(key, value) {
|
|
349
285
|
const cookie = Cookie.parse(`${key}=${value}`);
|
|
@@ -376,16 +312,28 @@ class TwitterGuestAuth {
|
|
|
376
312
|
getCookieJarUrl() {
|
|
377
313
|
return typeof document !== "undefined" ? document.location.toString() : "https://x.com";
|
|
378
314
|
}
|
|
315
|
+
async guestId() {
|
|
316
|
+
const cookies = await this.getCookies();
|
|
317
|
+
const guestIdCookie = cookies.find((cookie) => cookie.key === "guest_id");
|
|
318
|
+
return guestIdCookie ? guestIdCookie.value : null;
|
|
319
|
+
}
|
|
379
320
|
/**
|
|
380
321
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
381
322
|
*/
|
|
382
323
|
async updateGuestToken() {
|
|
324
|
+
try {
|
|
325
|
+
await this.updateGuestTokenCore();
|
|
326
|
+
} catch (err) {
|
|
327
|
+
log$3("Failed to update guest token; this may cause issues:", err);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
async updateGuestTokenCore() {
|
|
383
331
|
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
384
332
|
const headers = new Headers({
|
|
385
333
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
386
334
|
Cookie: await this.getCookieString()
|
|
387
335
|
});
|
|
388
|
-
log$
|
|
336
|
+
log$3(`Making POST request to ${guestActivateUrl}`);
|
|
389
337
|
const res = await this.fetch(guestActivateUrl, {
|
|
390
338
|
method: "POST",
|
|
391
339
|
headers,
|
|
@@ -406,7 +354,7 @@ class TwitterGuestAuth {
|
|
|
406
354
|
this.guestToken = newGuestToken;
|
|
407
355
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
408
356
|
await this.setCookie("gt", newGuestToken);
|
|
409
|
-
log$
|
|
357
|
+
log$3(`Updated guest token: ${newGuestToken}`);
|
|
410
358
|
}
|
|
411
359
|
/**
|
|
412
360
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -417,6 +365,277 @@ class TwitterGuestAuth {
|
|
|
417
365
|
}
|
|
418
366
|
}
|
|
419
367
|
|
|
368
|
+
class Platform {
|
|
369
|
+
async randomizeCiphers() {
|
|
370
|
+
const platform = await Platform.importPlatform();
|
|
371
|
+
await platform?.randomizeCiphers();
|
|
372
|
+
}
|
|
373
|
+
static async importPlatform() {
|
|
374
|
+
{
|
|
375
|
+
const { platform } = await Promise.resolve().then(function () { return index; });
|
|
376
|
+
return platform;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
const log$2 = debug("twitter-scraper:xctxid");
|
|
382
|
+
let linkedom = null;
|
|
383
|
+
function linkedomImport() {
|
|
384
|
+
if (!linkedom) {
|
|
385
|
+
const mod = require("linkedom");
|
|
386
|
+
linkedom = mod;
|
|
387
|
+
return mod;
|
|
388
|
+
}
|
|
389
|
+
return linkedom;
|
|
390
|
+
}
|
|
391
|
+
async function parseHTML(html) {
|
|
392
|
+
if (typeof window !== "undefined") {
|
|
393
|
+
const { defaultView } = new DOMParser().parseFromString(html, "text/html");
|
|
394
|
+
if (!defaultView) {
|
|
395
|
+
throw new Error("Failed to get defaultView from parsed HTML.");
|
|
396
|
+
}
|
|
397
|
+
return defaultView;
|
|
398
|
+
} else {
|
|
399
|
+
const { DOMParser: DOMParser2 } = linkedomImport();
|
|
400
|
+
return new DOMParser2().parseFromString(html, "text/html").defaultView;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
async function handleXMigration(fetchFn) {
|
|
404
|
+
const headers = {
|
|
405
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
406
|
+
"accept-language": "ja",
|
|
407
|
+
"cache-control": "no-cache",
|
|
408
|
+
pragma: "no-cache",
|
|
409
|
+
priority: "u=0, i",
|
|
410
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
411
|
+
"sec-ch-ua-mobile": "?0",
|
|
412
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
413
|
+
"sec-fetch-dest": "document",
|
|
414
|
+
"sec-fetch-mode": "navigate",
|
|
415
|
+
"sec-fetch-site": "none",
|
|
416
|
+
"sec-fetch-user": "?1",
|
|
417
|
+
"upgrade-insecure-requests": "1",
|
|
418
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
419
|
+
};
|
|
420
|
+
const response = await fetchFn("https://x.com", {
|
|
421
|
+
headers
|
|
422
|
+
});
|
|
423
|
+
if (!response.ok) {
|
|
424
|
+
throw new Error(`Failed to fetch X homepage: ${response.statusText}`);
|
|
425
|
+
}
|
|
426
|
+
const htmlText = await response.text();
|
|
427
|
+
let dom = await parseHTML(htmlText);
|
|
428
|
+
let document = dom.window.document;
|
|
429
|
+
const migrationRedirectionRegex = new RegExp(
|
|
430
|
+
"(http(?:s)?://(?:www\\.)?(twitter|x){1}\\.com(/x)?/migrate([/?])?tok=[a-zA-Z0-9%\\-_]+)+",
|
|
431
|
+
"i"
|
|
432
|
+
);
|
|
433
|
+
const metaRefresh = document.querySelector("meta[http-equiv='refresh']");
|
|
434
|
+
const metaContent = metaRefresh ? metaRefresh.getAttribute("content") || "" : "";
|
|
435
|
+
const migrationRedirectionUrl = migrationRedirectionRegex.exec(metaContent) || migrationRedirectionRegex.exec(htmlText);
|
|
436
|
+
if (migrationRedirectionUrl) {
|
|
437
|
+
const redirectResponse = await fetch(migrationRedirectionUrl[0]);
|
|
438
|
+
if (!redirectResponse.ok) {
|
|
439
|
+
throw new Error(
|
|
440
|
+
`Failed to follow migration redirection: ${redirectResponse.statusText}`
|
|
441
|
+
);
|
|
442
|
+
}
|
|
443
|
+
const redirectHtml = await redirectResponse.text();
|
|
444
|
+
dom = await parseHTML(redirectHtml);
|
|
445
|
+
document = dom.window.document;
|
|
446
|
+
}
|
|
447
|
+
const migrationForm = document.querySelector("form[name='f']") || document.querySelector("form[action='https://x.com/x/migrate']");
|
|
448
|
+
if (migrationForm) {
|
|
449
|
+
const url = migrationForm.getAttribute("action") || "https://x.com/x/migrate";
|
|
450
|
+
const method = migrationForm.getAttribute("method") || "POST";
|
|
451
|
+
const requestPayload = new FormData();
|
|
452
|
+
const inputFields = migrationForm.querySelectorAll("input");
|
|
453
|
+
for (const element of Array.from(inputFields)) {
|
|
454
|
+
const name = element.getAttribute("name");
|
|
455
|
+
const value = element.getAttribute("value");
|
|
456
|
+
if (name && value) {
|
|
457
|
+
requestPayload.append(name, value);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
const formResponse = await fetch(url, {
|
|
461
|
+
method,
|
|
462
|
+
body: requestPayload,
|
|
463
|
+
headers
|
|
464
|
+
});
|
|
465
|
+
if (!formResponse.ok) {
|
|
466
|
+
throw new Error(
|
|
467
|
+
`Failed to submit migration form: ${formResponse.statusText}`
|
|
468
|
+
);
|
|
469
|
+
}
|
|
470
|
+
const formHtml = await formResponse.text();
|
|
471
|
+
dom = await parseHTML(formHtml);
|
|
472
|
+
document = dom.window.document;
|
|
473
|
+
}
|
|
474
|
+
return document;
|
|
475
|
+
}
|
|
476
|
+
let ClientTransaction = null;
|
|
477
|
+
function clientTransaction() {
|
|
478
|
+
if (!ClientTransaction) {
|
|
479
|
+
const mod = require("x-client-transaction-id");
|
|
480
|
+
const ctx = mod.ClientTransaction;
|
|
481
|
+
ClientTransaction = ctx;
|
|
482
|
+
return ctx;
|
|
483
|
+
}
|
|
484
|
+
return ClientTransaction;
|
|
485
|
+
}
|
|
486
|
+
async function generateTransactionId(url, fetchFn, method) {
|
|
487
|
+
const parsedUrl = new URL(url);
|
|
488
|
+
const path = parsedUrl.pathname;
|
|
489
|
+
log$2(`Generating transaction ID for ${method} ${path}`);
|
|
490
|
+
const document = await handleXMigration(fetchFn);
|
|
491
|
+
const transaction = await clientTransaction().create(document);
|
|
492
|
+
const transactionId = await transaction.generateTransactionId(method, path);
|
|
493
|
+
log$2(`Transaction ID: ${transactionId}`);
|
|
494
|
+
return transactionId;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const log$1 = debug("twitter-scraper:api");
|
|
498
|
+
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
499
|
+
async function jitter(maxMs) {
|
|
500
|
+
const jitter2 = Math.random() * maxMs;
|
|
501
|
+
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
502
|
+
}
|
|
503
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new Headers()) {
|
|
504
|
+
log$1(`Making ${method} request to ${url}`);
|
|
505
|
+
await auth.installTo(headers, url);
|
|
506
|
+
await platform.randomizeCiphers();
|
|
507
|
+
if (auth instanceof TwitterGuestAuth && auth.options?.experimental?.xClientTransactionId) {
|
|
508
|
+
const transactionId = await generateTransactionId(
|
|
509
|
+
url,
|
|
510
|
+
auth.fetch.bind(auth),
|
|
511
|
+
method
|
|
512
|
+
);
|
|
513
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
514
|
+
}
|
|
515
|
+
let res;
|
|
516
|
+
do {
|
|
517
|
+
const fetchParameters = [
|
|
518
|
+
url,
|
|
519
|
+
{
|
|
520
|
+
method,
|
|
521
|
+
headers,
|
|
522
|
+
credentials: "include"
|
|
523
|
+
}
|
|
524
|
+
];
|
|
525
|
+
try {
|
|
526
|
+
res = await auth.fetch(...fetchParameters);
|
|
527
|
+
} catch (err) {
|
|
528
|
+
if (!(err instanceof Error)) {
|
|
529
|
+
throw err;
|
|
530
|
+
}
|
|
531
|
+
return {
|
|
532
|
+
success: false,
|
|
533
|
+
err: new Error("Failed to perform request.")
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
537
|
+
if (res.status === 429) {
|
|
538
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
539
|
+
await auth.onRateLimit({
|
|
540
|
+
fetchParameters,
|
|
541
|
+
response: res
|
|
542
|
+
});
|
|
543
|
+
}
|
|
544
|
+
} while (res.status === 429);
|
|
545
|
+
if (!res.ok) {
|
|
546
|
+
return {
|
|
547
|
+
success: false,
|
|
548
|
+
err: await ApiError.fromResponse(res)
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
const value = await flexParseJson(res);
|
|
552
|
+
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
553
|
+
auth.deleteToken();
|
|
554
|
+
return { success: true, value };
|
|
555
|
+
} else {
|
|
556
|
+
return { success: true, value };
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
async function flexParseJson(res) {
|
|
560
|
+
try {
|
|
561
|
+
return await res.json();
|
|
562
|
+
} catch {
|
|
563
|
+
log$1("Failed to parse response as JSON, trying text parse...");
|
|
564
|
+
const text = await res.text();
|
|
565
|
+
log$1("Response text:", text);
|
|
566
|
+
return JSON.parse(text);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
function addApiFeatures(o) {
|
|
570
|
+
return {
|
|
571
|
+
...o,
|
|
572
|
+
rweb_lists_timeline_redesign_enabled: true,
|
|
573
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
574
|
+
verified_phone_label_enabled: false,
|
|
575
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
576
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
577
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
578
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
579
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
580
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
581
|
+
view_counts_everywhere_api_enabled: true,
|
|
582
|
+
longform_notetweets_consumption_enabled: true,
|
|
583
|
+
tweet_awards_web_tipping_enabled: false,
|
|
584
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
585
|
+
standardized_nudges_misinfo: true,
|
|
586
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
587
|
+
responsive_web_enhance_cards_enabled: false,
|
|
588
|
+
subscriptions_verification_info_enabled: true,
|
|
589
|
+
subscriptions_verification_info_reason_enabled: true,
|
|
590
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
591
|
+
super_follow_badge_privacy_enabled: false,
|
|
592
|
+
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
593
|
+
super_follow_tweet_api_enabled: false,
|
|
594
|
+
super_follow_user_api_enabled: false,
|
|
595
|
+
android_graphql_skip_api_media_color_palette: false,
|
|
596
|
+
creator_subscriptions_subscription_count_enabled: false,
|
|
597
|
+
blue_business_profile_image_shape_enabled: false,
|
|
598
|
+
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
function addApiParams(params, includeTweetReplies) {
|
|
602
|
+
params.set("include_profile_interstitial_type", "1");
|
|
603
|
+
params.set("include_blocking", "1");
|
|
604
|
+
params.set("include_blocked_by", "1");
|
|
605
|
+
params.set("include_followed_by", "1");
|
|
606
|
+
params.set("include_want_retweets", "1");
|
|
607
|
+
params.set("include_mute_edge", "1");
|
|
608
|
+
params.set("include_can_dm", "1");
|
|
609
|
+
params.set("include_can_media_tag", "1");
|
|
610
|
+
params.set("include_ext_has_nft_avatar", "1");
|
|
611
|
+
params.set("include_ext_is_blue_verified", "1");
|
|
612
|
+
params.set("include_ext_verified_type", "1");
|
|
613
|
+
params.set("skip_status", "1");
|
|
614
|
+
params.set("cards_platform", "Web-12");
|
|
615
|
+
params.set("include_cards", "1");
|
|
616
|
+
params.set("include_ext_alt_text", "true");
|
|
617
|
+
params.set("include_ext_limited_action_results", "false");
|
|
618
|
+
params.set("include_quote_count", "true");
|
|
619
|
+
params.set("include_reply_count", "1");
|
|
620
|
+
params.set("tweet_mode", "extended");
|
|
621
|
+
params.set("include_ext_collab_control", "true");
|
|
622
|
+
params.set("include_ext_views", "true");
|
|
623
|
+
params.set("include_entities", "true");
|
|
624
|
+
params.set("include_user_entities", "true");
|
|
625
|
+
params.set("include_ext_media_color", "true");
|
|
626
|
+
params.set("include_ext_media_availability", "true");
|
|
627
|
+
params.set("include_ext_sensitive_media_warning", "true");
|
|
628
|
+
params.set("include_ext_trusted_friends_metadata", "true");
|
|
629
|
+
params.set("send_error_codes", "true");
|
|
630
|
+
params.set("simple_quoted_tweet", "true");
|
|
631
|
+
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
632
|
+
params.set(
|
|
633
|
+
"ext",
|
|
634
|
+
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
635
|
+
);
|
|
636
|
+
return params;
|
|
637
|
+
}
|
|
638
|
+
|
|
420
639
|
const log = debug("twitter-scraper:auth-user");
|
|
421
640
|
const TwitterUserAuthSubtask = Type.Object({
|
|
422
641
|
subtask_id: Type.String(),
|
|
@@ -524,25 +743,25 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
524
743
|
this.jar = new CookieJar();
|
|
525
744
|
}
|
|
526
745
|
}
|
|
527
|
-
async installCsrfToken(headers) {
|
|
528
|
-
const cookies = await this.getCookies();
|
|
529
|
-
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
530
|
-
if (xCsrfToken) {
|
|
531
|
-
headers.set("x-csrf-token", xCsrfToken.value);
|
|
532
|
-
}
|
|
533
|
-
}
|
|
534
746
|
async installTo(headers) {
|
|
535
747
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
536
|
-
const cookie = await this.getCookieString();
|
|
537
|
-
headers.set("cookie", cookie);
|
|
538
|
-
if (this.guestToken) {
|
|
539
|
-
headers.set("x-guest-token", this.guestToken);
|
|
540
|
-
}
|
|
541
748
|
headers.set(
|
|
542
749
|
"user-agent",
|
|
543
750
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
544
751
|
);
|
|
752
|
+
if (this.guestToken) {
|
|
753
|
+
headers.set("x-guest-token", this.guestToken);
|
|
754
|
+
}
|
|
545
755
|
await this.installCsrfToken(headers);
|
|
756
|
+
if (this.options?.experimental?.xpff) {
|
|
757
|
+
const guestId = await this.guestId();
|
|
758
|
+
if (guestId != null) {
|
|
759
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
760
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
const cookie = await this.getCookieString();
|
|
764
|
+
headers.set("cookie", cookie);
|
|
546
765
|
}
|
|
547
766
|
async initLogin() {
|
|
548
767
|
this.removeCookie("twitter_ads_id=");
|
|
@@ -747,12 +966,6 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
747
966
|
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
748
967
|
}
|
|
749
968
|
log(`Making POST request to ${onboardingTaskUrl}`);
|
|
750
|
-
const token = this.guestToken;
|
|
751
|
-
if (token == null) {
|
|
752
|
-
throw new AuthenticationError(
|
|
753
|
-
"Authentication token is null or undefined."
|
|
754
|
-
);
|
|
755
|
-
}
|
|
756
969
|
const headers = new Headers({
|
|
757
970
|
accept: "*/*",
|
|
758
971
|
"accept-language": "en-US,en;q=0.9",
|
|
@@ -769,12 +982,19 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
769
982
|
"sec-fetch-mode": "cors",
|
|
770
983
|
"sec-fetch-site": "same-origin",
|
|
771
984
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
772
|
-
"x-guest-token": token,
|
|
773
985
|
"x-twitter-auth-type": "OAuth2Client",
|
|
774
986
|
"x-twitter-active-user": "yes",
|
|
775
987
|
"x-twitter-client-language": "en"
|
|
776
988
|
});
|
|
777
989
|
await this.installTo(headers);
|
|
990
|
+
if (this.options?.experimental?.xClientTransactionId) {
|
|
991
|
+
const transactionId = await generateTransactionId(
|
|
992
|
+
onboardingTaskUrl,
|
|
993
|
+
this.fetch.bind(this),
|
|
994
|
+
"POST"
|
|
995
|
+
);
|
|
996
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
997
|
+
}
|
|
778
998
|
let res;
|
|
779
999
|
do {
|
|
780
1000
|
const fetchParameters = [
|
|
@@ -2599,7 +2819,11 @@ class Scraper {
|
|
|
2599
2819
|
return {
|
|
2600
2820
|
fetch: this.options?.fetch,
|
|
2601
2821
|
transform: this.options?.transform,
|
|
2602
|
-
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2822
|
+
rateLimitStrategy: this.options?.rateLimitStrategy,
|
|
2823
|
+
experimental: {
|
|
2824
|
+
xClientTransactionId: this.options?.experimental?.xClientTransactionId,
|
|
2825
|
+
xpff: this.options?.experimental?.xpff
|
|
2826
|
+
}
|
|
2603
2827
|
};
|
|
2604
2828
|
}
|
|
2605
2829
|
handleResponse(res) {
|