@the-convocation/twitter-scraper 0.19.1 → 0.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/default/cjs/index.js +441 -196
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +441 -196
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +438 -193
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +438 -193
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +13 -0
- package/examples/node-integration/package.json +2 -1
- package/package.json +6 -4
package/dist/node/esm/index.mjs
CHANGED
|
@@ -51,13 +51,13 @@ class AuthenticationError extends Error {
|
|
|
51
51
|
}
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
const log$
|
|
54
|
+
const log$6 = debug("twitter-scraper:rate-limit");
|
|
55
55
|
class WaitingRateLimitStrategy {
|
|
56
56
|
async onRateLimit({ response: res }) {
|
|
57
57
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
58
58
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
59
59
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
60
|
-
log$
|
|
60
|
+
log$6(
|
|
61
61
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
62
62
|
);
|
|
63
63
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -73,20 +73,7 @@ class ErrorRateLimitStrategy {
|
|
|
73
73
|
}
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
-
|
|
77
|
-
async randomizeCiphers() {
|
|
78
|
-
const platform = await Platform.importPlatform();
|
|
79
|
-
await platform?.randomizeCiphers();
|
|
80
|
-
}
|
|
81
|
-
static async importPlatform() {
|
|
82
|
-
{
|
|
83
|
-
const { platform } = await Promise.resolve().then(function () { return index; });
|
|
84
|
-
return platform;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
const log$3 = debug("twitter-scraper:requests");
|
|
76
|
+
const log$5 = debug("twitter-scraper:requests");
|
|
90
77
|
async function updateCookieJar(cookieJar, headers) {
|
|
91
78
|
let setCookieHeaders = [];
|
|
92
79
|
if (typeof headers.getSetCookie === "function") {
|
|
@@ -101,12 +88,12 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
101
88
|
for (const cookieStr of setCookieHeaders) {
|
|
102
89
|
const cookie = Cookie.parse(cookieStr);
|
|
103
90
|
if (!cookie) {
|
|
104
|
-
log$
|
|
91
|
+
log$5(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
105
92
|
continue;
|
|
106
93
|
}
|
|
107
94
|
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
108
95
|
if (cookie.key === "ct0") {
|
|
109
|
-
log$
|
|
96
|
+
log$5(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
110
97
|
}
|
|
111
98
|
continue;
|
|
112
99
|
}
|
|
@@ -114,7 +101,7 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
114
101
|
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
115
102
|
await cookieJar.setCookie(cookie, url);
|
|
116
103
|
if (cookie.key === "ct0") {
|
|
117
|
-
log$
|
|
104
|
+
log$5(
|
|
118
105
|
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
119
106
|
0,
|
|
120
107
|
20
|
|
@@ -122,9 +109,9 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
122
109
|
);
|
|
123
110
|
}
|
|
124
111
|
} catch (err) {
|
|
125
|
-
log$
|
|
112
|
+
log$5(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
126
113
|
if (cookie.key === "ct0") {
|
|
127
|
-
log$
|
|
114
|
+
log$5(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
128
115
|
}
|
|
129
116
|
}
|
|
130
117
|
}
|
|
@@ -138,141 +125,84 @@ async function updateCookieJar(cookieJar, headers) {
|
|
|
138
125
|
}
|
|
139
126
|
}
|
|
140
127
|
|
|
141
|
-
const log$
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
128
|
+
const log$4 = debug("twitter-scraper:xpff");
|
|
129
|
+
let isoCrypto = null;
|
|
130
|
+
function getCrypto() {
|
|
131
|
+
if (isoCrypto != null) {
|
|
132
|
+
return isoCrypto;
|
|
133
|
+
}
|
|
134
|
+
if (typeof crypto === "undefined") {
|
|
135
|
+
log$4("Global crypto is undefined, importing from crypto module...");
|
|
136
|
+
const { webcrypto } = require("crypto");
|
|
137
|
+
isoCrypto = webcrypto;
|
|
138
|
+
return webcrypto;
|
|
139
|
+
}
|
|
140
|
+
isoCrypto = crypto;
|
|
141
|
+
return crypto;
|
|
142
|
+
}
|
|
143
|
+
async function sha256(message) {
|
|
144
|
+
const msgBuffer = new TextEncoder().encode(message);
|
|
145
|
+
const hashBuffer = await getCrypto().subtle.digest("SHA-256", msgBuffer);
|
|
146
|
+
return new Uint8Array(hashBuffer);
|
|
147
|
+
}
|
|
148
|
+
function buf2hex(buffer) {
|
|
149
|
+
return [...new Uint8Array(buffer)].map((x) => x.toString(16).padStart(2, "0")).join("");
|
|
150
|
+
}
|
|
151
|
+
class XPFFHeaderGenerator {
|
|
152
|
+
constructor(seed) {
|
|
153
|
+
this.seed = seed;
|
|
154
|
+
}
|
|
155
|
+
async deriveKey(guestId) {
|
|
156
|
+
const combined = `${this.seed}${guestId}`;
|
|
157
|
+
const result = await sha256(combined);
|
|
158
|
+
return result;
|
|
159
|
+
}
|
|
160
|
+
async generateHeader(plaintext, guestId) {
|
|
161
|
+
log$4(`Generating XPFF key for guest ID: ${guestId}`);
|
|
162
|
+
const key = await this.deriveKey(guestId);
|
|
163
|
+
const nonce = getCrypto().getRandomValues(new Uint8Array(12));
|
|
164
|
+
const cipher = await getCrypto().subtle.importKey(
|
|
165
|
+
"raw",
|
|
166
|
+
key,
|
|
167
|
+
{ name: "AES-GCM" },
|
|
168
|
+
false,
|
|
169
|
+
["encrypt"]
|
|
170
|
+
);
|
|
171
|
+
const encrypted = await getCrypto().subtle.encrypt(
|
|
155
172
|
{
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
}
|
|
181
|
-
if (!res.ok) {
|
|
182
|
-
return {
|
|
183
|
-
success: false,
|
|
184
|
-
err: await ApiError.fromResponse(res)
|
|
185
|
-
};
|
|
186
|
-
}
|
|
187
|
-
const value = await flexParseJson(res);
|
|
188
|
-
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
189
|
-
auth.deleteToken();
|
|
190
|
-
return { success: true, value };
|
|
191
|
-
} else {
|
|
192
|
-
return { success: true, value };
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
async function flexParseJson(res) {
|
|
196
|
-
try {
|
|
197
|
-
return await res.json();
|
|
198
|
-
} catch {
|
|
199
|
-
log$2("Failed to parse response as JSON, trying text parse...");
|
|
200
|
-
const text = await res.text();
|
|
201
|
-
log$2("Response text:", text);
|
|
202
|
-
return JSON.parse(text);
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
function addApiFeatures(o) {
|
|
206
|
-
return {
|
|
207
|
-
...o,
|
|
208
|
-
rweb_lists_timeline_redesign_enabled: true,
|
|
209
|
-
responsive_web_graphql_exclude_directive_enabled: true,
|
|
210
|
-
verified_phone_label_enabled: false,
|
|
211
|
-
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
212
|
-
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
213
|
-
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
214
|
-
tweetypie_unmention_optimization_enabled: true,
|
|
215
|
-
responsive_web_edit_tweet_api_enabled: true,
|
|
216
|
-
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
217
|
-
view_counts_everywhere_api_enabled: true,
|
|
218
|
-
longform_notetweets_consumption_enabled: true,
|
|
219
|
-
tweet_awards_web_tipping_enabled: false,
|
|
220
|
-
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
221
|
-
standardized_nudges_misinfo: true,
|
|
222
|
-
longform_notetweets_rich_text_read_enabled: true,
|
|
223
|
-
responsive_web_enhance_cards_enabled: false,
|
|
224
|
-
subscriptions_verification_info_enabled: true,
|
|
225
|
-
subscriptions_verification_info_reason_enabled: true,
|
|
226
|
-
subscriptions_verification_info_verified_since_enabled: true,
|
|
227
|
-
super_follow_badge_privacy_enabled: false,
|
|
228
|
-
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
229
|
-
super_follow_tweet_api_enabled: false,
|
|
230
|
-
super_follow_user_api_enabled: false,
|
|
231
|
-
android_graphql_skip_api_media_color_palette: false,
|
|
232
|
-
creator_subscriptions_subscription_count_enabled: false,
|
|
233
|
-
blue_business_profile_image_shape_enabled: false,
|
|
234
|
-
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
235
|
-
};
|
|
173
|
+
name: "AES-GCM",
|
|
174
|
+
iv: nonce
|
|
175
|
+
},
|
|
176
|
+
cipher,
|
|
177
|
+
new TextEncoder().encode(plaintext)
|
|
178
|
+
);
|
|
179
|
+
const combined = new Uint8Array(nonce.length + encrypted.byteLength);
|
|
180
|
+
combined.set(nonce);
|
|
181
|
+
combined.set(new Uint8Array(encrypted), nonce.length);
|
|
182
|
+
const result = buf2hex(combined);
|
|
183
|
+
log$4(`XPFF header generated for guest ID ${guestId}: ${result}`);
|
|
184
|
+
return result;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
const xpffBaseKey = "0e6be1f1e21ffc33590b888fd4dc81b19713e570e805d4e5df80a493c9571a05";
|
|
188
|
+
function xpffPlain() {
|
|
189
|
+
const timestamp = Date.now();
|
|
190
|
+
return JSON.stringify({
|
|
191
|
+
navigator_properties: {
|
|
192
|
+
hasBeenActive: "true",
|
|
193
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
194
|
+
webdriver: "false"
|
|
195
|
+
},
|
|
196
|
+
created_at: timestamp
|
|
197
|
+
});
|
|
236
198
|
}
|
|
237
|
-
function
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
params.set("include_followed_by", "1");
|
|
242
|
-
params.set("include_want_retweets", "1");
|
|
243
|
-
params.set("include_mute_edge", "1");
|
|
244
|
-
params.set("include_can_dm", "1");
|
|
245
|
-
params.set("include_can_media_tag", "1");
|
|
246
|
-
params.set("include_ext_has_nft_avatar", "1");
|
|
247
|
-
params.set("include_ext_is_blue_verified", "1");
|
|
248
|
-
params.set("include_ext_verified_type", "1");
|
|
249
|
-
params.set("skip_status", "1");
|
|
250
|
-
params.set("cards_platform", "Web-12");
|
|
251
|
-
params.set("include_cards", "1");
|
|
252
|
-
params.set("include_ext_alt_text", "true");
|
|
253
|
-
params.set("include_ext_limited_action_results", "false");
|
|
254
|
-
params.set("include_quote_count", "true");
|
|
255
|
-
params.set("include_reply_count", "1");
|
|
256
|
-
params.set("tweet_mode", "extended");
|
|
257
|
-
params.set("include_ext_collab_control", "true");
|
|
258
|
-
params.set("include_ext_views", "true");
|
|
259
|
-
params.set("include_entities", "true");
|
|
260
|
-
params.set("include_user_entities", "true");
|
|
261
|
-
params.set("include_ext_media_color", "true");
|
|
262
|
-
params.set("include_ext_media_availability", "true");
|
|
263
|
-
params.set("include_ext_sensitive_media_warning", "true");
|
|
264
|
-
params.set("include_ext_trusted_friends_metadata", "true");
|
|
265
|
-
params.set("send_error_codes", "true");
|
|
266
|
-
params.set("simple_quoted_tweet", "true");
|
|
267
|
-
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
268
|
-
params.set(
|
|
269
|
-
"ext",
|
|
270
|
-
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
271
|
-
);
|
|
272
|
-
return params;
|
|
199
|
+
async function generateXPFFHeader(guestId) {
|
|
200
|
+
const generator = new XPFFHeaderGenerator(xpffBaseKey);
|
|
201
|
+
const plaintext = xpffPlain();
|
|
202
|
+
return generator.generateHeader(plaintext, guestId);
|
|
273
203
|
}
|
|
274
204
|
|
|
275
|
-
const log$
|
|
205
|
+
const log$3 = debug("twitter-scraper:auth");
|
|
276
206
|
function withTransform(fetchFn, transform) {
|
|
277
207
|
return async (input, init) => {
|
|
278
208
|
const fetchArgs = await transform?.request?.(input, init) ?? [
|
|
@@ -322,28 +252,37 @@ class TwitterGuestAuth {
|
|
|
322
252
|
}
|
|
323
253
|
return new Date(this.guestCreatedAt);
|
|
324
254
|
}
|
|
325
|
-
async installTo(headers) {
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
"
|
|
333
|
-
|
|
255
|
+
async installTo(headers, _url, bearerTokenOverride) {
|
|
256
|
+
const tokenToUse = bearerTokenOverride ?? this.bearerToken;
|
|
257
|
+
if (!bearerTokenOverride) {
|
|
258
|
+
if (this.shouldUpdate()) {
|
|
259
|
+
await this.updateGuestToken();
|
|
260
|
+
}
|
|
261
|
+
if (this.guestToken) {
|
|
262
|
+
headers.set("x-guest-token", this.guestToken);
|
|
263
|
+
}
|
|
334
264
|
}
|
|
335
|
-
headers.set("authorization", `Bearer ${
|
|
336
|
-
headers.set("x-guest-token", token);
|
|
265
|
+
headers.set("authorization", `Bearer ${tokenToUse}`);
|
|
337
266
|
headers.set(
|
|
338
267
|
"user-agent",
|
|
339
268
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
340
269
|
);
|
|
270
|
+
await this.installCsrfToken(headers);
|
|
271
|
+
if (this.options?.experimental?.xpff) {
|
|
272
|
+
const guestId = await this.guestId();
|
|
273
|
+
if (guestId != null) {
|
|
274
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
275
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
headers.set("cookie", await this.getCookieString());
|
|
279
|
+
}
|
|
280
|
+
async installCsrfToken(headers) {
|
|
341
281
|
const cookies = await this.getCookies();
|
|
342
282
|
const xCsrfToken = cookies.find((cookie) => cookie.key === "ct0");
|
|
343
283
|
if (xCsrfToken) {
|
|
344
284
|
headers.set("x-csrf-token", xCsrfToken.value);
|
|
345
285
|
}
|
|
346
|
-
headers.set("cookie", await this.getCookieString());
|
|
347
286
|
}
|
|
348
287
|
async setCookie(key, value) {
|
|
349
288
|
const cookie = Cookie.parse(`${key}=${value}`);
|
|
@@ -376,16 +315,28 @@ class TwitterGuestAuth {
|
|
|
376
315
|
getCookieJarUrl() {
|
|
377
316
|
return typeof document !== "undefined" ? document.location.toString() : "https://x.com";
|
|
378
317
|
}
|
|
318
|
+
async guestId() {
|
|
319
|
+
const cookies = await this.getCookies();
|
|
320
|
+
const guestIdCookie = cookies.find((cookie) => cookie.key === "guest_id");
|
|
321
|
+
return guestIdCookie ? guestIdCookie.value : null;
|
|
322
|
+
}
|
|
379
323
|
/**
|
|
380
324
|
* Updates the authentication state with a new guest token from the Twitter API.
|
|
381
325
|
*/
|
|
382
326
|
async updateGuestToken() {
|
|
327
|
+
try {
|
|
328
|
+
await this.updateGuestTokenCore();
|
|
329
|
+
} catch (err) {
|
|
330
|
+
log$3("Failed to update guest token; this may cause issues:", err);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
async updateGuestTokenCore() {
|
|
383
334
|
const guestActivateUrl = "https://api.x.com/1.1/guest/activate.json";
|
|
384
335
|
const headers = new Headers({
|
|
385
336
|
Authorization: `Bearer ${this.bearerToken}`,
|
|
386
337
|
Cookie: await this.getCookieString()
|
|
387
338
|
});
|
|
388
|
-
log$
|
|
339
|
+
log$3(`Making POST request to ${guestActivateUrl}`);
|
|
389
340
|
const res = await this.fetch(guestActivateUrl, {
|
|
390
341
|
method: "POST",
|
|
391
342
|
headers,
|
|
@@ -406,7 +357,7 @@ class TwitterGuestAuth {
|
|
|
406
357
|
this.guestToken = newGuestToken;
|
|
407
358
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
408
359
|
await this.setCookie("gt", newGuestToken);
|
|
409
|
-
log$
|
|
360
|
+
log$3(`Updated guest token: ${newGuestToken}`);
|
|
410
361
|
}
|
|
411
362
|
/**
|
|
412
363
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -417,6 +368,278 @@ class TwitterGuestAuth {
|
|
|
417
368
|
}
|
|
418
369
|
}
|
|
419
370
|
|
|
371
|
+
class Platform {
|
|
372
|
+
async randomizeCiphers() {
|
|
373
|
+
const platform = await Platform.importPlatform();
|
|
374
|
+
await platform?.randomizeCiphers();
|
|
375
|
+
}
|
|
376
|
+
static async importPlatform() {
|
|
377
|
+
{
|
|
378
|
+
const { platform } = await Promise.resolve().then(function () { return index; });
|
|
379
|
+
return platform;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
const log$2 = debug("twitter-scraper:xctxid");
|
|
385
|
+
let linkedom = null;
|
|
386
|
+
function linkedomImport() {
|
|
387
|
+
if (!linkedom) {
|
|
388
|
+
const mod = require("linkedom");
|
|
389
|
+
linkedom = mod;
|
|
390
|
+
return mod;
|
|
391
|
+
}
|
|
392
|
+
return linkedom;
|
|
393
|
+
}
|
|
394
|
+
async function parseHTML(html) {
|
|
395
|
+
if (typeof window !== "undefined") {
|
|
396
|
+
const { defaultView } = new DOMParser().parseFromString(html, "text/html");
|
|
397
|
+
if (!defaultView) {
|
|
398
|
+
throw new Error("Failed to get defaultView from parsed HTML.");
|
|
399
|
+
}
|
|
400
|
+
return defaultView;
|
|
401
|
+
} else {
|
|
402
|
+
const { DOMParser: DOMParser2 } = linkedomImport();
|
|
403
|
+
return new DOMParser2().parseFromString(html, "text/html").defaultView;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
async function handleXMigration(fetchFn) {
|
|
407
|
+
const headers = {
|
|
408
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
409
|
+
"accept-language": "ja",
|
|
410
|
+
"cache-control": "no-cache",
|
|
411
|
+
pragma: "no-cache",
|
|
412
|
+
priority: "u=0, i",
|
|
413
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
414
|
+
"sec-ch-ua-mobile": "?0",
|
|
415
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
416
|
+
"sec-fetch-dest": "document",
|
|
417
|
+
"sec-fetch-mode": "navigate",
|
|
418
|
+
"sec-fetch-site": "none",
|
|
419
|
+
"sec-fetch-user": "?1",
|
|
420
|
+
"upgrade-insecure-requests": "1",
|
|
421
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
422
|
+
};
|
|
423
|
+
const response = await fetchFn("https://x.com", {
|
|
424
|
+
headers
|
|
425
|
+
});
|
|
426
|
+
if (!response.ok) {
|
|
427
|
+
throw new Error(`Failed to fetch X homepage: ${response.statusText}`);
|
|
428
|
+
}
|
|
429
|
+
const htmlText = await response.text();
|
|
430
|
+
let dom = await parseHTML(htmlText);
|
|
431
|
+
let document = dom.window.document;
|
|
432
|
+
const migrationRedirectionRegex = new RegExp(
|
|
433
|
+
"(http(?:s)?://(?:www\\.)?(twitter|x){1}\\.com(/x)?/migrate([/?])?tok=[a-zA-Z0-9%\\-_]+)+",
|
|
434
|
+
"i"
|
|
435
|
+
);
|
|
436
|
+
const metaRefresh = document.querySelector("meta[http-equiv='refresh']");
|
|
437
|
+
const metaContent = metaRefresh ? metaRefresh.getAttribute("content") || "" : "";
|
|
438
|
+
const migrationRedirectionUrl = migrationRedirectionRegex.exec(metaContent) || migrationRedirectionRegex.exec(htmlText);
|
|
439
|
+
if (migrationRedirectionUrl) {
|
|
440
|
+
const redirectResponse = await fetch(migrationRedirectionUrl[0]);
|
|
441
|
+
if (!redirectResponse.ok) {
|
|
442
|
+
throw new Error(
|
|
443
|
+
`Failed to follow migration redirection: ${redirectResponse.statusText}`
|
|
444
|
+
);
|
|
445
|
+
}
|
|
446
|
+
const redirectHtml = await redirectResponse.text();
|
|
447
|
+
dom = await parseHTML(redirectHtml);
|
|
448
|
+
document = dom.window.document;
|
|
449
|
+
}
|
|
450
|
+
const migrationForm = document.querySelector("form[name='f']") || document.querySelector("form[action='https://x.com/x/migrate']");
|
|
451
|
+
if (migrationForm) {
|
|
452
|
+
const url = migrationForm.getAttribute("action") || "https://x.com/x/migrate";
|
|
453
|
+
const method = migrationForm.getAttribute("method") || "POST";
|
|
454
|
+
const requestPayload = new FormData();
|
|
455
|
+
const inputFields = migrationForm.querySelectorAll("input");
|
|
456
|
+
for (const element of Array.from(inputFields)) {
|
|
457
|
+
const name = element.getAttribute("name");
|
|
458
|
+
const value = element.getAttribute("value");
|
|
459
|
+
if (name && value) {
|
|
460
|
+
requestPayload.append(name, value);
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
const formResponse = await fetch(url, {
|
|
464
|
+
method,
|
|
465
|
+
body: requestPayload,
|
|
466
|
+
headers
|
|
467
|
+
});
|
|
468
|
+
if (!formResponse.ok) {
|
|
469
|
+
throw new Error(
|
|
470
|
+
`Failed to submit migration form: ${formResponse.statusText}`
|
|
471
|
+
);
|
|
472
|
+
}
|
|
473
|
+
const formHtml = await formResponse.text();
|
|
474
|
+
dom = await parseHTML(formHtml);
|
|
475
|
+
document = dom.window.document;
|
|
476
|
+
}
|
|
477
|
+
return document;
|
|
478
|
+
}
|
|
479
|
+
let ClientTransaction = null;
|
|
480
|
+
function clientTransaction() {
|
|
481
|
+
if (!ClientTransaction) {
|
|
482
|
+
const mod = require("x-client-transaction-id");
|
|
483
|
+
const ctx = mod.ClientTransaction;
|
|
484
|
+
ClientTransaction = ctx;
|
|
485
|
+
return ctx;
|
|
486
|
+
}
|
|
487
|
+
return ClientTransaction;
|
|
488
|
+
}
|
|
489
|
+
async function generateTransactionId(url, fetchFn, method) {
|
|
490
|
+
const parsedUrl = new URL(url);
|
|
491
|
+
const path = parsedUrl.pathname;
|
|
492
|
+
log$2(`Generating transaction ID for ${method} ${path}`);
|
|
493
|
+
const document = await handleXMigration(fetchFn);
|
|
494
|
+
const transaction = await clientTransaction().create(document);
|
|
495
|
+
const transactionId = await transaction.generateTransactionId(method, path);
|
|
496
|
+
log$2(`Transaction ID: ${transactionId}`);
|
|
497
|
+
return transactionId;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
const log$1 = debug("twitter-scraper:api");
|
|
501
|
+
const bearerToken = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF";
|
|
502
|
+
const bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA";
|
|
503
|
+
async function jitter(maxMs) {
|
|
504
|
+
const jitter2 = Math.random() * maxMs;
|
|
505
|
+
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
506
|
+
}
|
|
507
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new Headers(), bearerTokenOverride) {
|
|
508
|
+
log$1(`Making ${method} request to ${url}`);
|
|
509
|
+
await auth.installTo(headers, url, bearerTokenOverride);
|
|
510
|
+
await platform.randomizeCiphers();
|
|
511
|
+
if (auth instanceof TwitterGuestAuth && auth.options?.experimental?.xClientTransactionId) {
|
|
512
|
+
const transactionId = await generateTransactionId(
|
|
513
|
+
url,
|
|
514
|
+
auth.fetch.bind(auth),
|
|
515
|
+
method
|
|
516
|
+
);
|
|
517
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
518
|
+
}
|
|
519
|
+
let res;
|
|
520
|
+
do {
|
|
521
|
+
const fetchParameters = [
|
|
522
|
+
url,
|
|
523
|
+
{
|
|
524
|
+
method,
|
|
525
|
+
headers,
|
|
526
|
+
credentials: "include"
|
|
527
|
+
}
|
|
528
|
+
];
|
|
529
|
+
try {
|
|
530
|
+
res = await auth.fetch(...fetchParameters);
|
|
531
|
+
} catch (err) {
|
|
532
|
+
if (!(err instanceof Error)) {
|
|
533
|
+
throw err;
|
|
534
|
+
}
|
|
535
|
+
return {
|
|
536
|
+
success: false,
|
|
537
|
+
err: new Error("Failed to perform request.")
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
541
|
+
if (res.status === 429) {
|
|
542
|
+
log$1("Rate limit hit, waiting for retry...");
|
|
543
|
+
await auth.onRateLimit({
|
|
544
|
+
fetchParameters,
|
|
545
|
+
response: res
|
|
546
|
+
});
|
|
547
|
+
}
|
|
548
|
+
} while (res.status === 429);
|
|
549
|
+
if (!res.ok) {
|
|
550
|
+
return {
|
|
551
|
+
success: false,
|
|
552
|
+
err: await ApiError.fromResponse(res)
|
|
553
|
+
};
|
|
554
|
+
}
|
|
555
|
+
const value = await flexParseJson(res);
|
|
556
|
+
if (res.headers.get("x-rate-limit-incoming") == "0") {
|
|
557
|
+
auth.deleteToken();
|
|
558
|
+
return { success: true, value };
|
|
559
|
+
} else {
|
|
560
|
+
return { success: true, value };
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
async function flexParseJson(res) {
|
|
564
|
+
try {
|
|
565
|
+
return await res.json();
|
|
566
|
+
} catch {
|
|
567
|
+
log$1("Failed to parse response as JSON, trying text parse...");
|
|
568
|
+
const text = await res.text();
|
|
569
|
+
log$1("Response text:", text);
|
|
570
|
+
return JSON.parse(text);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
function addApiFeatures(o) {
|
|
574
|
+
return {
|
|
575
|
+
...o,
|
|
576
|
+
rweb_lists_timeline_redesign_enabled: true,
|
|
577
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
578
|
+
verified_phone_label_enabled: false,
|
|
579
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
580
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
581
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
582
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
583
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
584
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
585
|
+
view_counts_everywhere_api_enabled: true,
|
|
586
|
+
longform_notetweets_consumption_enabled: true,
|
|
587
|
+
tweet_awards_web_tipping_enabled: false,
|
|
588
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
589
|
+
standardized_nudges_misinfo: true,
|
|
590
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
591
|
+
responsive_web_enhance_cards_enabled: false,
|
|
592
|
+
subscriptions_verification_info_enabled: true,
|
|
593
|
+
subscriptions_verification_info_reason_enabled: true,
|
|
594
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
595
|
+
super_follow_badge_privacy_enabled: false,
|
|
596
|
+
super_follow_exclusive_tweet_notifications_enabled: false,
|
|
597
|
+
super_follow_tweet_api_enabled: false,
|
|
598
|
+
super_follow_user_api_enabled: false,
|
|
599
|
+
android_graphql_skip_api_media_color_palette: false,
|
|
600
|
+
creator_subscriptions_subscription_count_enabled: false,
|
|
601
|
+
blue_business_profile_image_shape_enabled: false,
|
|
602
|
+
unified_cards_ad_metadata_container_dynamic_card_content_query_enabled: false
|
|
603
|
+
};
|
|
604
|
+
}
|
|
605
|
+
function addApiParams(params, includeTweetReplies) {
|
|
606
|
+
params.set("include_profile_interstitial_type", "1");
|
|
607
|
+
params.set("include_blocking", "1");
|
|
608
|
+
params.set("include_blocked_by", "1");
|
|
609
|
+
params.set("include_followed_by", "1");
|
|
610
|
+
params.set("include_want_retweets", "1");
|
|
611
|
+
params.set("include_mute_edge", "1");
|
|
612
|
+
params.set("include_can_dm", "1");
|
|
613
|
+
params.set("include_can_media_tag", "1");
|
|
614
|
+
params.set("include_ext_has_nft_avatar", "1");
|
|
615
|
+
params.set("include_ext_is_blue_verified", "1");
|
|
616
|
+
params.set("include_ext_verified_type", "1");
|
|
617
|
+
params.set("skip_status", "1");
|
|
618
|
+
params.set("cards_platform", "Web-12");
|
|
619
|
+
params.set("include_cards", "1");
|
|
620
|
+
params.set("include_ext_alt_text", "true");
|
|
621
|
+
params.set("include_ext_limited_action_results", "false");
|
|
622
|
+
params.set("include_quote_count", "true");
|
|
623
|
+
params.set("include_reply_count", "1");
|
|
624
|
+
params.set("tweet_mode", "extended");
|
|
625
|
+
params.set("include_ext_collab_control", "true");
|
|
626
|
+
params.set("include_ext_views", "true");
|
|
627
|
+
params.set("include_entities", "true");
|
|
628
|
+
params.set("include_user_entities", "true");
|
|
629
|
+
params.set("include_ext_media_color", "true");
|
|
630
|
+
params.set("include_ext_media_availability", "true");
|
|
631
|
+
params.set("include_ext_sensitive_media_warning", "true");
|
|
632
|
+
params.set("include_ext_trusted_friends_metadata", "true");
|
|
633
|
+
params.set("send_error_codes", "true");
|
|
634
|
+
params.set("simple_quoted_tweet", "true");
|
|
635
|
+
params.set("include_tweet_replies", `${includeTweetReplies}`);
|
|
636
|
+
params.set(
|
|
637
|
+
"ext",
|
|
638
|
+
"mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe"
|
|
639
|
+
);
|
|
640
|
+
return params;
|
|
641
|
+
}
|
|
642
|
+
|
|
420
643
|
const log = debug("twitter-scraper:auth-user");
|
|
421
644
|
const TwitterUserAuthSubtask = Type.Object({
|
|
422
645
|
subtask_id: Type.String(),
|
|
@@ -524,25 +747,26 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
524
747
|
this.jar = new CookieJar();
|
|
525
748
|
}
|
|
526
749
|
}
|
|
527
|
-
async
|
|
528
|
-
const
|
|
529
|
-
|
|
530
|
-
if (xCsrfToken) {
|
|
531
|
-
headers.set("x-csrf-token", xCsrfToken.value);
|
|
532
|
-
}
|
|
533
|
-
}
|
|
534
|
-
async installTo(headers) {
|
|
535
|
-
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
536
|
-
const cookie = await this.getCookieString();
|
|
537
|
-
headers.set("cookie", cookie);
|
|
538
|
-
if (this.guestToken) {
|
|
539
|
-
headers.set("x-guest-token", this.guestToken);
|
|
540
|
-
}
|
|
750
|
+
async installTo(headers, _url, bearerTokenOverride) {
|
|
751
|
+
const tokenToUse = bearerTokenOverride ?? this.bearerToken;
|
|
752
|
+
headers.set("authorization", `Bearer ${tokenToUse}`);
|
|
541
753
|
headers.set(
|
|
542
754
|
"user-agent",
|
|
543
755
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
544
756
|
);
|
|
757
|
+
if (this.guestToken) {
|
|
758
|
+
headers.set("x-guest-token", this.guestToken);
|
|
759
|
+
}
|
|
545
760
|
await this.installCsrfToken(headers);
|
|
761
|
+
if (this.options?.experimental?.xpff) {
|
|
762
|
+
const guestId = await this.guestId();
|
|
763
|
+
if (guestId != null) {
|
|
764
|
+
const xpffHeader = await generateXPFFHeader(guestId);
|
|
765
|
+
headers.set("x-xp-forwarded-for", xpffHeader);
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
const cookie = await this.getCookieString();
|
|
769
|
+
headers.set("cookie", cookie);
|
|
546
770
|
}
|
|
547
771
|
async initLogin() {
|
|
548
772
|
this.removeCookie("twitter_ads_id=");
|
|
@@ -747,12 +971,6 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
747
971
|
onboardingTaskUrl = `https://api.x.com/1.1/onboarding/task.json?flow_name=${data.flow_name}`;
|
|
748
972
|
}
|
|
749
973
|
log(`Making POST request to ${onboardingTaskUrl}`);
|
|
750
|
-
const token = this.guestToken;
|
|
751
|
-
if (token == null) {
|
|
752
|
-
throw new AuthenticationError(
|
|
753
|
-
"Authentication token is null or undefined."
|
|
754
|
-
);
|
|
755
|
-
}
|
|
756
974
|
const headers = new Headers({
|
|
757
975
|
accept: "*/*",
|
|
758
976
|
"accept-language": "en-US,en;q=0.9",
|
|
@@ -769,12 +987,19 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
769
987
|
"sec-fetch-mode": "cors",
|
|
770
988
|
"sec-fetch-site": "same-origin",
|
|
771
989
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
772
|
-
"x-guest-token": token,
|
|
773
990
|
"x-twitter-auth-type": "OAuth2Client",
|
|
774
991
|
"x-twitter-active-user": "yes",
|
|
775
992
|
"x-twitter-client-language": "en"
|
|
776
993
|
});
|
|
777
|
-
await this.installTo(headers);
|
|
994
|
+
await this.installTo(headers, onboardingTaskUrl);
|
|
995
|
+
if (this.options?.experimental?.xClientTransactionId) {
|
|
996
|
+
const transactionId = await generateTransactionId(
|
|
997
|
+
onboardingTaskUrl,
|
|
998
|
+
this.fetch.bind(this),
|
|
999
|
+
"POST"
|
|
1000
|
+
);
|
|
1001
|
+
headers.set("x-client-transaction-id", transactionId);
|
|
1002
|
+
}
|
|
778
1003
|
let res;
|
|
779
1004
|
do {
|
|
780
1005
|
const fetchParameters = [
|
|
@@ -1797,7 +2022,11 @@ async function getTrends(auth) {
|
|
|
1797
2022
|
params.set("entity_tokens", "false");
|
|
1798
2023
|
const res = await requestApi(
|
|
1799
2024
|
`https://api.x.com/2/guide.json?${params.toString()}`,
|
|
1800
|
-
auth
|
|
2025
|
+
auth,
|
|
2026
|
+
"GET",
|
|
2027
|
+
void 0,
|
|
2028
|
+
void 0,
|
|
2029
|
+
bearerToken2
|
|
1801
2030
|
);
|
|
1802
2031
|
if (!res.success) {
|
|
1803
2032
|
throw res.err;
|
|
@@ -1880,7 +2109,11 @@ async function fetchTweets(userId, maxTweets, cursor, auth) {
|
|
|
1880
2109
|
}
|
|
1881
2110
|
const res = await requestApi(
|
|
1882
2111
|
userTweetsRequest.toRequestUrl(),
|
|
1883
|
-
auth
|
|
2112
|
+
auth,
|
|
2113
|
+
"GET",
|
|
2114
|
+
void 0,
|
|
2115
|
+
void 0,
|
|
2116
|
+
bearerToken2
|
|
1884
2117
|
);
|
|
1885
2118
|
if (!res.success) {
|
|
1886
2119
|
throw res.err;
|
|
@@ -2026,7 +2259,11 @@ async function getTweet(id, auth) {
|
|
|
2026
2259
|
tweetDetailRequest.variables.focalTweetId = id;
|
|
2027
2260
|
const res = await requestApi(
|
|
2028
2261
|
tweetDetailRequest.toRequestUrl(),
|
|
2029
|
-
auth
|
|
2262
|
+
auth,
|
|
2263
|
+
"GET",
|
|
2264
|
+
void 0,
|
|
2265
|
+
void 0,
|
|
2266
|
+
bearerToken2
|
|
2030
2267
|
);
|
|
2031
2268
|
if (!res.success) {
|
|
2032
2269
|
throw res.err;
|
|
@@ -2042,7 +2279,11 @@ async function getTweetAnonymous(id, auth) {
|
|
|
2042
2279
|
tweetResultByRestIdRequest.variables.tweetId = id;
|
|
2043
2280
|
const res = await requestApi(
|
|
2044
2281
|
tweetResultByRestIdRequest.toRequestUrl(),
|
|
2045
|
-
auth
|
|
2282
|
+
auth,
|
|
2283
|
+
"GET",
|
|
2284
|
+
void 0,
|
|
2285
|
+
void 0,
|
|
2286
|
+
bearerToken2
|
|
2046
2287
|
);
|
|
2047
2288
|
if (!res.success) {
|
|
2048
2289
|
throw res.err;
|
|
@@ -2599,7 +2840,11 @@ class Scraper {
|
|
|
2599
2840
|
return {
|
|
2600
2841
|
fetch: this.options?.fetch,
|
|
2601
2842
|
transform: this.options?.transform,
|
|
2602
|
-
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2843
|
+
rateLimitStrategy: this.options?.rateLimitStrategy,
|
|
2844
|
+
experimental: {
|
|
2845
|
+
xClientTransactionId: this.options?.experimental?.xClientTransactionId,
|
|
2846
|
+
xpff: this.options?.experimental?.xpff
|
|
2847
|
+
}
|
|
2603
2848
|
};
|
|
2604
2849
|
}
|
|
2605
2850
|
handleResponse(res) {
|