@the-convocation/twitter-scraper 0.18.3 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -0
- package/dist/cycletls/cjs/index.cjs +99 -0
- package/dist/cycletls/cjs/index.cjs.map +1 -0
- package/dist/cycletls/esm/index.mjs +96 -0
- package/dist/cycletls/esm/index.mjs.map +1 -0
- package/dist/cycletls/index.d.ts +11 -0
- package/dist/default/cjs/index.js +75 -18
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +75 -18
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +75 -18
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +75 -18
- package/dist/node/esm/index.mjs.map +1 -1
- package/examples/cycletls/README.md +54 -0
- package/examples/cycletls/package.json +12 -0
- package/package.json +25 -9
- package/rollup.config.mjs +34 -0
package/dist/node/cjs/index.cjs
CHANGED
|
@@ -72,13 +72,13 @@ class AuthenticationError extends Error {
|
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
const log$
|
|
75
|
+
const log$4 = debug("twitter-scraper:rate-limit");
|
|
76
76
|
class WaitingRateLimitStrategy {
|
|
77
77
|
async onRateLimit({ response: res }) {
|
|
78
78
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
79
79
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
80
80
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
81
|
-
log$
|
|
81
|
+
log$4(
|
|
82
82
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
83
83
|
);
|
|
84
84
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -107,16 +107,47 @@ class Platform {
|
|
|
107
107
|
}
|
|
108
108
|
}
|
|
109
109
|
|
|
110
|
+
const log$3 = debug("twitter-scraper:requests");
|
|
110
111
|
async function updateCookieJar(cookieJar, headers) {
|
|
111
|
-
|
|
112
|
-
if (
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
112
|
+
let setCookieHeaders = [];
|
|
113
|
+
if (typeof headers.getSetCookie === "function") {
|
|
114
|
+
setCookieHeaders = headers.getSetCookie();
|
|
115
|
+
} else {
|
|
116
|
+
const setCookieHeader = headers.get("set-cookie");
|
|
117
|
+
if (setCookieHeader) {
|
|
118
|
+
setCookieHeaders = setCookie.splitCookiesString(setCookieHeader);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
if (setCookieHeaders.length > 0) {
|
|
122
|
+
for (const cookieStr of setCookieHeaders) {
|
|
123
|
+
const cookie = toughCookie.Cookie.parse(cookieStr);
|
|
124
|
+
if (!cookie) {
|
|
125
|
+
log$3(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
129
|
+
if (cookie.key === "ct0") {
|
|
130
|
+
log$3(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
131
|
+
}
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
try {
|
|
135
|
+
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
136
|
+
await cookieJar.setCookie(cookie, url);
|
|
137
|
+
if (cookie.key === "ct0") {
|
|
138
|
+
log$3(
|
|
139
|
+
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
140
|
+
0,
|
|
141
|
+
20
|
|
142
|
+
)}...`
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
} catch (err) {
|
|
146
|
+
log$3(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
147
|
+
if (cookie.key === "ct0") {
|
|
148
|
+
log$3(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
120
151
|
}
|
|
121
152
|
} else if (typeof document !== "undefined") {
|
|
122
153
|
for (const cookie of document.cookie.split(";")) {
|
|
@@ -134,9 +165,8 @@ async function jitter(maxMs) {
|
|
|
134
165
|
const jitter2 = Math.random() * maxMs;
|
|
135
166
|
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
136
167
|
}
|
|
137
|
-
async function requestApi(url, auth, method = "GET", platform = new Platform()) {
|
|
168
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new headersPolyfill.Headers()) {
|
|
138
169
|
log$2(`Making ${method} request to ${url}`);
|
|
139
|
-
const headers = new headersPolyfill.Headers();
|
|
140
170
|
await auth.installTo(headers, url);
|
|
141
171
|
await platform.randomizeCiphers();
|
|
142
172
|
let res;
|
|
@@ -322,6 +352,16 @@ class TwitterGuestAuth {
|
|
|
322
352
|
}
|
|
323
353
|
headers.set("cookie", await this.getCookieString());
|
|
324
354
|
}
|
|
355
|
+
async setCookie(key, value) {
|
|
356
|
+
const cookie = toughCookie.Cookie.parse(`${key}=${value}`);
|
|
357
|
+
if (!cookie) {
|
|
358
|
+
throw new Error("Failed to parse cookie.");
|
|
359
|
+
}
|
|
360
|
+
await this.jar.setCookie(cookie, this.getCookieJarUrl());
|
|
361
|
+
if (typeof document !== "undefined") {
|
|
362
|
+
document.cookie = cookie.toString();
|
|
363
|
+
}
|
|
364
|
+
}
|
|
325
365
|
async getCookies() {
|
|
326
366
|
return this.jar.getCookies(this.getCookieJarUrl());
|
|
327
367
|
}
|
|
@@ -372,6 +412,8 @@ class TwitterGuestAuth {
|
|
|
372
412
|
}
|
|
373
413
|
this.guestToken = newGuestToken;
|
|
374
414
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
415
|
+
await this.setCookie("gt", newGuestToken);
|
|
416
|
+
log$1(`Updated guest token: ${newGuestToken}`);
|
|
375
417
|
}
|
|
376
418
|
/**
|
|
377
419
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -498,7 +540,11 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
498
540
|
}
|
|
499
541
|
async installTo(headers) {
|
|
500
542
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
501
|
-
|
|
543
|
+
const cookie = await this.getCookieString();
|
|
544
|
+
headers.set("cookie", cookie);
|
|
545
|
+
if (this.guestToken) {
|
|
546
|
+
headers.set("x-guest-token", this.guestToken);
|
|
547
|
+
}
|
|
502
548
|
await this.installCsrfToken(headers);
|
|
503
549
|
}
|
|
504
550
|
async initLogin() {
|
|
@@ -711,16 +757,27 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
711
757
|
);
|
|
712
758
|
}
|
|
713
759
|
const headers = new headersPolyfill.Headers({
|
|
714
|
-
|
|
715
|
-
|
|
760
|
+
accept: "*/*",
|
|
761
|
+
"accept-language": "en-US,en;q=0.9",
|
|
716
762
|
"content-type": "application/json",
|
|
717
|
-
"
|
|
763
|
+
"cache-control": "no-cache",
|
|
764
|
+
origin: "https://x.com",
|
|
765
|
+
pragma: "no-cache",
|
|
766
|
+
priority: "u=1, i",
|
|
767
|
+
referer: "https://x.com/",
|
|
768
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
769
|
+
"sec-ch-ua-mobile": "?0",
|
|
770
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
771
|
+
"sec-fetch-dest": "empty",
|
|
772
|
+
"sec-fetch-mode": "cors",
|
|
773
|
+
"sec-fetch-site": "same-origin",
|
|
774
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
718
775
|
"x-guest-token": token,
|
|
719
776
|
"x-twitter-auth-type": "OAuth2Client",
|
|
720
777
|
"x-twitter-active-user": "yes",
|
|
721
778
|
"x-twitter-client-language": "en"
|
|
722
779
|
});
|
|
723
|
-
await this.
|
|
780
|
+
await this.installTo(headers);
|
|
724
781
|
let res;
|
|
725
782
|
do {
|
|
726
783
|
const fetchParameters = [
|