@the-convocation/twitter-scraper 0.18.3 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -0
- package/dist/cycletls/cjs/index.cjs +99 -0
- package/dist/cycletls/cjs/index.cjs.map +1 -0
- package/dist/cycletls/esm/index.mjs +96 -0
- package/dist/cycletls/esm/index.mjs.map +1 -0
- package/dist/cycletls/index.d.ts +11 -0
- package/dist/default/cjs/index.js +75 -18
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +75 -18
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +75 -18
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +75 -18
- package/dist/node/esm/index.mjs.map +1 -1
- package/examples/cycletls/README.md +54 -0
- package/examples/cycletls/package.json +12 -0
- package/package.json +25 -9
- package/rollup.config.mjs +34 -0
package/README.md
CHANGED
|
@@ -168,6 +168,41 @@ const scraper = new Scraper({
|
|
|
168
168
|
});
|
|
169
169
|
```
|
|
170
170
|
|
|
171
|
+
### Bypassing Cloudflare bot detection
|
|
172
|
+
|
|
173
|
+
In some cases, Twitter's authentication endpoints may be protected by Cloudflare's advanced bot detection, resulting in `403 Forbidden` errors during login. This typically happens because standard Node.js TLS fingerprints are detected as non-browser clients.
|
|
174
|
+
|
|
175
|
+
To bypass this protection, you can use the optional CycleTLS integration, which uses golang to mimic Chrome browser TLS fingerprints:
|
|
176
|
+
|
|
177
|
+
**Installation:**
|
|
178
|
+
|
|
179
|
+
```sh
|
|
180
|
+
npm install cycletls
|
|
181
|
+
# or
|
|
182
|
+
yarn add cycletls
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Usage:**
|
|
186
|
+
|
|
187
|
+
```ts
|
|
188
|
+
import { Scraper } from '@the-convocation/twitter-scraper';
|
|
189
|
+
import { cycleTLSFetch, cycleTLSExit } from '@the-convocation/twitter-scraper/cycletls';
|
|
190
|
+
|
|
191
|
+
const scraper = new Scraper({
|
|
192
|
+
fetch: cycleTLSFetch,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
// Use the scraper normally
|
|
196
|
+
await scraper.login(username, password, email);
|
|
197
|
+
|
|
198
|
+
// Important: cleanup CycleTLS resources when done
|
|
199
|
+
cycleTLSExit();
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
**Note:** The `/cycletls` entrypoint is Node.js only and will not work in browser environments. It's provided as a separate optional entrypoint to avoid bundling golang dependencies in environments where they cannot run.
|
|
203
|
+
|
|
204
|
+
See the [cycletls-cloudflare example](./examples/cycletls-cloudflare/) for a complete working example.
|
|
205
|
+
|
|
171
206
|
### Rate limiting
|
|
172
207
|
The Twitter API heavily rate-limits clients, requiring that the scraper has its own
|
|
173
208
|
rate-limit handling to behave predictably when rate-limiting occurs. By default, the
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var initCycleTLS = require('cycletls');
|
|
4
|
+
var headersPolyfill = require('headers-polyfill');
|
|
5
|
+
var debug = require('debug');
|
|
6
|
+
|
|
7
|
+
const log = debug("twitter-scraper:cycletls");
|
|
8
|
+
let cycleTLSInstance = null;
|
|
9
|
+
async function initCycleTLSFetch() {
|
|
10
|
+
if (!cycleTLSInstance) {
|
|
11
|
+
log("Initializing CycleTLS...");
|
|
12
|
+
cycleTLSInstance = await initCycleTLS();
|
|
13
|
+
log("CycleTLS initialized successfully");
|
|
14
|
+
}
|
|
15
|
+
return cycleTLSInstance;
|
|
16
|
+
}
|
|
17
|
+
function cycleTLSExit() {
|
|
18
|
+
if (cycleTLSInstance) {
|
|
19
|
+
log("Exiting CycleTLS...");
|
|
20
|
+
cycleTLSInstance.exit();
|
|
21
|
+
cycleTLSInstance = null;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
async function cycleTLSFetch(input, init) {
|
|
25
|
+
const instance = await initCycleTLSFetch();
|
|
26
|
+
const url = typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
|
|
27
|
+
const method = (init?.method || "GET").toUpperCase();
|
|
28
|
+
log(`Making ${method} request to ${url}`);
|
|
29
|
+
const headers = {};
|
|
30
|
+
if (init?.headers) {
|
|
31
|
+
if (init.headers instanceof headersPolyfill.Headers) {
|
|
32
|
+
init.headers.forEach((value, key) => {
|
|
33
|
+
headers[key] = value;
|
|
34
|
+
});
|
|
35
|
+
} else if (Array.isArray(init.headers)) {
|
|
36
|
+
init.headers.forEach(([key, value]) => {
|
|
37
|
+
headers[key] = value;
|
|
38
|
+
});
|
|
39
|
+
} else {
|
|
40
|
+
Object.assign(headers, init.headers);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
let body;
|
|
44
|
+
if (init?.body) {
|
|
45
|
+
if (typeof init.body === "string") {
|
|
46
|
+
body = init.body;
|
|
47
|
+
} else if (init.body instanceof URLSearchParams) {
|
|
48
|
+
body = init.body.toString();
|
|
49
|
+
} else {
|
|
50
|
+
body = init.body.toString();
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
const options = {
|
|
54
|
+
body,
|
|
55
|
+
headers,
|
|
56
|
+
// Chrome 120 on Windows 10
|
|
57
|
+
ja3: "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513,29-23-24,0",
|
|
58
|
+
userAgent: headers["user-agent"] || "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
59
|
+
};
|
|
60
|
+
try {
|
|
61
|
+
const response = await instance(
|
|
62
|
+
url,
|
|
63
|
+
options,
|
|
64
|
+
method.toLowerCase()
|
|
65
|
+
);
|
|
66
|
+
const responseHeaders = new headersPolyfill.Headers();
|
|
67
|
+
if (response.headers) {
|
|
68
|
+
Object.entries(response.headers).forEach(([key, value]) => {
|
|
69
|
+
if (Array.isArray(value)) {
|
|
70
|
+
value.forEach((v) => {
|
|
71
|
+
responseHeaders.append(key, v);
|
|
72
|
+
});
|
|
73
|
+
} else if (typeof value === "string") {
|
|
74
|
+
responseHeaders.set(key, value);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
let responseBody = "";
|
|
79
|
+
if (typeof response.text === "function") {
|
|
80
|
+
responseBody = await response.text();
|
|
81
|
+
} else if (response.body) {
|
|
82
|
+
responseBody = response.body;
|
|
83
|
+
}
|
|
84
|
+
const fetchResponse = new Response(responseBody, {
|
|
85
|
+
status: response.status,
|
|
86
|
+
statusText: "",
|
|
87
|
+
// CycleTLS doesn't provide status text
|
|
88
|
+
headers: responseHeaders
|
|
89
|
+
});
|
|
90
|
+
return fetchResponse;
|
|
91
|
+
} catch (error) {
|
|
92
|
+
log(`CycleTLS request failed: ${error}`);
|
|
93
|
+
throw error;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
exports.cycleTLSExit = cycleTLSExit;
|
|
98
|
+
exports.cycleTLSFetch = cycleTLSFetch;
|
|
99
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.cjs","sources":["../../../src/cycletls-fetch.ts"],"sourcesContent":["import initCycleTLS from 'cycletls';\nimport { Headers } from 'headers-polyfill';\nimport debug from 'debug';\n\nconst log = debug('twitter-scraper:cycletls');\n\nlet cycleTLSInstance: Awaited<ReturnType<typeof initCycleTLS>> | null = null;\n\n/**\n * Initialize the CycleTLS instance. This should be called once before using the fetch wrapper.\n */\nexport async function initCycleTLSFetch() {\n if (!cycleTLSInstance) {\n log('Initializing CycleTLS...');\n cycleTLSInstance = await initCycleTLS();\n log('CycleTLS initialized successfully');\n }\n return cycleTLSInstance;\n}\n\n/**\n * Cleanup the CycleTLS instance. Call this when you're done making requests.\n */\nexport function cycleTLSExit() {\n if (cycleTLSInstance) {\n log('Exiting CycleTLS...');\n cycleTLSInstance.exit();\n cycleTLSInstance = null;\n }\n}\n\n/**\n * A fetch-compatible wrapper around CycleTLS that mimics Chrome's TLS fingerprint\n * to bypass Cloudflare and other bot detection systems.\n */\nexport async function cycleTLSFetch(\n input: RequestInfo | URL,\n init?: RequestInit,\n): Promise<Response> {\n const instance = await initCycleTLSFetch();\n\n const url =\n typeof input === 'string'\n ? input\n : input instanceof URL\n ? input.toString()\n : input.url;\n const method = (init?.method || 'GET').toUpperCase();\n\n log(`Making ${method} request to ${url}`);\n\n // Extract headers from RequestInit\n const headers: Record<string, string> = {};\n if (init?.headers) {\n if (init.headers instanceof Headers) {\n init.headers.forEach((value, key) => {\n headers[key] = value;\n });\n } else if (Array.isArray(init.headers)) {\n init.headers.forEach(([key, value]) => {\n headers[key] = value;\n });\n } else {\n Object.assign(headers, init.headers);\n }\n }\n\n // Convert body to string if needed\n let body: string | undefined;\n if (init?.body) {\n if (typeof init.body === 'string') {\n body = init.body;\n } else if (init.body instanceof URLSearchParams) {\n body = init.body.toString();\n } else {\n body = init.body.toString();\n }\n }\n\n // Use Chrome 120 JA3 fingerprint for maximum compatibility\n const options = {\n body,\n headers,\n // Chrome 120 on Windows 10\n ja3: '771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513,29-23-24,0',\n userAgent:\n headers['user-agent'] ||\n 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',\n };\n\n try {\n const response = await instance(\n url,\n options,\n method.toLowerCase() as\n | 'get'\n | 'post'\n | 'put'\n | 'delete'\n | 'patch'\n | 'head'\n | 'options',\n );\n\n // Convert CycleTLS response to fetch Response\n // CycleTLS returns headers as an object\n const responseHeaders = new Headers();\n if (response.headers) {\n Object.entries(response.headers).forEach(([key, value]) => {\n if (Array.isArray(value)) {\n value.forEach((v) => {\n responseHeaders.append(key, v);\n });\n } else if (typeof value === 'string') {\n responseHeaders.set(key, value);\n }\n });\n }\n\n // Get response body - cycletls provides helper methods, but we need the raw text\n // The response object has a text() method that returns the body as text\n let responseBody = '';\n if (typeof response.text === 'function') {\n responseBody = await response.text();\n } else if ((response as any).body) {\n responseBody = (response as any).body;\n }\n\n // Create a proper Response object using standard Response constructor\n const fetchResponse = new Response(responseBody, {\n status: response.status,\n statusText: '', // CycleTLS doesn't provide status text\n headers: responseHeaders,\n });\n\n return fetchResponse;\n } catch (error) {\n log(`CycleTLS request failed: ${error}`);\n throw error;\n }\n}\n"],"names":["Headers"],"mappings":";;;;;;AAIA,MAAM,GAAA,GAAM,MAAM,0BAA0B,CAAA,CAAA;AAE5C,IAAI,gBAAoE,GAAA,IAAA,CAAA;AAKxE,eAAsB,iBAAoB,GAAA;AACxC,EAAA,IAAI,CAAC,gBAAkB,EAAA;AACrB,IAAA,GAAA,CAAI,0BAA0B,CAAA,CAAA;AAC9B,IAAA,gBAAA,GAAmB,MAAM,YAAa,EAAA,CAAA;AACtC,IAAA,GAAA,CAAI,mCAAmC,CAAA,CAAA;AAAA,GACzC;AACA,EAAO,OAAA,gBAAA,CAAA;AACT,CAAA;AAKO,SAAS,YAAe,GAAA;AAC7B,EAAA,IAAI,gBAAkB,EAAA;AACpB,IAAA,GAAA,CAAI,qBAAqB,CAAA,CAAA;AACzB,IAAA,gBAAA,CAAiB,IAAK,EAAA,CAAA;AACtB,IAAmB,gBAAA,GAAA,IAAA,CAAA;AAAA,GACrB;AACF,CAAA;AAMsB,eAAA,aAAA,CACpB,OACA,IACmB,EAAA;AACnB,EAAM,MAAA,QAAA,GAAW,MAAM,iBAAkB,EAAA,CAAA;AAEzC,EAAM,MAAA,GAAA,GACJ,OAAO,KAAA,KAAU,QACb,GAAA,KAAA,GACA,iBAAiB,GACjB,GAAA,KAAA,CAAM,QAAS,EAAA,GACf,KAAM,CAAA,GAAA,CAAA;AACZ,EAAA,MAAM,MAAU,GAAA,CAAA,IAAA,EAAM,MAAU,IAAA,KAAA,EAAO,WAAY,EAAA,CAAA;AAEnD,EAAA,GAAA,CAAI,CAAU,OAAA,EAAA,MAAM,CAAe,YAAA,EAAA,GAAG,CAAE,CAAA,CAAA,CAAA;AAGxC,EAAA,MAAM,UAAkC,EAAC,CAAA;AACzC,EAAA,IAAI,MAAM,OAAS,EAAA;AACjB,IAAI,IAAA,IAAA,CAAK,mBAAmBA,uBAAS,EAAA;AACnC,MAAA,IAAA,CAAK,OAAQ,CAAA,OAAA,CAAQ,CAAC,KAAA,EAAO,GAAQ,KAAA;AACnC,QAAA,OAAA,CAAQ,GAAG,CAAI,GAAA,KAAA,CAAA;AAAA,OAChB,CAAA,CAAA;AAAA,KACQ,MAAA,IAAA,KAAA,CAAM,OAAQ,CAAA,IAAA,CAAK,OAAO,CAAG,EAAA;AACtC,MAAA,IAAA,CAAK,QAAQ,OAAQ,CAAA,CAAC,CAAC,GAAA,EAAK,KAAK,CAAM,KAAA;AACrC,QAAA,OAAA,CAAQ,GAAG,CAAI,GAAA,KAAA,CAAA;AAAA,OAChB,CAAA,CAAA;AAAA,KACI,MAAA;AACL,MAAO,MAAA,CAAA,MAAA,CAAO,OAAS,EAAA,IAAA,CAAK,OAAO,CAAA,CAAA;AAAA,KACrC;AAAA,GACF;AAGA,EAAI,IAAA,IAAA,CAAA;AACJ,EAAA,IAAI,MAAM,IAAM,EAAA;AACd,IAAI,IAAA,OAAO,IAAK,CAAA,IAAA,KAAS,QAAU,EAAA;AACjC,MAAA,IAAA,GAAO,IAAK,CAAA,IAAA,CAAA;AAAA,KACd,MAAA,IAAW,IAAK,CAAA,IAAA,YAAgB,eAAiB,EAAA;AAC/C,MAAO,IAAA,GAAA,IAAA,CAAK,KAAK,QAAS,EAAA,CAAA;AAAA,KACrB,MAAA;AACL,MAAO,IAAA,GAAA,IAAA,CAAK,KAAK,QAAS,EAAA,CAAA;AAAA,KAC5B;AAAA,GACF;AAGA,EAAA,MAAM,OAAU,GAAA;AAAA,IACd,IAAA;AAAA,IACA,OAAA;AAAA;AAAA,IAEA,GAAK,EAAA,8IAAA;AAAA,IACL,SAAA,EACE,OAAQ,CAAA,YAAY,CACpB,IAAA,iHAAA;AAAA,GACJ,CAAA;AAEA,EAAI,IAAA;AACF,IAAA,MAAM,WAAW,MAAM,QAAA;AAAA,MACrB,GAAA;AAAA,MACA,OAAA;AAAA,MACA,OAAO,WAAY,EAAA;AAAA,KAQrB,CAAA;AAIA,IAAM,MAAA,eAAA,GAAkB,IAAIA,uBAAQ,EAAA,CAAA;AACpC,IAAA,IAAI,SAAS,OAAS,EAAA;AACpB,MAAO,MAAA,CAAA,OAAA,CAAQ,SAAS,OAAO,CAAA,CAAE,QAAQ,CAAC,CAAC,GAAK,EAAA,KAAK,CAAM,KAAA;AACzD,QAAI,IAAA,KAAA,CAAM,OAAQ,CAAA,KAAK,CAAG,EAAA;AACxB,UAAM,KAAA,CAAA,OAAA,CAAQ,CAAC,CAAM,KAAA;AACnB,YAAgB,eAAA,CAAA,MAAA,CAAO,KAAK,CAAC,CAAA,CAAA;AAAA,WAC9B,CAAA,CAAA;AAAA,SACH,MAAA,IAAW,OAAO,KAAA,KAAU,QAAU,EAAA;AACpC,UAAgB,eAAA,CAAA,GAAA,CAAI,KAAK,KAAK,CAAA,CAAA;AAAA,SAChC;AAAA,OACD,CAAA,CAAA;AAAA,KACH;AAIA,IAAA,IAAI,YAAe,GAAA,EAAA,CAAA;AACnB,IAAI,IAAA,OAAO,QAAS,CAAA,IAAA,KAAS,UAAY,EAAA;AACvC,MAAe,YAAA,GAAA,MAAM,SAAS,IAAK,EAAA,CAAA;AAAA,KACrC,MAAA,IAAY,SAAiB,IAAM,EAAA;AACjC,MAAA,YAAA,GAAgB,QAAiB,CAAA,IAAA,CAAA;AAAA,KACnC;AAGA,IAAM,MAAA,aAAA,GAAgB,IAAI,QAAA,CAAS,YAAc,EAAA;AAAA,MAC/C,QAAQ,QAAS,CAAA,MAAA;AAAA,MACjB,UAAY,EAAA,EAAA;AAAA;AAAA,MACZ,OAAS,EAAA,eAAA;AAAA,KACV,CAAA,CAAA;AAED,IAAO,OAAA,aAAA,CAAA;AAAA,WACA,KAAO,EAAA;AACd,IAAI,GAAA,CAAA,CAAA,yBAAA,EAA4B,KAAK,CAAE,CAAA,CAAA,CAAA;AACvC,IAAM,MAAA,KAAA,CAAA;AAAA,GACR;AACF;;;;;"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import initCycleTLS from 'cycletls';
|
|
2
|
+
import { Headers } from 'headers-polyfill';
|
|
3
|
+
import debug from 'debug';
|
|
4
|
+
|
|
5
|
+
const log = debug("twitter-scraper:cycletls");
|
|
6
|
+
let cycleTLSInstance = null;
|
|
7
|
+
async function initCycleTLSFetch() {
|
|
8
|
+
if (!cycleTLSInstance) {
|
|
9
|
+
log("Initializing CycleTLS...");
|
|
10
|
+
cycleTLSInstance = await initCycleTLS();
|
|
11
|
+
log("CycleTLS initialized successfully");
|
|
12
|
+
}
|
|
13
|
+
return cycleTLSInstance;
|
|
14
|
+
}
|
|
15
|
+
function cycleTLSExit() {
|
|
16
|
+
if (cycleTLSInstance) {
|
|
17
|
+
log("Exiting CycleTLS...");
|
|
18
|
+
cycleTLSInstance.exit();
|
|
19
|
+
cycleTLSInstance = null;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
async function cycleTLSFetch(input, init) {
|
|
23
|
+
const instance = await initCycleTLSFetch();
|
|
24
|
+
const url = typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
|
|
25
|
+
const method = (init?.method || "GET").toUpperCase();
|
|
26
|
+
log(`Making ${method} request to ${url}`);
|
|
27
|
+
const headers = {};
|
|
28
|
+
if (init?.headers) {
|
|
29
|
+
if (init.headers instanceof Headers) {
|
|
30
|
+
init.headers.forEach((value, key) => {
|
|
31
|
+
headers[key] = value;
|
|
32
|
+
});
|
|
33
|
+
} else if (Array.isArray(init.headers)) {
|
|
34
|
+
init.headers.forEach(([key, value]) => {
|
|
35
|
+
headers[key] = value;
|
|
36
|
+
});
|
|
37
|
+
} else {
|
|
38
|
+
Object.assign(headers, init.headers);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
let body;
|
|
42
|
+
if (init?.body) {
|
|
43
|
+
if (typeof init.body === "string") {
|
|
44
|
+
body = init.body;
|
|
45
|
+
} else if (init.body instanceof URLSearchParams) {
|
|
46
|
+
body = init.body.toString();
|
|
47
|
+
} else {
|
|
48
|
+
body = init.body.toString();
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const options = {
|
|
52
|
+
body,
|
|
53
|
+
headers,
|
|
54
|
+
// Chrome 120 on Windows 10
|
|
55
|
+
ja3: "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513,29-23-24,0",
|
|
56
|
+
userAgent: headers["user-agent"] || "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
57
|
+
};
|
|
58
|
+
try {
|
|
59
|
+
const response = await instance(
|
|
60
|
+
url,
|
|
61
|
+
options,
|
|
62
|
+
method.toLowerCase()
|
|
63
|
+
);
|
|
64
|
+
const responseHeaders = new Headers();
|
|
65
|
+
if (response.headers) {
|
|
66
|
+
Object.entries(response.headers).forEach(([key, value]) => {
|
|
67
|
+
if (Array.isArray(value)) {
|
|
68
|
+
value.forEach((v) => {
|
|
69
|
+
responseHeaders.append(key, v);
|
|
70
|
+
});
|
|
71
|
+
} else if (typeof value === "string") {
|
|
72
|
+
responseHeaders.set(key, value);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
let responseBody = "";
|
|
77
|
+
if (typeof response.text === "function") {
|
|
78
|
+
responseBody = await response.text();
|
|
79
|
+
} else if (response.body) {
|
|
80
|
+
responseBody = response.body;
|
|
81
|
+
}
|
|
82
|
+
const fetchResponse = new Response(responseBody, {
|
|
83
|
+
status: response.status,
|
|
84
|
+
statusText: "",
|
|
85
|
+
// CycleTLS doesn't provide status text
|
|
86
|
+
headers: responseHeaders
|
|
87
|
+
});
|
|
88
|
+
return fetchResponse;
|
|
89
|
+
} catch (error) {
|
|
90
|
+
log(`CycleTLS request failed: ${error}`);
|
|
91
|
+
throw error;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export { cycleTLSExit, cycleTLSFetch };
|
|
96
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.mjs","sources":["../../../src/cycletls-fetch.ts"],"sourcesContent":["import initCycleTLS from 'cycletls';\nimport { Headers } from 'headers-polyfill';\nimport debug from 'debug';\n\nconst log = debug('twitter-scraper:cycletls');\n\nlet cycleTLSInstance: Awaited<ReturnType<typeof initCycleTLS>> | null = null;\n\n/**\n * Initialize the CycleTLS instance. This should be called once before using the fetch wrapper.\n */\nexport async function initCycleTLSFetch() {\n if (!cycleTLSInstance) {\n log('Initializing CycleTLS...');\n cycleTLSInstance = await initCycleTLS();\n log('CycleTLS initialized successfully');\n }\n return cycleTLSInstance;\n}\n\n/**\n * Cleanup the CycleTLS instance. Call this when you're done making requests.\n */\nexport function cycleTLSExit() {\n if (cycleTLSInstance) {\n log('Exiting CycleTLS...');\n cycleTLSInstance.exit();\n cycleTLSInstance = null;\n }\n}\n\n/**\n * A fetch-compatible wrapper around CycleTLS that mimics Chrome's TLS fingerprint\n * to bypass Cloudflare and other bot detection systems.\n */\nexport async function cycleTLSFetch(\n input: RequestInfo | URL,\n init?: RequestInit,\n): Promise<Response> {\n const instance = await initCycleTLSFetch();\n\n const url =\n typeof input === 'string'\n ? input\n : input instanceof URL\n ? input.toString()\n : input.url;\n const method = (init?.method || 'GET').toUpperCase();\n\n log(`Making ${method} request to ${url}`);\n\n // Extract headers from RequestInit\n const headers: Record<string, string> = {};\n if (init?.headers) {\n if (init.headers instanceof Headers) {\n init.headers.forEach((value, key) => {\n headers[key] = value;\n });\n } else if (Array.isArray(init.headers)) {\n init.headers.forEach(([key, value]) => {\n headers[key] = value;\n });\n } else {\n Object.assign(headers, init.headers);\n }\n }\n\n // Convert body to string if needed\n let body: string | undefined;\n if (init?.body) {\n if (typeof init.body === 'string') {\n body = init.body;\n } else if (init.body instanceof URLSearchParams) {\n body = init.body.toString();\n } else {\n body = init.body.toString();\n }\n }\n\n // Use Chrome 120 JA3 fingerprint for maximum compatibility\n const options = {\n body,\n headers,\n // Chrome 120 on Windows 10\n ja3: '771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-13-18-51-45-43-27-17513,29-23-24,0',\n userAgent:\n headers['user-agent'] ||\n 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',\n };\n\n try {\n const response = await instance(\n url,\n options,\n method.toLowerCase() as\n | 'get'\n | 'post'\n | 'put'\n | 'delete'\n | 'patch'\n | 'head'\n | 'options',\n );\n\n // Convert CycleTLS response to fetch Response\n // CycleTLS returns headers as an object\n const responseHeaders = new Headers();\n if (response.headers) {\n Object.entries(response.headers).forEach(([key, value]) => {\n if (Array.isArray(value)) {\n value.forEach((v) => {\n responseHeaders.append(key, v);\n });\n } else if (typeof value === 'string') {\n responseHeaders.set(key, value);\n }\n });\n }\n\n // Get response body - cycletls provides helper methods, but we need the raw text\n // The response object has a text() method that returns the body as text\n let responseBody = '';\n if (typeof response.text === 'function') {\n responseBody = await response.text();\n } else if ((response as any).body) {\n responseBody = (response as any).body;\n }\n\n // Create a proper Response object using standard Response constructor\n const fetchResponse = new Response(responseBody, {\n status: response.status,\n statusText: '', // CycleTLS doesn't provide status text\n headers: responseHeaders,\n });\n\n return fetchResponse;\n } catch (error) {\n log(`CycleTLS request failed: ${error}`);\n throw error;\n }\n}\n"],"names":[],"mappings":";;;;AAIA,MAAM,GAAA,GAAM,MAAM,0BAA0B,CAAA,CAAA;AAE5C,IAAI,gBAAoE,GAAA,IAAA,CAAA;AAKxE,eAAsB,iBAAoB,GAAA;AACxC,EAAA,IAAI,CAAC,gBAAkB,EAAA;AACrB,IAAA,GAAA,CAAI,0BAA0B,CAAA,CAAA;AAC9B,IAAA,gBAAA,GAAmB,MAAM,YAAa,EAAA,CAAA;AACtC,IAAA,GAAA,CAAI,mCAAmC,CAAA,CAAA;AAAA,GACzC;AACA,EAAO,OAAA,gBAAA,CAAA;AACT,CAAA;AAKO,SAAS,YAAe,GAAA;AAC7B,EAAA,IAAI,gBAAkB,EAAA;AACpB,IAAA,GAAA,CAAI,qBAAqB,CAAA,CAAA;AACzB,IAAA,gBAAA,CAAiB,IAAK,EAAA,CAAA;AACtB,IAAmB,gBAAA,GAAA,IAAA,CAAA;AAAA,GACrB;AACF,CAAA;AAMsB,eAAA,aAAA,CACpB,OACA,IACmB,EAAA;AACnB,EAAM,MAAA,QAAA,GAAW,MAAM,iBAAkB,EAAA,CAAA;AAEzC,EAAM,MAAA,GAAA,GACJ,OAAO,KAAA,KAAU,QACb,GAAA,KAAA,GACA,iBAAiB,GACjB,GAAA,KAAA,CAAM,QAAS,EAAA,GACf,KAAM,CAAA,GAAA,CAAA;AACZ,EAAA,MAAM,MAAU,GAAA,CAAA,IAAA,EAAM,MAAU,IAAA,KAAA,EAAO,WAAY,EAAA,CAAA;AAEnD,EAAA,GAAA,CAAI,CAAU,OAAA,EAAA,MAAM,CAAe,YAAA,EAAA,GAAG,CAAE,CAAA,CAAA,CAAA;AAGxC,EAAA,MAAM,UAAkC,EAAC,CAAA;AACzC,EAAA,IAAI,MAAM,OAAS,EAAA;AACjB,IAAI,IAAA,IAAA,CAAK,mBAAmB,OAAS,EAAA;AACnC,MAAA,IAAA,CAAK,OAAQ,CAAA,OAAA,CAAQ,CAAC,KAAA,EAAO,GAAQ,KAAA;AACnC,QAAA,OAAA,CAAQ,GAAG,CAAI,GAAA,KAAA,CAAA;AAAA,OAChB,CAAA,CAAA;AAAA,KACQ,MAAA,IAAA,KAAA,CAAM,OAAQ,CAAA,IAAA,CAAK,OAAO,CAAG,EAAA;AACtC,MAAA,IAAA,CAAK,QAAQ,OAAQ,CAAA,CAAC,CAAC,GAAA,EAAK,KAAK,CAAM,KAAA;AACrC,QAAA,OAAA,CAAQ,GAAG,CAAI,GAAA,KAAA,CAAA;AAAA,OAChB,CAAA,CAAA;AAAA,KACI,MAAA;AACL,MAAO,MAAA,CAAA,MAAA,CAAO,OAAS,EAAA,IAAA,CAAK,OAAO,CAAA,CAAA;AAAA,KACrC;AAAA,GACF;AAGA,EAAI,IAAA,IAAA,CAAA;AACJ,EAAA,IAAI,MAAM,IAAM,EAAA;AACd,IAAI,IAAA,OAAO,IAAK,CAAA,IAAA,KAAS,QAAU,EAAA;AACjC,MAAA,IAAA,GAAO,IAAK,CAAA,IAAA,CAAA;AAAA,KACd,MAAA,IAAW,IAAK,CAAA,IAAA,YAAgB,eAAiB,EAAA;AAC/C,MAAO,IAAA,GAAA,IAAA,CAAK,KAAK,QAAS,EAAA,CAAA;AAAA,KACrB,MAAA;AACL,MAAO,IAAA,GAAA,IAAA,CAAK,KAAK,QAAS,EAAA,CAAA;AAAA,KAC5B;AAAA,GACF;AAGA,EAAA,MAAM,OAAU,GAAA;AAAA,IACd,IAAA;AAAA,IACA,OAAA;AAAA;AAAA,IAEA,GAAK,EAAA,8IAAA;AAAA,IACL,SAAA,EACE,OAAQ,CAAA,YAAY,CACpB,IAAA,iHAAA;AAAA,GACJ,CAAA;AAEA,EAAI,IAAA;AACF,IAAA,MAAM,WAAW,MAAM,QAAA;AAAA,MACrB,GAAA;AAAA,MACA,OAAA;AAAA,MACA,OAAO,WAAY,EAAA;AAAA,KAQrB,CAAA;AAIA,IAAM,MAAA,eAAA,GAAkB,IAAI,OAAQ,EAAA,CAAA;AACpC,IAAA,IAAI,SAAS,OAAS,EAAA;AACpB,MAAO,MAAA,CAAA,OAAA,CAAQ,SAAS,OAAO,CAAA,CAAE,QAAQ,CAAC,CAAC,GAAK,EAAA,KAAK,CAAM,KAAA;AACzD,QAAI,IAAA,KAAA,CAAM,OAAQ,CAAA,KAAK,CAAG,EAAA;AACxB,UAAM,KAAA,CAAA,OAAA,CAAQ,CAAC,CAAM,KAAA;AACnB,YAAgB,eAAA,CAAA,MAAA,CAAO,KAAK,CAAC,CAAA,CAAA;AAAA,WAC9B,CAAA,CAAA;AAAA,SACH,MAAA,IAAW,OAAO,KAAA,KAAU,QAAU,EAAA;AACpC,UAAgB,eAAA,CAAA,GAAA,CAAI,KAAK,KAAK,CAAA,CAAA;AAAA,SAChC;AAAA,OACD,CAAA,CAAA;AAAA,KACH;AAIA,IAAA,IAAI,YAAe,GAAA,EAAA,CAAA;AACnB,IAAI,IAAA,OAAO,QAAS,CAAA,IAAA,KAAS,UAAY,EAAA;AACvC,MAAe,YAAA,GAAA,MAAM,SAAS,IAAK,EAAA,CAAA;AAAA,KACrC,MAAA,IAAY,SAAiB,IAAM,EAAA;AACjC,MAAA,YAAA,GAAgB,QAAiB,CAAA,IAAA,CAAA;AAAA,KACnC;AAGA,IAAM,MAAA,aAAA,GAAgB,IAAI,QAAA,CAAS,YAAc,EAAA;AAAA,MAC/C,QAAQ,QAAS,CAAA,MAAA;AAAA,MACjB,UAAY,EAAA,EAAA;AAAA;AAAA,MACZ,OAAS,EAAA,eAAA;AAAA,KACV,CAAA,CAAA;AAED,IAAO,OAAA,aAAA,CAAA;AAAA,WACA,KAAO,EAAA;AACd,IAAI,GAAA,CAAA,CAAA,yBAAA,EAA4B,KAAK,CAAE,CAAA,CAAA,CAAA;AACvC,IAAM,MAAA,KAAA,CAAA;AAAA,GACR;AACF;;;;"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cleanup the CycleTLS instance. Call this when you're done making requests.
|
|
3
|
+
*/
|
|
4
|
+
declare function cycleTLSExit(): void;
|
|
5
|
+
/**
|
|
6
|
+
* A fetch-compatible wrapper around CycleTLS that mimics Chrome's TLS fingerprint
|
|
7
|
+
* to bypass Cloudflare and other bot detection systems.
|
|
8
|
+
*/
|
|
9
|
+
declare function cycleTLSFetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response>;
|
|
10
|
+
|
|
11
|
+
export { cycleTLSExit, cycleTLSFetch };
|
|
@@ -70,13 +70,13 @@ class AuthenticationError extends Error {
|
|
|
70
70
|
}
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
const log$
|
|
73
|
+
const log$4 = debug("twitter-scraper:rate-limit");
|
|
74
74
|
class WaitingRateLimitStrategy {
|
|
75
75
|
async onRateLimit({ response: res }) {
|
|
76
76
|
const xRateLimitLimit = res.headers.get("x-rate-limit-limit");
|
|
77
77
|
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
78
78
|
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
79
|
-
log$
|
|
79
|
+
log$4(
|
|
80
80
|
`Rate limit event: limit=${xRateLimitLimit}, remaining=${xRateLimitRemaining}, reset=${xRateLimitReset}`
|
|
81
81
|
);
|
|
82
82
|
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
@@ -108,16 +108,47 @@ class Platform {
|
|
|
108
108
|
}
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
+
const log$3 = debug("twitter-scraper:requests");
|
|
111
112
|
async function updateCookieJar(cookieJar, headers) {
|
|
112
|
-
|
|
113
|
-
if (
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
113
|
+
let setCookieHeaders = [];
|
|
114
|
+
if (typeof headers.getSetCookie === "function") {
|
|
115
|
+
setCookieHeaders = headers.getSetCookie();
|
|
116
|
+
} else {
|
|
117
|
+
const setCookieHeader = headers.get("set-cookie");
|
|
118
|
+
if (setCookieHeader) {
|
|
119
|
+
setCookieHeaders = setCookie.splitCookiesString(setCookieHeader);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
if (setCookieHeaders.length > 0) {
|
|
123
|
+
for (const cookieStr of setCookieHeaders) {
|
|
124
|
+
const cookie = toughCookie.Cookie.parse(cookieStr);
|
|
125
|
+
if (!cookie) {
|
|
126
|
+
log$3(`Failed to parse cookie: ${cookieStr.substring(0, 100)}`);
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
if (cookie.maxAge === 0 || cookie.expires && cookie.expires < /* @__PURE__ */ new Date()) {
|
|
130
|
+
if (cookie.key === "ct0") {
|
|
131
|
+
log$3(`Skipping deletion of ct0 cookie (Max-Age=0)`);
|
|
132
|
+
}
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
try {
|
|
136
|
+
const url = `${cookie.secure ? "https" : "http"}://${cookie.domain}${cookie.path}`;
|
|
137
|
+
await cookieJar.setCookie(cookie, url);
|
|
138
|
+
if (cookie.key === "ct0") {
|
|
139
|
+
log$3(
|
|
140
|
+
`Successfully set ct0 cookie with value: ${cookie.value.substring(
|
|
141
|
+
0,
|
|
142
|
+
20
|
|
143
|
+
)}...`
|
|
144
|
+
);
|
|
145
|
+
}
|
|
146
|
+
} catch (err) {
|
|
147
|
+
log$3(`Failed to set cookie ${cookie.key}: ${err}`);
|
|
148
|
+
if (cookie.key === "ct0") {
|
|
149
|
+
log$3(`FAILED to set ct0 cookie! Error: ${err}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
121
152
|
}
|
|
122
153
|
} else if (typeof document !== "undefined") {
|
|
123
154
|
for (const cookie of document.cookie.split(";")) {
|
|
@@ -135,9 +166,8 @@ async function jitter(maxMs) {
|
|
|
135
166
|
const jitter2 = Math.random() * maxMs;
|
|
136
167
|
await new Promise((resolve) => setTimeout(resolve, jitter2));
|
|
137
168
|
}
|
|
138
|
-
async function requestApi(url, auth, method = "GET", platform = new Platform()) {
|
|
169
|
+
async function requestApi(url, auth, method = "GET", platform = new Platform(), headers = new headersPolyfill.Headers()) {
|
|
139
170
|
log$2(`Making ${method} request to ${url}`);
|
|
140
|
-
const headers = new headersPolyfill.Headers();
|
|
141
171
|
await auth.installTo(headers, url);
|
|
142
172
|
await platform.randomizeCiphers();
|
|
143
173
|
let res;
|
|
@@ -323,6 +353,16 @@ class TwitterGuestAuth {
|
|
|
323
353
|
}
|
|
324
354
|
headers.set("cookie", await this.getCookieString());
|
|
325
355
|
}
|
|
356
|
+
async setCookie(key, value) {
|
|
357
|
+
const cookie = toughCookie.Cookie.parse(`${key}=${value}`);
|
|
358
|
+
if (!cookie) {
|
|
359
|
+
throw new Error("Failed to parse cookie.");
|
|
360
|
+
}
|
|
361
|
+
await this.jar.setCookie(cookie, this.getCookieJarUrl());
|
|
362
|
+
if (typeof document !== "undefined") {
|
|
363
|
+
document.cookie = cookie.toString();
|
|
364
|
+
}
|
|
365
|
+
}
|
|
326
366
|
async getCookies() {
|
|
327
367
|
return this.jar.getCookies(this.getCookieJarUrl());
|
|
328
368
|
}
|
|
@@ -373,6 +413,8 @@ class TwitterGuestAuth {
|
|
|
373
413
|
}
|
|
374
414
|
this.guestToken = newGuestToken;
|
|
375
415
|
this.guestCreatedAt = /* @__PURE__ */ new Date();
|
|
416
|
+
await this.setCookie("gt", newGuestToken);
|
|
417
|
+
log$1(`Updated guest token: ${newGuestToken}`);
|
|
376
418
|
}
|
|
377
419
|
/**
|
|
378
420
|
* Returns if the authentication token needs to be updated or not.
|
|
@@ -499,7 +541,11 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
499
541
|
}
|
|
500
542
|
async installTo(headers) {
|
|
501
543
|
headers.set("authorization", `Bearer ${this.bearerToken}`);
|
|
502
|
-
|
|
544
|
+
const cookie = await this.getCookieString();
|
|
545
|
+
headers.set("cookie", cookie);
|
|
546
|
+
if (this.guestToken) {
|
|
547
|
+
headers.set("x-guest-token", this.guestToken);
|
|
548
|
+
}
|
|
503
549
|
await this.installCsrfToken(headers);
|
|
504
550
|
}
|
|
505
551
|
async initLogin() {
|
|
@@ -712,16 +758,27 @@ class TwitterUserAuth extends TwitterGuestAuth {
|
|
|
712
758
|
);
|
|
713
759
|
}
|
|
714
760
|
const headers = new headersPolyfill.Headers({
|
|
715
|
-
|
|
716
|
-
|
|
761
|
+
accept: "*/*",
|
|
762
|
+
"accept-language": "en-US,en;q=0.9",
|
|
717
763
|
"content-type": "application/json",
|
|
718
|
-
"
|
|
764
|
+
"cache-control": "no-cache",
|
|
765
|
+
origin: "https://x.com",
|
|
766
|
+
pragma: "no-cache",
|
|
767
|
+
priority: "u=1, i",
|
|
768
|
+
referer: "https://x.com/",
|
|
769
|
+
"sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
770
|
+
"sec-ch-ua-mobile": "?0",
|
|
771
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
772
|
+
"sec-fetch-dest": "empty",
|
|
773
|
+
"sec-fetch-mode": "cors",
|
|
774
|
+
"sec-fetch-site": "same-origin",
|
|
775
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
|
719
776
|
"x-guest-token": token,
|
|
720
777
|
"x-twitter-auth-type": "OAuth2Client",
|
|
721
778
|
"x-twitter-active-user": "yes",
|
|
722
779
|
"x-twitter-client-language": "en"
|
|
723
780
|
});
|
|
724
|
-
await this.
|
|
781
|
+
await this.installTo(headers);
|
|
725
782
|
let res;
|
|
726
783
|
do {
|
|
727
784
|
const fetchParameters = [
|