@the-convocation/twitter-scraper 0.14.1 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -0
- package/dist/default/cjs/index.js +37 -11
- package/dist/default/cjs/index.js.map +1 -1
- package/dist/default/esm/index.mjs +35 -12
- package/dist/default/esm/index.mjs.map +1 -1
- package/dist/node/cjs/index.cjs +37 -11
- package/dist/node/cjs/index.cjs.map +1 -1
- package/dist/node/esm/index.mjs +35 -12
- package/dist/node/esm/index.mjs.map +1 -1
- package/dist/types/index.d.ts +66 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -168,6 +168,38 @@ const scraper = new Scraper({
|
|
|
168
168
|
});
|
|
169
169
|
```
|
|
170
170
|
|
|
171
|
+
### Rate limiting
|
|
172
|
+
The Twitter API heavily rate-limits clients, requiring that the scraper has its own
|
|
173
|
+
rate-limit handling to behave predictably when rate-limiting occurs. By default, the
|
|
174
|
+
scraper uses a rate-limiting strategy that rates for the current rate-limiting period
|
|
175
|
+
to expire before resuming requests.
|
|
176
|
+
|
|
177
|
+
**This has been known to take a very long time, in some cases (up to 13 minutes).**
|
|
178
|
+
|
|
179
|
+
You may want to change how rate-limiting events are handled, potentially by pooling
|
|
180
|
+
scrapers logged-in to different accounts (approach currently out of scope for this
|
|
181
|
+
README). The rate-limit handling strategy can be configured by passing a custom
|
|
182
|
+
implementation to the `rateLimitStrategy` option in the scraper constructor:
|
|
183
|
+
|
|
184
|
+
```ts
|
|
185
|
+
import { Scraper, RateLimitStrategy } from "@the-convocation/twitter-scraper";
|
|
186
|
+
|
|
187
|
+
class CustomRateLimitStrategy implements RateLimitStrategy {
|
|
188
|
+
async onRateLimit(event: RateLimitEvent): Promise<void> {
|
|
189
|
+
// your own logic...
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const scraper = new Scraper({
|
|
194
|
+
rateLimitStrategy: new CustomRateLimitStrategy(),
|
|
195
|
+
});
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
More information on this interface can be found on the [`RateLimitStrategy`](https://the-convocation.github.io/twitter-scraper/interfaces/RateLimitStrategy.html)
|
|
199
|
+
page in the documentation. The library provides two pre-written implementations to choose from:
|
|
200
|
+
- `WaitingRateLimitStrategy`: The default, which waits for the limit to expire.
|
|
201
|
+
- `ErrorRateLimitStrategy`: A strategy that throws if any rate-limit event occurs.
|
|
202
|
+
|
|
171
203
|
## Contributing
|
|
172
204
|
|
|
173
205
|
### Setup
|
|
@@ -48,6 +48,23 @@ class ApiError extends Error {
|
|
|
48
48
|
}
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
class WaitingRateLimitStrategy {
|
|
52
|
+
async onRateLimit({ response: res }) {
|
|
53
|
+
const xRateLimitRemaining = res.headers.get("x-rate-limit-remaining");
|
|
54
|
+
const xRateLimitReset = res.headers.get("x-rate-limit-reset");
|
|
55
|
+
if (xRateLimitRemaining == "0" && xRateLimitReset) {
|
|
56
|
+
const currentTime = (/* @__PURE__ */ new Date()).valueOf() / 1e3;
|
|
57
|
+
const timeDeltaMs = 1e3 * (parseInt(xRateLimitReset) - currentTime);
|
|
58
|
+
await new Promise((resolve) => setTimeout(resolve, timeDeltaMs));
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
class ErrorRateLimitStrategy {
|
|
63
|
+
onRateLimit({ response: res }) {
|
|
64
|
+
throw ApiError.fromResponse(res);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
51
68
|
const genericPlatform = new class {
|
|
52
69
|
randomizeCiphers() {
|
|
53
70
|
return Promise.resolve();
|
|
@@ -92,12 +109,16 @@ async function requestApi(url, auth, method = "GET", platform = new Platform())
|
|
|
92
109
|
await platform.randomizeCiphers();
|
|
93
110
|
let res;
|
|
94
111
|
do {
|
|
95
|
-
|
|
96
|
-
|
|
112
|
+
const fetchParameters = [
|
|
113
|
+
url,
|
|
114
|
+
{
|
|
97
115
|
method,
|
|
98
116
|
headers,
|
|
99
117
|
credentials: "include"
|
|
100
|
-
}
|
|
118
|
+
}
|
|
119
|
+
];
|
|
120
|
+
try {
|
|
121
|
+
res = await auth.fetch(...fetchParameters);
|
|
101
122
|
} catch (err) {
|
|
102
123
|
if (!(err instanceof Error)) {
|
|
103
124
|
throw err;
|
|
@@ -109,13 +130,10 @@ async function requestApi(url, auth, method = "GET", platform = new Platform())
|
|
|
109
130
|
}
|
|
110
131
|
await updateCookieJar(auth.cookieJar(), res.headers);
|
|
111
132
|
if (res.status === 429) {
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
const timeDeltaMs = 1e3 * (parseInt(xRateLimitReset) - currentTime);
|
|
117
|
-
await new Promise((resolve) => setTimeout(resolve, timeDeltaMs));
|
|
118
|
-
}
|
|
133
|
+
await auth.onRateLimit({
|
|
134
|
+
fetchParameters,
|
|
135
|
+
response: res
|
|
136
|
+
});
|
|
119
137
|
}
|
|
120
138
|
} while (res.status === 429);
|
|
121
139
|
if (!res.ok) {
|
|
@@ -216,9 +234,13 @@ class TwitterGuestAuth {
|
|
|
216
234
|
constructor(bearerToken, options) {
|
|
217
235
|
this.options = options;
|
|
218
236
|
this.fetch = withTransform(options?.fetch ?? fetch, options?.transform);
|
|
237
|
+
this.rateLimitStrategy = options?.rateLimitStrategy ?? new WaitingRateLimitStrategy();
|
|
219
238
|
this.bearerToken = bearerToken;
|
|
220
239
|
this.jar = new toughCookie.CookieJar();
|
|
221
240
|
}
|
|
241
|
+
async onRateLimit(event) {
|
|
242
|
+
await this.rateLimitStrategy.onRateLimit(event);
|
|
243
|
+
}
|
|
222
244
|
cookieJar() {
|
|
223
245
|
return this.jar;
|
|
224
246
|
}
|
|
@@ -2110,7 +2132,8 @@ class Scraper {
|
|
|
2110
2132
|
getAuthOptions() {
|
|
2111
2133
|
return {
|
|
2112
2134
|
fetch: this.options?.fetch,
|
|
2113
|
-
transform: this.options?.transform
|
|
2135
|
+
transform: this.options?.transform,
|
|
2136
|
+
rateLimitStrategy: this.options?.rateLimitStrategy
|
|
2114
2137
|
};
|
|
2115
2138
|
}
|
|
2116
2139
|
handleResponse(res) {
|
|
@@ -2121,6 +2144,9 @@ class Scraper {
|
|
|
2121
2144
|
}
|
|
2122
2145
|
}
|
|
2123
2146
|
|
|
2147
|
+
exports.ApiError = ApiError;
|
|
2148
|
+
exports.ErrorRateLimitStrategy = ErrorRateLimitStrategy;
|
|
2124
2149
|
exports.Scraper = Scraper;
|
|
2125
2150
|
exports.SearchMode = SearchMode;
|
|
2151
|
+
exports.WaitingRateLimitStrategy = WaitingRateLimitStrategy;
|
|
2126
2152
|
//# sourceMappingURL=index.js.map
|