rezo 1.0.67 → 1.0.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -9
- package/dist/crawler/index.cjs +40 -40
- package/dist/entries/crawler.cjs +4 -4
- package/dist/index.cjs +27 -27
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/dist/wget/asset-extractor.cjs +556 -0
- package/dist/wget/asset-extractor.js +553 -0
- package/dist/wget/asset-organizer.cjs +230 -0
- package/dist/wget/asset-organizer.js +227 -0
- package/dist/wget/download-cache.cjs +221 -0
- package/dist/wget/download-cache.js +218 -0
- package/dist/wget/downloader.cjs +607 -0
- package/dist/wget/downloader.js +604 -0
- package/dist/wget/file-writer.cjs +349 -0
- package/dist/wget/file-writer.js +346 -0
- package/dist/wget/filter-lists.cjs +1330 -0
- package/dist/wget/filter-lists.js +1330 -0
- package/dist/wget/index.cjs +633 -0
- package/dist/wget/index.d.ts +8494 -0
- package/dist/wget/index.js +614 -0
- package/dist/wget/link-converter.cjs +342 -0
- package/dist/wget/link-converter.js +339 -0
- package/dist/wget/progress.cjs +271 -0
- package/dist/wget/progress.js +266 -0
- package/dist/wget/resume.cjs +166 -0
- package/dist/wget/resume.js +163 -0
- package/dist/wget/robots.cjs +303 -0
- package/dist/wget/robots.js +300 -0
- package/dist/wget/types.cjs +200 -0
- package/dist/wget/types.js +197 -0
- package/dist/wget/url-filter.cjs +351 -0
- package/dist/wget/url-filter.js +348 -0
- package/package.json +6 -1
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
const DEFAULT_USER_AGENT = "Rezo-Wget";
|
|
2
|
+
|
|
3
|
+
export class RobotsHandler {
|
|
4
|
+
options;
|
|
5
|
+
userAgent;
|
|
6
|
+
cache = new Map;
|
|
7
|
+
pending = new Map;
|
|
8
|
+
constructor(options) {
|
|
9
|
+
this.options = options;
|
|
10
|
+
this.userAgent = this.extractBotName(options.userAgent || DEFAULT_USER_AGENT);
|
|
11
|
+
}
|
|
12
|
+
extractBotName(userAgent) {
|
|
13
|
+
const match = userAgent.match(/^([^\s\/]+)/);
|
|
14
|
+
return match ? match[1].toLowerCase() : userAgent.toLowerCase();
|
|
15
|
+
}
|
|
16
|
+
async fetch(url, fetcher) {
|
|
17
|
+
if (this.options.noRobots || this.options.robots === false) {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
const domain = this.getDomain(url);
|
|
21
|
+
if (!domain)
|
|
22
|
+
return null;
|
|
23
|
+
if (this.cache.has(domain)) {
|
|
24
|
+
return this.cache.get(domain) || null;
|
|
25
|
+
}
|
|
26
|
+
if (this.pending.has(domain)) {
|
|
27
|
+
await this.pending.get(domain);
|
|
28
|
+
return this.cache.get(domain) || null;
|
|
29
|
+
}
|
|
30
|
+
const robotsUrl = this.getRobotsUrl(url);
|
|
31
|
+
const fetchPromise = (async () => {
|
|
32
|
+
try {
|
|
33
|
+
const content = await fetcher(robotsUrl);
|
|
34
|
+
if (content) {
|
|
35
|
+
const parsed = this.parse(content);
|
|
36
|
+
this.cache.set(domain, parsed);
|
|
37
|
+
} else {
|
|
38
|
+
this.cache.set(domain, null);
|
|
39
|
+
}
|
|
40
|
+
} catch {
|
|
41
|
+
this.cache.set(domain, null);
|
|
42
|
+
} finally {
|
|
43
|
+
this.pending.delete(domain);
|
|
44
|
+
}
|
|
45
|
+
})();
|
|
46
|
+
this.pending.set(domain, fetchPromise);
|
|
47
|
+
await fetchPromise;
|
|
48
|
+
return this.cache.get(domain) || null;
|
|
49
|
+
}
|
|
50
|
+
isAllowed(url) {
|
|
51
|
+
if (this.options.noRobots || this.options.robots === false) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
const domain = this.getDomain(url);
|
|
55
|
+
if (!domain)
|
|
56
|
+
return true;
|
|
57
|
+
const parsed = this.cache.get(domain);
|
|
58
|
+
if (!parsed) {
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
const parsedUrl = new URL(url);
|
|
63
|
+
const path = parsedUrl.pathname + parsedUrl.search;
|
|
64
|
+
return this.isPathAllowed(path, parsed.rules);
|
|
65
|
+
} catch {
|
|
66
|
+
return true;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
isPathAllowed(path, rules) {
|
|
70
|
+
const matchingRules = this.findMatchingRules(rules);
|
|
71
|
+
if (matchingRules.length === 0) {
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
const allAllows = [];
|
|
75
|
+
const allDisallows = [];
|
|
76
|
+
for (const rule of matchingRules) {
|
|
77
|
+
allAllows.push(...rule.allow);
|
|
78
|
+
allDisallows.push(...rule.disallow);
|
|
79
|
+
}
|
|
80
|
+
let bestMatch = null;
|
|
81
|
+
let bestLength = -1;
|
|
82
|
+
for (const pattern of allAllows) {
|
|
83
|
+
if (this.pathMatches(path, pattern)) {
|
|
84
|
+
const specificity = this.getSpecificity(pattern);
|
|
85
|
+
if (specificity > bestLength) {
|
|
86
|
+
bestLength = specificity;
|
|
87
|
+
bestMatch = { pattern, allow: true };
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
for (const pattern of allDisallows) {
|
|
92
|
+
if (this.pathMatches(path, pattern)) {
|
|
93
|
+
const specificity = this.getSpecificity(pattern);
|
|
94
|
+
if (specificity > bestLength) {
|
|
95
|
+
bestLength = specificity;
|
|
96
|
+
bestMatch = { pattern, allow: false };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
if (!bestMatch) {
|
|
101
|
+
return true;
|
|
102
|
+
}
|
|
103
|
+
return bestMatch.allow;
|
|
104
|
+
}
|
|
105
|
+
findMatchingRules(rules) {
|
|
106
|
+
const matching = [];
|
|
107
|
+
let hasSpecificMatch = false;
|
|
108
|
+
for (const rule of rules) {
|
|
109
|
+
const ruleAgent = rule.userAgent.toLowerCase();
|
|
110
|
+
if (ruleAgent === this.userAgent) {
|
|
111
|
+
matching.push(rule);
|
|
112
|
+
hasSpecificMatch = true;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (hasSpecificMatch) {
|
|
116
|
+
return matching;
|
|
117
|
+
}
|
|
118
|
+
for (const rule of rules) {
|
|
119
|
+
const ruleAgent = rule.userAgent.toLowerCase();
|
|
120
|
+
if (this.userAgent.includes(ruleAgent) || ruleAgent.includes(this.userAgent)) {
|
|
121
|
+
matching.push(rule);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
if (matching.length > 0) {
|
|
125
|
+
return matching;
|
|
126
|
+
}
|
|
127
|
+
for (const rule of rules) {
|
|
128
|
+
if (rule.userAgent === "*") {
|
|
129
|
+
matching.push(rule);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return matching;
|
|
133
|
+
}
|
|
134
|
+
pathMatches(path, pattern) {
|
|
135
|
+
if (!pattern)
|
|
136
|
+
return false;
|
|
137
|
+
if (pattern === "")
|
|
138
|
+
return false;
|
|
139
|
+
let regexStr = "";
|
|
140
|
+
let hasEndAnchor = false;
|
|
141
|
+
if (pattern.endsWith("$")) {
|
|
142
|
+
hasEndAnchor = true;
|
|
143
|
+
pattern = pattern.slice(0, -1);
|
|
144
|
+
}
|
|
145
|
+
for (const char of pattern) {
|
|
146
|
+
if (char === "*") {
|
|
147
|
+
regexStr += ".*";
|
|
148
|
+
} else if (".+?^${}()|[]\\".includes(char)) {
|
|
149
|
+
regexStr += "\\" + char;
|
|
150
|
+
} else {
|
|
151
|
+
regexStr += char;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
regexStr = "^" + regexStr;
|
|
155
|
+
if (hasEndAnchor) {
|
|
156
|
+
regexStr += "$";
|
|
157
|
+
}
|
|
158
|
+
try {
|
|
159
|
+
const regex = new RegExp(regexStr);
|
|
160
|
+
return regex.test(path);
|
|
161
|
+
} catch {
|
|
162
|
+
return path.startsWith(pattern.replace(/\*/g, ""));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
getSpecificity(pattern) {
|
|
166
|
+
return pattern.replace(/\*/g, "").length;
|
|
167
|
+
}
|
|
168
|
+
getCrawlDelay(urlOrDomain) {
|
|
169
|
+
const domain = urlOrDomain.includes("://") ? this.getDomain(urlOrDomain) : urlOrDomain;
|
|
170
|
+
if (!domain)
|
|
171
|
+
return null;
|
|
172
|
+
const parsed = this.cache.get(domain);
|
|
173
|
+
if (!parsed)
|
|
174
|
+
return null;
|
|
175
|
+
const matchingRules = this.findMatchingRules(parsed.rules);
|
|
176
|
+
for (const rule of matchingRules) {
|
|
177
|
+
if (rule.crawlDelay !== undefined) {
|
|
178
|
+
return rule.crawlDelay;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
getSitemaps(urlOrDomain) {
|
|
184
|
+
const domain = urlOrDomain.includes("://") ? this.getDomain(urlOrDomain) : urlOrDomain;
|
|
185
|
+
if (!domain)
|
|
186
|
+
return [];
|
|
187
|
+
const parsed = this.cache.get(domain);
|
|
188
|
+
if (!parsed)
|
|
189
|
+
return [];
|
|
190
|
+
return parsed.sitemaps;
|
|
191
|
+
}
|
|
192
|
+
getParsed(urlOrDomain) {
|
|
193
|
+
const domain = urlOrDomain.includes("://") ? this.getDomain(urlOrDomain) : urlOrDomain;
|
|
194
|
+
if (!domain)
|
|
195
|
+
return null;
|
|
196
|
+
return this.cache.get(domain) || null;
|
|
197
|
+
}
|
|
198
|
+
parse(content) {
|
|
199
|
+
const rules = [];
|
|
200
|
+
const sitemaps = [];
|
|
201
|
+
let currentRule = null;
|
|
202
|
+
const lines = content.split(/\r?\n/);
|
|
203
|
+
for (let line of lines) {
|
|
204
|
+
const commentIndex = line.indexOf("#");
|
|
205
|
+
if (commentIndex !== -1) {
|
|
206
|
+
line = line.substring(0, commentIndex);
|
|
207
|
+
}
|
|
208
|
+
line = line.trim();
|
|
209
|
+
if (!line)
|
|
210
|
+
continue;
|
|
211
|
+
const colonIndex = line.indexOf(":");
|
|
212
|
+
if (colonIndex === -1)
|
|
213
|
+
continue;
|
|
214
|
+
const directive = line.substring(0, colonIndex).trim().toLowerCase();
|
|
215
|
+
const value = line.substring(colonIndex + 1).trim();
|
|
216
|
+
switch (directive) {
|
|
217
|
+
case "user-agent":
|
|
218
|
+
if (currentRule && (currentRule.allow.length > 0 || currentRule.disallow.length > 0)) {
|
|
219
|
+
rules.push(currentRule);
|
|
220
|
+
}
|
|
221
|
+
currentRule = {
|
|
222
|
+
userAgent: value,
|
|
223
|
+
disallow: [],
|
|
224
|
+
allow: [],
|
|
225
|
+
sitemaps: []
|
|
226
|
+
};
|
|
227
|
+
break;
|
|
228
|
+
case "disallow":
|
|
229
|
+
if (currentRule) {
|
|
230
|
+
currentRule.disallow.push(value);
|
|
231
|
+
}
|
|
232
|
+
break;
|
|
233
|
+
case "allow":
|
|
234
|
+
if (currentRule) {
|
|
235
|
+
currentRule.allow.push(value);
|
|
236
|
+
}
|
|
237
|
+
break;
|
|
238
|
+
case "crawl-delay":
|
|
239
|
+
if (currentRule) {
|
|
240
|
+
const delay = parseFloat(value);
|
|
241
|
+
if (!isNaN(delay) && delay >= 0) {
|
|
242
|
+
currentRule.crawlDelay = delay;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
break;
|
|
246
|
+
case "sitemap":
|
|
247
|
+
sitemaps.push(value);
|
|
248
|
+
break;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
if (currentRule && (currentRule.allow.length > 0 || currentRule.disallow.length > 0)) {
|
|
252
|
+
rules.push(currentRule);
|
|
253
|
+
}
|
|
254
|
+
return {
|
|
255
|
+
rules,
|
|
256
|
+
sitemaps,
|
|
257
|
+
raw: content
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
getRobotsUrl(url) {
|
|
261
|
+
try {
|
|
262
|
+
const parsed = new URL(url);
|
|
263
|
+
return `${parsed.protocol}//${parsed.host}/robots.txt`;
|
|
264
|
+
} catch {
|
|
265
|
+
return "";
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
getDomain(url) {
|
|
269
|
+
try {
|
|
270
|
+
const parsed = new URL(url);
|
|
271
|
+
return parsed.hostname.toLowerCase();
|
|
272
|
+
} catch {
|
|
273
|
+
return null;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
hasFetched(urlOrDomain) {
|
|
277
|
+
const domain = urlOrDomain.includes("://") ? this.getDomain(urlOrDomain) : urlOrDomain;
|
|
278
|
+
if (!domain)
|
|
279
|
+
return false;
|
|
280
|
+
return this.cache.has(domain);
|
|
281
|
+
}
|
|
282
|
+
clearCache(urlOrDomain) {
|
|
283
|
+
if (urlOrDomain) {
|
|
284
|
+
const domain = urlOrDomain.includes("://") ? this.getDomain(urlOrDomain) : urlOrDomain;
|
|
285
|
+
if (domain) {
|
|
286
|
+
this.cache.delete(domain);
|
|
287
|
+
}
|
|
288
|
+
} else {
|
|
289
|
+
this.cache.clear();
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
getRulesCount(urlOrDomain) {
|
|
293
|
+
const domain = urlOrDomain.includes("://") ? this.getDomain(urlOrDomain) : urlOrDomain;
|
|
294
|
+
if (!domain)
|
|
295
|
+
return 0;
|
|
296
|
+
const parsed = this.cache.get(domain);
|
|
297
|
+
return parsed?.rules.length || 0;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
export default RobotsHandler;
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
class WgetError extends Error {
|
|
2
|
+
code;
|
|
3
|
+
url;
|
|
4
|
+
statusCode;
|
|
5
|
+
cause;
|
|
6
|
+
constructor(message, code, url, statusCode, cause) {
|
|
7
|
+
super(message);
|
|
8
|
+
this.name = "WgetError";
|
|
9
|
+
this.code = code;
|
|
10
|
+
this.url = url;
|
|
11
|
+
this.statusCode = statusCode;
|
|
12
|
+
this.cause = cause;
|
|
13
|
+
if (Error.captureStackTrace) {
|
|
14
|
+
Error.captureStackTrace(this, WgetError);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
isNetworkError() {
|
|
18
|
+
const networkCodes = [
|
|
19
|
+
"ENOTFOUND",
|
|
20
|
+
"ECONNREFUSED",
|
|
21
|
+
"ECONNRESET",
|
|
22
|
+
"ETIMEDOUT",
|
|
23
|
+
"ENETUNREACH",
|
|
24
|
+
"EHOSTUNREACH",
|
|
25
|
+
"ECONNABORTED",
|
|
26
|
+
"EPIPE",
|
|
27
|
+
"EAI_AGAIN"
|
|
28
|
+
];
|
|
29
|
+
return networkCodes.includes(this.code);
|
|
30
|
+
}
|
|
31
|
+
isHttpError() {
|
|
32
|
+
return this.code.startsWith("HTTP_") || this.statusCode !== undefined && this.statusCode >= 400;
|
|
33
|
+
}
|
|
34
|
+
isRetryable() {
|
|
35
|
+
if (this.isNetworkError())
|
|
36
|
+
return true;
|
|
37
|
+
if (this.statusCode) {
|
|
38
|
+
return this.statusCode === 408 || this.statusCode === 429 || this.statusCode >= 500;
|
|
39
|
+
}
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
isProxyError() {
|
|
43
|
+
const proxyCodes = [
|
|
44
|
+
"REZ_PROXY_CONNECTION_FAILED",
|
|
45
|
+
"REZ_PROXY_AUTHENTICATION_FAILED",
|
|
46
|
+
"REZ_PROXY_PROTOCOL_ERROR",
|
|
47
|
+
"REZ_PROXY_TIMEOUT",
|
|
48
|
+
"REZ_SOCKS_CONNECTION_FAILED",
|
|
49
|
+
"REZ_SOCKS_AUTH_FAILED",
|
|
50
|
+
"REZ_SOCKS_HANDSHAKE_FAILED",
|
|
51
|
+
"REZ_SOCKS_PROTOCOL_ERROR",
|
|
52
|
+
"ECONNREFUSED",
|
|
53
|
+
"ECONNRESET"
|
|
54
|
+
];
|
|
55
|
+
if (proxyCodes.includes(this.code))
|
|
56
|
+
return true;
|
|
57
|
+
const lowerMessage = this.message.toLowerCase();
|
|
58
|
+
return lowerMessage.includes("proxy") || lowerMessage.includes("socks");
|
|
59
|
+
}
|
|
60
|
+
static fromHttpStatus(url, statusCode, statusText) {
|
|
61
|
+
return new WgetError(`HTTP ${statusCode}: ${statusText}`, `HTTP_${statusCode}`, url, statusCode);
|
|
62
|
+
}
|
|
63
|
+
static fromNetworkError(url, error) {
|
|
64
|
+
const code = error.code || "NETWORK_ERROR";
|
|
65
|
+
return new WgetError(error.message, code, url, undefined, error);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
function flattenWgetOptions(options) {
|
|
69
|
+
const flat = {};
|
|
70
|
+
if (options.logging) {
|
|
71
|
+
flat.quiet = options.logging.quiet;
|
|
72
|
+
flat.verbose = options.logging.verbose;
|
|
73
|
+
flat.debug = options.logging.debug;
|
|
74
|
+
flat.noVerbose = options.logging.noVerbose;
|
|
75
|
+
flat.reportSpeed = options.logging.reportSpeed;
|
|
76
|
+
flat.progress = options.logging.progress;
|
|
77
|
+
flat.showProgress = options.logging.showProgress;
|
|
78
|
+
flat.logFile = options.logging.logFile;
|
|
79
|
+
flat.appendOutput = options.logging.appendOutput;
|
|
80
|
+
}
|
|
81
|
+
if (options.download) {
|
|
82
|
+
flat.output = options.download.output;
|
|
83
|
+
flat.outputDir = options.download.outputDir;
|
|
84
|
+
flat.continueDownload = options.download.continue;
|
|
85
|
+
flat.timestamping = options.download.timestamping;
|
|
86
|
+
flat.noClobber = options.download.noClobber;
|
|
87
|
+
flat.backups = options.download.backups;
|
|
88
|
+
flat.adjustExtension = options.download.adjustExtension;
|
|
89
|
+
flat.contentDisposition = options.download.contentDisposition;
|
|
90
|
+
flat.contentOnError = options.download.contentOnError;
|
|
91
|
+
flat.tries = options.download.tries;
|
|
92
|
+
flat.retryConnrefused = options.download.retryConnrefused;
|
|
93
|
+
flat.waitRetry = options.download.waitRetry;
|
|
94
|
+
flat.maxProxyRetries = options.download.maxProxyRetries;
|
|
95
|
+
flat.retryProxyErrors = options.download.retryProxyErrors;
|
|
96
|
+
flat.timeout = options.download.timeout;
|
|
97
|
+
flat.connectTimeout = options.download.connectTimeout;
|
|
98
|
+
flat.readTimeout = options.download.readTimeout;
|
|
99
|
+
flat.dnsTimeout = options.download.dnsTimeout;
|
|
100
|
+
flat.wait = options.download.wait;
|
|
101
|
+
flat.randomWait = options.download.randomWait;
|
|
102
|
+
flat.limitRate = options.download.limitRate;
|
|
103
|
+
flat.quota = options.download.quota;
|
|
104
|
+
flat.concurrency = options.download.concurrency;
|
|
105
|
+
}
|
|
106
|
+
if (options.directories) {
|
|
107
|
+
flat.noDirectories = options.directories.noDirectories;
|
|
108
|
+
flat.forceDirectories = options.directories.forceDirectories;
|
|
109
|
+
flat.cutDirs = options.directories.cutDirs;
|
|
110
|
+
flat.protocolDirectories = options.directories.protocolDirectories;
|
|
111
|
+
flat.noHostDirectories = options.directories.noHostDirectories;
|
|
112
|
+
flat.organizeAssets = options.directories.organizeAssets;
|
|
113
|
+
flat.assetFolders = options.directories.assetFolders;
|
|
114
|
+
}
|
|
115
|
+
if (options.http) {
|
|
116
|
+
flat.userAgent = options.http.userAgent;
|
|
117
|
+
flat.referer = options.http.referer;
|
|
118
|
+
flat.headers = options.http.headers;
|
|
119
|
+
flat.method = options.http.method;
|
|
120
|
+
flat.maxRedirects = options.http.maxRedirects;
|
|
121
|
+
flat.noCheckCertificate = options.http.noCheckCertificate;
|
|
122
|
+
flat.postData = options.http.postData;
|
|
123
|
+
flat.postFile = options.http.postFile;
|
|
124
|
+
flat.httpUser = options.http.user;
|
|
125
|
+
flat.httpPassword = options.http.password;
|
|
126
|
+
if (options.http.cookies) {
|
|
127
|
+
flat.loadCookies = options.http.cookies.load;
|
|
128
|
+
flat.saveCookies = options.http.cookies.save;
|
|
129
|
+
flat.keepSessionCookies = options.http.cookies.keepSession;
|
|
130
|
+
flat.cookieJar = options.http.cookies.jar;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (options.recursive) {
|
|
134
|
+
flat.recursive = options.recursive.enabled;
|
|
135
|
+
flat.depth = options.recursive.depth;
|
|
136
|
+
flat.deleteAfter = options.recursive.deleteAfter;
|
|
137
|
+
flat.convertLinks = options.recursive.convertLinks;
|
|
138
|
+
flat.backupConverted = options.recursive.backupConverted;
|
|
139
|
+
flat.mirror = options.recursive.mirror;
|
|
140
|
+
flat.pageRequisites = options.recursive.pageRequisites;
|
|
141
|
+
flat.strictComments = options.recursive.strictComments;
|
|
142
|
+
}
|
|
143
|
+
if (options.filter) {
|
|
144
|
+
flat.accept = options.filter.accept;
|
|
145
|
+
flat.reject = options.filter.reject;
|
|
146
|
+
flat.acceptRegex = options.filter.acceptRegex;
|
|
147
|
+
flat.rejectRegex = options.filter.rejectRegex;
|
|
148
|
+
flat.domains = options.filter.domains;
|
|
149
|
+
flat.excludeDomains = options.filter.excludeDomains;
|
|
150
|
+
flat.followTags = options.filter.followTags;
|
|
151
|
+
flat.ignoreTags = options.filter.ignoreTags;
|
|
152
|
+
flat.followFTP = options.filter.followFTP;
|
|
153
|
+
flat.spanHosts = options.filter.spanHosts;
|
|
154
|
+
flat.relativeOnly = options.filter.relativeOnly;
|
|
155
|
+
flat.noParent = options.filter.noParent;
|
|
156
|
+
flat.includeDirectories = options.filter.includeDirectories;
|
|
157
|
+
flat.excludeDirectories = options.filter.excludeDirectories;
|
|
158
|
+
flat.excludeExtensions = options.filter.excludeExtensions;
|
|
159
|
+
flat.excludeMimeTypes = options.filter.excludeMimeTypes;
|
|
160
|
+
flat.acceptAssetTypes = options.filter.acceptAssetTypes;
|
|
161
|
+
flat.rejectAssetTypes = options.filter.rejectAssetTypes;
|
|
162
|
+
flat.maxFileSize = options.filter.maxFileSize;
|
|
163
|
+
flat.minFileSize = options.filter.minFileSize;
|
|
164
|
+
}
|
|
165
|
+
if (options.robots) {
|
|
166
|
+
flat.robots = options.robots.enabled;
|
|
167
|
+
flat.noRobots = options.robots.enabled === false;
|
|
168
|
+
}
|
|
169
|
+
if (options.proxy) {
|
|
170
|
+
flat.proxy = options.proxy;
|
|
171
|
+
}
|
|
172
|
+
if (options.network) {
|
|
173
|
+
flat.inet4Only = options.network.inet4Only;
|
|
174
|
+
flat.inet6Only = options.network.inet6Only;
|
|
175
|
+
flat.preferFamily = options.network.preferFamily;
|
|
176
|
+
}
|
|
177
|
+
if (options.input) {
|
|
178
|
+
flat.inputFile = options.input.file;
|
|
179
|
+
flat.baseUrl = options.input.base;
|
|
180
|
+
}
|
|
181
|
+
if (options.misc) {
|
|
182
|
+
flat.background = options.misc.background;
|
|
183
|
+
flat.execute = options.misc.execute;
|
|
184
|
+
flat.restrictFileNames = options.misc.restrictFileNames;
|
|
185
|
+
flat.signal = options.misc.signal;
|
|
186
|
+
}
|
|
187
|
+
if (options.organizeAssets !== undefined) {
|
|
188
|
+
flat.organizeAssets = options.organizeAssets;
|
|
189
|
+
}
|
|
190
|
+
if (options.assetFolders !== undefined) {
|
|
191
|
+
flat.assetFolders = options.assetFolders;
|
|
192
|
+
}
|
|
193
|
+
if (options.cache !== undefined) {
|
|
194
|
+
flat.cache = options.cache;
|
|
195
|
+
}
|
|
196
|
+
return Object.fromEntries(Object.entries(flat).filter(([_, v]) => v !== undefined));
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
exports.WgetError = WgetError;
|
|
200
|
+
exports.flattenWgetOptions = flattenWgetOptions;
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
export class WgetError extends Error {
|
|
2
|
+
code;
|
|
3
|
+
url;
|
|
4
|
+
statusCode;
|
|
5
|
+
cause;
|
|
6
|
+
constructor(message, code, url, statusCode, cause) {
|
|
7
|
+
super(message);
|
|
8
|
+
this.name = "WgetError";
|
|
9
|
+
this.code = code;
|
|
10
|
+
this.url = url;
|
|
11
|
+
this.statusCode = statusCode;
|
|
12
|
+
this.cause = cause;
|
|
13
|
+
if (Error.captureStackTrace) {
|
|
14
|
+
Error.captureStackTrace(this, WgetError);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
isNetworkError() {
|
|
18
|
+
const networkCodes = [
|
|
19
|
+
"ENOTFOUND",
|
|
20
|
+
"ECONNREFUSED",
|
|
21
|
+
"ECONNRESET",
|
|
22
|
+
"ETIMEDOUT",
|
|
23
|
+
"ENETUNREACH",
|
|
24
|
+
"EHOSTUNREACH",
|
|
25
|
+
"ECONNABORTED",
|
|
26
|
+
"EPIPE",
|
|
27
|
+
"EAI_AGAIN"
|
|
28
|
+
];
|
|
29
|
+
return networkCodes.includes(this.code);
|
|
30
|
+
}
|
|
31
|
+
isHttpError() {
|
|
32
|
+
return this.code.startsWith("HTTP_") || this.statusCode !== undefined && this.statusCode >= 400;
|
|
33
|
+
}
|
|
34
|
+
isRetryable() {
|
|
35
|
+
if (this.isNetworkError())
|
|
36
|
+
return true;
|
|
37
|
+
if (this.statusCode) {
|
|
38
|
+
return this.statusCode === 408 || this.statusCode === 429 || this.statusCode >= 500;
|
|
39
|
+
}
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
isProxyError() {
|
|
43
|
+
const proxyCodes = [
|
|
44
|
+
"REZ_PROXY_CONNECTION_FAILED",
|
|
45
|
+
"REZ_PROXY_AUTHENTICATION_FAILED",
|
|
46
|
+
"REZ_PROXY_PROTOCOL_ERROR",
|
|
47
|
+
"REZ_PROXY_TIMEOUT",
|
|
48
|
+
"REZ_SOCKS_CONNECTION_FAILED",
|
|
49
|
+
"REZ_SOCKS_AUTH_FAILED",
|
|
50
|
+
"REZ_SOCKS_HANDSHAKE_FAILED",
|
|
51
|
+
"REZ_SOCKS_PROTOCOL_ERROR",
|
|
52
|
+
"ECONNREFUSED",
|
|
53
|
+
"ECONNRESET"
|
|
54
|
+
];
|
|
55
|
+
if (proxyCodes.includes(this.code))
|
|
56
|
+
return true;
|
|
57
|
+
const lowerMessage = this.message.toLowerCase();
|
|
58
|
+
return lowerMessage.includes("proxy") || lowerMessage.includes("socks");
|
|
59
|
+
}
|
|
60
|
+
static fromHttpStatus(url, statusCode, statusText) {
|
|
61
|
+
return new WgetError(`HTTP ${statusCode}: ${statusText}`, `HTTP_${statusCode}`, url, statusCode);
|
|
62
|
+
}
|
|
63
|
+
static fromNetworkError(url, error) {
|
|
64
|
+
const code = error.code || "NETWORK_ERROR";
|
|
65
|
+
return new WgetError(error.message, code, url, undefined, error);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
export function flattenWgetOptions(options) {
|
|
69
|
+
const flat = {};
|
|
70
|
+
if (options.logging) {
|
|
71
|
+
flat.quiet = options.logging.quiet;
|
|
72
|
+
flat.verbose = options.logging.verbose;
|
|
73
|
+
flat.debug = options.logging.debug;
|
|
74
|
+
flat.noVerbose = options.logging.noVerbose;
|
|
75
|
+
flat.reportSpeed = options.logging.reportSpeed;
|
|
76
|
+
flat.progress = options.logging.progress;
|
|
77
|
+
flat.showProgress = options.logging.showProgress;
|
|
78
|
+
flat.logFile = options.logging.logFile;
|
|
79
|
+
flat.appendOutput = options.logging.appendOutput;
|
|
80
|
+
}
|
|
81
|
+
if (options.download) {
|
|
82
|
+
flat.output = options.download.output;
|
|
83
|
+
flat.outputDir = options.download.outputDir;
|
|
84
|
+
flat.continueDownload = options.download.continue;
|
|
85
|
+
flat.timestamping = options.download.timestamping;
|
|
86
|
+
flat.noClobber = options.download.noClobber;
|
|
87
|
+
flat.backups = options.download.backups;
|
|
88
|
+
flat.adjustExtension = options.download.adjustExtension;
|
|
89
|
+
flat.contentDisposition = options.download.contentDisposition;
|
|
90
|
+
flat.contentOnError = options.download.contentOnError;
|
|
91
|
+
flat.tries = options.download.tries;
|
|
92
|
+
flat.retryConnrefused = options.download.retryConnrefused;
|
|
93
|
+
flat.waitRetry = options.download.waitRetry;
|
|
94
|
+
flat.maxProxyRetries = options.download.maxProxyRetries;
|
|
95
|
+
flat.retryProxyErrors = options.download.retryProxyErrors;
|
|
96
|
+
flat.timeout = options.download.timeout;
|
|
97
|
+
flat.connectTimeout = options.download.connectTimeout;
|
|
98
|
+
flat.readTimeout = options.download.readTimeout;
|
|
99
|
+
flat.dnsTimeout = options.download.dnsTimeout;
|
|
100
|
+
flat.wait = options.download.wait;
|
|
101
|
+
flat.randomWait = options.download.randomWait;
|
|
102
|
+
flat.limitRate = options.download.limitRate;
|
|
103
|
+
flat.quota = options.download.quota;
|
|
104
|
+
flat.concurrency = options.download.concurrency;
|
|
105
|
+
}
|
|
106
|
+
if (options.directories) {
|
|
107
|
+
flat.noDirectories = options.directories.noDirectories;
|
|
108
|
+
flat.forceDirectories = options.directories.forceDirectories;
|
|
109
|
+
flat.cutDirs = options.directories.cutDirs;
|
|
110
|
+
flat.protocolDirectories = options.directories.protocolDirectories;
|
|
111
|
+
flat.noHostDirectories = options.directories.noHostDirectories;
|
|
112
|
+
flat.organizeAssets = options.directories.organizeAssets;
|
|
113
|
+
flat.assetFolders = options.directories.assetFolders;
|
|
114
|
+
}
|
|
115
|
+
if (options.http) {
|
|
116
|
+
flat.userAgent = options.http.userAgent;
|
|
117
|
+
flat.referer = options.http.referer;
|
|
118
|
+
flat.headers = options.http.headers;
|
|
119
|
+
flat.method = options.http.method;
|
|
120
|
+
flat.maxRedirects = options.http.maxRedirects;
|
|
121
|
+
flat.noCheckCertificate = options.http.noCheckCertificate;
|
|
122
|
+
flat.postData = options.http.postData;
|
|
123
|
+
flat.postFile = options.http.postFile;
|
|
124
|
+
flat.httpUser = options.http.user;
|
|
125
|
+
flat.httpPassword = options.http.password;
|
|
126
|
+
if (options.http.cookies) {
|
|
127
|
+
flat.loadCookies = options.http.cookies.load;
|
|
128
|
+
flat.saveCookies = options.http.cookies.save;
|
|
129
|
+
flat.keepSessionCookies = options.http.cookies.keepSession;
|
|
130
|
+
flat.cookieJar = options.http.cookies.jar;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (options.recursive) {
|
|
134
|
+
flat.recursive = options.recursive.enabled;
|
|
135
|
+
flat.depth = options.recursive.depth;
|
|
136
|
+
flat.deleteAfter = options.recursive.deleteAfter;
|
|
137
|
+
flat.convertLinks = options.recursive.convertLinks;
|
|
138
|
+
flat.backupConverted = options.recursive.backupConverted;
|
|
139
|
+
flat.mirror = options.recursive.mirror;
|
|
140
|
+
flat.pageRequisites = options.recursive.pageRequisites;
|
|
141
|
+
flat.strictComments = options.recursive.strictComments;
|
|
142
|
+
}
|
|
143
|
+
if (options.filter) {
|
|
144
|
+
flat.accept = options.filter.accept;
|
|
145
|
+
flat.reject = options.filter.reject;
|
|
146
|
+
flat.acceptRegex = options.filter.acceptRegex;
|
|
147
|
+
flat.rejectRegex = options.filter.rejectRegex;
|
|
148
|
+
flat.domains = options.filter.domains;
|
|
149
|
+
flat.excludeDomains = options.filter.excludeDomains;
|
|
150
|
+
flat.followTags = options.filter.followTags;
|
|
151
|
+
flat.ignoreTags = options.filter.ignoreTags;
|
|
152
|
+
flat.followFTP = options.filter.followFTP;
|
|
153
|
+
flat.spanHosts = options.filter.spanHosts;
|
|
154
|
+
flat.relativeOnly = options.filter.relativeOnly;
|
|
155
|
+
flat.noParent = options.filter.noParent;
|
|
156
|
+
flat.includeDirectories = options.filter.includeDirectories;
|
|
157
|
+
flat.excludeDirectories = options.filter.excludeDirectories;
|
|
158
|
+
flat.excludeExtensions = options.filter.excludeExtensions;
|
|
159
|
+
flat.excludeMimeTypes = options.filter.excludeMimeTypes;
|
|
160
|
+
flat.acceptAssetTypes = options.filter.acceptAssetTypes;
|
|
161
|
+
flat.rejectAssetTypes = options.filter.rejectAssetTypes;
|
|
162
|
+
flat.maxFileSize = options.filter.maxFileSize;
|
|
163
|
+
flat.minFileSize = options.filter.minFileSize;
|
|
164
|
+
}
|
|
165
|
+
if (options.robots) {
|
|
166
|
+
flat.robots = options.robots.enabled;
|
|
167
|
+
flat.noRobots = options.robots.enabled === false;
|
|
168
|
+
}
|
|
169
|
+
if (options.proxy) {
|
|
170
|
+
flat.proxy = options.proxy;
|
|
171
|
+
}
|
|
172
|
+
if (options.network) {
|
|
173
|
+
flat.inet4Only = options.network.inet4Only;
|
|
174
|
+
flat.inet6Only = options.network.inet6Only;
|
|
175
|
+
flat.preferFamily = options.network.preferFamily;
|
|
176
|
+
}
|
|
177
|
+
if (options.input) {
|
|
178
|
+
flat.inputFile = options.input.file;
|
|
179
|
+
flat.baseUrl = options.input.base;
|
|
180
|
+
}
|
|
181
|
+
if (options.misc) {
|
|
182
|
+
flat.background = options.misc.background;
|
|
183
|
+
flat.execute = options.misc.execute;
|
|
184
|
+
flat.restrictFileNames = options.misc.restrictFileNames;
|
|
185
|
+
flat.signal = options.misc.signal;
|
|
186
|
+
}
|
|
187
|
+
if (options.organizeAssets !== undefined) {
|
|
188
|
+
flat.organizeAssets = options.organizeAssets;
|
|
189
|
+
}
|
|
190
|
+
if (options.assetFolders !== undefined) {
|
|
191
|
+
flat.assetFolders = options.assetFolders;
|
|
192
|
+
}
|
|
193
|
+
if (options.cache !== undefined) {
|
|
194
|
+
flat.cache = options.cache;
|
|
195
|
+
}
|
|
196
|
+
return Object.fromEntries(Object.entries(flat).filter(([_, v]) => v !== undefined));
|
|
197
|
+
}
|