rezo 1.0.67 → 1.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -9
- package/dist/crawler/index.cjs +40 -40
- package/dist/entries/crawler.cjs +4 -4
- package/dist/index.cjs +27 -27
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/dist/wget/asset-extractor.cjs +556 -0
- package/dist/wget/asset-extractor.js +553 -0
- package/dist/wget/asset-organizer.cjs +230 -0
- package/dist/wget/asset-organizer.js +227 -0
- package/dist/wget/download-cache.cjs +221 -0
- package/dist/wget/download-cache.js +218 -0
- package/dist/wget/downloader.cjs +607 -0
- package/dist/wget/downloader.js +604 -0
- package/dist/wget/file-writer.cjs +349 -0
- package/dist/wget/file-writer.js +346 -0
- package/dist/wget/filter-lists.cjs +1330 -0
- package/dist/wget/filter-lists.js +1330 -0
- package/dist/wget/index.cjs +633 -0
- package/dist/wget/index.d.ts +8486 -0
- package/dist/wget/index.js +614 -0
- package/dist/wget/link-converter.cjs +297 -0
- package/dist/wget/link-converter.js +294 -0
- package/dist/wget/progress.cjs +271 -0
- package/dist/wget/progress.js +266 -0
- package/dist/wget/resume.cjs +166 -0
- package/dist/wget/resume.js +163 -0
- package/dist/wget/robots.cjs +303 -0
- package/dist/wget/robots.js +300 -0
- package/dist/wget/types.cjs +200 -0
- package/dist/wget/types.js +197 -0
- package/dist/wget/url-filter.cjs +351 -0
- package/dist/wget/url-filter.js +348 -0
- package/package.json +6 -1
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import { promises as fs } from "node:fs";
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
3
|
+
import { join, dirname } from "node:path";
|
|
4
|
+
import { cwd } from "node:process";
|
|
5
|
+
|
|
6
|
+
export class DownloadCache {
|
|
7
|
+
outputDir;
|
|
8
|
+
baseUrl;
|
|
9
|
+
cacheDir;
|
|
10
|
+
cacheFile;
|
|
11
|
+
data = null;
|
|
12
|
+
dirty = false;
|
|
13
|
+
saveTimeout = null;
|
|
14
|
+
static VERSION = 1;
|
|
15
|
+
static CACHE_DIR = ".rezo-wget";
|
|
16
|
+
constructor(outputDir, baseUrl) {
|
|
17
|
+
this.outputDir = outputDir;
|
|
18
|
+
this.baseUrl = baseUrl;
|
|
19
|
+
this.cacheDir = join(cwd(), DownloadCache.CACHE_DIR);
|
|
20
|
+
const hash = this.generateCacheHash();
|
|
21
|
+
this.cacheFile = join(this.cacheDir, `${hash}.json`);
|
|
22
|
+
}
|
|
23
|
+
generateCacheHash() {
|
|
24
|
+
return createHash("md5").update(this.baseUrl).digest("hex").slice(0, 12);
|
|
25
|
+
}
|
|
26
|
+
urlHash(url) {
|
|
27
|
+
return createHash("md5").update(url).digest("hex");
|
|
28
|
+
}
|
|
29
|
+
async load() {
|
|
30
|
+
try {
|
|
31
|
+
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
32
|
+
const content = await fs.readFile(this.cacheFile, "utf-8");
|
|
33
|
+
const data = JSON.parse(content);
|
|
34
|
+
if (data.version !== DownloadCache.VERSION) {
|
|
35
|
+
this.data = this.createEmptyCache();
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
this.data = data;
|
|
39
|
+
} catch (error) {
|
|
40
|
+
this.data = this.createEmptyCache();
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
createEmptyCache() {
|
|
44
|
+
return {
|
|
45
|
+
version: DownloadCache.VERSION,
|
|
46
|
+
created: Date.now(),
|
|
47
|
+
updated: Date.now(),
|
|
48
|
+
configHash: this.generateCacheHash(),
|
|
49
|
+
baseUrl: this.baseUrl,
|
|
50
|
+
entries: {}
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
async check(url) {
|
|
54
|
+
if (!this.data) {
|
|
55
|
+
await this.load();
|
|
56
|
+
}
|
|
57
|
+
const key = this.urlHash(url);
|
|
58
|
+
const entry = this.data.entries[key];
|
|
59
|
+
if (!entry) {
|
|
60
|
+
return { cached: false, reason: "not-found" };
|
|
61
|
+
}
|
|
62
|
+
for (const filename of entry.filenames) {
|
|
63
|
+
const fullPath = join(this.outputDir, filename);
|
|
64
|
+
try {
|
|
65
|
+
const stat = await fs.stat(fullPath);
|
|
66
|
+
if (stat.size === entry.totalBytes) {
|
|
67
|
+
return {
|
|
68
|
+
cached: true,
|
|
69
|
+
entry,
|
|
70
|
+
filename
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
} catch {}
|
|
74
|
+
}
|
|
75
|
+
if (entry.filenames.length > 0) {
|
|
76
|
+
return { cached: false, reason: "file-missing", entry };
|
|
77
|
+
}
|
|
78
|
+
return { cached: false, reason: "size-mismatch", entry };
|
|
79
|
+
}
|
|
80
|
+
get(url) {
|
|
81
|
+
if (!this.data)
|
|
82
|
+
return;
|
|
83
|
+
return this.data.entries[this.urlHash(url)];
|
|
84
|
+
}
|
|
85
|
+
set(url, entry) {
|
|
86
|
+
if (!this.data) {
|
|
87
|
+
this.data = this.createEmptyCache();
|
|
88
|
+
}
|
|
89
|
+
const key = this.urlHash(url);
|
|
90
|
+
const existing = this.data.entries[key];
|
|
91
|
+
this.data.entries[key] = {
|
|
92
|
+
url,
|
|
93
|
+
...entry,
|
|
94
|
+
filenames: existing ? [...new Set([...existing.filenames, ...entry.filenames])] : entry.filenames
|
|
95
|
+
};
|
|
96
|
+
this.data.updated = Date.now();
|
|
97
|
+
this.dirty = true;
|
|
98
|
+
this.scheduleSave();
|
|
99
|
+
}
|
|
100
|
+
addFilename(url, filename) {
|
|
101
|
+
if (!this.data)
|
|
102
|
+
return;
|
|
103
|
+
const key = this.urlHash(url);
|
|
104
|
+
const entry = this.data.entries[key];
|
|
105
|
+
if (entry && !entry.filenames.includes(filename)) {
|
|
106
|
+
entry.filenames.push(filename);
|
|
107
|
+
this.data.updated = Date.now();
|
|
108
|
+
this.dirty = true;
|
|
109
|
+
this.scheduleSave();
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
delete(url) {
|
|
113
|
+
if (!this.data)
|
|
114
|
+
return;
|
|
115
|
+
const key = this.urlHash(url);
|
|
116
|
+
if (this.data.entries[key]) {
|
|
117
|
+
delete this.data.entries[key];
|
|
118
|
+
this.data.updated = Date.now();
|
|
119
|
+
this.dirty = true;
|
|
120
|
+
this.scheduleSave();
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
has(url) {
|
|
124
|
+
if (!this.data)
|
|
125
|
+
return false;
|
|
126
|
+
return this.urlHash(url) in this.data.entries;
|
|
127
|
+
}
|
|
128
|
+
urls() {
|
|
129
|
+
if (!this.data)
|
|
130
|
+
return [];
|
|
131
|
+
return Object.values(this.data.entries).map((e) => e.url);
|
|
132
|
+
}
|
|
133
|
+
stats() {
|
|
134
|
+
if (!this.data) {
|
|
135
|
+
return { entries: 0, totalBytes: 0, filesCount: 0 };
|
|
136
|
+
}
|
|
137
|
+
const entries = Object.values(this.data.entries);
|
|
138
|
+
return {
|
|
139
|
+
entries: entries.length,
|
|
140
|
+
totalBytes: entries.reduce((sum, e) => sum + e.totalBytes, 0),
|
|
141
|
+
filesCount: entries.reduce((sum, e) => sum + e.filenames.length, 0)
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
scheduleSave() {
|
|
145
|
+
if (this.saveTimeout) {
|
|
146
|
+
clearTimeout(this.saveTimeout);
|
|
147
|
+
}
|
|
148
|
+
this.saveTimeout = setTimeout(() => this.save(), 1000);
|
|
149
|
+
}
|
|
150
|
+
async save() {
|
|
151
|
+
if (!this.data || !this.dirty)
|
|
152
|
+
return;
|
|
153
|
+
if (this.saveTimeout) {
|
|
154
|
+
clearTimeout(this.saveTimeout);
|
|
155
|
+
this.saveTimeout = null;
|
|
156
|
+
}
|
|
157
|
+
try {
|
|
158
|
+
await fs.mkdir(dirname(this.cacheFile), { recursive: true });
|
|
159
|
+
await fs.writeFile(this.cacheFile, JSON.stringify(this.data, null, 2), "utf-8");
|
|
160
|
+
this.dirty = false;
|
|
161
|
+
} catch (error) {
|
|
162
|
+
console.error("Failed to save download cache:", error);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
clear() {
|
|
166
|
+
if (this.data) {
|
|
167
|
+
this.data.entries = {};
|
|
168
|
+
this.data.updated = Date.now();
|
|
169
|
+
this.dirty = true;
|
|
170
|
+
this.scheduleSave();
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
async cleanup() {
|
|
174
|
+
if (!this.data)
|
|
175
|
+
return 0;
|
|
176
|
+
let removed = 0;
|
|
177
|
+
const entries = Object.entries(this.data.entries);
|
|
178
|
+
for (const [key, entry] of entries) {
|
|
179
|
+
let hasValidFile = false;
|
|
180
|
+
const validFilenames = [];
|
|
181
|
+
for (const filename of entry.filenames) {
|
|
182
|
+
const fullPath = join(this.outputDir, filename);
|
|
183
|
+
try {
|
|
184
|
+
await fs.stat(fullPath);
|
|
185
|
+
validFilenames.push(filename);
|
|
186
|
+
hasValidFile = true;
|
|
187
|
+
} catch {}
|
|
188
|
+
}
|
|
189
|
+
if (!hasValidFile) {
|
|
190
|
+
delete this.data.entries[key];
|
|
191
|
+
removed++;
|
|
192
|
+
} else if (validFilenames.length !== entry.filenames.length) {
|
|
193
|
+
entry.filenames = validFilenames;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
if (removed > 0) {
|
|
197
|
+
this.data.updated = Date.now();
|
|
198
|
+
this.dirty = true;
|
|
199
|
+
await this.save();
|
|
200
|
+
}
|
|
201
|
+
return removed;
|
|
202
|
+
}
|
|
203
|
+
async destroy() {
|
|
204
|
+
if (this.saveTimeout) {
|
|
205
|
+
clearTimeout(this.saveTimeout);
|
|
206
|
+
this.saveTimeout = null;
|
|
207
|
+
}
|
|
208
|
+
await this.save();
|
|
209
|
+
this.data = null;
|
|
210
|
+
}
|
|
211
|
+
get filePath() {
|
|
212
|
+
return this.cacheFile;
|
|
213
|
+
}
|
|
214
|
+
get dirPath() {
|
|
215
|
+
return this.cacheDir;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
export default DownloadCache;
|