html-to-gutenberg 4.2.9 → 4.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +20 -3
- package/.github/workflows/sync-npm.yml +154 -0
- package/.vscode/extensions.json +7 -0
- package/.vscode/launch.json +50 -0
- package/.vscode/settings.json +23 -0
- package/.vscode/spellright.dict +2 -0
- package/.vscode/tasks.json +33 -0
- package/fetch-page-assets.test.ts +448 -0
- package/index.d.ts +173 -0
- package/index.js +570 -224
- package/index.test.ts +633 -4
- package/index.ts +168 -63
- package/package.json +25 -24
- package/public/fonts/README.md +24 -0
- package/public/fonts/fa-brands-400.eot +0 -0
- package/public/fonts/fa-brands-400.svg +3570 -0
- package/public/fonts/fa-brands-400.ttf +0 -0
- package/public/fonts/fa-brands-400.woff +0 -0
- package/public/fonts/fa-brands-400.woff2 +0 -0
- package/public/fonts/fa-regular-400.eot +0 -0
- package/public/fonts/fa-regular-400.svg +803 -0
- package/public/fonts/fa-regular-400.ttf +0 -0
- package/public/fonts/fa-regular-400.woff +0 -0
- package/public/fonts/fa-regular-400.woff2 +0 -0
- package/public/fonts/fa-solid-400.woff2 +1 -0
- package/public/fonts/fa-solid-900.eot +0 -0
- package/public/fonts/fa-solid-900.svg +4938 -0
- package/public/fonts/fa-solid-900.ttf +0 -0
- package/public/fonts/fa-solid-900.woff +0 -0
- package/public/fonts/fa-solid-900.woff2 +0 -0
- package/r2.js +163 -0
- package/readme.md +122 -88
- package/scripts/patch-fetch-page-assets.mjs +13 -0
- package/scripts/sync-from-npm.mjs +115 -0
- package/tsconfig.json +17 -2
- package/vendor/fetch-page-assets/LICENSE.MD +21 -0
- package/vendor/fetch-page-assets/README.md +117 -0
- package/vendor/fetch-page-assets/index.js +362 -0
- package/vendor/fetch-page-assets/package.json +48 -0
- package/.env +0 -1
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# fetch-page-assets
|
|
2
|
+
|
|
3
|
+
Download page assets, rewrite the HTML to point at the fetched assets, and optionally upload those downloads directly to Cloudflare R2 instead of writing them to disk.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install fetch-page-assets
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Environment
|
|
12
|
+
|
|
13
|
+
Keep real secrets in `.env` and never commit them.
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
cp .env.example .env
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Required for R2 uploads:
|
|
20
|
+
|
|
21
|
+
- `CLOUDFLARE_R2_ACCOUNT_ID`
|
|
22
|
+
- `CLOUDFLARE_R2_BUCKET`
|
|
23
|
+
- `CLOUDFLARE_R2_ACCESS_KEY_ID`
|
|
24
|
+
- `CLOUDFLARE_R2_SECRET_ACCESS_KEY`
|
|
25
|
+
- `CLOUDFLARE_R2_PUBLIC_BASE_URL`
|
|
26
|
+
|
|
27
|
+
Optional:
|
|
28
|
+
|
|
29
|
+
- `CLOUDFLARE_API_TOKEN`
|
|
30
|
+
- `CLOUDFLARE_R2_ENDPOINT`
|
|
31
|
+
|
|
32
|
+
## Getting and rotating Cloudflare credentials
|
|
33
|
+
|
|
34
|
+
1. Open the Cloudflare dashboard.
|
|
35
|
+
2. Create or rotate the R2 access keys for the bucket you want to use.
|
|
36
|
+
3. Update `.env` with the new key values.
|
|
37
|
+
4. If you use a Cloudflare API token for verification or other account workflows, rotate it in the API Tokens section and update `.env`.
|
|
38
|
+
5. Restart the service after updating `.env`.
|
|
39
|
+
6. Revoke the old token or key after the new one is confirmed working.
|
|
40
|
+
|
|
41
|
+
To verify a token without exposing it in source code:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
curl "https://api.cloudflare.com/client/v4/user/tokens/verify" \
|
|
45
|
+
-H "Authorization: Bearer $CLOUDFLARE_API_TOKEN"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Usage
|
|
49
|
+
|
|
50
|
+
### Legacy local mode
|
|
51
|
+
|
|
52
|
+
```js
|
|
53
|
+
import extractAssets from 'fetch-page-assets';
|
|
54
|
+
|
|
55
|
+
const html = await extractAssets('<img src="/logo.png" />', {
|
|
56
|
+
source: 'https://example.com',
|
|
57
|
+
basePath: process.cwd(),
|
|
58
|
+
saveFile: true
|
|
59
|
+
});
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### R2 upload mode
|
|
63
|
+
|
|
64
|
+
```js
|
|
65
|
+
import extractAssets from 'fetch-page-assets';
|
|
66
|
+
|
|
67
|
+
const result = await extractAssets('<img src="/logo.png" />', {
|
|
68
|
+
source: 'https://example.com',
|
|
69
|
+
uploadToR2: true,
|
|
70
|
+
returnDetails: true,
|
|
71
|
+
jobId: 'conv_123',
|
|
72
|
+
r2Prefix: 'generated/conv_123/assets'
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
console.log(result);
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Example response:
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{
|
|
82
|
+
"html": "<img src=\"https://storage.example.com/generated/conv_123/assets/logo.png\">",
|
|
83
|
+
"assets": [
|
|
84
|
+
{
|
|
85
|
+
"id": "file_1",
|
|
86
|
+
"name": "logo.png",
|
|
87
|
+
"type": "image/png",
|
|
88
|
+
"size": 48211,
|
|
89
|
+
"path": "/generated/conv_123/assets/logo.png",
|
|
90
|
+
"url": "https://storage.example.com/generated/conv_123/assets/logo.png",
|
|
91
|
+
"kind": "asset"
|
|
92
|
+
}
|
|
93
|
+
]
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Options
|
|
98
|
+
|
|
99
|
+
| Option | Description | Type | Default |
|
|
100
|
+
| --- | --- | --- | --- |
|
|
101
|
+
| `source` | Base URL used to resolve relative asset paths. | `string` | `''` |
|
|
102
|
+
| `basePath` | Local base path used in legacy mode. | `string` | current directory |
|
|
103
|
+
| `saveFile` | Writes downloaded assets to disk in legacy mode. | `boolean` | `true` |
|
|
104
|
+
| `uploadToR2` | Uploads resolved assets to Cloudflare R2. | `boolean` | `false` |
|
|
105
|
+
| `returnDetails` | Returns `{ html, assets }` metadata instead of only the rewritten HTML string. | `boolean` | `false` |
|
|
106
|
+
| `jobId` | Stable conversion identifier used to build remote paths. | `string` | `conv_local` |
|
|
107
|
+
| `r2Prefix` | Remote storage prefix for uploaded assets. | `string` | derived from `jobId` |
|
|
108
|
+
| `concurrency` | Maximum number of simultaneous downloads. | `number` | `8` |
|
|
109
|
+
| `maxRetryAttempts` | Maximum attempts per asset. | `number` | `3` |
|
|
110
|
+
| `retryDelay` | Delay between attempts in milliseconds. | `number` | `1000` |
|
|
111
|
+
| `verbose` | Enables console logging. | `boolean` | `true` |
|
|
112
|
+
|
|
113
|
+
## Notes
|
|
114
|
+
|
|
115
|
+
- `returnDetails: true` is the recommended mode when using R2 because it gives you the uploaded asset metadata.
|
|
116
|
+
- Keep all Cloudflare credentials in `.env`.
|
|
117
|
+
- Do not hardcode tokens or access keys in code, documentation, tests, or shell history.
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
import https from "https";
|
|
2
|
+
import http from "http";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import beautify from "beautify";
|
|
6
|
+
import { decode } from "html-entities";
|
|
7
|
+
import mime from "mime";
|
|
8
|
+
import { createFileRecord, inferContentType, uploadBufferToR2 } from "../../r2.js";
|
|
9
|
+
export const h = (t, i = true) => { if (i)
|
|
10
|
+
console.error(`[Error] ${t}`); };
|
|
11
|
+
export const p = (t, i = true) => { if (i)
|
|
12
|
+
console.log(`[Success] ${t}`); };
|
|
13
|
+
export const d = (t) => t.startsWith("//");
|
|
14
|
+
export const m = (t, r = "https") => (d(t) ? `${r}:${t}` : t);
|
|
15
|
+
export const u = (t) => t.endsWith("/");
|
|
16
|
+
export const w = (t) => !t.startsWith("http") && !d(t);
|
|
17
|
+
export const $ = (e) => e.trim().replace(/^['"]|['"]$/g, "");
|
|
18
|
+
export const E = (relativePath, a = "", r = "https") => {
|
|
19
|
+
try {
|
|
20
|
+
if (w(relativePath)) {
|
|
21
|
+
const cleanPath = $(relativePath);
|
|
22
|
+
const resolved = new URL(cleanPath, a);
|
|
23
|
+
return resolved.href;
|
|
24
|
+
}
|
|
25
|
+
return m(relativePath, r);
|
|
26
|
+
}
|
|
27
|
+
catch (err) {
|
|
28
|
+
h(`Error resolving path: ${relativePath} — ${err.message}`);
|
|
29
|
+
return relativePath;
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
export const g = (t) => path.join(...t);
|
|
33
|
+
export const U = (t, i = true) => {
|
|
34
|
+
fs.mkdirSync(t, { recursive: !0 });
|
|
35
|
+
p(`Directory ensured: ${t}`, i);
|
|
36
|
+
};
|
|
37
|
+
export const v = (t, l, s, c, i = true) => {
|
|
38
|
+
if (c) {
|
|
39
|
+
const { parsedUrl: e, destinationFilePath: a } = t;
|
|
40
|
+
const { origin: r } = new URL(e);
|
|
41
|
+
const urlStr = typeof e === "string" ? e : e.toString();
|
|
42
|
+
let relativeLocalPath = path.relative(s, a).split(path.sep).join("/");
|
|
43
|
+
l = l.replaceAll(urlStr, relativeLocalPath);
|
|
44
|
+
l = l.replaceAll(`${r}${urlStr}`, relativeLocalPath);
|
|
45
|
+
fs.writeFileSync(path.join(s, "index.html"), beautify(l, { format: "html" }), "utf8");
|
|
46
|
+
p(`Updated HTML with local asset path for ${urlStr} -> ${relativeLocalPath}`, i);
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
export const A = (t) => t.split("?")[0].split("#")[0];
|
|
50
|
+
export const F = (t, e) => A(path.join(t, e));
|
|
51
|
+
export const x = (t) => t.split(".");
|
|
52
|
+
export const P = (t, e) => t[e] || t[e.toLowerCase()];
|
|
53
|
+
export const R = (headers, fallback) => {
|
|
54
|
+
let filename = P(headers, "Content-Disposition")?.match(/filename="(.+?)"/)?.[1] || fallback;
|
|
55
|
+
filename = filename?.split("?")[0].split("#")[0];
|
|
56
|
+
filename = filename.replace(/[^a-zA-Z0-9.\-_]/g, "_");
|
|
57
|
+
const contentType = P(headers, "Content-Type");
|
|
58
|
+
const hasExt = filename.includes(".");
|
|
59
|
+
if (!hasExt && contentType) {
|
|
60
|
+
const ext = mime.getExtension(contentType);
|
|
61
|
+
if (ext) {
|
|
62
|
+
filename = `${filename}.${ext}`;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return filename;
|
|
66
|
+
};
|
|
67
|
+
export const D = (t) => {
|
|
68
|
+
const { loaded: e, total: s } = t;
|
|
69
|
+
const a = e && s ? Math.round((e / s) * 100) : 0;
|
|
70
|
+
if (!isNaN(a))
|
|
71
|
+
console.log(`Download progress: ${a}%`);
|
|
72
|
+
};
|
|
73
|
+
const extractAssets = async (t, e = {}) => {
|
|
74
|
+
let { basePath: s = process.cwd(), source: a = "", protocol: r = "https", maxRetryAttempts: o = 3, retryDelay: n = 1000, verbose: i = true, saveFile: c = true, concurrency: y = 8, uploadToR2: k = false, returnDetails: q = false, jobId: z = "conv_local", r2Prefix: G, _assetTaskCache: H, _ensuredDirs: B } = e;
|
|
75
|
+
a = a || "";
|
|
76
|
+
r = r || "https";
|
|
77
|
+
n = n || 1000;
|
|
78
|
+
s = s || process.cwd();
|
|
79
|
+
o = Math.max(1, o || 3);
|
|
80
|
+
y = Math.max(1, Number.isFinite(y) ? Math.floor(y) : 8);
|
|
81
|
+
let l = "";
|
|
82
|
+
const uploadedAssets = [];
|
|
83
|
+
const assetTaskCache = H instanceof Map ? H : new Map();
|
|
84
|
+
const ensuredDirs = B instanceof Set ? B : new Set();
|
|
85
|
+
const h = (message) => {
|
|
86
|
+
if (i) {
|
|
87
|
+
console.error(`[Error] ${message}`);
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
const p = (message) => {
|
|
91
|
+
if (i) {
|
|
92
|
+
console.log(`[Success] ${message}`);
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
const sleep = (delay) => new Promise((resolve) => setTimeout(resolve, Math.max(0, delay)));
|
|
96
|
+
const escapeRegExp = (value) => String(value).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
97
|
+
const isProtocolRelative = (value) => value.startsWith("//");
|
|
98
|
+
const applyProtocol = (value) => (isProtocolRelative(value) ? `${r}:${value}` : value);
|
|
99
|
+
const isRelativePath = (value) => !value.startsWith("http") && !isProtocolRelative(value);
|
|
100
|
+
const stripQuotes = (value) => value.trim().replace(/^['"]|['"]$/g, "");
|
|
101
|
+
const ensureDir = (dirPath) => {
|
|
102
|
+
if (!dirPath || ensuredDirs.has(dirPath)) {
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
106
|
+
ensuredDirs.add(dirPath);
|
|
107
|
+
p(`Directory ensured: ${dirPath}`);
|
|
108
|
+
};
|
|
109
|
+
const resolveAssetUrl = (relativePath) => {
|
|
110
|
+
try {
|
|
111
|
+
if (isRelativePath(relativePath)) {
|
|
112
|
+
const cleanPath = stripQuotes(relativePath);
|
|
113
|
+
if (!a) {
|
|
114
|
+
throw new Error("A source URL is required to resolve relative asset paths.");
|
|
115
|
+
}
|
|
116
|
+
return new URL(cleanPath, a).href;
|
|
117
|
+
}
|
|
118
|
+
return applyProtocol(relativePath);
|
|
119
|
+
}
|
|
120
|
+
catch (err) {
|
|
121
|
+
h(`Error resolving path: ${relativePath} — ${err.message}`);
|
|
122
|
+
return relativePath;
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
const pickHeader = (headers, headerName) => headers[headerName] || headers[headerName.toLowerCase()];
|
|
126
|
+
const getFileNameFromHeaders = (headers, fallback) => {
|
|
127
|
+
let filename = pickHeader(headers, "Content-Disposition")?.match(/filename="(.+?)"/)?.[1] || fallback;
|
|
128
|
+
filename = filename?.split("?")[0].split("#")[0];
|
|
129
|
+
filename = filename.replace(/[^a-zA-Z0-9._-]/g, "_");
|
|
130
|
+
const contentType = pickHeader(headers, "Content-Type");
|
|
131
|
+
if (!filename.includes(".") && contentType) {
|
|
132
|
+
const ext = mime.getExtension(contentType);
|
|
133
|
+
if (ext) {
|
|
134
|
+
filename = `${filename}.${ext}`;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return filename;
|
|
138
|
+
};
|
|
139
|
+
const fetchBuffer = async (url, fallbackName) => {
|
|
140
|
+
const decodedUrl = decode(url);
|
|
141
|
+
if (decodedUrl.startsWith("file://")) {
|
|
142
|
+
const localPath = decodedUrl.replace("file://", "");
|
|
143
|
+
p(`Reading local file: ${localPath}`);
|
|
144
|
+
const data = fs.readFileSync(localPath);
|
|
145
|
+
return { data, fileName: path.basename(localPath) };
|
|
146
|
+
}
|
|
147
|
+
p(`Starting download for: ${decodedUrl}`);
|
|
148
|
+
return new Promise((resolve, reject) => {
|
|
149
|
+
const client = decodedUrl.startsWith("https") ? https : http;
|
|
150
|
+
client.get(decodedUrl, {
|
|
151
|
+
headers: {
|
|
152
|
+
"User-Agent": "Mozilla/5.0",
|
|
153
|
+
"Accept": "*/*"
|
|
154
|
+
}
|
|
155
|
+
}, (res) => {
|
|
156
|
+
const statusCode = res.statusCode || 0;
|
|
157
|
+
const location = res.headers.location;
|
|
158
|
+
if (statusCode >= 300 && statusCode < 400 && location) {
|
|
159
|
+
res.resume();
|
|
160
|
+
resolve(fetchBuffer(new URL(location, decodedUrl).href, fallbackName));
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
if (statusCode >= 400) {
|
|
164
|
+
res.resume();
|
|
165
|
+
reject(new Error(`HTTP error! Status: ${statusCode}`));
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
const chunks = [];
|
|
169
|
+
res.on("data", (chunk) => {
|
|
170
|
+
chunks.push(chunk);
|
|
171
|
+
});
|
|
172
|
+
res.on("end", () => {
|
|
173
|
+
resolve({
|
|
174
|
+
data: Buffer.concat(chunks),
|
|
175
|
+
fileName: getFileNameFromHeaders(res.headers, fallbackName)
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
res.on("error", reject);
|
|
179
|
+
}).on("error", reject);
|
|
180
|
+
});
|
|
181
|
+
};
|
|
182
|
+
const fetchText = async (url) => {
|
|
183
|
+
const decodedUrl = decode(url);
|
|
184
|
+
p(`Fetching content: ${decodedUrl}`);
|
|
185
|
+
return new Promise((resolve, reject) => {
|
|
186
|
+
const client = decodedUrl.startsWith("https") ? https : http;
|
|
187
|
+
client.get(decodedUrl, {
|
|
188
|
+
headers: {
|
|
189
|
+
"User-Agent": "Mozilla/5.0",
|
|
190
|
+
"Accept": "*/*"
|
|
191
|
+
}
|
|
192
|
+
}, (res) => {
|
|
193
|
+
const statusCode = res.statusCode || 0;
|
|
194
|
+
const location = res.headers.location;
|
|
195
|
+
if (statusCode >= 300 && statusCode < 400 && location) {
|
|
196
|
+
res.resume();
|
|
197
|
+
resolve(fetchText(new URL(location, decodedUrl).href));
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
if (statusCode >= 400) {
|
|
201
|
+
res.resume();
|
|
202
|
+
reject(new Error(`HTTP error! Status: ${statusCode}`));
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
const chunks = [];
|
|
206
|
+
res.on("data", (chunk) => {
|
|
207
|
+
chunks.push(chunk);
|
|
208
|
+
});
|
|
209
|
+
res.on("end", () => {
|
|
210
|
+
resolve(Buffer.concat(chunks).toString("utf-8"));
|
|
211
|
+
});
|
|
212
|
+
res.on("error", reject);
|
|
213
|
+
}).on("error", reject);
|
|
214
|
+
});
|
|
215
|
+
};
|
|
216
|
+
const isValidUrl = (value) => {
|
|
217
|
+
try {
|
|
218
|
+
return !!new URL(applyProtocol(value));
|
|
219
|
+
}
|
|
220
|
+
catch {
|
|
221
|
+
return false;
|
|
222
|
+
}
|
|
223
|
+
};
|
|
224
|
+
const hasValidHttpProtocol = (value) => {
|
|
225
|
+
const { protocol, hostname, href } = new URL(A(value));
|
|
226
|
+
if (!protocol || !["http:", "https:"].includes(protocol)) {
|
|
227
|
+
throw new Error("Invalid baseUrl. Only http and https are supported.");
|
|
228
|
+
}
|
|
229
|
+
if (!hostname) {
|
|
230
|
+
throw new Error("Invalid baseUrl. Provide a valid URL with a hostname.");
|
|
231
|
+
}
|
|
232
|
+
return !!href;
|
|
233
|
+
};
|
|
234
|
+
const loadInputHtml = async () => {
|
|
235
|
+
if (typeof t !== "string" || typeof s !== "string") {
|
|
236
|
+
h("Invalid user input: source and basePath must be strings.");
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
if (isValidUrl(t)) {
|
|
240
|
+
try {
|
|
241
|
+
hasValidHttpProtocol(t);
|
|
242
|
+
l = await fetchText(t);
|
|
243
|
+
if (!a) {
|
|
244
|
+
a = t;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
catch (err) {
|
|
248
|
+
h(err.message || err);
|
|
249
|
+
}
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
l = t;
|
|
253
|
+
};
|
|
254
|
+
const saveResolvedAsset = async (asset) => {
|
|
255
|
+
const absoluteAssetUrl = resolveAssetUrl(asset);
|
|
256
|
+
if (assetTaskCache.has(absoluteAssetUrl)) {
|
|
257
|
+
return assetTaskCache.get(absoluteAssetUrl);
|
|
258
|
+
}
|
|
259
|
+
const task = (async () => {
|
|
260
|
+
try {
|
|
261
|
+
const urlObj = new URL(absoluteAssetUrl);
|
|
262
|
+
const urlPath = urlObj.pathname.replace(/^\//, "");
|
|
263
|
+
const destinationPath = path.join(s, path.dirname(urlPath));
|
|
264
|
+
const fileNameGuess = path.basename(urlPath).split("?")[0].split("#")[0] || "asset";
|
|
265
|
+
ensureDir(destinationPath);
|
|
266
|
+
for (let attempt = 0; attempt < o; attempt++) {
|
|
267
|
+
try {
|
|
268
|
+
const { data, fileName } = await fetchBuffer(absoluteAssetUrl, fileNameGuess);
|
|
269
|
+
const fullPath = path.join(destinationPath, fileName);
|
|
270
|
+
let uploadedFile = null;
|
|
271
|
+
if (k) {
|
|
272
|
+
const storageKey = path.posix.join(G || `generated/${z}/assets`, path.dirname(urlPath).split(path.sep).join("/"), fileName);
|
|
273
|
+
const uploadResult = await uploadBufferToR2({
|
|
274
|
+
storageKey,
|
|
275
|
+
body: data,
|
|
276
|
+
contentType: inferContentType(fileName)
|
|
277
|
+
});
|
|
278
|
+
uploadedFile = createFileRecord({
|
|
279
|
+
id: `asset_${uploadedAssets.length + 1}`,
|
|
280
|
+
name: fileName,
|
|
281
|
+
kind: "asset",
|
|
282
|
+
storageKey: uploadResult.storageKey,
|
|
283
|
+
size: uploadResult.size,
|
|
284
|
+
type: uploadResult.type,
|
|
285
|
+
url: uploadResult.url
|
|
286
|
+
});
|
|
287
|
+
uploadedAssets.push({ ...uploadedFile, buffer: data });
|
|
288
|
+
p(`Asset uploaded successfully to ${uploadResult.url}`);
|
|
289
|
+
}
|
|
290
|
+
else if (c) {
|
|
291
|
+
fs.writeFileSync(fullPath, data);
|
|
292
|
+
p(`Asset saved successfully to ${fullPath}`);
|
|
293
|
+
}
|
|
294
|
+
return {
|
|
295
|
+
parsedUrl: asset,
|
|
296
|
+
absoluteAssetUrl,
|
|
297
|
+
destinationPath,
|
|
298
|
+
destinationFilePath: fullPath,
|
|
299
|
+
fileName,
|
|
300
|
+
uploadedFile
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
catch (err) {
|
|
304
|
+
const isLastAttempt = attempt === o - 1;
|
|
305
|
+
if (isLastAttempt) {
|
|
306
|
+
const { message, code } = err || {};
|
|
307
|
+
if (["ECONNRESET", "ETIMEDOUT"].includes(code)) {
|
|
308
|
+
h(`Network error occurred while downloading asset from ${absoluteAssetUrl}: ${message}.`);
|
|
309
|
+
}
|
|
310
|
+
else if (["EACCES", "EISDIR"].includes(code)) {
|
|
311
|
+
h("Error saving asset. Permission denied or target path is a directory.");
|
|
312
|
+
}
|
|
313
|
+
else {
|
|
314
|
+
h(`Error downloading asset from ${absoluteAssetUrl}: ${message || err}.`);
|
|
315
|
+
}
|
|
316
|
+
return null;
|
|
317
|
+
}
|
|
318
|
+
await sleep(n);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
catch (err) {
|
|
323
|
+
h(`Error downloading asset from ${absoluteAssetUrl}: ${err.message || err}.`);
|
|
324
|
+
}
|
|
325
|
+
return null;
|
|
326
|
+
})();
|
|
327
|
+
assetTaskCache.set(absoluteAssetUrl, task);
|
|
328
|
+
return task;
|
|
329
|
+
};
|
|
330
|
+
await loadInputHtml();
|
|
331
|
+
if (!l) {
|
|
332
|
+
return l;
|
|
333
|
+
}
|
|
334
|
+
l = l.replace(/srcset="(.*?)"/gi, "").replace(/sizes="(.*?)"/gi, "");
|
|
335
|
+
if (a) {
|
|
336
|
+
l = l.replace(new RegExp(escapeRegExp(a), "g"), "");
|
|
337
|
+
}
|
|
338
|
+
const regex = /(<link[^>]+rel=["']stylesheet["'][^>]+href=["'])([^"']+\.[^"']+)["']|<(img|script|source)[^>]+src=["']([^"']+\.(?!json)[^"']+)["']/gi;
|
|
339
|
+
const matches = [
|
|
340
|
+
...[...l.matchAll(regex)].map((match) => match[2] || match[4] || ""),
|
|
341
|
+
...[...l.matchAll(/url\(["']?(.*?)["']?\)/gi)]
|
|
342
|
+
.map((match) => match[1])
|
|
343
|
+
.filter((url) => !/^#/.test(url))
|
|
344
|
+
].filter((match) => !!match && !match.startsWith("data:"));
|
|
345
|
+
const uniqueMatches = [...new Set(matches)];
|
|
346
|
+
const queue = [...uniqueMatches];
|
|
347
|
+
const workers = Array.from({ length: Math.min(y, queue.length || 1) }, async () => {
|
|
348
|
+
while (queue.length > 0) {
|
|
349
|
+
const asset = queue.shift();
|
|
350
|
+
if (!asset) {
|
|
351
|
+
return;
|
|
352
|
+
}
|
|
353
|
+
await saveResolvedAsset(asset);
|
|
354
|
+
}
|
|
355
|
+
});
|
|
356
|
+
await Promise.all(workers);
|
|
357
|
+
if (q) {
|
|
358
|
+
return { html: l, assets: uploadedAssets };
|
|
359
|
+
}
|
|
360
|
+
return l;
|
|
361
|
+
};
|
|
362
|
+
export default extractAssets;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "fetch-page-assets",
|
|
3
|
+
"version": "1.2.7",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "A versatile Node.js module for extracting assets (such as CSS files, JavaScript files, fonts, and images) from HTML content or URLs.",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"files": [
|
|
8
|
+
"index.js",
|
|
9
|
+
"README.md",
|
|
10
|
+
"LICENSE.MD"
|
|
11
|
+
],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"test": "node -e \"console.log('No vendored tests configured')\""
|
|
14
|
+
},
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "git+https://github.com/DiogoAngelim/fetch-page-assets.git"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"backend",
|
|
21
|
+
"asset",
|
|
22
|
+
"extractor",
|
|
23
|
+
"downloader",
|
|
24
|
+
"html",
|
|
25
|
+
"css",
|
|
26
|
+
"javascript",
|
|
27
|
+
"image",
|
|
28
|
+
"fetch",
|
|
29
|
+
"save"
|
|
30
|
+
],
|
|
31
|
+
"author": "Diogo Angelim",
|
|
32
|
+
"license": "MIT",
|
|
33
|
+
"funding": "https://www.paypal.com/donate/?hosted_button_id=XA5LN4XR39PMQ",
|
|
34
|
+
"bugs": {
|
|
35
|
+
"url": "https://github.com/DiogoAngelim/fetch-page-assets/issues"
|
|
36
|
+
},
|
|
37
|
+
"homepage": "https://github.com/DiogoAngelim/fetch-page-assets#readme",
|
|
38
|
+
"publishConfig": {
|
|
39
|
+
"access": "public"
|
|
40
|
+
},
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"beautify": "^0.0.8",
|
|
43
|
+
"fs": "^0.0.1-security",
|
|
44
|
+
"html-entities": "^2.5.2",
|
|
45
|
+
"mime": "^4.0.3",
|
|
46
|
+
"path": "^0.12.7"
|
|
47
|
+
}
|
|
48
|
+
}
|
package/.env
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
SNAPAPI_KEY=sk_live_5b420427f2ec2509a0971c267b322300efec77a498c44ada
|