html-to-gutenberg 4.2.9 → 4.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,362 @@
1
+ import https from "https";
2
+ import http from "http";
3
+ import fs from "fs";
4
+ import path from "path";
5
+ import beautify from "beautify";
6
+ import { decode } from "html-entities";
7
+ import mime from "mime";
8
+ import { createFileRecord, inferContentType, uploadBufferToR2 } from "../../r2.js";
9
+ export const h = (t, i = true) => { if (i)
10
+ console.error(`[Error] ${t}`); };
11
+ export const p = (t, i = true) => { if (i)
12
+ console.log(`[Success] ${t}`); };
13
+ export const d = (t) => t.startsWith("//");
14
+ export const m = (t, r = "https") => (d(t) ? `${r}:${t}` : t);
15
+ export const u = (t) => t.endsWith("/");
16
+ export const w = (t) => !t.startsWith("http") && !d(t);
17
+ export const $ = (e) => e.trim().replace(/^['"]|['"]$/g, "");
18
+ export const E = (relativePath, a = "", r = "https") => {
19
+ try {
20
+ if (w(relativePath)) {
21
+ const cleanPath = $(relativePath);
22
+ const resolved = new URL(cleanPath, a);
23
+ return resolved.href;
24
+ }
25
+ return m(relativePath, r);
26
+ }
27
+ catch (err) {
28
+ h(`Error resolving path: ${relativePath} — ${err.message}`);
29
+ return relativePath;
30
+ }
31
+ };
32
+ export const g = (t) => path.join(...t);
33
+ export const U = (t, i = true) => {
34
+ fs.mkdirSync(t, { recursive: !0 });
35
+ p(`Directory ensured: ${t}`, i);
36
+ };
37
+ export const v = (t, l, s, c, i = true) => {
38
+ if (c) {
39
+ const { parsedUrl: e, destinationFilePath: a } = t;
40
+ const { origin: r } = new URL(e);
41
+ const urlStr = typeof e === "string" ? e : e.toString();
42
+ let relativeLocalPath = path.relative(s, a).split(path.sep).join("/");
43
+ l = l.replaceAll(urlStr, relativeLocalPath);
44
+ l = l.replaceAll(`${r}${urlStr}`, relativeLocalPath);
45
+ fs.writeFileSync(path.join(s, "index.html"), beautify(l, { format: "html" }), "utf8");
46
+ p(`Updated HTML with local asset path for ${urlStr} -> ${relativeLocalPath}`, i);
47
+ }
48
+ };
49
+ export const A = (t) => t.split("?")[0].split("#")[0];
50
+ export const F = (t, e) => A(path.join(t, e));
51
+ export const x = (t) => t.split(".");
52
+ export const P = (t, e) => t[e] || t[e.toLowerCase()];
53
+ export const R = (headers, fallback) => {
54
+ let filename = P(headers, "Content-Disposition")?.match(/filename="(.+?)"/)?.[1] || fallback;
55
+ filename = filename?.split("?")[0].split("#")[0];
56
+ filename = filename.replace(/[^a-zA-Z0-9.\-_]/g, "_");
57
+ const contentType = P(headers, "Content-Type");
58
+ const hasExt = filename.includes(".");
59
+ if (!hasExt && contentType) {
60
+ const ext = mime.getExtension(contentType);
61
+ if (ext) {
62
+ filename = `${filename}.${ext}`;
63
+ }
64
+ }
65
+ return filename;
66
+ };
67
+ export const D = (t) => {
68
+ const { loaded: e, total: s } = t;
69
+ const a = e && s ? Math.round((e / s) * 100) : 0;
70
+ if (!isNaN(a))
71
+ console.log(`Download progress: ${a}%`);
72
+ };
73
+ const extractAssets = async (t, e = {}) => {
74
+ let { basePath: s = process.cwd(), source: a = "", protocol: r = "https", maxRetryAttempts: o = 3, retryDelay: n = 1000, verbose: i = true, saveFile: c = true, concurrency: y = 8, uploadToR2: k = false, returnDetails: q = false, jobId: z = "conv_local", r2Prefix: G, _assetTaskCache: H, _ensuredDirs: B } = e;
75
+ a = a || "";
76
+ r = r || "https";
77
+ n = n || 1000;
78
+ s = s || process.cwd();
79
+ o = Math.max(1, o || 3);
80
+ y = Math.max(1, Number.isFinite(y) ? Math.floor(y) : 8);
81
+ let l = "";
82
+ const uploadedAssets = [];
83
+ const assetTaskCache = H instanceof Map ? H : new Map();
84
+ const ensuredDirs = B instanceof Set ? B : new Set();
85
+ const h = (message) => {
86
+ if (i) {
87
+ console.error(`[Error] ${message}`);
88
+ }
89
+ };
90
+ const p = (message) => {
91
+ if (i) {
92
+ console.log(`[Success] ${message}`);
93
+ }
94
+ };
95
+ const sleep = (delay) => new Promise((resolve) => setTimeout(resolve, Math.max(0, delay)));
96
+ const escapeRegExp = (value) => String(value).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
97
+ const isProtocolRelative = (value) => value.startsWith("//");
98
+ const applyProtocol = (value) => (isProtocolRelative(value) ? `${r}:${value}` : value);
99
+ const isRelativePath = (value) => !value.startsWith("http") && !isProtocolRelative(value);
100
+ const stripQuotes = (value) => value.trim().replace(/^['"]|['"]$/g, "");
101
+ const ensureDir = (dirPath) => {
102
+ if (!dirPath || ensuredDirs.has(dirPath)) {
103
+ return;
104
+ }
105
+ fs.mkdirSync(dirPath, { recursive: true });
106
+ ensuredDirs.add(dirPath);
107
+ p(`Directory ensured: ${dirPath}`);
108
+ };
109
+ const resolveAssetUrl = (relativePath) => {
110
+ try {
111
+ if (isRelativePath(relativePath)) {
112
+ const cleanPath = stripQuotes(relativePath);
113
+ if (!a) {
114
+ throw new Error("A source URL is required to resolve relative asset paths.");
115
+ }
116
+ return new URL(cleanPath, a).href;
117
+ }
118
+ return applyProtocol(relativePath);
119
+ }
120
+ catch (err) {
121
+ h(`Error resolving path: ${relativePath} — ${err.message}`);
122
+ return relativePath;
123
+ }
124
+ };
125
+ const pickHeader = (headers, headerName) => headers[headerName] || headers[headerName.toLowerCase()];
126
+ const getFileNameFromHeaders = (headers, fallback) => {
127
+ let filename = pickHeader(headers, "Content-Disposition")?.match(/filename="(.+?)"/)?.[1] || fallback;
128
+ filename = filename?.split("?")[0].split("#")[0];
129
+ filename = filename.replace(/[^a-zA-Z0-9._-]/g, "_");
130
+ const contentType = pickHeader(headers, "Content-Type");
131
+ if (!filename.includes(".") && contentType) {
132
+ const ext = mime.getExtension(contentType);
133
+ if (ext) {
134
+ filename = `${filename}.${ext}`;
135
+ }
136
+ }
137
+ return filename;
138
+ };
139
+ const fetchBuffer = async (url, fallbackName) => {
140
+ const decodedUrl = decode(url);
141
+ if (decodedUrl.startsWith("file://")) {
142
+ const localPath = decodedUrl.replace("file://", "");
143
+ p(`Reading local file: ${localPath}`);
144
+ const data = fs.readFileSync(localPath);
145
+ return { data, fileName: path.basename(localPath) };
146
+ }
147
+ p(`Starting download for: ${decodedUrl}`);
148
+ return new Promise((resolve, reject) => {
149
+ const client = decodedUrl.startsWith("https") ? https : http;
150
+ client.get(decodedUrl, {
151
+ headers: {
152
+ "User-Agent": "Mozilla/5.0",
153
+ "Accept": "*/*"
154
+ }
155
+ }, (res) => {
156
+ const statusCode = res.statusCode || 0;
157
+ const location = res.headers.location;
158
+ if (statusCode >= 300 && statusCode < 400 && location) {
159
+ res.resume();
160
+ resolve(fetchBuffer(new URL(location, decodedUrl).href, fallbackName));
161
+ return;
162
+ }
163
+ if (statusCode >= 400) {
164
+ res.resume();
165
+ reject(new Error(`HTTP error! Status: ${statusCode}`));
166
+ return;
167
+ }
168
+ const chunks = [];
169
+ res.on("data", (chunk) => {
170
+ chunks.push(chunk);
171
+ });
172
+ res.on("end", () => {
173
+ resolve({
174
+ data: Buffer.concat(chunks),
175
+ fileName: getFileNameFromHeaders(res.headers, fallbackName)
176
+ });
177
+ });
178
+ res.on("error", reject);
179
+ }).on("error", reject);
180
+ });
181
+ };
182
+ const fetchText = async (url) => {
183
+ const decodedUrl = decode(url);
184
+ p(`Fetching content: ${decodedUrl}`);
185
+ return new Promise((resolve, reject) => {
186
+ const client = decodedUrl.startsWith("https") ? https : http;
187
+ client.get(decodedUrl, {
188
+ headers: {
189
+ "User-Agent": "Mozilla/5.0",
190
+ "Accept": "*/*"
191
+ }
192
+ }, (res) => {
193
+ const statusCode = res.statusCode || 0;
194
+ const location = res.headers.location;
195
+ if (statusCode >= 300 && statusCode < 400 && location) {
196
+ res.resume();
197
+ resolve(fetchText(new URL(location, decodedUrl).href));
198
+ return;
199
+ }
200
+ if (statusCode >= 400) {
201
+ res.resume();
202
+ reject(new Error(`HTTP error! Status: ${statusCode}`));
203
+ return;
204
+ }
205
+ const chunks = [];
206
+ res.on("data", (chunk) => {
207
+ chunks.push(chunk);
208
+ });
209
+ res.on("end", () => {
210
+ resolve(Buffer.concat(chunks).toString("utf-8"));
211
+ });
212
+ res.on("error", reject);
213
+ }).on("error", reject);
214
+ });
215
+ };
216
+ const isValidUrl = (value) => {
217
+ try {
218
+ return !!new URL(applyProtocol(value));
219
+ }
220
+ catch {
221
+ return false;
222
+ }
223
+ };
224
+ const hasValidHttpProtocol = (value) => {
225
+ const { protocol, hostname, href } = new URL(A(value));
226
+ if (!protocol || !["http:", "https:"].includes(protocol)) {
227
+ throw new Error("Invalid baseUrl. Only http and https are supported.");
228
+ }
229
+ if (!hostname) {
230
+ throw new Error("Invalid baseUrl. Provide a valid URL with a hostname.");
231
+ }
232
+ return !!href;
233
+ };
234
+ const loadInputHtml = async () => {
235
+ if (typeof t !== "string" || typeof s !== "string") {
236
+ h("Invalid user input: source and basePath must be strings.");
237
+ return;
238
+ }
239
+ if (isValidUrl(t)) {
240
+ try {
241
+ hasValidHttpProtocol(t);
242
+ l = await fetchText(t);
243
+ if (!a) {
244
+ a = t;
245
+ }
246
+ }
247
+ catch (err) {
248
+ h(err.message || err);
249
+ }
250
+ return;
251
+ }
252
+ l = t;
253
+ };
254
+ const saveResolvedAsset = async (asset) => {
255
+ const absoluteAssetUrl = resolveAssetUrl(asset);
256
+ if (assetTaskCache.has(absoluteAssetUrl)) {
257
+ return assetTaskCache.get(absoluteAssetUrl);
258
+ }
259
+ const task = (async () => {
260
+ try {
261
+ const urlObj = new URL(absoluteAssetUrl);
262
+ const urlPath = urlObj.pathname.replace(/^\//, "");
263
+ const destinationPath = path.join(s, path.dirname(urlPath));
264
+ const fileNameGuess = path.basename(urlPath).split("?")[0].split("#")[0] || "asset";
265
+ ensureDir(destinationPath);
266
+ for (let attempt = 0; attempt < o; attempt++) {
267
+ try {
268
+ const { data, fileName } = await fetchBuffer(absoluteAssetUrl, fileNameGuess);
269
+ const fullPath = path.join(destinationPath, fileName);
270
+ let uploadedFile = null;
271
+ if (k) {
272
+ const storageKey = path.posix.join(G || `generated/${z}/assets`, path.dirname(urlPath).split(path.sep).join("/"), fileName);
273
+ const uploadResult = await uploadBufferToR2({
274
+ storageKey,
275
+ body: data,
276
+ contentType: inferContentType(fileName)
277
+ });
278
+ uploadedFile = createFileRecord({
279
+ id: `asset_${uploadedAssets.length + 1}`,
280
+ name: fileName,
281
+ kind: "asset",
282
+ storageKey: uploadResult.storageKey,
283
+ size: uploadResult.size,
284
+ type: uploadResult.type,
285
+ url: uploadResult.url
286
+ });
287
+ uploadedAssets.push({ ...uploadedFile, buffer: data });
288
+ p(`Asset uploaded successfully to ${uploadResult.url}`);
289
+ }
290
+ else if (c) {
291
+ fs.writeFileSync(fullPath, data);
292
+ p(`Asset saved successfully to ${fullPath}`);
293
+ }
294
+ return {
295
+ parsedUrl: asset,
296
+ absoluteAssetUrl,
297
+ destinationPath,
298
+ destinationFilePath: fullPath,
299
+ fileName,
300
+ uploadedFile
301
+ };
302
+ }
303
+ catch (err) {
304
+ const isLastAttempt = attempt === o - 1;
305
+ if (isLastAttempt) {
306
+ const { message, code } = err || {};
307
+ if (["ECONNRESET", "ETIMEDOUT"].includes(code)) {
308
+ h(`Network error occurred while downloading asset from ${absoluteAssetUrl}: ${message}.`);
309
+ }
310
+ else if (["EACCES", "EISDIR"].includes(code)) {
311
+ h("Error saving asset. Permission denied or target path is a directory.");
312
+ }
313
+ else {
314
+ h(`Error downloading asset from ${absoluteAssetUrl}: ${message || err}.`);
315
+ }
316
+ return null;
317
+ }
318
+ await sleep(n);
319
+ }
320
+ }
321
+ }
322
+ catch (err) {
323
+ h(`Error downloading asset from ${absoluteAssetUrl}: ${err.message || err}.`);
324
+ }
325
+ return null;
326
+ })();
327
+ assetTaskCache.set(absoluteAssetUrl, task);
328
+ return task;
329
+ };
330
+ await loadInputHtml();
331
+ if (!l) {
332
+ return l;
333
+ }
334
+ l = l.replace(/srcset="(.*?)"/gi, "").replace(/sizes="(.*?)"/gi, "");
335
+ if (a) {
336
+ l = l.replace(new RegExp(escapeRegExp(a), "g"), "");
337
+ }
338
+ const regex = /(<link[^>]+rel=["']stylesheet["'][^>]+href=["'])([^"']+\.[^"']+)["']|<(img|script|source)[^>]+src=["']([^"']+\.(?!json)[^"']+)["']/gi;
339
+ const matches = [
340
+ ...[...l.matchAll(regex)].map((match) => match[2] || match[4] || ""),
341
+ ...[...l.matchAll(/url\(["']?(.*?)["']?\)/gi)]
342
+ .map((match) => match[1])
343
+ .filter((url) => !/^#/.test(url))
344
+ ].filter((match) => !!match && !match.startsWith("data:"));
345
+ const uniqueMatches = [...new Set(matches)];
346
+ const queue = [...uniqueMatches];
347
+ const workers = Array.from({ length: Math.min(y, queue.length || 1) }, async () => {
348
+ while (queue.length > 0) {
349
+ const asset = queue.shift();
350
+ if (!asset) {
351
+ return;
352
+ }
353
+ await saveResolvedAsset(asset);
354
+ }
355
+ });
356
+ await Promise.all(workers);
357
+ if (q) {
358
+ return { html: l, assets: uploadedAssets };
359
+ }
360
+ return l;
361
+ };
362
+ export default extractAssets;
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "fetch-page-assets",
3
+ "version": "1.2.7",
4
+ "type": "module",
5
+ "description": "A versatile Node.js module for extracting assets (such as CSS files, JavaScript files, fonts, and images) from HTML content or URLs.",
6
+ "main": "index.js",
7
+ "files": [
8
+ "index.js",
9
+ "README.md",
10
+ "LICENSE.MD"
11
+ ],
12
+ "scripts": {
13
+ "test": "node -e \"console.log('No vendored tests configured')\""
14
+ },
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "git+https://github.com/DiogoAngelim/fetch-page-assets.git"
18
+ },
19
+ "keywords": [
20
+ "backend",
21
+ "asset",
22
+ "extractor",
23
+ "downloader",
24
+ "html",
25
+ "css",
26
+ "javascript",
27
+ "image",
28
+ "fetch",
29
+ "save"
30
+ ],
31
+ "author": "Diogo Angelim",
32
+ "license": "MIT",
33
+ "funding": "https://www.paypal.com/donate/?hosted_button_id=XA5LN4XR39PMQ",
34
+ "bugs": {
35
+ "url": "https://github.com/DiogoAngelim/fetch-page-assets/issues"
36
+ },
37
+ "homepage": "https://github.com/DiogoAngelim/fetch-page-assets#readme",
38
+ "publishConfig": {
39
+ "access": "public"
40
+ },
41
+ "dependencies": {
42
+ "beautify": "^0.0.8",
43
+ "fs": "^0.0.1-security",
44
+ "html-entities": "^2.5.2",
45
+ "mime": "^4.0.3",
46
+ "path": "^0.12.7"
47
+ }
48
+ }
package/.env DELETED
@@ -1 +0,0 @@
1
- SNAPAPI_KEY=sk_live_5b420427f2ec2509a0971c267b322300efec77a498c44ada