@pagepocket/lib 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,11 +33,11 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.rewriteLinks = void 0;
36
+ exports.rewriteEntryHtml = exports.rewriteJsText = void 0;
37
+ const cheerio = __importStar(require("cheerio"));
37
38
  const css_rewrite_1 = require("./css-rewrite");
38
- const network_records_1 = require("./network-records");
39
- const resources_1 = require("./resources");
40
- const shouldSkipValue = (value, assetsDirName) => {
39
+ const hack_html_1 = require("./hack-html");
40
+ const shouldSkipValue = (value) => {
41
41
  const trimmed = value.trim();
42
42
  return (!trimmed ||
43
43
  trimmed.startsWith("data:") ||
@@ -45,219 +45,207 @@ const shouldSkipValue = (value, assetsDirName) => {
45
45
  trimmed.startsWith("mailto:") ||
46
46
  trimmed.startsWith("tel:") ||
47
47
  trimmed.startsWith("javascript:") ||
48
- trimmed.startsWith("#") ||
49
- trimmed.includes(assetsDirName));
48
+ trimmed.startsWith("#"));
50
49
  };
51
- const buildLinkBase = (baseUrl) => {
50
+ const resolveUrlValue = (value, baseUrl, resolve) => {
51
+ if (shouldSkipValue(value)) {
52
+ return null;
53
+ }
52
54
  try {
53
- const parsed = new URL(baseUrl);
54
- const baseOrigin = parsed.origin;
55
- const baseDir = new URL(".", parsed).toString().replace(/\/$/, "");
56
- return { baseOrigin, baseDir };
55
+ const absolute = new URL(value, baseUrl).toString();
56
+ return resolve(absolute);
57
57
  }
58
58
  catch {
59
- return { baseOrigin: "", baseDir: "" };
59
+ return null;
60
60
  }
61
61
  };
62
- const expandUrlVariants = (value, baseUrl, baseOrigin, baseDir) => {
63
- const variants = [];
64
- if (typeof value === "string") {
65
- variants.push(value);
66
- try {
67
- variants.push(new URL(value, baseUrl).toString());
68
- }
69
- catch {
70
- // ignore
71
- }
72
- if (baseOrigin && value.startsWith("/")) {
73
- variants.push(baseOrigin + value);
74
- if (baseDir) {
75
- variants.push(baseDir + value);
76
- }
77
- }
78
- else if (baseDir) {
79
- variants.push(baseDir + (value.startsWith("/") ? value : "/" + value));
80
- }
81
- try {
82
- const parsed = new URL(value, baseUrl);
83
- const pathWithSearch = (parsed.pathname || "") + (parsed.search || "");
84
- if (baseOrigin && parsed.origin !== baseOrigin) {
85
- variants.push(baseOrigin + pathWithSearch);
86
- if (baseDir) {
87
- const path = pathWithSearch.startsWith("/") ? pathWithSearch : "/" + pathWithSearch;
88
- variants.push(baseDir + path);
89
- }
90
- }
91
- }
92
- catch {
93
- // ignore
94
- }
95
- }
96
- return Array.from(new Set(variants.filter(Boolean)));
62
+ const rewriteSrcsetValue = (value, baseUrl, resolve) => {
63
+ const parts = value.split(",").map((part) => part.trim());
64
+ const rewritten = parts.map((part) => {
65
+ const [rawUrl, descriptor] = part.split(/\s+/, 2);
66
+ if (!rawUrl)
67
+ return part;
68
+ const resolved = resolveUrlValue(rawUrl, baseUrl, resolve);
69
+ if (!resolved)
70
+ return part;
71
+ return descriptor ? `${resolved} ${descriptor}` : resolved;
72
+ });
73
+ return rewritten.join(", ");
97
74
  };
98
- const buildNetworkLookup = (records) => {
99
- const networkRecordByUrl = new Map();
100
- for (const record of records) {
101
- if (record?.url && !networkRecordByUrl.has(record.url)) {
102
- networkRecordByUrl.set(record.url, record);
103
- }
75
+ const rewriteMetaRefresh = (content, baseUrl, resolve) => {
76
+ const parts = content.split(";");
77
+ if (parts.length < 2)
78
+ return content;
79
+ const urlPartIndex = parts.findIndex((part) => part.trim().toLowerCase().startsWith("url="));
80
+ if (urlPartIndex === -1)
81
+ return content;
82
+ const urlPart = parts[urlPartIndex];
83
+ const rawUrl = urlPart.split("=").slice(1).join("=").trim();
84
+ const resolved = resolveUrlValue(rawUrl, baseUrl, resolve);
85
+ if (!resolved)
86
+ return content;
87
+ const next = `url=${resolved}`;
88
+ const nextParts = parts.slice();
89
+ nextParts[urlPartIndex] = next;
90
+ return nextParts.join(";");
91
+ };
92
+ const rewriteJsText = async (source, resolve, baseUrl) => {
93
+ const replaceSpecifier = async (specifier) => {
94
+ const trimmed = specifier.trim();
95
+ if (shouldSkipValue(trimmed)) {
96
+ return specifier;
97
+ }
98
+ const resolved = resolveUrlValue(trimmed, baseUrl, resolve);
99
+ return resolved ?? specifier;
100
+ };
101
+ const importFromPattern = /(\bimport\s+[^'"]*?\sfrom\s+)(["'])([^"']+)\2/g;
102
+ const importSideEffectPattern = /(\bimport\s+)(["'])([^"']+)\2/g;
103
+ const dynamicImportPattern = /(\bimport\s*\(\s*)(["'])([^"']+)\2(\s*\))/g;
104
+ let replaced = "";
105
+ let lastIndex = 0;
106
+ for (const match of source.matchAll(importFromPattern)) {
107
+ const index = match.index ?? 0;
108
+ replaced += source.slice(lastIndex, index);
109
+ const prefix = match[1] || "";
110
+ const quote = match[2] || "";
111
+ const specifier = match[3] || "";
112
+ const next = await replaceSpecifier(specifier);
113
+ replaced += `${prefix}${quote}${next}${quote}`;
114
+ lastIndex = index + match[0].length;
115
+ }
116
+ replaced += source.slice(lastIndex);
117
+ let final = "";
118
+ lastIndex = 0;
119
+ for (const match of replaced.matchAll(importSideEffectPattern)) {
120
+ const index = match.index ?? 0;
121
+ final += replaced.slice(lastIndex, index);
122
+ const prefix = match[1] || "";
123
+ const quote = match[2] || "";
124
+ const specifier = match[3] || "";
125
+ const next = await replaceSpecifier(specifier);
126
+ final += `${prefix}${quote}${next}${quote}`;
127
+ lastIndex = index + match[0].length;
104
128
  }
105
- return networkRecordByUrl;
129
+ final += replaced.slice(lastIndex);
130
+ let dynamicFinal = "";
131
+ lastIndex = 0;
132
+ for (const match of final.matchAll(dynamicImportPattern)) {
133
+ const index = match.index ?? 0;
134
+ dynamicFinal += final.slice(lastIndex, index);
135
+ const prefix = match[1] || "";
136
+ const quote = match[2] || "";
137
+ const specifier = match[3] || "";
138
+ const suffix = match[4] || "";
139
+ const next = await replaceSpecifier(specifier);
140
+ dynamicFinal += `${prefix}${quote}${next}${quote}${suffix}`;
141
+ lastIndex = index + match[0].length;
142
+ }
143
+ dynamicFinal += final.slice(lastIndex);
144
+ return dynamicFinal;
106
145
  };
107
- const rewriteLinks = async (input) => {
108
- const { readAsURL } = await Promise.resolve().then(() => __importStar(require("@pagepocket/uni-fs")));
109
- const networkRecordByUrl = buildNetworkLookup(input.networkRecords);
110
- const { baseOrigin, baseDir } = buildLinkBase(input.baseUrl);
111
- const localUrlCache = new Map();
112
- const resolveLocalUrl = async (value) => {
113
- if (shouldSkipValue(value, input.assetsDirName)) {
114
- return null;
115
- }
116
- const variants = expandUrlVariants(value, input.baseUrl, baseOrigin, baseDir);
117
- for (const variant of variants) {
118
- const resource = input.resourceMap.get(variant);
119
- if (!resource) {
120
- continue;
121
- }
122
- const cacheKey = resource.extension
123
- ? `${resource.filename}.${resource.extension}`
124
- : resource.filename;
125
- if (localUrlCache.has(cacheKey)) {
126
- return localUrlCache.get(cacheKey) ?? null;
127
- }
128
- const localUrl = await readAsURL(`${input.assetsDirName}/${resource.filename}`, resource.extension);
129
- localUrlCache.set(cacheKey, localUrl);
130
- return localUrl;
131
- }
132
- for (const variant of variants) {
133
- const record = networkRecordByUrl.get(variant);
134
- if (record) {
135
- return (0, network_records_1.toDataUrlFromRecord)(record);
136
- }
137
- }
138
- for (const variant of variants) {
139
- try {
140
- const parsed = new URL(variant);
141
- const withoutQuery = parsed.origin + parsed.pathname;
142
- const record = networkRecordByUrl.get(withoutQuery);
143
- if (record) {
144
- return (0, network_records_1.toDataUrlFromRecord)(record);
145
- }
146
+ exports.rewriteJsText = rewriteJsText;
147
+ const rewriteEntryHtml = async (input) => {
148
+ const $ = cheerio.load(input.html);
149
+ const baseUrl = input.entryUrl;
150
+ const resolve = input.resolve;
151
+ const shouldRewriteLinks = input.rewriteLinks !== false;
152
+ const rewriteAttr = (selector, attr) => {
153
+ $(selector).each((_, element) => {
154
+ const value = $(element).attr(attr);
155
+ if (!value)
156
+ return;
157
+ const resolved = resolveUrlValue(value, baseUrl, resolve);
158
+ if (resolved) {
159
+ $(element).attr(attr, resolved);
146
160
  }
147
- catch {
148
- // ignore
149
- }
150
- }
151
- return null;
161
+ });
152
162
  };
153
- for (const resource of input.resourceUrls) {
154
- const rawValue = input.$(resource.element).attr(resource.attr);
155
- if (!rawValue) {
156
- continue;
157
- }
158
- const nextUrl = await resolveLocalUrl(rawValue);
159
- if (nextUrl) {
160
- input.$(resource.element).attr(resource.attr, nextUrl);
161
- }
163
+ const rewriteDataAttrs = (selector, attr) => rewriteAttr(selector, attr);
164
+ if (shouldRewriteLinks) {
165
+ rewriteAttr("script[src]", "src");
166
+ rewriteAttr("img[src]", "src");
167
+ rewriteAttr("source[src]", "src");
168
+ rewriteAttr("video[src]", "src");
169
+ rewriteAttr("audio[src]", "src");
170
+ rewriteAttr("track[src]", "src");
171
+ rewriteAttr("iframe[src]", "src");
172
+ rewriteAttr("embed[src]", "src");
173
+ rewriteAttr("object[data]", "data");
174
+ rewriteAttr("link[href]", "href");
175
+ rewriteAttr("[poster]", "poster");
176
+ rewriteDataAttrs("[data-src]", "data-src");
177
+ rewriteDataAttrs("[data-href]", "data-href");
178
+ rewriteDataAttrs("[data-poster]", "data-poster");
179
+ rewriteDataAttrs("[data-url]", "data-url");
180
+ $("img[srcset], source[srcset]").each((_, element) => {
181
+ const value = $(element).attr("srcset");
182
+ if (!value)
183
+ return;
184
+ const rewritten = rewriteSrcsetValue(value, baseUrl, resolve);
185
+ $(element).attr("srcset", rewritten);
186
+ });
187
+ $("meta[http-equiv]").each((_, element) => {
188
+ const httpEquiv = ($(element).attr("http-equiv") || "").toLowerCase();
189
+ if (httpEquiv !== "refresh")
190
+ return;
191
+ const content = $(element).attr("content");
192
+ if (!content)
193
+ return;
194
+ const rewritten = rewriteMetaRefresh(content, baseUrl, resolve);
195
+ $(element).attr("content", rewritten);
196
+ });
162
197
  }
163
- for (const item of input.srcsetItems) {
164
- const parts = item.value.split(",").map((part) => part.trim());
165
- const rewrittenParts = [];
166
- for (const part of parts) {
167
- const [rawUrl, descriptor] = part.split(/\s+/, 2);
168
- if (!rawUrl) {
169
- rewrittenParts.push(part);
198
+ if (shouldRewriteLinks) {
199
+ const inlineStyles = $("style").toArray();
200
+ for (const element of inlineStyles) {
201
+ const cssText = $(element).html();
202
+ if (!cssText)
170
203
  continue;
204
+ const rewritten = await (0, css_rewrite_1.rewriteCssText)({
205
+ cssText,
206
+ cssUrl: baseUrl,
207
+ resolveUrl: resolve
208
+ });
209
+ if (rewritten !== cssText) {
210
+ $(element).html(rewritten);
171
211
  }
172
- const nextUrl = await resolveLocalUrl(rawUrl);
173
- if (!nextUrl) {
174
- rewrittenParts.push(part);
212
+ }
213
+ const inlineStyleElements = $("[style]").toArray();
214
+ for (const element of inlineStyleElements) {
215
+ const styleText = $(element).attr("style");
216
+ if (!styleText)
175
217
  continue;
218
+ const rewritten = await (0, css_rewrite_1.rewriteCssText)({
219
+ cssText: styleText,
220
+ cssUrl: baseUrl,
221
+ resolveUrl: resolve
222
+ });
223
+ if (rewritten !== styleText) {
224
+ $(element).attr("style", rewritten);
176
225
  }
177
- rewrittenParts.push(descriptor ? `${nextUrl} ${descriptor}` : nextUrl);
178
226
  }
179
- input.$(item.element).attr("srcset", rewrittenParts.join(", "));
180
227
  }
181
- const rewriteModuleImports = async (source) => {
182
- const replaceSpecifier = async (specifier) => {
183
- const trimmed = specifier.trim();
184
- if (shouldSkipValue(trimmed, input.assetsDirName)) {
185
- return specifier;
186
- }
187
- const next = await resolveLocalUrl(trimmed);
188
- return next ?? specifier;
189
- };
190
- const importFromPattern = /(\bimport\s+[^'"]*?\sfrom\s+)(["'])([^"']+)\2/g;
191
- const importSideEffectPattern = /(\bimport\s+)(["'])([^"']+)\2/g;
192
- let replaced = "";
193
- let lastIndex = 0;
194
- for (const match of source.matchAll(importFromPattern)) {
195
- const index = match.index ?? 0;
196
- replaced += source.slice(lastIndex, index);
197
- const prefix = match[1] || "";
198
- const quote = match[2] || "";
199
- const specifier = match[3] || "";
200
- const next = await replaceSpecifier(specifier);
201
- replaced += `${prefix}${quote}${next}${quote}`;
202
- lastIndex = index + match[0].length;
203
- }
204
- replaced += source.slice(lastIndex);
205
- let final = "";
206
- lastIndex = 0;
207
- for (const match of replaced.matchAll(importSideEffectPattern)) {
208
- const index = match.index ?? 0;
209
- final += replaced.slice(lastIndex, index);
210
- const prefix = match[1] || "";
211
- const quote = match[2] || "";
212
- const specifier = match[3] || "";
213
- const next = await replaceSpecifier(specifier);
214
- final += `${prefix}${quote}${next}${quote}`;
215
- lastIndex = index + match[0].length;
216
- }
217
- final += replaced.slice(lastIndex);
218
- return final;
219
- };
220
- const rewritePromises = [];
221
- const moduleScripts = input.$('script[type="module"]').toArray();
222
- for (const element of moduleScripts) {
223
- const src = input.$(element).attr("src");
224
- if (src) {
225
- continue;
226
- }
227
- const original = input.$(element).html();
228
- if (!original) {
229
- continue;
230
- }
231
- rewritePromises.push(rewriteModuleImports(original).then((rewritten) => {
228
+ if (shouldRewriteLinks) {
229
+ const moduleScripts = $('script[type="module"]').toArray();
230
+ for (const element of moduleScripts) {
231
+ const src = $(element).attr("src");
232
+ if (src)
233
+ continue;
234
+ const original = $(element).html();
235
+ if (!original)
236
+ continue;
237
+ const rewritten = await (0, exports.rewriteJsText)(original, resolve, baseUrl);
232
238
  if (rewritten !== original) {
233
- input.$(element).html(rewritten);
239
+ $(element).html(rewritten);
234
240
  }
235
- }));
236
- }
237
- for (const resource of input.resourceMap.values()) {
238
- const isCss = (resource.contentType && resource.contentType.includes("text/css")) ||
239
- resource.extension.toLowerCase() === "css";
240
- if (!isCss) {
241
- continue;
242
241
  }
243
- const cssUrl = resource.url;
244
- rewritePromises.push((0, css_rewrite_1.rewriteCssUrls)({
245
- filename: `${input.assetsDirName}/${resource.filename}`,
246
- extension: resource.extension,
247
- cssUrl,
248
- resolveUrl: async (absoluteUrl) => {
249
- const direct = input.resourceMap.get(absoluteUrl);
250
- if (direct) {
251
- return readAsURL(`${input.assetsDirName}/${direct.filename}`, direct.extension);
252
- }
253
- const fallback = (0, resources_1.toAbsoluteUrl)(input.baseUrl, absoluteUrl);
254
- const record = networkRecordByUrl.get(fallback) || networkRecordByUrl.get(absoluteUrl);
255
- return record ? (0, network_records_1.toDataUrlFromRecord)(record) : null;
256
- }
257
- }).then(() => { }));
258
- }
259
- if (rewritePromises.length) {
260
- await Promise.all(rewritePromises);
261
242
  }
243
+ (0, hack_html_1.hackHtml)({
244
+ $,
245
+ baseUrl: baseUrl,
246
+ apiPath: input.apiPath
247
+ });
248
+ const title = $("title").first().text() || undefined;
249
+ return { html: $.html(), title };
262
250
  };
263
- exports.rewriteLinks = rewriteLinks;
251
+ exports.rewriteEntryHtml = rewriteEntryHtml;
@@ -0,0 +1,15 @@
1
+ import type { ContentStore, PageSnapshot, PathResolver } from "./types";
2
+ import type { ApiEntry, StoredResource } from "./network-store";
3
+ type BuildOptions = {
4
+ entryUrl: string;
5
+ createdAt: number;
6
+ resources: StoredResource[];
7
+ apiEntries: ApiEntry[];
8
+ contentStore: ContentStore;
9
+ pathResolver?: PathResolver;
10
+ rewriteEntry: boolean;
11
+ rewriteCSS: boolean;
12
+ warnings: string[];
13
+ };
14
+ export declare const buildSnapshot: (input: BuildOptions) => Promise<PageSnapshot>;
15
+ export {};