@pagepocket/lib 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/content-type.d.ts +2 -0
  2. package/dist/content-type.js +36 -0
  3. package/dist/css-rewrite.d.ts +9 -0
  4. package/dist/css-rewrite.js +76 -0
  5. package/dist/download-resources.d.ts +25 -0
  6. package/dist/download-resources.js +163 -0
  7. package/dist/hack-html.d.ts +9 -0
  8. package/dist/hack-html.js +32 -0
  9. package/dist/hackers/index.d.ts +3 -0
  10. package/dist/hackers/index.js +22 -0
  11. package/dist/hackers/preload-fetch.d.ts +2 -0
  12. package/dist/hackers/preload-fetch.js +56 -0
  13. package/dist/hackers/preload-xhr.d.ts +2 -0
  14. package/dist/hackers/preload-xhr.js +59 -0
  15. package/dist/hackers/replay-beacon.d.ts +2 -0
  16. package/dist/hackers/replay-beacon.js +21 -0
  17. package/dist/hackers/replay-dom-rewrite.d.ts +2 -0
  18. package/dist/hackers/replay-dom-rewrite.js +295 -0
  19. package/dist/hackers/replay-eventsource.d.ts +2 -0
  20. package/dist/hackers/replay-eventsource.js +25 -0
  21. package/dist/hackers/replay-fetch.d.ts +2 -0
  22. package/dist/hackers/replay-fetch.js +33 -0
  23. package/dist/hackers/replay-svg-image.d.ts +2 -0
  24. package/dist/hackers/replay-svg-image.js +89 -0
  25. package/dist/hackers/replay-websocket.d.ts +2 -0
  26. package/dist/hackers/replay-websocket.js +26 -0
  27. package/dist/hackers/replay-xhr.d.ts +2 -0
  28. package/dist/hackers/replay-xhr.js +91 -0
  29. package/dist/hackers/types.d.ts +10 -0
  30. package/dist/hackers/types.js +2 -0
  31. package/dist/index.d.ts +6 -0
  32. package/dist/index.js +13 -0
  33. package/dist/network-records.d.ts +4 -0
  34. package/dist/network-records.js +83 -0
  35. package/dist/pagepocket.d.ts +18 -0
  36. package/dist/pagepocket.js +73 -0
  37. package/dist/preload.d.ts +1 -0
  38. package/dist/preload.js +60 -0
  39. package/dist/replay-script.d.ts +1 -0
  40. package/dist/replay-script.js +347 -0
  41. package/dist/resources.d.ts +16 -0
  42. package/dist/resources.js +82 -0
  43. package/dist/rewrite-links.d.ts +15 -0
  44. package/dist/rewrite-links.js +263 -0
  45. package/dist/types.d.ts +54 -0
  46. package/dist/types.js +2 -0
  47. package/package.json +29 -0
@@ -0,0 +1,15 @@
1
+ import type { CheerioAPI } from "cheerio";
2
+ import { type ResourceReference, type SrcsetReference } from "./resources";
3
+ import type { NetworkRecord } from "./types";
4
+ import type { DownloadedResource } from "./download-resources";
5
+ type RewriteLinksInput = {
6
+ $: CheerioAPI;
7
+ resourceUrls: ResourceReference[];
8
+ srcsetItems: SrcsetReference[];
9
+ baseUrl: string;
10
+ assetsDirName: string;
11
+ resourceMap: Map<string, DownloadedResource>;
12
+ networkRecords: NetworkRecord[];
13
+ };
14
+ export declare const rewriteLinks: (input: RewriteLinksInput) => Promise<void>;
15
+ export {};
@@ -0,0 +1,263 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.rewriteLinks = void 0;
37
+ const css_rewrite_1 = require("./css-rewrite");
38
+ const network_records_1 = require("./network-records");
39
+ const resources_1 = require("./resources");
40
+ const shouldSkipValue = (value, assetsDirName) => {
41
+ const trimmed = value.trim();
42
+ return (!trimmed ||
43
+ trimmed.startsWith("data:") ||
44
+ trimmed.startsWith("blob:") ||
45
+ trimmed.startsWith("mailto:") ||
46
+ trimmed.startsWith("tel:") ||
47
+ trimmed.startsWith("javascript:") ||
48
+ trimmed.startsWith("#") ||
49
+ trimmed.includes(assetsDirName));
50
+ };
51
+ const buildLinkBase = (baseUrl) => {
52
+ try {
53
+ const parsed = new URL(baseUrl);
54
+ const baseOrigin = parsed.origin;
55
+ const baseDir = new URL(".", parsed).toString().replace(/\/$/, "");
56
+ return { baseOrigin, baseDir };
57
+ }
58
+ catch {
59
+ return { baseOrigin: "", baseDir: "" };
60
+ }
61
+ };
62
+ const expandUrlVariants = (value, baseUrl, baseOrigin, baseDir) => {
63
+ const variants = [];
64
+ if (typeof value === "string") {
65
+ variants.push(value);
66
+ try {
67
+ variants.push(new URL(value, baseUrl).toString());
68
+ }
69
+ catch {
70
+ // ignore
71
+ }
72
+ if (baseOrigin && value.startsWith("/")) {
73
+ variants.push(baseOrigin + value);
74
+ if (baseDir) {
75
+ variants.push(baseDir + value);
76
+ }
77
+ }
78
+ else if (baseDir) {
79
+ variants.push(baseDir + (value.startsWith("/") ? value : "/" + value));
80
+ }
81
+ try {
82
+ const parsed = new URL(value, baseUrl);
83
+ const pathWithSearch = (parsed.pathname || "") + (parsed.search || "");
84
+ if (baseOrigin && parsed.origin !== baseOrigin) {
85
+ variants.push(baseOrigin + pathWithSearch);
86
+ if (baseDir) {
87
+ const path = pathWithSearch.startsWith("/") ? pathWithSearch : "/" + pathWithSearch;
88
+ variants.push(baseDir + path);
89
+ }
90
+ }
91
+ }
92
+ catch {
93
+ // ignore
94
+ }
95
+ }
96
+ return Array.from(new Set(variants.filter(Boolean)));
97
+ };
98
+ const buildNetworkLookup = (records) => {
99
+ const networkRecordByUrl = new Map();
100
+ for (const record of records) {
101
+ if (record?.url && !networkRecordByUrl.has(record.url)) {
102
+ networkRecordByUrl.set(record.url, record);
103
+ }
104
+ }
105
+ return networkRecordByUrl;
106
+ };
107
+ const rewriteLinks = async (input) => {
108
+ const { readAsURL } = await Promise.resolve().then(() => __importStar(require("uni-fs")));
109
+ const networkRecordByUrl = buildNetworkLookup(input.networkRecords);
110
+ const { baseOrigin, baseDir } = buildLinkBase(input.baseUrl);
111
+ const localUrlCache = new Map();
112
+ const resolveLocalUrl = async (value) => {
113
+ if (shouldSkipValue(value, input.assetsDirName)) {
114
+ return null;
115
+ }
116
+ const variants = expandUrlVariants(value, input.baseUrl, baseOrigin, baseDir);
117
+ for (const variant of variants) {
118
+ const resource = input.resourceMap.get(variant);
119
+ if (!resource) {
120
+ continue;
121
+ }
122
+ const cacheKey = resource.extension
123
+ ? `${resource.filename}.${resource.extension}`
124
+ : resource.filename;
125
+ if (localUrlCache.has(cacheKey)) {
126
+ return localUrlCache.get(cacheKey) ?? null;
127
+ }
128
+ const localUrl = await readAsURL(`${input.assetsDirName}/${resource.filename}`, resource.extension);
129
+ localUrlCache.set(cacheKey, localUrl);
130
+ return localUrl;
131
+ }
132
+ for (const variant of variants) {
133
+ const record = networkRecordByUrl.get(variant);
134
+ if (record) {
135
+ return (0, network_records_1.toDataUrlFromRecord)(record);
136
+ }
137
+ }
138
+ for (const variant of variants) {
139
+ try {
140
+ const parsed = new URL(variant);
141
+ const withoutQuery = parsed.origin + parsed.pathname;
142
+ const record = networkRecordByUrl.get(withoutQuery);
143
+ if (record) {
144
+ return (0, network_records_1.toDataUrlFromRecord)(record);
145
+ }
146
+ }
147
+ catch {
148
+ // ignore
149
+ }
150
+ }
151
+ return null;
152
+ };
153
+ for (const resource of input.resourceUrls) {
154
+ const rawValue = input.$(resource.element).attr(resource.attr);
155
+ if (!rawValue) {
156
+ continue;
157
+ }
158
+ const nextUrl = await resolveLocalUrl(rawValue);
159
+ if (nextUrl) {
160
+ input.$(resource.element).attr(resource.attr, nextUrl);
161
+ }
162
+ }
163
+ for (const item of input.srcsetItems) {
164
+ const parts = item.value.split(",").map((part) => part.trim());
165
+ const rewrittenParts = [];
166
+ for (const part of parts) {
167
+ const [rawUrl, descriptor] = part.split(/\s+/, 2);
168
+ if (!rawUrl) {
169
+ rewrittenParts.push(part);
170
+ continue;
171
+ }
172
+ const nextUrl = await resolveLocalUrl(rawUrl);
173
+ if (!nextUrl) {
174
+ rewrittenParts.push(part);
175
+ continue;
176
+ }
177
+ rewrittenParts.push(descriptor ? `${nextUrl} ${descriptor}` : nextUrl);
178
+ }
179
+ input.$(item.element).attr("srcset", rewrittenParts.join(", "));
180
+ }
181
+ const rewriteModuleImports = async (source) => {
182
+ const replaceSpecifier = async (specifier) => {
183
+ const trimmed = specifier.trim();
184
+ if (shouldSkipValue(trimmed, input.assetsDirName)) {
185
+ return specifier;
186
+ }
187
+ const next = await resolveLocalUrl(trimmed);
188
+ return next ?? specifier;
189
+ };
190
+ const importFromPattern = /(\bimport\s+[^'"]*?\sfrom\s+)(["'])([^"']+)\2/g;
191
+ const importSideEffectPattern = /(\bimport\s+)(["'])([^"']+)\2/g;
192
+ let replaced = "";
193
+ let lastIndex = 0;
194
+ for (const match of source.matchAll(importFromPattern)) {
195
+ const index = match.index ?? 0;
196
+ replaced += source.slice(lastIndex, index);
197
+ const prefix = match[1] || "";
198
+ const quote = match[2] || "";
199
+ const specifier = match[3] || "";
200
+ const next = await replaceSpecifier(specifier);
201
+ replaced += `${prefix}${quote}${next}${quote}`;
202
+ lastIndex = index + match[0].length;
203
+ }
204
+ replaced += source.slice(lastIndex);
205
+ let final = "";
206
+ lastIndex = 0;
207
+ for (const match of replaced.matchAll(importSideEffectPattern)) {
208
+ const index = match.index ?? 0;
209
+ final += replaced.slice(lastIndex, index);
210
+ const prefix = match[1] || "";
211
+ const quote = match[2] || "";
212
+ const specifier = match[3] || "";
213
+ const next = await replaceSpecifier(specifier);
214
+ final += `${prefix}${quote}${next}${quote}`;
215
+ lastIndex = index + match[0].length;
216
+ }
217
+ final += replaced.slice(lastIndex);
218
+ return final;
219
+ };
220
+ const rewritePromises = [];
221
+ const moduleScripts = input.$('script[type="module"]').toArray();
222
+ for (const element of moduleScripts) {
223
+ const src = input.$(element).attr("src");
224
+ if (src) {
225
+ continue;
226
+ }
227
+ const original = input.$(element).html();
228
+ if (!original) {
229
+ continue;
230
+ }
231
+ rewritePromises.push(rewriteModuleImports(original).then((rewritten) => {
232
+ if (rewritten !== original) {
233
+ input.$(element).html(rewritten);
234
+ }
235
+ }));
236
+ }
237
+ for (const resource of input.resourceMap.values()) {
238
+ const isCss = (resource.contentType && resource.contentType.includes("text/css")) ||
239
+ resource.extension.toLowerCase() === "css";
240
+ if (!isCss) {
241
+ continue;
242
+ }
243
+ const cssUrl = resource.url;
244
+ rewritePromises.push((0, css_rewrite_1.rewriteCssUrls)({
245
+ filename: `${input.assetsDirName}/${resource.filename}`,
246
+ extension: resource.extension,
247
+ cssUrl,
248
+ resolveUrl: async (absoluteUrl) => {
249
+ const direct = input.resourceMap.get(absoluteUrl);
250
+ if (direct) {
251
+ return readAsURL(`${input.assetsDirName}/${direct.filename}`, direct.extension);
252
+ }
253
+ const fallback = (0, resources_1.toAbsoluteUrl)(input.baseUrl, absoluteUrl);
254
+ const record = networkRecordByUrl.get(fallback) || networkRecordByUrl.get(absoluteUrl);
255
+ return record ? (0, network_records_1.toDataUrlFromRecord)(record) : null;
256
+ }
257
+ }).then(() => { }));
258
+ }
259
+ if (rewritePromises.length) {
260
+ await Promise.all(rewritePromises);
261
+ }
262
+ };
263
+ exports.rewriteLinks = rewriteLinks;
@@ -0,0 +1,54 @@
1
+ export type FetchRecord = {
2
+ kind: "fetch" | "xhr";
3
+ url: string;
4
+ method: string;
5
+ requestBody?: string;
6
+ status?: number;
7
+ statusText?: string;
8
+ responseHeaders?: Record<string, string>;
9
+ responseBody?: string;
10
+ error?: string;
11
+ timestamp: number;
12
+ };
13
+ export type NetworkRecord = {
14
+ url: string;
15
+ method: string;
16
+ requestHeaders?: Record<string, string>;
17
+ requestBody?: string;
18
+ status?: number;
19
+ statusText?: string;
20
+ responseHeaders?: Record<string, string>;
21
+ responseBody?: string;
22
+ responseBodyBase64?: string;
23
+ responseEncoding?: "text" | "base64";
24
+ error?: string;
25
+ timestamp: number;
26
+ };
27
+ export type LighterceptorResponseRecord = {
28
+ status: number;
29
+ statusText: string;
30
+ headers: Record<string, string>;
31
+ body: string;
32
+ bodyEncoding: "text" | "base64";
33
+ };
34
+ export type LighterceptorNetworkRecord = {
35
+ url: string;
36
+ source?: string;
37
+ method: string;
38
+ timestamp: number;
39
+ response?: LighterceptorResponseRecord;
40
+ error?: string;
41
+ };
42
+ export type SnapshotData = {
43
+ url: string;
44
+ title: string;
45
+ capturedAt: string;
46
+ fetchXhrRecords: FetchRecord[];
47
+ networkRecords: LighterceptorNetworkRecord[];
48
+ resources: Array<{
49
+ url: string;
50
+ localPath: string;
51
+ contentType?: string | null;
52
+ size?: number;
53
+ }>;
54
+ };
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
package/package.json ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "name": "@pagepocket/lib",
3
+ "version": "0.4.0",
4
+ "description": "",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "files": [
8
+ "dist"
9
+ ],
10
+ "keywords": [],
11
+ "author": "",
12
+ "license": "ISC",
13
+ "dependencies": {
14
+ "cheerio": "^1.0.0-rc.12",
15
+ "uni-fs": "npm:@pagepocket/uni-fs@0.4.0"
16
+ },
17
+ "devDependencies": {
18
+ "@types/node": "^20.11.30",
19
+ "prettier": "^3.7.4",
20
+ "tsx": "^4.19.3",
21
+ "typescript": "^5.4.5"
22
+ },
23
+ "scripts": {
24
+ "build": "tsc",
25
+ "format": "prettier --write .",
26
+ "format:check": "prettier --check .",
27
+ "test": "tsx --test specs/**/*.test.ts"
28
+ }
29
+ }