@pagepocket/lib 0.13.0 → 0.14.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/hackers/index.js +3 -1
- package/dist/hackers/replay-dom-rewrite/script-part-1.d.ts +1 -1
- package/dist/hackers/replay-dom-rewrite/script-part-1.js +40 -51
- package/dist/hackers/replay-dom-rewrite/script-part-2.d.ts +1 -1
- package/dist/hackers/replay-dom-rewrite/script-part-2.js +74 -44
- package/dist/hackers/replay-fetch.js +4 -0
- package/dist/hackers/replay-websocket.js +50 -8
- package/dist/hackers/replay-worker.d.ts +18 -0
- package/dist/hackers/replay-worker.js +242 -0
- package/dist/replay/match-api.js +103 -3
- package/dist/replay/templates/loader-template.d.ts +15 -0
- package/dist/replay/templates/loader-template.js +164 -0
- package/dist/replay/templates/match-api-source.d.ts +1 -1
- package/dist/replay/templates/match-api-source.js +86 -4
- package/dist/replay/templates/replay-script-template.part-2.js +24 -1
- package/dist/resource-filter.js +29 -3
- package/dist/snapshot-builder/build-snapshot.js +33 -3
- package/dist/units/runner.js +15 -0
- package/package.json +4 -4
|
@@ -107,12 +107,25 @@ export const matchAPISource = `function matchAPI(options) {
|
|
|
107
107
|
const recordPathSearch = stripTrailingSlash(toPathSearch(recordUrl));
|
|
108
108
|
if (inputPathSearch === recordPathSearch)
|
|
109
109
|
return true;
|
|
110
|
-
const inputPath = stripTrailingSlash(toPathname(inputUrl));
|
|
111
|
-
const recordPath = stripTrailingSlash(toPathname(recordUrl));
|
|
112
|
-
if (inputPath === recordPath)
|
|
113
|
-
return true;
|
|
114
110
|
return false;
|
|
115
111
|
};
|
|
112
|
+
const normalizePathLoose = (input) => {
|
|
113
|
+
const parsed = safeUrl(input);
|
|
114
|
+
if (parsed) {
|
|
115
|
+
return stripTrailingSlash(parsed.pathname);
|
|
116
|
+
}
|
|
117
|
+
const rawValue = String(input ?? "");
|
|
118
|
+
if (!rawValue) {
|
|
119
|
+
return "";
|
|
120
|
+
}
|
|
121
|
+
const valueWithoutHash = rawValue.split("#")[0] ?? "";
|
|
122
|
+
const valueWithoutQuery = valueWithoutHash.split("?")[0] ?? "";
|
|
123
|
+
const withoutOrigin = valueWithoutQuery.replace(/^([a-zA-Z][a-zA-Z\\d+\\-.]*:)?\\/\\/[^/]+/, "");
|
|
124
|
+
if (!withoutOrigin) {
|
|
125
|
+
return "/";
|
|
126
|
+
}
|
|
127
|
+
return stripTrailingSlash(withoutOrigin.startsWith("/") ? withoutOrigin : "/" + withoutOrigin);
|
|
128
|
+
};
|
|
116
129
|
const scanRecords = (keyMethod, keyBody) => {
|
|
117
130
|
for (const record of records || []) {
|
|
118
131
|
if (!record || !record.url || !record.method)
|
|
@@ -128,10 +141,79 @@ export const matchAPISource = `function matchAPI(options) {
|
|
|
128
141
|
}
|
|
129
142
|
return undefined;
|
|
130
143
|
};
|
|
144
|
+
const scanRecordsIgnoreQueryAndProtocol = (keyMethod, keyBody) => {
|
|
145
|
+
const inputPathname = normalizePathLoose(url);
|
|
146
|
+
if (!inputPathname) {
|
|
147
|
+
return undefined;
|
|
148
|
+
}
|
|
149
|
+
const candidates = [];
|
|
150
|
+
for (const record of records || []) {
|
|
151
|
+
if (!record || !record.url || !record.method)
|
|
152
|
+
continue;
|
|
153
|
+
if (record.method.toUpperCase() !== keyMethod)
|
|
154
|
+
continue;
|
|
155
|
+
const recordPathname = normalizePathLoose(record.url);
|
|
156
|
+
if (!recordPathname || recordPathname !== inputPathname)
|
|
157
|
+
continue;
|
|
158
|
+
const recordBody = record.requestBody || record.requestBodyBase64 || "";
|
|
159
|
+
if (keyBody && recordBody !== keyBody)
|
|
160
|
+
continue;
|
|
161
|
+
candidates.push(record);
|
|
162
|
+
}
|
|
163
|
+
if (candidates.length === 0)
|
|
164
|
+
return undefined;
|
|
165
|
+
if (candidates.length === 1) {
|
|
166
|
+
const candidate = candidates[0];
|
|
167
|
+
const inputParsed = safeUrl(url);
|
|
168
|
+
const candidateParsed = safeUrl(candidate.url);
|
|
169
|
+
if (inputParsed && candidateParsed) {
|
|
170
|
+
const inputKeys = Array.from(inputParsed.searchParams.keys());
|
|
171
|
+
const candidateKeys = new Set(candidateParsed.searchParams.keys());
|
|
172
|
+
const inputAllContained = inputKeys.length > 0 && inputKeys.every(k => candidateKeys.has(k));
|
|
173
|
+
const candidateHasExtra = candidateKeys.size > inputKeys.length;
|
|
174
|
+
if (inputAllContained && candidateHasExtra)
|
|
175
|
+
return undefined;
|
|
176
|
+
}
|
|
177
|
+
return candidate;
|
|
178
|
+
}
|
|
179
|
+
const inputParsed = safeUrl(url);
|
|
180
|
+
const inputSearch = inputParsed ? inputParsed.search : "";
|
|
181
|
+
let bestRecord = undefined;
|
|
182
|
+
let bestScore = -Infinity;
|
|
183
|
+
for (const candidate of candidates) {
|
|
184
|
+
const candidateParsed = safeUrl(candidate.url);
|
|
185
|
+
const candidateSearch = candidateParsed ? candidateParsed.search : "";
|
|
186
|
+
if (candidateSearch === inputSearch)
|
|
187
|
+
return candidate;
|
|
188
|
+
const inputParams = inputParsed ? inputParsed.searchParams : new URLSearchParams();
|
|
189
|
+
const candidateParams = candidateParsed ? candidateParsed.searchParams : new URLSearchParams();
|
|
190
|
+
let shared = 0;
|
|
191
|
+
let recordOnly = 0;
|
|
192
|
+
for (const [key, value] of candidateParams.entries()) {
|
|
193
|
+
if (inputParams.get(key) === value) {
|
|
194
|
+
shared++;
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
recordOnly++;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
const score = shared - recordOnly * 2;
|
|
201
|
+
if (score > bestScore) {
|
|
202
|
+
bestScore = score;
|
|
203
|
+
bestRecord = candidate;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return bestRecord;
|
|
207
|
+
};
|
|
131
208
|
for (const [keyMethod, keyBody] of matchOrder) {
|
|
132
209
|
const record = scanRecords(keyMethod, keyBody);
|
|
133
210
|
if (record)
|
|
134
211
|
return record;
|
|
135
212
|
}
|
|
213
|
+
for (const [keyMethod, keyBody] of matchOrder) {
|
|
214
|
+
const record = scanRecordsIgnoreQueryAndProtocol(keyMethod, keyBody);
|
|
215
|
+
if (record)
|
|
216
|
+
return record;
|
|
217
|
+
}
|
|
136
218
|
return undefined;
|
|
137
219
|
}`;
|
|
@@ -37,6 +37,25 @@ export const buildReplayInjectedScriptPart2 = (options) => {
|
|
|
37
37
|
return snapshot;
|
|
38
38
|
})();
|
|
39
39
|
|
|
40
|
+
|
|
41
|
+
// Synchronous fast-path: when the loader page has already set
|
|
42
|
+
// window.__pagepocketResourcesPath / window.__pagepocketApiSnapshot,
|
|
43
|
+
// build resourceIndex immediately so document.write URL rewriting
|
|
44
|
+
// works before the async ready promise resolves.
|
|
45
|
+
try {
|
|
46
|
+
var preloadedResources = window.__pagepocketResourcesPath;
|
|
47
|
+
if (preloadedResources && preloadedResources.version === "1.0" && Array.isArray(preloadedResources.items)) {
|
|
48
|
+
resourceIndex = __pagepocketResourceProxy.buildIndex(preloadedResources);
|
|
49
|
+
}
|
|
50
|
+
} catch (syncInitError) {}
|
|
51
|
+
|
|
52
|
+
try {
|
|
53
|
+
var preloadedApi = window.__pagepocketApiSnapshot;
|
|
54
|
+
if (preloadedApi && preloadedApi.version === "1.0" && Array.isArray(preloadedApi.records)) {
|
|
55
|
+
primeLookups(preloadedApi);
|
|
56
|
+
}
|
|
57
|
+
} catch (syncInitError) {}
|
|
58
|
+
|
|
40
59
|
const isLocalResource = (value) => {
|
|
41
60
|
if (!value) return false;
|
|
42
61
|
if (value.startsWith("data:") || value.startsWith("blob:")) return true;
|
|
@@ -202,7 +221,11 @@ export const buildReplayInjectedScriptPart2 = (options) => {
|
|
|
202
221
|
};
|
|
203
222
|
|
|
204
223
|
const responseFromRecord = (record) => {
|
|
205
|
-
|
|
224
|
+
var headers = new Headers();
|
|
225
|
+
var rawHeaders = record.responseHeaders || {};
|
|
226
|
+
for (var key in rawHeaders) {
|
|
227
|
+
try { headers.append(key, rawHeaders[key]); } catch (e) {}
|
|
228
|
+
}
|
|
206
229
|
if (record.responseEncoding === "base64" && record.responseBodyBase64) {
|
|
207
230
|
const bytes = decodeBase64(record.responseBodyBase64);
|
|
208
231
|
return new Response(bytes, {
|
package/dist/resource-filter.js
CHANGED
|
@@ -4,17 +4,43 @@ const isSkippableUrl = (url) => url.startsWith("data:") ||
|
|
|
4
4
|
url.startsWith("mailto:") ||
|
|
5
5
|
url.startsWith("tel:") ||
|
|
6
6
|
url.startsWith("javascript:");
|
|
7
|
+
/**
|
|
8
|
+
* Checks whether a response mimeType represents a saveable binary asset.
|
|
9
|
+
*
|
|
10
|
+
* Some SPAs (e.g. Feishu, Notion) load images and fonts via `fetch()` or
|
|
11
|
+
* `XMLHttpRequest` instead of `<img>` tags. Puppeteer marks those requests
|
|
12
|
+
* as `resourceType: "fetch"` / `"xhr"`, but the response is still an image
|
|
13
|
+
* or font that must be persisted for offline replay.
|
|
14
|
+
*
|
|
15
|
+
* We detect this by inspecting the response `mimeType`.
|
|
16
|
+
*/
|
|
17
|
+
const isSaveableMimeType = (mimeType) => {
|
|
18
|
+
const lower = mimeType.toLowerCase().split(";")[0].trim();
|
|
19
|
+
if (lower.startsWith("image/")) {
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
if (lower.startsWith("font/")) {
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
if (lower.startsWith("audio/") || lower.startsWith("video/")) {
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
if (lower === "application/font-woff" || lower === "application/font-woff2" || lower === "application/octet-stream") {
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
return false;
|
|
32
|
+
};
|
|
7
33
|
export const createDefaultResourceFilter = () => ({
|
|
8
34
|
shouldSave(req, res) {
|
|
9
35
|
if (isSkippableUrl(req.url)) {
|
|
10
36
|
return false;
|
|
11
37
|
}
|
|
12
|
-
if (req.resourceType && (req.resourceType === "fetch" || req.resourceType === "xhr")) {
|
|
13
|
-
return false;
|
|
14
|
-
}
|
|
15
38
|
if (res && res.status >= 400) {
|
|
16
39
|
return false;
|
|
17
40
|
}
|
|
41
|
+
if (req.resourceType && (req.resourceType === "fetch" || req.resourceType === "xhr")) {
|
|
42
|
+
return res?.mimeType ? isSaveableMimeType(res.mimeType) : false;
|
|
43
|
+
}
|
|
18
44
|
if (req.resourceType) {
|
|
19
45
|
return DEFAULT_ALLOWED.has(req.resourceType);
|
|
20
46
|
}
|
|
@@ -8,6 +8,7 @@ import { responseMimeType } from "./http.js";
|
|
|
8
8
|
import { docDirFromUrl, resolveSnapshotPath } from "./path-map.js";
|
|
9
9
|
import { buildResourcesPathSnapshot } from "./resources-path.js";
|
|
10
10
|
import { maybeRewriteScript, maybeRewriteStylesheet } from "./rewrite-resource.js";
|
|
11
|
+
import { buildLoaderHtml } from "../replay/templates/loader-template.js";
|
|
11
12
|
export const buildSnapshot = async (input) => {
|
|
12
13
|
const warnings = input.warnings;
|
|
13
14
|
const contentStore = input.capture.contentStore;
|
|
@@ -46,10 +47,11 @@ export const buildSnapshot = async (input) => {
|
|
|
46
47
|
const apiPath = ensureLeadingSlash(multiDoc ? `${sanitizePosixPath(docDir)}/api.json` : "/api.json");
|
|
47
48
|
for (const resource of group.resources) {
|
|
48
49
|
if (resource.request.resourceType === "document") {
|
|
49
|
-
const
|
|
50
|
+
const originalPath = urlToPath.get(resource.request.url) ?? "/index.html";
|
|
51
|
+
const pagePath = toPagePath(originalPath);
|
|
50
52
|
const { file } = await emitDocumentFile({
|
|
51
53
|
resource,
|
|
52
|
-
path,
|
|
54
|
+
path: pagePath,
|
|
53
55
|
entryUrl: input.entryUrl,
|
|
54
56
|
groupUrl: group.url,
|
|
55
57
|
apiPath,
|
|
@@ -60,8 +62,23 @@ export const buildSnapshot = async (input) => {
|
|
|
60
62
|
snapshotEntryUrl: input.entryUrl
|
|
61
63
|
});
|
|
62
64
|
files.push(file);
|
|
65
|
+
const loaderHtml = buildLoaderHtml({
|
|
66
|
+
pageUrl: pagePath,
|
|
67
|
+
apiPath,
|
|
68
|
+
resourcesPathUrl: "/resources_path.json"
|
|
69
|
+
});
|
|
70
|
+
const loaderBytes = new TextEncoder().encode(loaderHtml);
|
|
71
|
+
const loaderRef = await contentStore.put({ kind: "buffer", data: loaderBytes }, { url: originalPath, mimeType: "text/html", sizeHint: loaderBytes.byteLength });
|
|
72
|
+
files.push({
|
|
73
|
+
path: originalPath,
|
|
74
|
+
mimeType: "text/html",
|
|
75
|
+
size: loaderBytes.byteLength,
|
|
76
|
+
source: loaderRef,
|
|
77
|
+
originalUrl: resource.request.url,
|
|
78
|
+
resourceType: "document"
|
|
79
|
+
});
|
|
63
80
|
if (resource.request.url === input.entryUrl || !entryPath) {
|
|
64
|
-
entryPath =
|
|
81
|
+
entryPath = originalPath;
|
|
65
82
|
}
|
|
66
83
|
continue;
|
|
67
84
|
}
|
|
@@ -135,3 +152,16 @@ export const buildSnapshot = async (input) => {
|
|
|
135
152
|
};
|
|
136
153
|
};
|
|
137
154
|
const resourceMimeType = (resource) => resource.mimeType || responseMimeType(resource.response) || undefined;
|
|
155
|
+
/**
|
|
156
|
+
* Derive the "page" path from the original document path.
|
|
157
|
+
* `/index.html` → `/_page.html`, `/foo/bar.html` → `/foo/_bar.html`.
|
|
158
|
+
*/
|
|
159
|
+
const toPagePath = (originalPath) => {
|
|
160
|
+
const lastSlash = originalPath.lastIndexOf("/");
|
|
161
|
+
if (lastSlash === -1) {
|
|
162
|
+
return `_${originalPath}`;
|
|
163
|
+
}
|
|
164
|
+
const dir = originalPath.slice(0, lastSlash + 1);
|
|
165
|
+
const filename = originalPath.slice(lastSlash + 1);
|
|
166
|
+
return `${dir}_${filename}`;
|
|
167
|
+
};
|
package/dist/units/runner.js
CHANGED
|
@@ -39,6 +39,7 @@ export const runCapture = async (input) => {
|
|
|
39
39
|
const mergePatch = (_ctx, patch) => mergePatchIntoFreshContext(patch);
|
|
40
40
|
let ctx = { value: {} };
|
|
41
41
|
let result;
|
|
42
|
+
const unitErrors = [];
|
|
42
43
|
const totalUnits = input.units.length;
|
|
43
44
|
try {
|
|
44
45
|
for (let i = 0; i < input.units.length; i++) {
|
|
@@ -102,6 +103,7 @@ export const runCapture = async (input) => {
|
|
|
102
103
|
}
|
|
103
104
|
catch (err) {
|
|
104
105
|
debugLog(`[runner] unit "${unit.id}" threw:`, err);
|
|
106
|
+
unitErrors.push({ unitId: unit.id, error: err });
|
|
105
107
|
rt.publish(PROGRESS, {
|
|
106
108
|
type: "unit:end",
|
|
107
109
|
unitId: unit.id,
|
|
@@ -136,6 +138,19 @@ export const runCapture = async (input) => {
|
|
|
136
138
|
await rt._awaitDeferred();
|
|
137
139
|
}
|
|
138
140
|
if (!result) {
|
|
141
|
+
if (unitErrors.length > 0) {
|
|
142
|
+
const first = unitErrors[0];
|
|
143
|
+
const rootCause = first.error instanceof Error ? first.error : new Error(String(first.error));
|
|
144
|
+
if (unitErrors.length > 1) {
|
|
145
|
+
const summary = unitErrors
|
|
146
|
+
.map((e) => `${e.unitId}: ${e.error instanceof Error ? e.error.message : String(e.error)}`)
|
|
147
|
+
.join("; ");
|
|
148
|
+
const wrapped = new Error(`Pipeline failed: ${unitErrors.length} unit(s) threw errors. ${summary}`);
|
|
149
|
+
wrapped.cause = rootCause;
|
|
150
|
+
throw wrapped;
|
|
151
|
+
}
|
|
152
|
+
throw rootCause;
|
|
153
|
+
}
|
|
139
154
|
throw new Error("No terminal result. Ensure a terminal unit returns a CaptureResult or calls finish().");
|
|
140
155
|
}
|
|
141
156
|
return result;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pagepocket/lib",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.14.5",
|
|
4
4
|
"description": "Library for rewriting HTML snapshots and inlining local resources.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -20,9 +20,9 @@
|
|
|
20
20
|
"dependencies": {
|
|
21
21
|
"cheerio": "^1.0.0-rc.12",
|
|
22
22
|
"domhandler": "^5.0.3",
|
|
23
|
-
"@pagepocket/contracts": "0.
|
|
24
|
-
"@pagepocket/
|
|
25
|
-
"@pagepocket/
|
|
23
|
+
"@pagepocket/contracts": "0.14.5",
|
|
24
|
+
"@pagepocket/uni-fs": "0.14.5",
|
|
25
|
+
"@pagepocket/shared": "0.14.5"
|
|
26
26
|
},
|
|
27
27
|
"devDependencies": {
|
|
28
28
|
"@playwright/test": "^1.50.1",
|