@pagepocket/lib 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -6
- package/dist/build-snapshot-from-bundle.d.ts +23 -0
- package/dist/build-snapshot-from-bundle.js +68 -0
- package/dist/builtin-blacklist.js +3 -6
- package/dist/bundle/from-network-store.d.ts +10 -0
- package/dist/bundle/from-network-store.js +26 -0
- package/dist/bundle/types.d.ts +32 -0
- package/dist/bundle/types.js +2 -0
- package/dist/capture/index.d.ts +14 -0
- package/dist/capture/index.js +86 -0
- package/dist/capture/memory-content-store.d.ts +4 -0
- package/dist/capture/memory-content-store.js +42 -0
- package/dist/capture/types.d.ts +61 -0
- package/dist/capture/types.js +2 -0
- package/dist/content-store.js +3 -8
- package/dist/content-type.d.ts +1 -1
- package/dist/content-type.js +2 -28
- package/dist/core/_impl/completion.d.ts +4 -0
- package/dist/core/_impl/completion.js +29 -0
- package/dist/core/_impl/content-store.d.ts +21 -0
- package/dist/core/_impl/content-store.js +91 -0
- package/dist/core/_impl/debug.d.ts +1 -0
- package/dist/core/_impl/debug.js +16 -0
- package/dist/core/_impl/inflight-tracker.d.ts +19 -0
- package/dist/core/_impl/inflight-tracker.js +48 -0
- package/dist/core/_impl/pagepocket.d.ts +27 -0
- package/dist/core/_impl/pagepocket.js +155 -0
- package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
- package/dist/core/capture/_impl/memory-content-store.js +42 -0
- package/dist/core/capture/_impl/types.d.ts +61 -0
- package/dist/core/capture/_impl/types.js +2 -0
- package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
- package/dist/core/capture/internal/memory-content-store.js +42 -0
- package/dist/core/capture/internal/types.d.ts +61 -0
- package/dist/core/capture/internal/types.js +2 -0
- package/dist/core/capture/memory-content-store.d.ts +4 -0
- package/dist/core/capture/memory-content-store.js +38 -0
- package/dist/core/capture/types.d.ts +61 -0
- package/dist/core/capture/types.js +1 -0
- package/dist/core/completion.d.ts +4 -0
- package/dist/core/completion.js +23 -0
- package/dist/core/content-store.d.ts +21 -0
- package/dist/core/content-store.js +54 -0
- package/dist/core/debug.d.ts +1 -0
- package/dist/core/debug.js +12 -0
- package/dist/core/file-tree-merge.d.ts +2 -0
- package/dist/core/file-tree-merge.js +27 -0
- package/dist/core/file-tree.d.ts +36 -0
- package/dist/core/file-tree.js +1 -0
- package/dist/core/inflight-tracker.d.ts +19 -0
- package/dist/core/inflight-tracker.js +44 -0
- package/dist/core/internal/completion.d.ts +4 -0
- package/dist/core/internal/completion.js +29 -0
- package/dist/core/internal/content-store.d.ts +21 -0
- package/dist/core/internal/content-store.js +91 -0
- package/dist/core/internal/debug.d.ts +1 -0
- package/dist/core/internal/debug.js +16 -0
- package/dist/core/internal/inflight-tracker.d.ts +19 -0
- package/dist/core/internal/inflight-tracker.js +48 -0
- package/dist/core/internal/pagepocket.d.ts +27 -0
- package/dist/core/internal/pagepocket.js +155 -0
- package/dist/core/pagepocket.d.ts +38 -0
- package/dist/core/pagepocket.js +57 -0
- package/dist/core/plugin/_impl/context.d.ts +47 -0
- package/dist/core/plugin/_impl/context.js +142 -0
- package/dist/core/plugin/_impl/runner.d.ts +12 -0
- package/dist/core/plugin/_impl/runner.js +232 -0
- package/dist/core/plugin/_impl/types.d.ts +108 -0
- package/dist/core/plugin/_impl/types.js +2 -0
- package/dist/core/plugin/context.d.ts +47 -0
- package/dist/core/plugin/context.js +205 -0
- package/dist/core/plugin/internal/context.d.ts +47 -0
- package/dist/core/plugin/internal/context.js +142 -0
- package/dist/core/plugin/internal/runner.d.ts +12 -0
- package/dist/core/plugin/internal/runner.js +232 -0
- package/dist/core/plugin/internal/types.d.ts +108 -0
- package/dist/core/plugin/internal/types.js +2 -0
- package/dist/core/plugin/runner-utils.d.ts +9 -0
- package/dist/core/plugin/runner-utils.js +29 -0
- package/dist/core/plugin/runner.d.ts +12 -0
- package/dist/core/plugin/runner.js +118 -0
- package/dist/core/plugin/types.d.ts +117 -0
- package/dist/core/plugin/types.js +1 -0
- package/dist/core/runtime/types.d.ts +14 -0
- package/dist/core/runtime/types.js +2 -0
- package/dist/css-rewrite.js +1 -5
- package/dist/debug.d.ts +0 -1
- package/dist/debug.js +3 -5
- package/dist/files/types.d.ts +41 -0
- package/dist/files/types.js +2 -0
- package/dist/hack-html.js +20 -13
- package/dist/hackers/index.d.ts +1 -1
- package/dist/hackers/index.js +24 -27
- package/dist/hackers/preload-fetch.d.ts +1 -1
- package/dist/hackers/preload-fetch.js +1 -4
- package/dist/hackers/preload-xhr.d.ts +1 -1
- package/dist/hackers/preload-xhr.js +1 -4
- package/dist/hackers/replay-beacon.d.ts +1 -1
- package/dist/hackers/replay-beacon.js +1 -4
- package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
- package/dist/hackers/replay-block-text-fragment.js +1 -4
- package/dist/hackers/replay-css-proxy.d.ts +1 -1
- package/dist/hackers/replay-css-proxy.js +9 -12
- package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
- package/dist/hackers/replay-dom-rewrite.js +165 -154
- package/dist/hackers/replay-eventsource.d.ts +1 -1
- package/dist/hackers/replay-eventsource.js +1 -4
- package/dist/hackers/replay-fetch.d.ts +1 -1
- package/dist/hackers/replay-fetch.js +1 -4
- package/dist/hackers/replay-history-path.d.ts +1 -1
- package/dist/hackers/replay-history-path.js +1 -4
- package/dist/hackers/replay-svg-image.d.ts +1 -1
- package/dist/hackers/replay-svg-image.js +1 -4
- package/dist/hackers/replay-websocket.d.ts +1 -1
- package/dist/hackers/replay-websocket.js +1 -4
- package/dist/hackers/replay-xhr.d.ts +1 -1
- package/dist/hackers/replay-xhr.js +1 -4
- package/dist/hackers/types.js +1 -2
- package/dist/index.d.ts +29 -13
- package/dist/index.js +23 -44
- package/dist/kind-map.d.ts +68 -0
- package/dist/kind-map.js +58 -0
- package/dist/network-store.js +12 -1
- package/dist/pagepocket.d.ts +19 -4
- package/dist/pagepocket.js +36 -102
- package/dist/path-resolver.d.ts +1 -2
- package/dist/path-resolver.js +9 -16
- package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
- package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
- package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
- package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
- package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
- package/dist/plugin/builtins/to-directory-plugin.js +20 -0
- package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
- package/dist/plugin/builtins/to-zip-plugin.js +19 -0
- package/dist/plugin/context.d.ts +47 -0
- package/dist/plugin/context.js +142 -0
- package/dist/plugin/runner.d.ts +12 -0
- package/dist/plugin/runner.js +232 -0
- package/dist/plugin/types.d.ts +108 -0
- package/dist/plugin/types.js +2 -0
- package/dist/plugins/build-files-from-capture.d.ts +5 -0
- package/dist/plugins/build-files-from-capture.js +85 -0
- package/dist/plugins/build-warc.d.ts +5 -0
- package/dist/plugins/build-warc.js +225 -0
- package/dist/plugins/builtins/manifest.d.ts +2 -0
- package/dist/plugins/builtins/manifest.js +42 -0
- package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
- package/dist/plugins/builtins/snapshot-directory.js +24 -0
- package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
- package/dist/plugins/builtins/snapshot-zip.js +25 -0
- package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
- package/dist/plugins/capture-http-lighterceptor.js +85 -0
- package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
- package/dist/plugins/capture-http-puppeteer.js +85 -0
- package/dist/plugins/host.d.ts +37 -0
- package/dist/plugins/host.js +105 -0
- package/dist/plugins/index.d.ts +6 -0
- package/dist/plugins/index.js +11 -0
- package/dist/plugins/ordering.d.ts +2 -0
- package/dist/plugins/ordering.js +19 -0
- package/dist/plugins/types.d.ts +51 -0
- package/dist/plugins/types.js +2 -0
- package/dist/preload.js +3 -7
- package/dist/replace-elements/actions.d.ts +5 -0
- package/dist/replace-elements/actions.js +86 -0
- package/dist/replace-elements/match.d.ts +5 -0
- package/dist/replace-elements/match.js +46 -0
- package/dist/replace-elements/normalize.d.ts +21 -0
- package/dist/replace-elements/normalize.js +50 -0
- package/dist/replace-elements.d.ts +1 -1
- package/dist/replace-elements.js +5 -185
- package/dist/replay/match-api.d.ts +10 -0
- package/dist/replay/match-api.js +162 -0
- package/dist/replay/templates/match-api-source.d.ts +1 -0
- package/dist/replay/templates/match-api-source.js +137 -0
- package/dist/replay/templates/replay-script-template.d.ts +5 -0
- package/dist/replay/templates/replay-script-template.js +337 -0
- package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
- package/dist/replay/templates/resource-proxy-script.js +274 -0
- package/dist/replay-script.d.ts +3 -10
- package/dist/replay-script.js +11 -625
- package/dist/resource-filter.d.ts +1 -1
- package/dist/resource-filter.js +1 -5
- package/dist/resource-proxy/escape-percent.d.ts +1 -0
- package/dist/resource-proxy/escape-percent.js +12 -0
- package/dist/resource-proxy/multimap.d.ts +3 -0
- package/dist/resource-proxy/multimap.js +18 -0
- package/dist/resource-proxy/pathname-variants.d.ts +3 -0
- package/dist/resource-proxy/pathname-variants.js +54 -0
- package/dist/resource-proxy.d.ts +4 -2
- package/dist/resource-proxy.js +48 -117
- package/dist/resources.js +4 -42
- package/dist/rewrite-links/js-imports.d.ts +3 -0
- package/dist/rewrite-links/js-imports.js +56 -0
- package/dist/rewrite-links/link-rel.d.ts +2 -0
- package/dist/rewrite-links/link-rel.js +10 -0
- package/dist/rewrite-links/meta-refresh.d.ts +3 -0
- package/dist/rewrite-links/meta-refresh.js +22 -0
- package/dist/rewrite-links/skip.d.ts +1 -0
- package/dist/rewrite-links/skip.js +10 -0
- package/dist/rewrite-links/srcset.d.ts +3 -0
- package/dist/rewrite-links/srcset.js +63 -0
- package/dist/rewrite-links/url-resolve.d.ts +3 -0
- package/dist/rewrite-links/url-resolve.js +13 -0
- package/dist/rewrite-links.d.ts +3 -3
- package/dist/rewrite-links.js +31 -240
- package/dist/snapshot-builder/api.d.ts +3 -0
- package/dist/snapshot-builder/api.js +6 -0
- package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
- package/dist/snapshot-builder/build-snapshot.js +138 -0
- package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
- package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
- package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
- package/dist/snapshot-builder/capture-index/index.js +1 -0
- package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
- package/dist/snapshot-builder/capture-index/types.js +1 -0
- package/dist/snapshot-builder/capture-index.d.ts +12 -0
- package/dist/snapshot-builder/capture-index.js +173 -0
- package/dist/snapshot-builder/emit-document.d.ts +24 -0
- package/dist/snapshot-builder/emit-document.js +50 -0
- package/dist/snapshot-builder/grouping.d.ts +8 -0
- package/dist/snapshot-builder/grouping.js +87 -0
- package/dist/snapshot-builder/http.d.ts +6 -0
- package/dist/snapshot-builder/http.js +28 -0
- package/dist/snapshot-builder/index.d.ts +4 -0
- package/dist/snapshot-builder/index.js +2 -0
- package/dist/snapshot-builder/path-map.d.ts +3 -0
- package/dist/snapshot-builder/path-map.js +35 -0
- package/dist/snapshot-builder/resources-path.d.ts +23 -0
- package/dist/snapshot-builder/resources-path.js +47 -0
- package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
- package/dist/snapshot-builder/rewrite-resource.js +52 -0
- package/dist/snapshot-builder/types.d.ts +37 -0
- package/dist/snapshot-builder/types.js +2 -0
- package/dist/snapshot-builder.d.ts +12 -8
- package/dist/snapshot-builder.js +252 -27
- package/dist/types.d.ts +122 -78
- package/dist/types.js +4 -2
- package/dist/units/contracts-bridge.d.ts +76 -0
- package/dist/units/contracts-bridge.js +6 -0
- package/dist/units/index.d.ts +4 -0
- package/dist/units/index.js +2 -0
- package/dist/units/runner.d.ts +11 -0
- package/dist/units/runner.js +270 -0
- package/dist/units/types.d.ts +39 -0
- package/dist/units/types.js +1 -0
- package/dist/utils/streams.d.ts +2 -0
- package/dist/utils/streams.js +29 -0
- package/dist/utils.d.ts +35 -1
- package/dist/utils.js +107 -29
- package/dist/v3/contracts-bridge.d.ts +69 -0
- package/dist/v3/contracts-bridge.js +5 -0
- package/dist/v3/index.d.ts +4 -0
- package/dist/v3/index.js +2 -0
- package/dist/v3/runner.d.ts +20 -0
- package/dist/v3/runner.js +245 -0
- package/dist/v3/types.d.ts +39 -0
- package/dist/v3/types.js +1 -0
- package/dist/writers.js +3 -1
- package/package.json +11 -3
package/dist/rewrite-links.js
CHANGED
|
@@ -1,238 +1,30 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.rewriteEntryHtml = exports.rewriteJsText = void 0;
|
|
37
|
-
const cheerio = __importStar(require("cheerio"));
|
|
38
|
-
const css_rewrite_1 = require("./css-rewrite");
|
|
39
|
-
const hack_html_1 = require("./hack-html");
|
|
40
|
-
const replace_elements_1 = require("./replace-elements");
|
|
41
|
-
const shouldSkipValue = (value) => {
|
|
42
|
-
const trimmed = value.trim();
|
|
43
|
-
return (!trimmed ||
|
|
44
|
-
trimmed.startsWith("data:") ||
|
|
45
|
-
trimmed.startsWith("blob:") ||
|
|
46
|
-
trimmed.startsWith("mailto:") ||
|
|
47
|
-
trimmed.startsWith("tel:") ||
|
|
48
|
-
trimmed.startsWith("javascript:") ||
|
|
49
|
-
trimmed.startsWith("#"));
|
|
50
|
-
};
|
|
51
|
-
const resolveUrlValue = (value, baseUrl, resolve) => {
|
|
52
|
-
if (shouldSkipValue(value)) {
|
|
53
|
-
return null;
|
|
54
|
-
}
|
|
55
|
-
try {
|
|
56
|
-
const absolute = new URL(value, baseUrl).toString();
|
|
57
|
-
return resolve(absolute);
|
|
58
|
-
}
|
|
59
|
-
catch {
|
|
60
|
-
return null;
|
|
61
|
-
}
|
|
62
|
-
};
|
|
63
|
-
const isUnsafeSrcsetValue = (value) => {
|
|
64
|
-
const trimmed = value.trim();
|
|
65
|
-
if (!trimmed) {
|
|
66
|
-
return false;
|
|
67
|
-
}
|
|
68
|
-
// Some sites (notably Substack) emit image transform URLs that contain commas
|
|
69
|
-
// inside the URL itself (e.g. "/image/fetch/...,$w_40,$h_40,.../https%3A...").
|
|
70
|
-
//
|
|
71
|
-
// In the HTML srcset grammar, commas separate candidates, so unescaped commas
|
|
72
|
-
// inside a URL make the srcset invalid. Browsers will parse it into garbage
|
|
73
|
-
// URLs like "https%3A%2F%2F...png" and try to fetch them.
|
|
74
|
-
//
|
|
75
|
-
// For offline snapshots, it's better to drop srcset entirely and rely on
|
|
76
|
-
// the already-rewritten img[src].
|
|
77
|
-
const hasFetchTransform = trimmed.includes("/image/fetch/");
|
|
78
|
-
const hasEncodedUrlTail = trimmed.includes("https%3A%2F%2F");
|
|
79
|
-
const hasCommaTokens = trimmed.includes(",w_") ||
|
|
80
|
-
trimmed.includes(", w_") ||
|
|
81
|
-
trimmed.includes(",h_") ||
|
|
82
|
-
trimmed.includes(", h_") ||
|
|
83
|
-
trimmed.includes(",c_") ||
|
|
84
|
-
trimmed.includes(", c_");
|
|
85
|
-
return hasFetchTransform && hasEncodedUrlTail && hasCommaTokens;
|
|
86
|
-
};
|
|
87
|
-
const isDescriptorToken = (token) => {
|
|
88
|
-
const trimmed = token.trim();
|
|
89
|
-
if (!trimmed)
|
|
90
|
-
return false;
|
|
91
|
-
// Common srcset descriptors: 1x, 2x, 320w
|
|
92
|
-
return /^\d+(\.\d+)?x$/i.test(trimmed) || /^\d+w$/i.test(trimmed);
|
|
93
|
-
};
|
|
94
|
-
const parseSrcset = (input) => {
|
|
95
|
-
// Minimal srcset parser:
|
|
96
|
-
// - Candidates are separated by commas.
|
|
97
|
-
// - Each candidate is "<url> [descriptor]".
|
|
98
|
-
// - URLs may contain spaces/commas (e.g. CDN transform strings). To avoid
|
|
99
|
-
// breaking those, we locate the descriptor from the *end* of the candidate.
|
|
100
|
-
const rawCandidates = input
|
|
101
|
-
.split(",")
|
|
102
|
-
.map((c) => c.trim())
|
|
103
|
-
.filter(Boolean);
|
|
104
|
-
return rawCandidates.map((candidate) => {
|
|
105
|
-
const tokens = candidate.split(/\s+/).filter(Boolean);
|
|
106
|
-
if (tokens.length === 0) {
|
|
107
|
-
return { url: candidate };
|
|
108
|
-
}
|
|
109
|
-
const last = tokens[tokens.length - 1] ?? "";
|
|
110
|
-
if (tokens.length >= 2 && isDescriptorToken(last)) {
|
|
111
|
-
const descriptor = last;
|
|
112
|
-
const url = candidate.slice(0, candidate.lastIndexOf(descriptor)).trim();
|
|
113
|
-
return { url, descriptor };
|
|
114
|
-
}
|
|
115
|
-
return { url: candidate };
|
|
116
|
-
});
|
|
117
|
-
};
|
|
118
|
-
const stringifySrcset = (candidates) => {
|
|
119
|
-
return (candidates
|
|
120
|
-
.map((c) => {
|
|
121
|
-
const url = c.url.trim();
|
|
122
|
-
if (!c.descriptor)
|
|
123
|
-
return url;
|
|
124
|
-
return `${url} ${c.descriptor.trim()}`;
|
|
125
|
-
})
|
|
126
|
-
.filter(Boolean)
|
|
127
|
-
// Don't introduce spaces after commas inside URL tokens.
|
|
128
|
-
.join(","));
|
|
129
|
-
};
|
|
130
|
-
const rewriteSrcsetValue = (value, baseUrl, resolve) => {
|
|
131
|
-
if (isUnsafeSrcsetValue(value)) {
|
|
132
|
-
return "";
|
|
133
|
-
}
|
|
134
|
-
const candidates = parseSrcset(value);
|
|
135
|
-
const rewritten = candidates.map((c) => {
|
|
136
|
-
const resolved = resolveUrlValue(c.url, baseUrl, resolve);
|
|
137
|
-
return { url: resolved ?? c.url, descriptor: c.descriptor };
|
|
138
|
-
});
|
|
139
|
-
return stringifySrcset(rewritten);
|
|
140
|
-
};
|
|
141
|
-
const rewriteMetaRefresh = (content, baseUrl, resolve) => {
|
|
142
|
-
const parts = content.split(";");
|
|
143
|
-
if (parts.length < 2)
|
|
144
|
-
return content;
|
|
145
|
-
const urlPartIndex = parts.findIndex((part) => part.trim().toLowerCase().startsWith("url="));
|
|
146
|
-
if (urlPartIndex === -1)
|
|
147
|
-
return content;
|
|
148
|
-
const urlPart = parts[urlPartIndex];
|
|
149
|
-
let rawUrl = urlPart.split("=").slice(1).join("=").trim();
|
|
150
|
-
// Some pages quote the URL value (url="/next" or url='/next').
|
|
151
|
-
// Strip a single pair of surrounding quotes to improve rewrite coverage.
|
|
152
|
-
if ((rawUrl.startsWith('"') && rawUrl.endsWith('"')) ||
|
|
153
|
-
(rawUrl.startsWith("'") && rawUrl.endsWith("'"))) {
|
|
154
|
-
rawUrl = rawUrl.slice(1, -1).trim();
|
|
155
|
-
}
|
|
156
|
-
const resolved = resolveUrlValue(rawUrl, baseUrl, resolve);
|
|
157
|
-
if (!resolved)
|
|
158
|
-
return content;
|
|
159
|
-
const next = `url=${resolved}`;
|
|
160
|
-
const nextParts = parts.slice();
|
|
161
|
-
nextParts[urlPartIndex] = next;
|
|
162
|
-
return nextParts.join(";");
|
|
163
|
-
};
|
|
164
|
-
const shouldRewriteLinkHref = ($element) => {
|
|
165
|
-
const rel = ($element.attr("rel") || "").trim().toLowerCase();
|
|
166
|
-
if (!rel) {
|
|
167
|
-
return true;
|
|
168
|
-
}
|
|
169
|
-
// Only rewrite link rels that are expected to load a resource.
|
|
170
|
-
// Avoid rewriting navigational/SEO links like canonical, preconnect, etc.
|
|
171
|
-
return (rel.includes("stylesheet") ||
|
|
172
|
-
rel.includes("preload") ||
|
|
173
|
-
rel.includes("prefetch") ||
|
|
174
|
-
rel.includes("icon"));
|
|
175
|
-
};
|
|
176
|
-
const rewriteJsText = async (source, resolve, baseUrl) => {
|
|
177
|
-
const replaceSpecifier = async (specifier) => {
|
|
178
|
-
const trimmed = specifier.trim();
|
|
179
|
-
if (shouldSkipValue(trimmed)) {
|
|
180
|
-
return specifier;
|
|
181
|
-
}
|
|
182
|
-
const resolved = resolveUrlValue(trimmed, baseUrl, resolve);
|
|
183
|
-
return resolved ?? specifier;
|
|
184
|
-
};
|
|
185
|
-
const importFromPattern = /(\bimport\s+[^'"]*?\sfrom\s+)(["'])([^"']+)\2/g;
|
|
186
|
-
const importSideEffectPattern = /(\bimport\s+)(["'])([^"']+)\2/g;
|
|
187
|
-
const dynamicImportPattern = /(\bimport\s*\(\s*)(["'])([^"']+)\2(\s*\))/g;
|
|
188
|
-
let replaced = "";
|
|
189
|
-
let lastIndex = 0;
|
|
190
|
-
for (const match of source.matchAll(importFromPattern)) {
|
|
191
|
-
const index = match.index ?? 0;
|
|
192
|
-
replaced += source.slice(lastIndex, index);
|
|
193
|
-
const prefix = match[1] || "";
|
|
194
|
-
const quote = match[2] || "";
|
|
195
|
-
const specifier = match[3] || "";
|
|
196
|
-
const next = await replaceSpecifier(specifier);
|
|
197
|
-
replaced += `${prefix}${quote}${next}${quote}`;
|
|
198
|
-
lastIndex = index + match[0].length;
|
|
199
|
-
}
|
|
200
|
-
replaced += source.slice(lastIndex);
|
|
201
|
-
let final = "";
|
|
202
|
-
lastIndex = 0;
|
|
203
|
-
for (const match of replaced.matchAll(importSideEffectPattern)) {
|
|
204
|
-
const index = match.index ?? 0;
|
|
205
|
-
final += replaced.slice(lastIndex, index);
|
|
206
|
-
const prefix = match[1] || "";
|
|
207
|
-
const quote = match[2] || "";
|
|
208
|
-
const specifier = match[3] || "";
|
|
209
|
-
const next = await replaceSpecifier(specifier);
|
|
210
|
-
final += `${prefix}${quote}${next}${quote}`;
|
|
211
|
-
lastIndex = index + match[0].length;
|
|
212
|
-
}
|
|
213
|
-
final += replaced.slice(lastIndex);
|
|
214
|
-
let dynamicFinal = "";
|
|
215
|
-
lastIndex = 0;
|
|
216
|
-
for (const match of final.matchAll(dynamicImportPattern)) {
|
|
217
|
-
const index = match.index ?? 0;
|
|
218
|
-
dynamicFinal += final.slice(lastIndex, index);
|
|
219
|
-
const prefix = match[1] || "";
|
|
220
|
-
const quote = match[2] || "";
|
|
221
|
-
const specifier = match[3] || "";
|
|
222
|
-
const suffix = match[4] || "";
|
|
223
|
-
const next = await replaceSpecifier(specifier);
|
|
224
|
-
dynamicFinal += `${prefix}${quote}${next}${quote}${suffix}`;
|
|
225
|
-
lastIndex = index + match[0].length;
|
|
226
|
-
}
|
|
227
|
-
dynamicFinal += final.slice(lastIndex);
|
|
228
|
-
return dynamicFinal;
|
|
229
|
-
};
|
|
230
|
-
exports.rewriteJsText = rewriteJsText;
|
|
231
|
-
const rewriteEntryHtml = async (input) => {
|
|
232
|
-
const $ = cheerio.load(input.html);
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
import { rewriteCssText } from "./css-rewrite.js";
|
|
3
|
+
import { hackHtml } from "./hack-html.js";
|
|
4
|
+
import { applyReplaceElements } from "./replace-elements.js";
|
|
5
|
+
import { rewriteJsText } from "./rewrite-links/js-imports.js";
|
|
6
|
+
import { shouldRewriteLinkHref } from "./rewrite-links/link-rel.js";
|
|
7
|
+
import { rewriteMetaRefresh } from "./rewrite-links/meta-refresh.js";
|
|
8
|
+
import { rewriteSrcsetValue } from "./rewrite-links/srcset.js";
|
|
9
|
+
import { resolveUrlValue } from "./rewrite-links/url-resolve.js";
|
|
10
|
+
export { rewriteJsText };
|
|
11
|
+
export const rewriteEntryHtml = async (input) => {
|
|
233
12
|
const baseUrl = input.entryUrl;
|
|
234
13
|
const resolve = input.resolve;
|
|
235
14
|
const shouldRewriteLinks = input.rewriteLinks !== false;
|
|
15
|
+
if (input.html.includes("__pagepocketPatched") ||
|
|
16
|
+
input.html.includes("__pagepocketOriginalFetch")) {
|
|
17
|
+
const $title = cheerio.load(input.html)("title").first().text() || undefined;
|
|
18
|
+
return { html: input.html, title: $title };
|
|
19
|
+
}
|
|
20
|
+
const hasPreloadMarker = input.html.includes("__pagepocketPatched");
|
|
21
|
+
const hasReplayMarker = input.html.includes("__pagepocketOriginalFetch");
|
|
22
|
+
const hasReplaceElements = Array.isArray(input.replaceElements) && input.replaceElements.length > 0;
|
|
23
|
+
if (!shouldRewriteLinks && !hasReplaceElements && hasPreloadMarker && hasReplayMarker) {
|
|
24
|
+
const $title = cheerio.load(input.html)("title").first().text() || undefined;
|
|
25
|
+
return { html: input.html, title: $title };
|
|
26
|
+
}
|
|
27
|
+
const $ = cheerio.load(input.html);
|
|
236
28
|
const rewriteAttr = (selector, attr) => {
|
|
237
29
|
$(selector).each((_, element) => {
|
|
238
30
|
const value = $(element).attr(attr);
|
|
@@ -304,7 +96,7 @@ const rewriteEntryHtml = async (input) => {
|
|
|
304
96
|
const cssText = $(element).html();
|
|
305
97
|
if (!cssText)
|
|
306
98
|
continue;
|
|
307
|
-
const rewritten = await
|
|
99
|
+
const rewritten = await rewriteCssText({
|
|
308
100
|
cssText,
|
|
309
101
|
cssUrl: baseUrl,
|
|
310
102
|
resolveUrl: resolve
|
|
@@ -318,7 +110,7 @@ const rewriteEntryHtml = async (input) => {
|
|
|
318
110
|
const styleText = $(element).attr("style");
|
|
319
111
|
if (!styleText)
|
|
320
112
|
continue;
|
|
321
|
-
const rewritten = await
|
|
113
|
+
const rewritten = await rewriteCssText({
|
|
322
114
|
cssText: styleText,
|
|
323
115
|
cssUrl: baseUrl,
|
|
324
116
|
resolveUrl: resolve
|
|
@@ -337,18 +129,18 @@ const rewriteEntryHtml = async (input) => {
|
|
|
337
129
|
const original = $(element).html();
|
|
338
130
|
if (!original)
|
|
339
131
|
continue;
|
|
340
|
-
const rewritten = await
|
|
132
|
+
const rewritten = await rewriteJsText(original, resolve, baseUrl);
|
|
341
133
|
if (rewritten !== original) {
|
|
342
134
|
$(element).html(rewritten);
|
|
343
135
|
}
|
|
344
136
|
}
|
|
345
137
|
}
|
|
346
|
-
|
|
138
|
+
hackHtml({
|
|
347
139
|
$,
|
|
348
|
-
baseUrl
|
|
140
|
+
baseUrl,
|
|
349
141
|
apiPath: input.apiPath
|
|
350
142
|
});
|
|
351
|
-
await
|
|
143
|
+
await applyReplaceElements({
|
|
352
144
|
$,
|
|
353
145
|
entryUrl: input.snapshotEntryUrl ?? baseUrl,
|
|
354
146
|
url: baseUrl,
|
|
@@ -356,6 +148,5 @@ const rewriteEntryHtml = async (input) => {
|
|
|
356
148
|
isEntryDocument: input.isEntryDocument ?? true
|
|
357
149
|
});
|
|
358
150
|
const title = $("title").first().text() || undefined;
|
|
359
|
-
return { html: $.html()
|
|
151
|
+
return { html: `${$.html()}\n`, title };
|
|
360
152
|
};
|
|
361
|
-
exports.rewriteEntryHtml = rewriteEntryHtml;
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { createDefaultPathResolver, resolveCrossOrigin, withPrefixPathResolver } from "../path-resolver.js";
|
|
2
|
+
import { ensureLeadingSlash, sanitizePosixPath } from "../utils.js";
|
|
3
|
+
import { buildApiSnapshot } from "./api.js";
|
|
4
|
+
import { indexCapture } from "./capture-index/index.js";
|
|
5
|
+
import { emitDocumentFile } from "./emit-document.js";
|
|
6
|
+
import { groupResources } from "./grouping.js";
|
|
7
|
+
import { responseMimeType } from "./http.js";
|
|
8
|
+
import { docDirFromUrl, resolveSnapshotPath } from "./path-map.js";
|
|
9
|
+
import { buildResourcesPathSnapshot } from "./resources-path.js";
|
|
10
|
+
import { maybeRewriteScript, maybeRewriteStylesheet } from "./rewrite-resource.js";
|
|
11
|
+
// NOTE: helpers were extracted into snapshot-builder/* modules.
|
|
12
|
+
export const buildSnapshot = async (input) => {
|
|
13
|
+
const warnings = input.warnings;
|
|
14
|
+
const contentStore = input.capture.contentStore;
|
|
15
|
+
const indexed = await indexCapture({
|
|
16
|
+
capture: input.capture,
|
|
17
|
+
filter: input.filter,
|
|
18
|
+
limits: input.limits,
|
|
19
|
+
warnings
|
|
20
|
+
});
|
|
21
|
+
const groups = groupResources({
|
|
22
|
+
entryUrl: input.entryUrl,
|
|
23
|
+
resources: indexed.resources,
|
|
24
|
+
apiEntries: indexed.apiEntries,
|
|
25
|
+
warnings
|
|
26
|
+
});
|
|
27
|
+
const multiDoc = groups.length > 1;
|
|
28
|
+
const files = [];
|
|
29
|
+
let entryPath = "";
|
|
30
|
+
for (const group of groups) {
|
|
31
|
+
const docDir = multiDoc ? docDirFromUrl(group.url) : "";
|
|
32
|
+
const baseResolver = input.pathResolver ?? createDefaultPathResolver();
|
|
33
|
+
const resolver = multiDoc ? withPrefixPathResolver(baseResolver, docDir) : baseResolver;
|
|
34
|
+
const urlToPath = new Map();
|
|
35
|
+
for (const resource of group.resources) {
|
|
36
|
+
const path = resolver.resolve({
|
|
37
|
+
url: resource.request.url,
|
|
38
|
+
resourceType: resource.request.resourceType,
|
|
39
|
+
mimeType: resource.mimeType,
|
|
40
|
+
suggestedFilename: undefined,
|
|
41
|
+
isCrossOrigin: resolveCrossOrigin(resource.request.url, group.url),
|
|
42
|
+
entryUrl: group.url
|
|
43
|
+
});
|
|
44
|
+
urlToPath.set(resource.request.url, path);
|
|
45
|
+
}
|
|
46
|
+
const resolve = (absoluteUrl) => resolveSnapshotPath(urlToPath, absoluteUrl);
|
|
47
|
+
const apiPath = ensureLeadingSlash(multiDoc ? `${sanitizePosixPath(docDir)}/api.json` : "/api.json");
|
|
48
|
+
for (const resource of group.resources) {
|
|
49
|
+
if (resource.request.resourceType === "document") {
|
|
50
|
+
const path = urlToPath.get(resource.request.url) ?? "/index.html";
|
|
51
|
+
const { file } = await emitDocumentFile({
|
|
52
|
+
resource,
|
|
53
|
+
path,
|
|
54
|
+
entryUrl: input.entryUrl,
|
|
55
|
+
groupUrl: group.url,
|
|
56
|
+
apiPath,
|
|
57
|
+
resolve,
|
|
58
|
+
rewriteEntry: input.rewriteEntry,
|
|
59
|
+
replaceElements: input.replaceElements,
|
|
60
|
+
contentStore,
|
|
61
|
+
snapshotEntryUrl: input.entryUrl
|
|
62
|
+
});
|
|
63
|
+
files.push(file);
|
|
64
|
+
if (resource.request.url === input.entryUrl || !entryPath) {
|
|
65
|
+
entryPath = path;
|
|
66
|
+
}
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
const afterCss = await maybeRewriteStylesheet({
|
|
70
|
+
resource,
|
|
71
|
+
resolve,
|
|
72
|
+
contentStore,
|
|
73
|
+
rewriteCSS: input.rewriteCSS
|
|
74
|
+
});
|
|
75
|
+
const afterJs = await maybeRewriteScript({
|
|
76
|
+
resource: { ...resource, contentRef: afterCss.contentRef, size: afterCss.size },
|
|
77
|
+
resolve,
|
|
78
|
+
contentStore
|
|
79
|
+
});
|
|
80
|
+
const path = urlToPath.get(resource.request.url) ??
|
|
81
|
+
resolver.resolve({
|
|
82
|
+
url: resource.request.url,
|
|
83
|
+
resourceType: resource.request.resourceType,
|
|
84
|
+
mimeType: resourceMimeType(resource),
|
|
85
|
+
suggestedFilename: undefined,
|
|
86
|
+
isCrossOrigin: resolveCrossOrigin(resource.request.url, group.url),
|
|
87
|
+
entryUrl: group.url
|
|
88
|
+
});
|
|
89
|
+
files.push({
|
|
90
|
+
path,
|
|
91
|
+
mimeType: resourceMimeType(resource),
|
|
92
|
+
size: afterJs.size,
|
|
93
|
+
source: afterJs.contentRef,
|
|
94
|
+
originalUrl: resource.request.url,
|
|
95
|
+
resourceType: resource.request.resourceType,
|
|
96
|
+
headers: resource.response.headers
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
const apiSnapshot = buildApiSnapshot(group.url, input.createdAt, group.apiEntries);
|
|
100
|
+
const apiBytes = new TextEncoder().encode(`${JSON.stringify(apiSnapshot, null, 2)}\n`);
|
|
101
|
+
const apiRef = await contentStore.put({ kind: "buffer", data: apiBytes }, { url: apiPath, mimeType: "application/json", sizeHint: apiBytes.byteLength });
|
|
102
|
+
files.push({
|
|
103
|
+
path: apiPath,
|
|
104
|
+
mimeType: "application/json",
|
|
105
|
+
size: apiBytes.byteLength,
|
|
106
|
+
source: apiRef,
|
|
107
|
+
originalUrl: apiPath
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
{
|
|
111
|
+
const resourcesPath = buildResourcesPathSnapshot(input.createdAt, files);
|
|
112
|
+
const bytes = new TextEncoder().encode(`${JSON.stringify(resourcesPath, null, 2)}\n`);
|
|
113
|
+
files.push({
|
|
114
|
+
path: "/resources_path.json",
|
|
115
|
+
mimeType: "application/json",
|
|
116
|
+
size: bytes.byteLength,
|
|
117
|
+
source: { kind: "memory", data: bytes }
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
root: {
|
|
122
|
+
kind: "directory",
|
|
123
|
+
path: "",
|
|
124
|
+
entries: files.map((file) => ({
|
|
125
|
+
kind: "file",
|
|
126
|
+
path: file.path,
|
|
127
|
+
source: { kind: "content-ref", ref: file.source }
|
|
128
|
+
}))
|
|
129
|
+
},
|
|
130
|
+
content: {
|
|
131
|
+
open: (ref) => contentStore.open(ref),
|
|
132
|
+
dispose: async () => {
|
|
133
|
+
await contentStore.dispose?.();
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
};
|
|
138
|
+
const resourceMimeType = (resource) => resource.mimeType || responseMimeType(resource.response) || undefined;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { CaptureArtifacts } from "../../core/capture/types.js";
|
|
2
|
+
import type { ResourceFilter } from "../../types.js";
|
|
3
|
+
import type { BuildLimits } from "../types.js";
|
|
4
|
+
import type { ApiEntry, StoredResource } from "./types.js";
|
|
5
|
+
export declare const indexCapture: (input: {
|
|
6
|
+
capture: CaptureArtifacts;
|
|
7
|
+
filter: ResourceFilter;
|
|
8
|
+
limits?: BuildLimits;
|
|
9
|
+
warnings: string[];
|
|
10
|
+
}) => Promise<{
|
|
11
|
+
resources: StoredResource[];
|
|
12
|
+
apiEntries: ApiEntry[];
|
|
13
|
+
}>;
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import { streamToUint8Array } from "../../utils/streams.js";
|
|
2
|
+
import { bodyToTextOrBase64 } from "../../utils.js";
|
|
3
|
+
import { headersListToRecord, parseContentLength, responseMimeType } from "../http.js";
|
|
4
|
+
const isApiResource = (request) => {
|
|
5
|
+
const type = request?.resourceType;
|
|
6
|
+
return type === "fetch" || type === "xhr";
|
|
7
|
+
};
|
|
8
|
+
const buildByRequestId = (events) => {
|
|
9
|
+
const byId = new Map();
|
|
10
|
+
const ensure = (requestId) => {
|
|
11
|
+
const existing = byId.get(requestId);
|
|
12
|
+
if (existing)
|
|
13
|
+
return existing;
|
|
14
|
+
const created = {};
|
|
15
|
+
byId.set(requestId, created);
|
|
16
|
+
return created;
|
|
17
|
+
};
|
|
18
|
+
for (const event of events) {
|
|
19
|
+
if (event.type === "http.request") {
|
|
20
|
+
ensure(event.requestId).request = event;
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
if (event.type === "http.response") {
|
|
24
|
+
ensure(event.requestId).response = event;
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
if (event.type === "http.failed") {
|
|
28
|
+
ensure(event.requestId).failed = event;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return byId;
|
|
32
|
+
};
|
|
33
|
+
export const indexCapture = async (input) => {
|
|
34
|
+
const byId = buildByRequestId(input.capture.events);
|
|
35
|
+
const resources = [];
|
|
36
|
+
const apiEntries = [];
|
|
37
|
+
const apiSeen = new Set();
|
|
38
|
+
let totalBytes = 0;
|
|
39
|
+
const recordApiFailure = (request, failed) => {
|
|
40
|
+
if (apiSeen.has(request.requestId))
|
|
41
|
+
return;
|
|
42
|
+
apiSeen.add(request.requestId);
|
|
43
|
+
apiEntries.push({
|
|
44
|
+
request,
|
|
45
|
+
record: {
|
|
46
|
+
url: request.url,
|
|
47
|
+
method: request.method,
|
|
48
|
+
requestHeaders: request.headers,
|
|
49
|
+
error: failed.errorText,
|
|
50
|
+
timestamp: failed.timestamp
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
};
|
|
54
|
+
const recordApiResponse = async (request, response, bodyRef) => {
|
|
55
|
+
if (apiSeen.has(request.requestId))
|
|
56
|
+
return;
|
|
57
|
+
apiSeen.add(request.requestId);
|
|
58
|
+
const record = {
|
|
59
|
+
url: request.url,
|
|
60
|
+
method: request.method,
|
|
61
|
+
requestHeaders: request.headers,
|
|
62
|
+
status: response.status,
|
|
63
|
+
statusText: response.statusText,
|
|
64
|
+
responseHeaders: response.headers,
|
|
65
|
+
timestamp: response.timestamp
|
|
66
|
+
};
|
|
67
|
+
if (bodyRef) {
|
|
68
|
+
const stream = await input.capture.contentStore.open(bodyRef);
|
|
69
|
+
const bytes = await streamToUint8Array(stream);
|
|
70
|
+
if (bytes.byteLength > 0) {
|
|
71
|
+
const mimeType = responseMimeType(response);
|
|
72
|
+
const decoded = bodyToTextOrBase64(bytes, mimeType);
|
|
73
|
+
if (decoded.encoding === "text") {
|
|
74
|
+
record.responseBody = decoded.text;
|
|
75
|
+
record.responseEncoding = "text";
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
record.responseBodyBase64 = decoded.base64;
|
|
79
|
+
record.responseEncoding = "base64";
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
apiEntries.push({ request, record });
|
|
84
|
+
};
|
|
85
|
+
for (const record of byId.values()) {
|
|
86
|
+
if (!record.request || record.request.type !== "http.request") {
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
const requestEvent = record.request;
|
|
90
|
+
const request = {
|
|
91
|
+
type: "request",
|
|
92
|
+
requestId: requestEvent.requestId,
|
|
93
|
+
url: requestEvent.url,
|
|
94
|
+
method: requestEvent.method,
|
|
95
|
+
headers: headersListToRecord(requestEvent.headers),
|
|
96
|
+
timestamp: requestEvent.timestamp,
|
|
97
|
+
frameId: requestEvent.frameId,
|
|
98
|
+
resourceType: requestEvent.resourceType,
|
|
99
|
+
initiator: requestEvent.initiator
|
|
100
|
+
};
|
|
101
|
+
if (record.failed && record.failed.type === "http.failed") {
|
|
102
|
+
const failedEvent = record.failed;
|
|
103
|
+
const failed = {
|
|
104
|
+
type: "failed",
|
|
105
|
+
requestId: failedEvent.requestId,
|
|
106
|
+
url: failedEvent.url,
|
|
107
|
+
errorText: failedEvent.errorText,
|
|
108
|
+
timestamp: failedEvent.timestamp
|
|
109
|
+
};
|
|
110
|
+
if (isApiResource(request)) {
|
|
111
|
+
recordApiFailure(request, failed);
|
|
112
|
+
}
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
if (!record.response || record.response.type !== "http.response") {
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
const responseEvent = record.response;
|
|
119
|
+
const response = {
|
|
120
|
+
type: "response",
|
|
121
|
+
requestId: responseEvent.requestId,
|
|
122
|
+
url: responseEvent.url,
|
|
123
|
+
status: responseEvent.status,
|
|
124
|
+
statusText: responseEvent.statusText,
|
|
125
|
+
headers: headersListToRecord(responseEvent.headers),
|
|
126
|
+
timestamp: responseEvent.timestamp,
|
|
127
|
+
mimeType: responseEvent.mimeType,
|
|
128
|
+
fromDiskCache: responseEvent.fromDiskCache,
|
|
129
|
+
fromServiceWorker: responseEvent.fromServiceWorker,
|
|
130
|
+
body: undefined
|
|
131
|
+
};
|
|
132
|
+
const bodyRef = responseEvent.bodyRef;
|
|
133
|
+
if (isApiResource(request)) {
|
|
134
|
+
await recordApiResponse(request, response, bodyRef);
|
|
135
|
+
}
|
|
136
|
+
if (!input.filter.shouldSave(request, response)) {
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
if (!bodyRef) {
|
|
140
|
+
input.warnings.push(`Missing body for ${request.url}`);
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
const stream = await input.capture.contentStore.open(bodyRef);
|
|
144
|
+
const bytes = await streamToUint8Array(stream);
|
|
145
|
+
const byteLength = bytes.byteLength;
|
|
146
|
+
if (input.limits?.maxSingleResourceBytes && byteLength > input.limits.maxSingleResourceBytes) {
|
|
147
|
+
input.warnings.push(`Resource too large: ${request.url}`);
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
if (input.limits?.maxResources && resources.length >= input.limits.maxResources) {
|
|
151
|
+
input.warnings.push(`Resource limit reached at ${request.url}`);
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
if (input.limits?.maxTotalBytes && totalBytes + byteLength > input.limits.maxTotalBytes) {
|
|
155
|
+
input.warnings.push(`Total byte limit reached at ${request.url}`);
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
totalBytes += byteLength;
|
|
159
|
+
resources.push({
|
|
160
|
+
request,
|
|
161
|
+
response,
|
|
162
|
+
contentRef: bodyRef,
|
|
163
|
+
size: parseContentLength(response.headers || {}) ?? byteLength,
|
|
164
|
+
mimeType: responseMimeType(response)
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
return { resources, apiEntries };
|
|
168
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { indexCapture } from "./index-capture.js";
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ApiRecord, ContentRef, NetworkRequestEvent, NetworkResponseEvent } from "../../types.js";
|
|
2
|
+
export type StoredResource = {
|
|
3
|
+
request: NetworkRequestEvent;
|
|
4
|
+
response: NetworkResponseEvent;
|
|
5
|
+
contentRef: ContentRef;
|
|
6
|
+
size: number;
|
|
7
|
+
mimeType?: string;
|
|
8
|
+
};
|
|
9
|
+
export type ApiEntry = {
|
|
10
|
+
record: ApiRecord;
|
|
11
|
+
request: NetworkRequestEvent;
|
|
12
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|