@pagepocket/lib 0.7.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/README.md +7 -6
  2. package/dist/build-snapshot-from-bundle.d.ts +23 -0
  3. package/dist/build-snapshot-from-bundle.js +68 -0
  4. package/dist/builtin-blacklist.js +3 -6
  5. package/dist/bundle/from-network-store.d.ts +10 -0
  6. package/dist/bundle/from-network-store.js +26 -0
  7. package/dist/bundle/types.d.ts +32 -0
  8. package/dist/bundle/types.js +2 -0
  9. package/dist/capture/index.d.ts +14 -0
  10. package/dist/capture/index.js +86 -0
  11. package/dist/capture/memory-content-store.d.ts +4 -0
  12. package/dist/capture/memory-content-store.js +42 -0
  13. package/dist/capture/types.d.ts +61 -0
  14. package/dist/capture/types.js +2 -0
  15. package/dist/content-store.js +3 -8
  16. package/dist/content-type.d.ts +1 -1
  17. package/dist/content-type.js +2 -28
  18. package/dist/core/_impl/completion.d.ts +4 -0
  19. package/dist/core/_impl/completion.js +29 -0
  20. package/dist/core/_impl/content-store.d.ts +21 -0
  21. package/dist/core/_impl/content-store.js +91 -0
  22. package/dist/core/_impl/debug.d.ts +1 -0
  23. package/dist/core/_impl/debug.js +16 -0
  24. package/dist/core/_impl/inflight-tracker.d.ts +19 -0
  25. package/dist/core/_impl/inflight-tracker.js +48 -0
  26. package/dist/core/_impl/pagepocket.d.ts +27 -0
  27. package/dist/core/_impl/pagepocket.js +155 -0
  28. package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
  29. package/dist/core/capture/_impl/memory-content-store.js +42 -0
  30. package/dist/core/capture/_impl/types.d.ts +61 -0
  31. package/dist/core/capture/_impl/types.js +2 -0
  32. package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
  33. package/dist/core/capture/internal/memory-content-store.js +42 -0
  34. package/dist/core/capture/internal/types.d.ts +61 -0
  35. package/dist/core/capture/internal/types.js +2 -0
  36. package/dist/core/capture/memory-content-store.d.ts +4 -0
  37. package/dist/core/capture/memory-content-store.js +38 -0
  38. package/dist/core/capture/types.d.ts +61 -0
  39. package/dist/core/capture/types.js +1 -0
  40. package/dist/core/completion.d.ts +4 -0
  41. package/dist/core/completion.js +23 -0
  42. package/dist/core/content-store.d.ts +21 -0
  43. package/dist/core/content-store.js +54 -0
  44. package/dist/core/debug.d.ts +1 -0
  45. package/dist/core/debug.js +12 -0
  46. package/dist/core/file-tree-merge.d.ts +2 -0
  47. package/dist/core/file-tree-merge.js +27 -0
  48. package/dist/core/file-tree.d.ts +36 -0
  49. package/dist/core/file-tree.js +1 -0
  50. package/dist/core/inflight-tracker.d.ts +19 -0
  51. package/dist/core/inflight-tracker.js +44 -0
  52. package/dist/core/internal/completion.d.ts +4 -0
  53. package/dist/core/internal/completion.js +29 -0
  54. package/dist/core/internal/content-store.d.ts +21 -0
  55. package/dist/core/internal/content-store.js +91 -0
  56. package/dist/core/internal/debug.d.ts +1 -0
  57. package/dist/core/internal/debug.js +16 -0
  58. package/dist/core/internal/inflight-tracker.d.ts +19 -0
  59. package/dist/core/internal/inflight-tracker.js +48 -0
  60. package/dist/core/internal/pagepocket.d.ts +27 -0
  61. package/dist/core/internal/pagepocket.js +155 -0
  62. package/dist/core/pagepocket.d.ts +38 -0
  63. package/dist/core/pagepocket.js +57 -0
  64. package/dist/core/plugin/_impl/context.d.ts +47 -0
  65. package/dist/core/plugin/_impl/context.js +142 -0
  66. package/dist/core/plugin/_impl/runner.d.ts +12 -0
  67. package/dist/core/plugin/_impl/runner.js +232 -0
  68. package/dist/core/plugin/_impl/types.d.ts +108 -0
  69. package/dist/core/plugin/_impl/types.js +2 -0
  70. package/dist/core/plugin/context.d.ts +47 -0
  71. package/dist/core/plugin/context.js +205 -0
  72. package/dist/core/plugin/internal/context.d.ts +47 -0
  73. package/dist/core/plugin/internal/context.js +142 -0
  74. package/dist/core/plugin/internal/runner.d.ts +12 -0
  75. package/dist/core/plugin/internal/runner.js +232 -0
  76. package/dist/core/plugin/internal/types.d.ts +108 -0
  77. package/dist/core/plugin/internal/types.js +2 -0
  78. package/dist/core/plugin/runner-utils.d.ts +9 -0
  79. package/dist/core/plugin/runner-utils.js +29 -0
  80. package/dist/core/plugin/runner.d.ts +12 -0
  81. package/dist/core/plugin/runner.js +118 -0
  82. package/dist/core/plugin/types.d.ts +117 -0
  83. package/dist/core/plugin/types.js +1 -0
  84. package/dist/core/runtime/types.d.ts +14 -0
  85. package/dist/core/runtime/types.js +2 -0
  86. package/dist/css-rewrite.js +1 -5
  87. package/dist/debug.d.ts +0 -1
  88. package/dist/debug.js +3 -5
  89. package/dist/files/types.d.ts +41 -0
  90. package/dist/files/types.js +2 -0
  91. package/dist/hack-html.js +20 -13
  92. package/dist/hackers/index.d.ts +1 -1
  93. package/dist/hackers/index.js +24 -27
  94. package/dist/hackers/preload-fetch.d.ts +1 -1
  95. package/dist/hackers/preload-fetch.js +1 -4
  96. package/dist/hackers/preload-xhr.d.ts +1 -1
  97. package/dist/hackers/preload-xhr.js +1 -4
  98. package/dist/hackers/replay-beacon.d.ts +1 -1
  99. package/dist/hackers/replay-beacon.js +1 -4
  100. package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
  101. package/dist/hackers/replay-block-text-fragment.js +1 -4
  102. package/dist/hackers/replay-css-proxy.d.ts +1 -1
  103. package/dist/hackers/replay-css-proxy.js +9 -12
  104. package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
  105. package/dist/hackers/replay-dom-rewrite.js +165 -154
  106. package/dist/hackers/replay-eventsource.d.ts +1 -1
  107. package/dist/hackers/replay-eventsource.js +1 -4
  108. package/dist/hackers/replay-fetch.d.ts +1 -1
  109. package/dist/hackers/replay-fetch.js +1 -4
  110. package/dist/hackers/replay-history-path.d.ts +1 -1
  111. package/dist/hackers/replay-history-path.js +1 -4
  112. package/dist/hackers/replay-svg-image.d.ts +1 -1
  113. package/dist/hackers/replay-svg-image.js +1 -4
  114. package/dist/hackers/replay-websocket.d.ts +1 -1
  115. package/dist/hackers/replay-websocket.js +1 -4
  116. package/dist/hackers/replay-xhr.d.ts +1 -1
  117. package/dist/hackers/replay-xhr.js +1 -4
  118. package/dist/hackers/types.js +1 -2
  119. package/dist/index.d.ts +29 -13
  120. package/dist/index.js +23 -44
  121. package/dist/kind-map.d.ts +68 -0
  122. package/dist/kind-map.js +58 -0
  123. package/dist/network-store.js +12 -1
  124. package/dist/pagepocket.d.ts +19 -4
  125. package/dist/pagepocket.js +36 -102
  126. package/dist/path-resolver.d.ts +1 -2
  127. package/dist/path-resolver.js +9 -16
  128. package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
  129. package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
  130. package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
  131. package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
  132. package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
  133. package/dist/plugin/builtins/to-directory-plugin.js +20 -0
  134. package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
  135. package/dist/plugin/builtins/to-zip-plugin.js +19 -0
  136. package/dist/plugin/context.d.ts +47 -0
  137. package/dist/plugin/context.js +142 -0
  138. package/dist/plugin/runner.d.ts +12 -0
  139. package/dist/plugin/runner.js +232 -0
  140. package/dist/plugin/types.d.ts +108 -0
  141. package/dist/plugin/types.js +2 -0
  142. package/dist/plugins/build-files-from-capture.d.ts +5 -0
  143. package/dist/plugins/build-files-from-capture.js +85 -0
  144. package/dist/plugins/build-warc.d.ts +5 -0
  145. package/dist/plugins/build-warc.js +225 -0
  146. package/dist/plugins/builtins/manifest.d.ts +2 -0
  147. package/dist/plugins/builtins/manifest.js +42 -0
  148. package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
  149. package/dist/plugins/builtins/snapshot-directory.js +24 -0
  150. package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
  151. package/dist/plugins/builtins/snapshot-zip.js +25 -0
  152. package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
  153. package/dist/plugins/capture-http-lighterceptor.js +85 -0
  154. package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
  155. package/dist/plugins/capture-http-puppeteer.js +85 -0
  156. package/dist/plugins/host.d.ts +37 -0
  157. package/dist/plugins/host.js +105 -0
  158. package/dist/plugins/index.d.ts +6 -0
  159. package/dist/plugins/index.js +11 -0
  160. package/dist/plugins/ordering.d.ts +2 -0
  161. package/dist/plugins/ordering.js +19 -0
  162. package/dist/plugins/types.d.ts +51 -0
  163. package/dist/plugins/types.js +2 -0
  164. package/dist/preload.js +3 -7
  165. package/dist/replace-elements/actions.d.ts +5 -0
  166. package/dist/replace-elements/actions.js +86 -0
  167. package/dist/replace-elements/match.d.ts +5 -0
  168. package/dist/replace-elements/match.js +46 -0
  169. package/dist/replace-elements/normalize.d.ts +21 -0
  170. package/dist/replace-elements/normalize.js +50 -0
  171. package/dist/replace-elements.d.ts +1 -1
  172. package/dist/replace-elements.js +5 -185
  173. package/dist/replay/match-api.d.ts +10 -0
  174. package/dist/replay/match-api.js +162 -0
  175. package/dist/replay/templates/match-api-source.d.ts +1 -0
  176. package/dist/replay/templates/match-api-source.js +137 -0
  177. package/dist/replay/templates/replay-script-template.d.ts +5 -0
  178. package/dist/replay/templates/replay-script-template.js +337 -0
  179. package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
  180. package/dist/replay/templates/resource-proxy-script.js +274 -0
  181. package/dist/replay-script.d.ts +3 -10
  182. package/dist/replay-script.js +11 -625
  183. package/dist/resource-filter.d.ts +1 -1
  184. package/dist/resource-filter.js +1 -5
  185. package/dist/resource-proxy/escape-percent.d.ts +1 -0
  186. package/dist/resource-proxy/escape-percent.js +12 -0
  187. package/dist/resource-proxy/multimap.d.ts +3 -0
  188. package/dist/resource-proxy/multimap.js +18 -0
  189. package/dist/resource-proxy/pathname-variants.d.ts +3 -0
  190. package/dist/resource-proxy/pathname-variants.js +54 -0
  191. package/dist/resource-proxy.d.ts +4 -2
  192. package/dist/resource-proxy.js +48 -117
  193. package/dist/resources.js +4 -42
  194. package/dist/rewrite-links/js-imports.d.ts +3 -0
  195. package/dist/rewrite-links/js-imports.js +56 -0
  196. package/dist/rewrite-links/link-rel.d.ts +2 -0
  197. package/dist/rewrite-links/link-rel.js +10 -0
  198. package/dist/rewrite-links/meta-refresh.d.ts +3 -0
  199. package/dist/rewrite-links/meta-refresh.js +22 -0
  200. package/dist/rewrite-links/skip.d.ts +1 -0
  201. package/dist/rewrite-links/skip.js +10 -0
  202. package/dist/rewrite-links/srcset.d.ts +3 -0
  203. package/dist/rewrite-links/srcset.js +63 -0
  204. package/dist/rewrite-links/url-resolve.d.ts +3 -0
  205. package/dist/rewrite-links/url-resolve.js +13 -0
  206. package/dist/rewrite-links.d.ts +3 -3
  207. package/dist/rewrite-links.js +31 -240
  208. package/dist/snapshot-builder/api.d.ts +3 -0
  209. package/dist/snapshot-builder/api.js +6 -0
  210. package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
  211. package/dist/snapshot-builder/build-snapshot.js +138 -0
  212. package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
  213. package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
  214. package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
  215. package/dist/snapshot-builder/capture-index/index.js +1 -0
  216. package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
  217. package/dist/snapshot-builder/capture-index/types.js +1 -0
  218. package/dist/snapshot-builder/capture-index.d.ts +12 -0
  219. package/dist/snapshot-builder/capture-index.js +173 -0
  220. package/dist/snapshot-builder/emit-document.d.ts +24 -0
  221. package/dist/snapshot-builder/emit-document.js +50 -0
  222. package/dist/snapshot-builder/grouping.d.ts +8 -0
  223. package/dist/snapshot-builder/grouping.js +87 -0
  224. package/dist/snapshot-builder/http.d.ts +6 -0
  225. package/dist/snapshot-builder/http.js +28 -0
  226. package/dist/snapshot-builder/index.d.ts +4 -0
  227. package/dist/snapshot-builder/index.js +2 -0
  228. package/dist/snapshot-builder/path-map.d.ts +3 -0
  229. package/dist/snapshot-builder/path-map.js +35 -0
  230. package/dist/snapshot-builder/resources-path.d.ts +23 -0
  231. package/dist/snapshot-builder/resources-path.js +47 -0
  232. package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
  233. package/dist/snapshot-builder/rewrite-resource.js +52 -0
  234. package/dist/snapshot-builder/types.d.ts +37 -0
  235. package/dist/snapshot-builder/types.js +2 -0
  236. package/dist/snapshot-builder.d.ts +12 -8
  237. package/dist/snapshot-builder.js +252 -27
  238. package/dist/types.d.ts +122 -78
  239. package/dist/types.js +4 -2
  240. package/dist/units/contracts-bridge.d.ts +76 -0
  241. package/dist/units/contracts-bridge.js +6 -0
  242. package/dist/units/index.d.ts +4 -0
  243. package/dist/units/index.js +2 -0
  244. package/dist/units/runner.d.ts +11 -0
  245. package/dist/units/runner.js +270 -0
  246. package/dist/units/types.d.ts +39 -0
  247. package/dist/units/types.js +1 -0
  248. package/dist/utils/streams.d.ts +2 -0
  249. package/dist/utils/streams.js +29 -0
  250. package/dist/utils.d.ts +35 -1
  251. package/dist/utils.js +107 -29
  252. package/dist/v3/contracts-bridge.d.ts +69 -0
  253. package/dist/v3/contracts-bridge.js +5 -0
  254. package/dist/v3/index.d.ts +4 -0
  255. package/dist/v3/index.js +2 -0
  256. package/dist/v3/runner.d.ts +20 -0
  257. package/dist/v3/runner.js +245 -0
  258. package/dist/v3/types.d.ts +39 -0
  259. package/dist/v3/types.js +1 -0
  260. package/dist/writers.js +3 -1
  261. package/package.json +11 -3
@@ -0,0 +1,12 @@
1
+ import type { ResourceFilter } from "../types";
2
+ import type { ApiEntry, BuildLimits, StoredResource } from "./types";
3
+ import type { CaptureArtifacts } from "../capture/types";
4
+ export declare const indexCapture: (input: {
5
+ capture: CaptureArtifacts;
6
+ filter: ResourceFilter;
7
+ limits?: BuildLimits;
8
+ warnings: string[];
9
+ }) => Promise<{
10
+ resources: StoredResource[];
11
+ apiEntries: ApiEntry[];
12
+ }>;
@@ -0,0 +1,173 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.indexCapture = void 0;
4
+ const utils_1 = require("../utils");
5
+ const streams_1 = require("../utils/streams");
6
+ const http_1 = require("./http");
7
+ const isApiResource = (request) => {
8
+ const type = request?.resourceType;
9
+ return type === "fetch" || type === "xhr";
10
+ };
11
+ const buildByRequestId = (events) => {
12
+ const byId = new Map();
13
+ const ensure = (requestId) => {
14
+ const existing = byId.get(requestId);
15
+ if (existing) {
16
+ return existing;
17
+ }
18
+ const created = {};
19
+ byId.set(requestId, created);
20
+ return created;
21
+ };
22
+ for (const event of events) {
23
+ if (event.type === "http.request") {
24
+ ensure(event.requestId).request = event;
25
+ continue;
26
+ }
27
+ if (event.type === "http.response") {
28
+ ensure(event.requestId).response = event;
29
+ continue;
30
+ }
31
+ if (event.type === "http.failed") {
32
+ ensure(event.requestId).failed = event;
33
+ }
34
+ }
35
+ return byId;
36
+ };
37
+ const indexCapture = async (input) => {
38
+ const byId = buildByRequestId(input.capture.events);
39
+ const resources = [];
40
+ const apiEntries = [];
41
+ const apiSeen = new Set();
42
+ let totalBytes = 0;
43
+ const recordApiFailure = (request, failed) => {
44
+ if (apiSeen.has(request.requestId))
45
+ return;
46
+ apiSeen.add(request.requestId);
47
+ apiEntries.push({
48
+ request,
49
+ record: {
50
+ url: request.url,
51
+ method: request.method,
52
+ requestHeaders: request.headers,
53
+ error: failed.errorText,
54
+ timestamp: failed.timestamp
55
+ }
56
+ });
57
+ };
58
+ const recordApiResponse = async (request, response, bodyRef) => {
59
+ if (apiSeen.has(request.requestId))
60
+ return;
61
+ apiSeen.add(request.requestId);
62
+ const record = {
63
+ url: request.url,
64
+ method: request.method,
65
+ requestHeaders: request.headers,
66
+ status: response.status,
67
+ statusText: response.statusText,
68
+ responseHeaders: response.headers,
69
+ timestamp: response.timestamp
70
+ };
71
+ if (bodyRef) {
72
+ const stream = await input.capture.contentStore.open(bodyRef);
73
+ const bytes = await (0, streams_1.streamToUint8Array)(stream);
74
+ if (bytes.byteLength > 0) {
75
+ const mimeType = (0, http_1.responseMimeType)(response);
76
+ const decoded = (0, utils_1.bodyToTextOrBase64)(bytes, mimeType);
77
+ if (decoded.encoding === "text") {
78
+ record.responseBody = decoded.text;
79
+ record.responseEncoding = "text";
80
+ }
81
+ else {
82
+ record.responseBodyBase64 = decoded.base64;
83
+ record.responseEncoding = "base64";
84
+ }
85
+ }
86
+ }
87
+ apiEntries.push({ request, record });
88
+ };
89
+ for (const record of byId.values()) {
90
+ if (!record.request || record.request.type !== "http.request") {
91
+ continue;
92
+ }
93
+ const requestEvent = record.request;
94
+ const request = {
95
+ type: "request",
96
+ requestId: requestEvent.requestId,
97
+ url: requestEvent.url,
98
+ method: requestEvent.method,
99
+ headers: (0, http_1.headersListToRecord)(requestEvent.headers),
100
+ timestamp: requestEvent.timestamp,
101
+ frameId: requestEvent.frameId,
102
+ resourceType: requestEvent.resourceType,
103
+ initiator: requestEvent.initiator
104
+ };
105
+ if (record.failed && record.failed.type === "http.failed") {
106
+ const failedEvent = record.failed;
107
+ const failed = {
108
+ type: "failed",
109
+ requestId: failedEvent.requestId,
110
+ url: failedEvent.url,
111
+ errorText: failedEvent.errorText,
112
+ timestamp: failedEvent.timestamp
113
+ };
114
+ if (isApiResource(request)) {
115
+ recordApiFailure(request, failed);
116
+ }
117
+ continue;
118
+ }
119
+ if (!record.response || record.response.type !== "http.response") {
120
+ continue;
121
+ }
122
+ const responseEvent = record.response;
123
+ const response = {
124
+ type: "response",
125
+ requestId: responseEvent.requestId,
126
+ url: responseEvent.url,
127
+ status: responseEvent.status,
128
+ statusText: responseEvent.statusText,
129
+ headers: (0, http_1.headersListToRecord)(responseEvent.headers),
130
+ timestamp: responseEvent.timestamp,
131
+ mimeType: responseEvent.mimeType,
132
+ fromDiskCache: responseEvent.fromDiskCache,
133
+ fromServiceWorker: responseEvent.fromServiceWorker,
134
+ body: undefined
135
+ };
136
+ const bodyRef = responseEvent.bodyRef;
137
+ if (isApiResource(request)) {
138
+ await recordApiResponse(request, response, bodyRef);
139
+ }
140
+ if (!input.filter.shouldSave(request, response)) {
141
+ continue;
142
+ }
143
+ if (!bodyRef) {
144
+ input.warnings.push(`Missing body for ${request.url}`);
145
+ continue;
146
+ }
147
+ const stream = await input.capture.contentStore.open(bodyRef);
148
+ const bytes = await (0, streams_1.streamToUint8Array)(stream);
149
+ const byteLength = bytes.byteLength;
150
+ if (input.limits?.maxSingleResourceBytes && byteLength > input.limits.maxSingleResourceBytes) {
151
+ input.warnings.push(`Resource too large: ${request.url}`);
152
+ continue;
153
+ }
154
+ if (input.limits?.maxResources && resources.length >= input.limits.maxResources) {
155
+ input.warnings.push(`Resource limit reached at ${request.url}`);
156
+ continue;
157
+ }
158
+ if (input.limits?.maxTotalBytes && totalBytes + byteLength > input.limits.maxTotalBytes) {
159
+ input.warnings.push(`Total byte limit reached at ${request.url}`);
160
+ continue;
161
+ }
162
+ totalBytes += byteLength;
163
+ resources.push({
164
+ request,
165
+ response,
166
+ contentRef: bodyRef,
167
+ size: (0, http_1.parseContentLength)(response.headers || {}) ?? byteLength,
168
+ mimeType: (0, http_1.responseMimeType)(response)
169
+ });
170
+ }
171
+ return { resources, apiEntries };
172
+ };
173
+ exports.indexCapture = indexCapture;
@@ -0,0 +1,24 @@
1
+ import type { BuildOptions, StoredResource } from "./types.js";
2
+ export declare const emitDocumentFile: (input: {
3
+ resource: StoredResource;
4
+ path: string;
5
+ entryUrl: string;
6
+ groupUrl: string;
7
+ apiPath: string;
8
+ resolve: (absoluteUrl: string) => string | null;
9
+ rewriteEntry: boolean;
10
+ replaceElements: BuildOptions["replaceElements"];
11
+ contentStore: BuildOptions["capture"]["contentStore"];
12
+ snapshotEntryUrl: string;
13
+ }) => Promise<{
14
+ file: {
15
+ path: string;
16
+ mimeType: string;
17
+ size: number;
18
+ source: import("../types.js").ContentRef;
19
+ originalUrl: string;
20
+ resourceType: import("../types.js").ResourceType | undefined;
21
+ headers: Record<string, string>;
22
+ };
23
+ title: string | undefined;
24
+ }>;
@@ -0,0 +1,50 @@
1
+ import { rewriteEntryHtml } from "../rewrite-links.js";
2
+ import { streamToUint8Array } from "../utils/streams.js";
3
+ import { decodeUtf8 } from "../utils.js";
4
+ export const emitDocumentFile = async (input) => {
5
+ const stream = await input.contentStore.open(input.resource.contentRef);
6
+ const bytes = await streamToUint8Array(stream);
7
+ const decoded = decodeUtf8(bytes);
8
+ if (decoded === null) {
9
+ return {
10
+ file: {
11
+ path: input.path,
12
+ mimeType: input.resource.mimeType ?? "text/html",
13
+ size: bytes.byteLength,
14
+ source: input.resource.contentRef,
15
+ originalUrl: input.resource.request.url,
16
+ resourceType: input.resource.request.resourceType,
17
+ headers: input.resource.response.headers
18
+ },
19
+ title: undefined
20
+ };
21
+ }
22
+ const rewritten = await rewriteEntryHtml({
23
+ html: decoded,
24
+ entryUrl: input.groupUrl,
25
+ apiPath: input.apiPath,
26
+ resolve: input.resolve,
27
+ rewriteLinks: input.rewriteEntry,
28
+ replaceElements: input.replaceElements,
29
+ isEntryDocument: input.groupUrl === input.entryUrl,
30
+ snapshotEntryUrl: input.snapshotEntryUrl
31
+ });
32
+ const encoded = new TextEncoder().encode(rewritten.html);
33
+ const contentRef = await input.contentStore.put({ kind: "buffer", data: encoded }, {
34
+ url: input.resource.request.url,
35
+ mimeType: input.resource.mimeType,
36
+ sizeHint: encoded.byteLength
37
+ });
38
+ return {
39
+ file: {
40
+ path: input.path,
41
+ mimeType: input.resource.mimeType ?? "text/html",
42
+ size: encoded.byteLength,
43
+ source: contentRef,
44
+ originalUrl: input.resource.request.url,
45
+ resourceType: input.resource.request.resourceType,
46
+ headers: input.resource.response.headers
47
+ },
48
+ title: rewritten.title
49
+ };
50
+ };
@@ -0,0 +1,8 @@
1
+ import type { ApiEntry, DocumentGroup, StoredResource } from "./types.js";
2
+ export declare const docDirFromUrl: (url: string) => string;
3
+ export declare const groupResources: (input: {
4
+ entryUrl: string;
5
+ resources: StoredResource[];
6
+ apiEntries: ApiEntry[];
7
+ warnings: string[];
8
+ }) => DocumentGroup[];
@@ -0,0 +1,87 @@
1
+ import { sanitizePosixPath } from "../utils.js";
2
+ export const docDirFromUrl = (url) => {
3
+ try {
4
+ const parsed = new URL(url);
5
+ const clean = sanitizePosixPath(parsed.pathname || "");
6
+ if (!clean) {
7
+ return "root";
8
+ }
9
+ return clean;
10
+ }
11
+ catch {
12
+ return "root";
13
+ }
14
+ };
15
+ export const groupResources = (input) => {
16
+ const documents = input.resources.filter((resource) => resource.request.resourceType === "document");
17
+ const hasFrameId = input.resources.some((resource) => !!resource.request.frameId);
18
+ const primaryDoc = documents.find((doc) => doc.request.url === input.entryUrl) ?? documents[0];
19
+ if (!hasFrameId) {
20
+ if (documents.length > 1) {
21
+ input.warnings.push("Multiple documents captured without frameId; using the first document.");
22
+ }
23
+ const primaryGroup = {
24
+ id: primaryDoc?.request.requestId ?? "root",
25
+ url: primaryDoc?.request.url ?? input.entryUrl,
26
+ resources: [],
27
+ apiEntries: []
28
+ };
29
+ for (const resource of input.resources) {
30
+ if (resource.request.resourceType === "document" && resource !== primaryDoc) {
31
+ continue;
32
+ }
33
+ primaryGroup.resources.push(resource);
34
+ if (resource.request.resourceType === "document") {
35
+ primaryGroup.docResource = resource;
36
+ }
37
+ }
38
+ for (const apiEntry of input.apiEntries) {
39
+ primaryGroup.apiEntries.push(apiEntry);
40
+ }
41
+ return [primaryGroup];
42
+ }
43
+ const groups = new Map();
44
+ for (const doc of documents) {
45
+ const id = doc.request.frameId ?? doc.request.requestId;
46
+ groups.set(id, {
47
+ id,
48
+ url: doc.request.url,
49
+ resources: [doc],
50
+ apiEntries: [],
51
+ docResource: doc
52
+ });
53
+ }
54
+ const primaryGroup = primaryDoc
55
+ ? (groups.get(primaryDoc.request.frameId ?? primaryDoc.request.requestId) ?? null)
56
+ : null;
57
+ const groupByUrl = new Map();
58
+ for (const group of groups.values()) {
59
+ groupByUrl.set(group.url, group);
60
+ }
61
+ for (const resource of input.resources) {
62
+ if (resource.request.resourceType === "document") {
63
+ continue;
64
+ }
65
+ const frameId = resource.request.frameId;
66
+ const byFrame = frameId ? groups.get(frameId) : undefined;
67
+ const byInitiator = resource.request.initiator?.url
68
+ ? groupByUrl.get(resource.request.initiator.url)
69
+ : undefined;
70
+ const target = byFrame ?? byInitiator ?? primaryGroup ?? Array.from(groups.values())[0];
71
+ if (target) {
72
+ target.resources.push(resource);
73
+ }
74
+ }
75
+ for (const entry of input.apiEntries) {
76
+ const frameId = entry.request.frameId;
77
+ const byFrame = frameId ? groups.get(frameId) : undefined;
78
+ const byInitiator = entry.request.initiator?.url
79
+ ? groupByUrl.get(entry.request.initiator.url)
80
+ : undefined;
81
+ const target = byFrame ?? byInitiator ?? primaryGroup ?? Array.from(groups.values())[0];
82
+ if (target) {
83
+ target.apiEntries.push(entry);
84
+ }
85
+ }
86
+ return Array.from(groups.values());
87
+ };
@@ -0,0 +1,6 @@
1
+ import type { Header } from "../core/capture/types.js";
2
+ import type { NetworkResponseEvent } from "../types.js";
3
+ export declare const headersListToRecord: (headers: Header[]) => Record<string, string>;
4
+ export declare const getHeaderValue: (headers: Record<string, string>, name: string) => string | undefined;
5
+ export declare const parseContentLength: (headers: Record<string, string>) => number | undefined;
6
+ export declare const responseMimeType: (response: NetworkResponseEvent) => string | undefined;
@@ -0,0 +1,28 @@
1
+ export const headersListToRecord = (headers) => {
2
+ const out = {};
3
+ for (const h of headers) {
4
+ out[h.name] = h.value;
5
+ }
6
+ return out;
7
+ };
8
+ export const getHeaderValue = (headers, name) => {
9
+ const target = name.toLowerCase();
10
+ for (const key in headers) {
11
+ if (key.toLowerCase() === target) {
12
+ return headers[key];
13
+ }
14
+ }
15
+ return undefined;
16
+ };
17
+ export const parseContentLength = (headers) => {
18
+ const raw = getHeaderValue(headers, "content-length");
19
+ if (!raw) {
20
+ return undefined;
21
+ }
22
+ const parsed = Number(raw);
23
+ if (!Number.isFinite(parsed) || parsed <= 0) {
24
+ return undefined;
25
+ }
26
+ return parsed;
27
+ };
28
+ export const responseMimeType = (response) => response.mimeType || getHeaderValue(response.headers || {}, "content-type");
@@ -0,0 +1,4 @@
1
+ export type { BuildOptions } from "./types.js";
2
+ export type { ApiEntry, StoredResource } from "./capture-index/index.js";
3
+ export { indexCapture } from "./capture-index/index.js";
4
+ export { buildSnapshot } from "./build-snapshot.js";
@@ -0,0 +1,2 @@
1
+ export { indexCapture } from "./capture-index/index.js";
2
+ export { buildSnapshot } from "./build-snapshot.js";
@@ -0,0 +1,3 @@
1
+ export declare const escapePercentForStaticServers: (value: string) => string;
2
+ export declare const docDirFromUrl: (url: string) => string;
3
+ export declare const resolveSnapshotPath: (urlToPath: Map<string, string>, absoluteUrl: string) => string | null;
@@ -0,0 +1,35 @@
1
+ import { sanitizePosixPath } from "../utils.js";
2
+ export const escapePercentForStaticServers = (value) => {
3
+ // Many static servers decode percent-encoding in the request path before
4
+ // resolving it to a filesystem path.
5
+ //
6
+ // Our snapshots can contain literal "%2F" sequences in filenames (e.g.
7
+ // Substack image URLs embedded into a path segment). When a server decodes
8
+ // "%2F" to "/", it changes the path structure and causes 404s.
9
+ //
10
+ // Escaping "%" to "%25" makes the request decode back to the original
11
+ // filename on disk.
12
+ return value.split("%").join("%25");
13
+ };
14
+ export const docDirFromUrl = (url) => {
15
+ try {
16
+ const parsed = new URL(url);
17
+ const clean = sanitizePosixPath(parsed.pathname || "");
18
+ return clean || "root";
19
+ }
20
+ catch {
21
+ return "root";
22
+ }
23
+ };
24
+ export const resolveSnapshotPath = (urlToPath, absoluteUrl) => {
25
+ const resolved = urlToPath.get(absoluteUrl);
26
+ if (!resolved) {
27
+ return null;
28
+ }
29
+ // Defensive: resolved should be a path, but avoid breaking any unexpected
30
+ // absolute URLs.
31
+ if (resolved.includes("://")) {
32
+ return resolved;
33
+ }
34
+ return escapePercentForStaticServers(resolved);
35
+ };
@@ -0,0 +1,23 @@
1
+ import type { ResourceType } from "../types.js";
2
+ export type ResourcesPathSnapshotItem = {
3
+ url: string;
4
+ path: string;
5
+ resourceType?: ResourceType;
6
+ mimeType?: string;
7
+ size?: number;
8
+ };
9
+ export type ResourcesPathSnapshot = {
10
+ version: "1.0";
11
+ createdAt: number;
12
+ items: ResourcesPathSnapshotItem[];
13
+ };
14
+ export declare const buildResourcesPathSnapshot: (createdAt: number, files: Array<{
15
+ path: string;
16
+ originalUrl?: string;
17
+ resourceType?: ResourceType;
18
+ mimeType?: string;
19
+ size?: number;
20
+ source?: {
21
+ kind: string;
22
+ };
23
+ }>) => ResourcesPathSnapshot;
@@ -0,0 +1,47 @@
1
+ export const buildResourcesPathSnapshot = (createdAt, files) => {
2
+ const items = [];
3
+ for (const file of files) {
4
+ const originalUrl = file.originalUrl;
5
+ if (!originalUrl) {
6
+ continue;
7
+ }
8
+ // Exclude HTML documents. We only want static resources.
9
+ if (file.resourceType === "document") {
10
+ continue;
11
+ }
12
+ // Skip snapshot-local pseudo URLs.
13
+ if (originalUrl.startsWith("/")) {
14
+ continue;
15
+ }
16
+ // Be defensive: only include valid absolute URLs.
17
+ try {
18
+ new URL(originalUrl);
19
+ }
20
+ catch {
21
+ continue;
22
+ }
23
+ items.push({
24
+ url: originalUrl,
25
+ path: file.path,
26
+ resourceType: file.resourceType,
27
+ mimeType: file.mimeType,
28
+ size: file.size
29
+ });
30
+ }
31
+ items.sort((left, right) => {
32
+ if (left.path < right.path)
33
+ return -1;
34
+ if (left.path > right.path)
35
+ return 1;
36
+ if (left.url < right.url)
37
+ return -1;
38
+ if (left.url > right.url)
39
+ return 1;
40
+ return 0;
41
+ });
42
+ return {
43
+ version: "1.0",
44
+ createdAt,
45
+ items
46
+ };
47
+ };
@@ -0,0 +1,18 @@
1
+ import type { BuildOptions, StoredResource } from "./types.js";
2
+ export declare const maybeRewriteStylesheet: (input: {
3
+ resource: StoredResource;
4
+ resolve: (absoluteUrl: string) => string | null;
5
+ contentStore: BuildOptions["capture"]["contentStore"];
6
+ rewriteCSS: boolean;
7
+ }) => Promise<{
8
+ contentRef: import("../types.js").ContentRef;
9
+ size: number;
10
+ }>;
11
+ export declare const maybeRewriteScript: (input: {
12
+ resource: StoredResource;
13
+ resolve: (absoluteUrl: string) => string | null;
14
+ contentStore: BuildOptions["capture"]["contentStore"];
15
+ }) => Promise<{
16
+ contentRef: import("../types.js").ContentRef;
17
+ size: number;
18
+ }>;
@@ -0,0 +1,52 @@
1
+ import { rewriteCssText } from "../css-rewrite.js";
2
+ import { rewriteJsText } from "../rewrite-links.js";
3
+ import { streamToUint8Array } from "../utils/streams.js";
4
+ import { decodeUtf8 } from "../utils.js";
5
+ export const maybeRewriteStylesheet = async (input) => {
6
+ if (input.resource.request.resourceType !== "stylesheet" || !input.rewriteCSS) {
7
+ return { contentRef: input.resource.contentRef, size: input.resource.size };
8
+ }
9
+ const stream = await input.contentStore.open(input.resource.contentRef);
10
+ const bytes = await streamToUint8Array(stream);
11
+ const decoded = decodeUtf8(bytes);
12
+ if (decoded === null) {
13
+ return { contentRef: input.resource.contentRef, size: input.resource.size };
14
+ }
15
+ const rewritten = await rewriteCssText({
16
+ cssText: decoded,
17
+ cssUrl: input.resource.request.url,
18
+ resolveUrl: input.resolve
19
+ });
20
+ if (rewritten === decoded) {
21
+ return { contentRef: input.resource.contentRef, size: input.resource.size };
22
+ }
23
+ const encoded = new TextEncoder().encode(rewritten);
24
+ const contentRef = await input.contentStore.put({ kind: "buffer", data: encoded }, {
25
+ url: input.resource.request.url,
26
+ mimeType: input.resource.mimeType,
27
+ sizeHint: encoded.byteLength
28
+ });
29
+ return { contentRef, size: encoded.byteLength };
30
+ };
31
+ export const maybeRewriteScript = async (input) => {
32
+ if (input.resource.request.resourceType !== "script") {
33
+ return { contentRef: input.resource.contentRef, size: input.resource.size };
34
+ }
35
+ const stream = await input.contentStore.open(input.resource.contentRef);
36
+ const bytes = await streamToUint8Array(stream);
37
+ const decoded = decodeUtf8(bytes);
38
+ if (decoded === null) {
39
+ return { contentRef: input.resource.contentRef, size: input.resource.size };
40
+ }
41
+ const rewritten = await rewriteJsText(decoded, input.resolve, input.resource.request.url);
42
+ if (rewritten === decoded) {
43
+ return { contentRef: input.resource.contentRef, size: input.resource.size };
44
+ }
45
+ const encoded = new TextEncoder().encode(rewritten);
46
+ const contentRef = await input.contentStore.put({ kind: "buffer", data: encoded }, {
47
+ url: input.resource.request.url,
48
+ mimeType: input.resource.mimeType,
49
+ sizeHint: encoded.byteLength
50
+ });
51
+ return { contentRef, size: encoded.byteLength };
52
+ };
@@ -0,0 +1,37 @@
1
+ import type { CaptureArtifacts } from "../core/capture/types.js";
2
+ import type { ApiRecord, ContentRef, NetworkRequestEvent, NetworkResponseEvent, PathResolver, ResourceFilter, ReplaceElementsConfig } from "../types.js";
3
+ export type BuildLimits = {
4
+ maxTotalBytes?: number;
5
+ maxSingleResourceBytes?: number;
6
+ maxResources?: number;
7
+ };
8
+ export type BuildOptions = {
9
+ entryUrl: string;
10
+ createdAt: number;
11
+ capture: CaptureArtifacts;
12
+ filter: ResourceFilter;
13
+ pathResolver?: PathResolver;
14
+ rewriteEntry: boolean;
15
+ rewriteCSS: boolean;
16
+ replaceElements?: ReplaceElementsConfig;
17
+ limits?: BuildLimits;
18
+ warnings: string[];
19
+ };
20
+ export type StoredResource = {
21
+ request: NetworkRequestEvent;
22
+ response: NetworkResponseEvent;
23
+ contentRef: ContentRef;
24
+ size: number;
25
+ mimeType?: string;
26
+ };
27
+ export type ApiEntry = {
28
+ record: ApiRecord;
29
+ request: NetworkRequestEvent;
30
+ };
31
+ export type DocumentGroup = {
32
+ id: string;
33
+ url: string;
34
+ resources: StoredResource[];
35
+ apiEntries: ApiEntry[];
36
+ docResource?: StoredResource;
37
+ };
@@ -0,0 +1,2 @@
1
+ export {};
2
+ // NOTE: ResourcesPathSnapshot types are defined in resources-path.ts.
@@ -1,16 +1,20 @@
1
- import type { ApiEntry, StoredResource } from "./network-store";
2
- import type { ContentStore, PageSnapshot, PathResolver, ReplaceElementsConfig } from "./types";
3
- type BuildOptions = {
1
+ import type { FileTree } from "./core/file-tree";
2
+ import type { CaptureArtifacts } from "./capture/types";
3
+ import type { PathResolver, ResourceFilter, ReplaceElementsConfig } from "./types";
4
+ export type BuildOptions = {
4
5
  entryUrl: string;
5
6
  createdAt: number;
6
- resources: StoredResource[];
7
- apiEntries: ApiEntry[];
8
- contentStore: ContentStore;
7
+ capture: CaptureArtifacts;
8
+ filter: ResourceFilter;
9
9
  pathResolver?: PathResolver;
10
10
  rewriteEntry: boolean;
11
11
  rewriteCSS: boolean;
12
12
  replaceElements?: ReplaceElementsConfig;
13
+ limits?: {
14
+ maxTotalBytes?: number;
15
+ maxSingleResourceBytes?: number;
16
+ maxResources?: number;
17
+ };
13
18
  warnings: string[];
14
19
  };
15
- export declare const buildSnapshot: (input: BuildOptions) => Promise<PageSnapshot>;
16
- export {};
20
+ export declare const buildSnapshot: (input: BuildOptions) => Promise<FileTree>;