@pagepocket/lib 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,159 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.NetworkStore = void 0;
4
+ const utils_1 = require("./utils");
5
+ const isApiResource = (request) => {
6
+ const type = request?.resourceType;
7
+ return type === "fetch" || type === "xhr";
8
+ };
9
+ const getHeaderValue = (headers, name) => {
10
+ const target = name.toLowerCase();
11
+ for (const key in headers) {
12
+ if (key.toLowerCase() === target) {
13
+ return headers[key];
14
+ }
15
+ }
16
+ return undefined;
17
+ };
18
+ const responseMimeType = (response) => response.mimeType || getHeaderValue(response.headers || {}, "content-type");
19
+ class NetworkStore {
20
+ constructor(options) {
21
+ this.requests = new Map();
22
+ this.storedResources = [];
23
+ this.apiEntries = [];
24
+ this.apiRecordIds = new Set();
25
+ this.warnings = [];
26
+ this.totalBytes = 0;
27
+ this.contentStore = options.contentStore;
28
+ this.filter = options.filter;
29
+ this.limits = options.limits ?? {};
30
+ }
31
+ getWarnings() {
32
+ return this.warnings.slice();
33
+ }
34
+ getTotals() {
35
+ return {
36
+ totalBytes: this.totalBytes,
37
+ totalFiles: this.storedResources.length
38
+ };
39
+ }
40
+ getResources() {
41
+ return this.storedResources.slice();
42
+ }
43
+ getApiRecords() {
44
+ return this.apiEntries.map((entry) => entry.record);
45
+ }
46
+ getApiEntries() {
47
+ return this.apiEntries.slice();
48
+ }
49
+ getRequestRecords() {
50
+ return new Map(this.requests);
51
+ }
52
+ async handleEvent(event) {
53
+ if (event.type === "request") {
54
+ this.requests.set(event.requestId, { request: event });
55
+ return;
56
+ }
57
+ const record = this.requests.get(event.requestId);
58
+ if (!record) {
59
+ return;
60
+ }
61
+ if (event.type === "failed") {
62
+ record.failed = event;
63
+ if (isApiResource(record.request)) {
64
+ this.recordApiFailure(record.request, event);
65
+ }
66
+ return;
67
+ }
68
+ record.response = event;
69
+ const request = record.request;
70
+ const response = event;
71
+ const isApi = isApiResource(request);
72
+ const shouldSave = this.filter.shouldSave(request, response);
73
+ let bodyBytes = null;
74
+ if (response.body) {
75
+ bodyBytes = await (0, utils_1.toUint8Array)(response.body);
76
+ }
77
+ if (isApi) {
78
+ await this.recordApiResponse(request, response, bodyBytes);
79
+ }
80
+ if (!shouldSave) {
81
+ return;
82
+ }
83
+ if (!bodyBytes) {
84
+ this.warnings.push(`Missing body for ${request.url}`);
85
+ return;
86
+ }
87
+ if (this.limits.maxSingleResourceBytes &&
88
+ bodyBytes.byteLength > this.limits.maxSingleResourceBytes) {
89
+ this.warnings.push(`Resource too large: ${request.url}`);
90
+ return;
91
+ }
92
+ if (this.limits.maxResources && this.storedResources.length >= this.limits.maxResources) {
93
+ this.warnings.push(`Resource limit reached at ${request.url}`);
94
+ return;
95
+ }
96
+ if (this.limits.maxTotalBytes &&
97
+ this.totalBytes + bodyBytes.byteLength > this.limits.maxTotalBytes) {
98
+ this.warnings.push(`Total byte limit reached at ${request.url}`);
99
+ return;
100
+ }
101
+ const contentRef = await this.contentStore.put({ kind: "buffer", data: bodyBytes }, {
102
+ url: request.url,
103
+ mimeType: responseMimeType(response),
104
+ sizeHint: bodyBytes.byteLength
105
+ });
106
+ const stored = {
107
+ request,
108
+ response,
109
+ contentRef,
110
+ size: bodyBytes.byteLength,
111
+ mimeType: responseMimeType(response)
112
+ };
113
+ this.storedResources.push(stored);
114
+ this.totalBytes += bodyBytes.byteLength;
115
+ }
116
+ recordApiFailure(request, failed) {
117
+ if (this.apiRecordIds.has(request.requestId)) {
118
+ return;
119
+ }
120
+ this.apiRecordIds.add(request.requestId);
121
+ const record = {
122
+ url: request.url,
123
+ method: request.method,
124
+ requestHeaders: request.headers,
125
+ error: failed.errorText,
126
+ timestamp: failed.timestamp
127
+ };
128
+ this.apiEntries.push({ record, request });
129
+ }
130
+ async recordApiResponse(request, response, bodyBytes) {
131
+ if (this.apiRecordIds.has(request.requestId)) {
132
+ return;
133
+ }
134
+ this.apiRecordIds.add(request.requestId);
135
+ const record = {
136
+ url: request.url,
137
+ method: request.method,
138
+ requestHeaders: request.headers,
139
+ status: response.status,
140
+ statusText: response.statusText,
141
+ responseHeaders: response.headers,
142
+ timestamp: response.timestamp
143
+ };
144
+ if (bodyBytes && bodyBytes.byteLength > 0) {
145
+ const mimeType = responseMimeType(response);
146
+ const decoded = (0, utils_1.bodyToTextOrBase64)(bodyBytes, mimeType);
147
+ if (decoded.encoding === "text") {
148
+ record.responseBody = decoded.text;
149
+ record.responseEncoding = "text";
150
+ }
151
+ else {
152
+ record.responseBodyBase64 = decoded.base64;
153
+ record.responseEncoding = "base64";
154
+ }
155
+ }
156
+ this.apiEntries.push({ record, request });
157
+ }
158
+ }
159
+ exports.NetworkStore = NetworkStore;
@@ -1,23 +1,9 @@
1
- import type { NetworkInterceptorAdapter, SnapshotData } from "./types";
2
- export type PagePocketOptions = {
3
- assetsDirName?: string;
4
- baseUrl?: string;
5
- requestsPath?: string;
6
- };
7
- interface PageContent extends PagePocketOptions {
8
- content: string;
9
- title: string;
10
- }
11
- type RequestsInput = SnapshotData | string;
1
+ import type { CaptureOptions, InterceptTarget, PagePocketOptions, PageSnapshot } from "./types";
12
2
  export declare class PagePocket {
13
- private htmlString;
14
- private requestsJSON;
3
+ private target;
15
4
  private options;
16
- resources: SnapshotData["resources"];
17
- downloadedCount: number;
18
- failedCount: number;
19
- constructor(htmlString: string, requestsJSON: RequestsInput, options?: PagePocketOptions);
20
- static fromNetworkIntercetor(htmlString: string, url: string, interceptorAdapter: NetworkInterceptorAdapter, options?: PagePocketOptions): Promise<PagePocket>;
21
- put(): Promise<PageContent>;
5
+ private constructor();
6
+ static fromURL(url: string, options?: PagePocketOptions): PagePocket;
7
+ static fromTarget(target: InterceptTarget, options?: PagePocketOptions): PagePocket;
8
+ capture(options?: CaptureOptions): Promise<PageSnapshot>;
22
9
  }
23
- export {};
@@ -1,82 +1,108 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.PagePocket = void 0;
4
- const download_resources_1 = require("./download-resources");
5
- const hack_html_1 = require("./hack-html");
6
- const network_records_1 = require("./network-records");
7
- const resources_1 = require("./resources");
8
- const rewrite_links_1 = require("./rewrite-links");
9
- const safeFilename = (input) => {
10
- const trimmed = input.trim();
11
- if (!trimmed) {
12
- return "snapshot";
13
- }
14
- return (trimmed
15
- .replace(/[^a-zA-Z0-9._-]+/g, "_")
16
- .replace(/^_+|_+$/g, "")
17
- .slice(0, 120) || "snapshot");
18
- };
19
- const parseRequestsJson = (requestsJSON) => {
20
- const snapshot = typeof requestsJSON === "string" ? JSON.parse(requestsJSON) : requestsJSON;
21
- const rawNetworkRecords = (snapshot.networkRecords || []);
22
- const mappedNetworkRecords = (0, network_records_1.mapCapturedNetworkRecords)(rawNetworkRecords);
23
- return {
24
- snapshot,
25
- networkRecords: mappedNetworkRecords
26
- };
27
- };
4
+ const content_store_1 = require("./content-store");
5
+ const completion_1 = require("./completion");
6
+ const path_resolver_1 = require("./path-resolver");
7
+ const resource_filter_1 = require("./resource-filter");
8
+ const snapshot_builder_1 = require("./snapshot-builder");
9
+ const network_store_1 = require("./network-store");
28
10
  class PagePocket {
29
- constructor(htmlString, requestsJSON, options) {
30
- this.resources = [];
31
- this.downloadedCount = 0;
32
- this.failedCount = 0;
33
- this.htmlString = htmlString;
34
- this.requestsJSON = requestsJSON;
11
+ constructor(target, options) {
12
+ this.target = target;
35
13
  this.options = options ?? {};
36
14
  }
37
- static async fromNetworkIntercetor(htmlString, url, interceptorAdapter, options) {
38
- return new PagePocket(htmlString, await interceptorAdapter.run(url), options);
15
+ static fromURL(url, options) {
16
+ return new PagePocket({ kind: "url", url }, options);
39
17
  }
40
- async put() {
41
- const { snapshot, networkRecords } = parseRequestsJson(this.requestsJSON);
42
- const safeTitle = safeFilename(snapshot.title || "snapshot");
43
- const assetsDirName = this.options.assetsDirName ?? `${safeTitle}_files`;
44
- const baseUrl = this.options.baseUrl ?? snapshot.url ?? "";
45
- const requestsPath = this.options.requestsPath ?? `${safeTitle}.requests.json`;
46
- const { $, resourceUrls, srcsetItems } = (0, resources_1.extractResourceUrls)(this.htmlString, baseUrl);
47
- const downloadResult = await (0, download_resources_1.downloadResources)({
48
- baseUrl,
49
- assetsDirName,
50
- resourceUrls,
51
- srcsetItems,
52
- referer: baseUrl
53
- });
54
- this.resources = downloadResult.resourceMeta;
55
- this.downloadedCount = downloadResult.downloadedCount;
56
- this.failedCount = downloadResult.failedCount;
57
- await (0, rewrite_links_1.rewriteLinks)({
58
- $,
59
- resourceUrls,
60
- srcsetItems,
61
- baseUrl,
62
- assetsDirName,
63
- resourceMap: downloadResult.resourceMap,
64
- networkRecords
65
- });
66
- const faviconDataUrl = (0, network_records_1.findFaviconDataUrl)(networkRecords);
67
- (0, hack_html_1.hackHtml)({
68
- $,
69
- baseUrl,
70
- requestsPath,
71
- faviconDataUrl
18
+ static fromTarget(target, options) {
19
+ return new PagePocket(target, options);
20
+ }
21
+ async capture(options) {
22
+ if (!options?.interceptor) {
23
+ throw new Error("CaptureOptions.interceptor is required.");
24
+ }
25
+ const contentStore = options?.contentStore ?? new content_store_1.HybridContentStore();
26
+ const filter = options?.filter ?? (0, resource_filter_1.createDefaultResourceFilter)();
27
+ const pathResolver = options?.pathResolver ?? (0, path_resolver_1.createDefaultPathResolver)();
28
+ const rewriteEntry = options?.rewriteEntry ?? true;
29
+ const rewriteCSS = options?.rewriteCSS ?? true;
30
+ const limits = options?.limits;
31
+ const completionStrategies = (0, completion_1.normalizeCompletion)(options?.completion);
32
+ const completion = completionStrategies.length > 0
33
+ ? completionStrategies
34
+ : [(0, completion_1.networkIdle)(1000), (0, completion_1.timeout)(5000)];
35
+ const store = new network_store_1.NetworkStore({
36
+ contentStore,
37
+ filter,
38
+ limits
72
39
  });
73
- return {
74
- content: $.html(),
75
- title: safeTitle,
76
- assetsDirName,
77
- requestsPath,
78
- baseUrl
40
+ const inflight = new Set();
41
+ let inflightRequests = 0;
42
+ let lastNetworkTs = Date.now();
43
+ let totalRequests = 0;
44
+ const pendingEvents = new Set();
45
+ const onEvent = (event) => {
46
+ if (event?.timestamp) {
47
+ lastNetworkTs = event.timestamp;
48
+ }
49
+ else {
50
+ lastNetworkTs = Date.now();
51
+ }
52
+ if (event?.type === "request") {
53
+ totalRequests += 1;
54
+ if (!inflight.has(event.requestId)) {
55
+ inflight.add(event.requestId);
56
+ inflightRequests += 1;
57
+ }
58
+ }
59
+ if (event?.type === "response" || event?.type === "failed") {
60
+ if (inflight.delete(event.requestId)) {
61
+ inflightRequests = Math.max(0, inflightRequests - 1);
62
+ }
63
+ }
64
+ const task = store.handleEvent(event);
65
+ pendingEvents.add(task);
66
+ task.finally(() => pendingEvents.delete(task));
79
67
  };
68
+ const session = await options.interceptor.start(this.target, { onEvent });
69
+ if (this.target.kind === "url" && session?.navigate) {
70
+ await session.navigate(this.target.url);
71
+ }
72
+ if (completion.length === 1) {
73
+ await completion[0].wait({
74
+ now: () => Date.now(),
75
+ getStats: () => ({
76
+ inflightRequests,
77
+ lastNetworkTs,
78
+ totalRequests
79
+ })
80
+ });
81
+ }
82
+ else if (completion.length > 1) {
83
+ await Promise.race(completion.map((strategy) => strategy.wait({
84
+ now: () => Date.now(),
85
+ getStats: () => ({
86
+ inflightRequests,
87
+ lastNetworkTs,
88
+ totalRequests
89
+ })
90
+ })));
91
+ }
92
+ await session.stop();
93
+ await Promise.all(pendingEvents);
94
+ const entryUrl = this.target.kind === "url" ? this.target.url : "";
95
+ return (0, snapshot_builder_1.buildSnapshot)({
96
+ entryUrl,
97
+ createdAt: Date.now(),
98
+ resources: store.getResources(),
99
+ apiEntries: store.getApiEntries(),
100
+ contentStore,
101
+ pathResolver,
102
+ rewriteEntry,
103
+ rewriteCSS,
104
+ warnings: store.getWarnings()
105
+ });
80
106
  }
81
107
  }
82
108
  exports.PagePocket = PagePocket;
@@ -0,0 +1,5 @@
1
+ import type { PathResolver, ResourceType } from "./types";
2
+ export declare const createDefaultPathResolver: () => PathResolver;
3
+ export declare const resolveCrossOrigin: (url: string, entryUrl: string) => boolean;
4
+ export declare const withPrefixPathResolver: (resolver: PathResolver, prefix: string) => PathResolver;
5
+ export declare const isDocumentType: (resourceType?: ResourceType) => boolean;
@@ -0,0 +1,92 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.isDocumentType = exports.withPrefixPathResolver = exports.resolveCrossOrigin = exports.createDefaultPathResolver = void 0;
4
+ const utils_1 = require("./utils");
5
+ const normalizePathname = (pathname) => {
6
+ const normalized = pathname || "/";
7
+ const clean = (0, utils_1.sanitizePosixPath)(normalized);
8
+ const leading = clean ? `/${clean}` : "/";
9
+ if (leading.endsWith("/")) {
10
+ return `${leading}index`;
11
+ }
12
+ return leading;
13
+ };
14
+ const withSuffix = (path, suffix) => {
15
+ const lastSlash = path.lastIndexOf("/");
16
+ const lastDot = path.lastIndexOf(".");
17
+ if (lastDot > lastSlash) {
18
+ return `${path.slice(0, lastDot)}${suffix}${path.slice(lastDot)}`;
19
+ }
20
+ return `${path}${suffix}`;
21
+ };
22
+ const sameDomain = (left, right) => left.hostname === right.hostname;
23
+ const createDefaultPathResolver = () => {
24
+ const resolvedByUrl = new Map();
25
+ const usedPaths = new Map();
26
+ return {
27
+ resolve(input) {
28
+ if (resolvedByUrl.has(input.url)) {
29
+ return resolvedByUrl.get(input.url) ?? "/index.html";
30
+ }
31
+ if (input.resourceType === "document") {
32
+ const entryPath = "/index.html";
33
+ resolvedByUrl.set(input.url, entryPath);
34
+ return entryPath;
35
+ }
36
+ let parsed = null;
37
+ try {
38
+ parsed = new URL(input.url);
39
+ }
40
+ catch {
41
+ parsed = null;
42
+ }
43
+ const pathname = normalizePathname(parsed?.pathname || "/");
44
+ const queryHash = `${parsed?.search || ""}${parsed?.hash || ""}`;
45
+ const suffix = queryHash ? `__ppq_${(0, utils_1.hashString)(queryHash)}` : "";
46
+ const basePath = input.isCrossOrigin
47
+ ? `/external_resources${pathname}`
48
+ : `${pathname}`;
49
+ let resolvedPath = suffix ? withSuffix(basePath, suffix) : basePath;
50
+ const collisionKey = resolvedPath;
51
+ const existingUrl = usedPaths.get(collisionKey);
52
+ if (existingUrl && existingUrl !== input.url) {
53
+ const collisionSuffix = `__ppc_${(0, utils_1.hashString)(input.url)}`;
54
+ resolvedPath = withSuffix(resolvedPath, collisionSuffix);
55
+ }
56
+ usedPaths.set(resolvedPath, input.url);
57
+ resolvedByUrl.set(input.url, resolvedPath);
58
+ return resolvedPath;
59
+ }
60
+ };
61
+ };
62
+ exports.createDefaultPathResolver = createDefaultPathResolver;
63
+ const resolveCrossOrigin = (url, entryUrl) => {
64
+ try {
65
+ const parsed = new URL(url);
66
+ const entry = new URL(entryUrl);
67
+ return !sameDomain(parsed, entry);
68
+ }
69
+ catch {
70
+ return false;
71
+ }
72
+ };
73
+ exports.resolveCrossOrigin = resolveCrossOrigin;
74
+ const withPrefixPathResolver = (resolver, prefix) => {
75
+ const normalizedPrefix = (0, utils_1.sanitizePosixPath)(prefix);
76
+ if (!normalizedPrefix) {
77
+ return resolver;
78
+ }
79
+ const prefixWithSlash = `/${normalizedPrefix}`;
80
+ return {
81
+ resolve(input) {
82
+ const resolved = resolver.resolve(input);
83
+ if (!resolved.startsWith("/")) {
84
+ return `${prefixWithSlash}/${resolved}`;
85
+ }
86
+ return `${prefixWithSlash}${resolved}`;
87
+ }
88
+ };
89
+ };
90
+ exports.withPrefixPathResolver = withPrefixPathResolver;
91
+ const isDocumentType = (resourceType) => resourceType === "document";
92
+ exports.isDocumentType = isDocumentType;
@@ -1 +1,11 @@
1
- export declare const buildReplayScript: (requestsPath: string, baseUrl: string) => string;
1
+ import type { ApiRecord } from "./types";
2
+ export type MatchApiOptions = {
3
+ records: ApiRecord[];
4
+ byKey?: Map<string, ApiRecord>;
5
+ baseUrl: string;
6
+ method: string;
7
+ url: string;
8
+ body?: unknown;
9
+ };
10
+ export declare function matchAPI(options: MatchApiOptions): ApiRecord | undefined;
11
+ export declare const buildReplayScript: (apiPath: string, baseUrl: string) => string;