@pagepocket/capture-http-puppeteer-unit 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,9 @@ import { NETWORK } from "@pagepocket/contracts";
2
2
  import { Unit, createMemoryContentStore, InflightTracker, mapKind, networkIdle, normalizeCompletion, throwUnsupportedEntryKind, timeout } from "@pagepocket/lib";
3
3
  import { PuppeteerAdapter } from "./internal/puppeteer-adapter.js";
4
4
  const headersRecordToList = (headers) => {
5
- if (!headers)
5
+ if (!headers) {
6
6
  return [];
7
+ }
7
8
  return Object.keys(headers).map((name) => ({ name, value: headers[name] }));
8
9
  };
9
10
  const targetBuilders = {
@@ -0,0 +1,24 @@
1
+ import type { NetworkEventHandlers } from "@pagepocket/lib";
2
+ import type { HTTPRequest, HTTPResponse } from "puppeteer";
3
+ export declare const createRequestIdStore: () => {
4
+ getRequestId: (request: HTTPRequest) => string;
5
+ has: (request: HTTPRequest) => boolean;
6
+ };
7
+ export declare const createEmitRequest: (handlers: NetworkEventHandlers, getRequestId: (r: HTTPRequest) => string) => (request: HTTPRequest, timestamp: number) => void;
8
+ export declare const isExpectedMissingBody: (input: {
9
+ request: HTTPRequest;
10
+ response: HTTPResponse;
11
+ headers: Record<string, string>;
12
+ }) => boolean;
13
+ export declare const createOnResponse: (input: {
14
+ handlers: NetworkEventHandlers;
15
+ getRequestId: (r: HTTPRequest) => string;
16
+ hasRequestId: (r: HTTPRequest) => boolean;
17
+ emitRequest: (r: HTTPRequest, ts: number) => void;
18
+ }) => (response: HTTPResponse) => void;
19
+ export declare const createOnRequestFailed: (input: {
20
+ handlers: NetworkEventHandlers;
21
+ getRequestId: (r: HTTPRequest) => string;
22
+ hasRequestId: (r: HTTPRequest) => boolean;
23
+ emitRequest: (r: HTTPRequest, ts: number) => void;
24
+ }) => (request: HTTPRequest) => void;
@@ -0,0 +1,130 @@
1
+ import { isResponseBodyUnavailableError } from "../utils/errors.js";
2
+ import { getHeaderValue, normalizeHeaders } from "../utils/headers.js";
3
+ import { getFrameId, getInitiator } from "../utils/puppeteer-internals.js";
4
+ const toResourceType = (request) => {
5
+ const type = request.resourceType?.();
6
+ return type ? type : undefined;
7
+ };
8
+ export const createRequestIdStore = () => {
9
+ const requestIds = new WeakMap();
10
+ let requestSequence = 0;
11
+ const getRequestId = (request) => {
12
+ const existing = requestIds.get(request);
13
+ if (existing) {
14
+ return existing;
15
+ }
16
+ const requestId = `pptr-${Date.now()}-${requestSequence++}`;
17
+ requestIds.set(request, requestId);
18
+ return requestId;
19
+ };
20
+ const has = (request) => requestIds.has(request);
21
+ return { getRequestId, has };
22
+ };
23
+ export const createEmitRequest = (handlers, getRequestId) => {
24
+ return (request, timestamp) => {
25
+ const requestEvent = {
26
+ type: "request",
27
+ requestId: getRequestId(request),
28
+ url: request.url(),
29
+ method: request.method(),
30
+ headers: normalizeHeaders(request.headers()),
31
+ frameId: getFrameId(request),
32
+ resourceType: toResourceType(request),
33
+ initiator: getInitiator(request),
34
+ timestamp
35
+ };
36
+ handlers.onEvent(requestEvent);
37
+ };
38
+ };
39
+ export const isExpectedMissingBody = (input) => {
40
+ const { request, response, headers } = input;
41
+ const method = request.method().toUpperCase();
42
+ if (method === "OPTIONS" || method === "HEAD") {
43
+ return true;
44
+ }
45
+ const status = response.status();
46
+ if (status >= 100 && status < 200) {
47
+ return true;
48
+ }
49
+ if (status >= 300 && status < 400) {
50
+ return true;
51
+ }
52
+ if (status === 206) {
53
+ const contentRange = getHeaderValue(headers, "content-range");
54
+ const contentType = getHeaderValue(headers, "content-type") ?? "";
55
+ if (contentRange) {
56
+ return true;
57
+ }
58
+ if (/^video\//i.test(contentType) || /^audio\//i.test(contentType)) {
59
+ return true;
60
+ }
61
+ }
62
+ return status === 204 || status === 205 || status === 304;
63
+ };
64
+ export const createOnResponse = (input) => {
65
+ const { handlers, getRequestId, hasRequestId, emitRequest } = input;
66
+ return (response) => {
67
+ const timestamp = Date.now();
68
+ const request = response.request();
69
+ if (!hasRequestId(request)) {
70
+ emitRequest(request, timestamp);
71
+ }
72
+ const headers = normalizeHeaders(response.headers());
73
+ const expectedMissingBody = () => isExpectedMissingBody({ request, response, headers });
74
+ const responseEvent = {
75
+ type: "response",
76
+ requestId: getRequestId(request),
77
+ url: response.url(),
78
+ status: response.status(),
79
+ statusText: response.statusText(),
80
+ headers,
81
+ mimeType: getHeaderValue(headers, "content-type"),
82
+ fromDiskCache: response.fromCache(),
83
+ fromServiceWorker: response.fromServiceWorker(),
84
+ timestamp,
85
+ body: {
86
+ kind: "late",
87
+ read: async () => {
88
+ try {
89
+ const buffer = await response.buffer();
90
+ return new Uint8Array(buffer);
91
+ }
92
+ catch (error) {
93
+ if (isResponseBodyUnavailableError(error)) {
94
+ if (!expectedMissingBody()) {
95
+ const method = request.method();
96
+ const status = response.status();
97
+ const url = response.url();
98
+ handlers.onError?.(new Error(`Unexpected missing response body (method=${method} status=${status}) for ${url}`));
99
+ }
100
+ return new Uint8Array();
101
+ }
102
+ handlers.onError?.(error);
103
+ return new Uint8Array();
104
+ }
105
+ }
106
+ }
107
+ };
108
+ handlers.onEvent(responseEvent);
109
+ };
110
+ };
111
+ export const createOnRequestFailed = (input) => {
112
+ const { handlers, getRequestId, hasRequestId, emitRequest } = input;
113
+ return (request) => {
114
+ const timestamp = Date.now();
115
+ const failure = request.failure?.();
116
+ const hadRequest = hasRequestId(request);
117
+ const requestId = getRequestId(request);
118
+ if (!hadRequest) {
119
+ emitRequest(request, timestamp);
120
+ }
121
+ const failedEvent = {
122
+ type: "failed",
123
+ requestId,
124
+ url: request.url(),
125
+ errorText: failure?.errorText ?? "Request failed",
126
+ timestamp
127
+ };
128
+ handlers.onEvent(failedEvent);
129
+ };
130
+ };
@@ -0,0 +1,7 @@
1
+ import type { InterceptTarget } from "@pagepocket/lib";
2
+ import type { GoToOptions, Page } from "puppeteer";
3
+ export declare const buildNavigate: (page: Page, gotoOptions?: GoToOptions) => {
4
+ navigate: (url: string, options?: GoToOptions) => Promise<void>;
5
+ awaitLastNavigation: (fallbackUrl: string) => Promise<void>;
6
+ };
7
+ export declare const ensureHtmlTargetLoaded: (page: Page, target: InterceptTarget) => Promise<void>;
@@ -0,0 +1,25 @@
1
+ export const buildNavigate = (page, gotoOptions) => {
2
+ let navigationPromise;
3
+ const navigate = async (url, options) => {
4
+ navigationPromise = page.goto(url, {
5
+ ...gotoOptions,
6
+ ...options
7
+ });
8
+ await navigationPromise;
9
+ };
10
+ const awaitLastNavigation = async (fallbackUrl) => {
11
+ await (navigationPromise ?? navigate(fallbackUrl));
12
+ };
13
+ return { navigate, awaitLastNavigation };
14
+ };
15
+ export const ensureHtmlTargetLoaded = async (page, target) => {
16
+ if (target.kind !== "html") {
17
+ return;
18
+ }
19
+ const baseTag = `<base href="${target.baseUrl}">`;
20
+ const alreadyHasBase = /<base\s+/i.test(target.htmlString);
21
+ const htmlWithBase = alreadyHasBase
22
+ ? target.htmlString
23
+ : target.htmlString.replace(/<head(\s[^>]*)?>/i, (match) => `${match}${baseTag}`);
24
+ await page.setContent(htmlWithBase, { waitUntil: "domcontentloaded" });
25
+ };
@@ -1,14 +1,10 @@
1
1
  import puppeteer from "puppeteer";
2
+ import { createEmitRequest, createOnRequestFailed, createOnResponse, createRequestIdStore } from "./puppeteer-adapter/events.js";
3
+ import { buildNavigate, ensureHtmlTargetLoaded } from "./puppeteer-adapter/targets.js";
2
4
  import { readDomHtml } from "./utils/dom-html.js";
3
5
  import { getEnvString } from "./utils/env.js";
4
- import { buildMissingChromeHelp, isMissingChromeError, isResponseBodyUnavailableError } from "./utils/errors.js";
5
- import { getHeaderValue, normalizeHeaders } from "./utils/headers.js";
6
- import { getFrameId, getInitiator } from "./utils/puppeteer-internals.js";
6
+ import { buildMissingChromeHelp, getErrorMessage, isMissingChromeError } from "./utils/errors.js";
7
7
  import { runTriggerActions } from "./utils/trigger-actions.js";
8
- const toResourceType = (request) => {
9
- const type = request.resourceType?.();
10
- return type ? type : undefined;
11
- };
12
8
  /**
13
9
  * Internal adapter for the capture plugin.
14
10
  *
@@ -52,10 +48,7 @@ export class PuppeteerAdapter {
52
48
  if (!isMissingChromeError(error)) {
53
49
  throw error;
54
50
  }
55
- const message = error && typeof error.message === "string"
56
- ? error.message
57
- : String(error);
58
- throw new Error(`${message}\n\n${buildMissingChromeHelp()}`);
51
+ throw new Error(`${getErrorMessage(error)}\n\n${buildMissingChromeHelp()}`);
59
52
  }
60
53
  ownsBrowser = true;
61
54
  }
@@ -69,153 +62,35 @@ export class PuppeteerAdapter {
69
62
  if (!page || typeof page.on !== "function") {
70
63
  throw new Error("PuppeteerAdapter requires a Puppeteer Page instance.");
71
64
  }
72
- const requestIds = new WeakMap();
73
- let requestSequence = 0;
74
- const getRequestId = (request) => {
75
- const existing = requestIds.get(request);
76
- if (existing) {
77
- return existing;
78
- }
79
- const requestId = `pptr-${Date.now()}-${requestSequence++}`;
80
- requestIds.set(request, requestId);
81
- return requestId;
82
- };
83
- const emitRequest = (request, timestamp) => {
84
- const requestEvent = {
85
- type: "request",
86
- requestId: getRequestId(request),
87
- url: request.url(),
88
- method: request.method(),
89
- headers: normalizeHeaders(request.headers()),
90
- frameId: getFrameId(request),
91
- resourceType: toResourceType(request),
92
- initiator: getInitiator(request),
93
- timestamp
94
- };
95
- handlers.onEvent(requestEvent);
96
- };
97
- const onRequest = (request) => {
98
- const timestamp = Date.now();
99
- emitRequest(request, timestamp);
100
- };
101
- const onResponse = (response) => {
102
- const timestamp = Date.now();
103
- const request = response.request();
104
- if (!requestIds.has(request)) {
105
- emitRequest(request, timestamp);
106
- }
107
- const headers = normalizeHeaders(response.headers());
108
- const isExpectedMissingBody = () => {
109
- const method = request.method().toUpperCase();
110
- if (method === "OPTIONS" || method === "HEAD") {
111
- return true;
112
- }
113
- const status = response.status();
114
- if (status >= 100 && status < 200) {
115
- return true;
116
- }
117
- if (status >= 300 && status < 400) {
118
- return true;
119
- }
120
- if (status === 206) {
121
- // Range / streaming responses are frequently not retrievable via CDP.
122
- // Treat missing bodies as expected to avoid noisy adapter errors.
123
- const contentRange = getHeaderValue(headers, "content-range");
124
- const contentType = getHeaderValue(headers, "content-type") ?? "";
125
- if (contentRange) {
126
- return true;
127
- }
128
- if (/^video\//i.test(contentType) || /^audio\//i.test(contentType)) {
129
- return true;
130
- }
131
- }
132
- return status === 204 || status === 205 || status === 304;
133
- };
134
- const responseEvent = {
135
- type: "response",
136
- requestId: getRequestId(request),
137
- url: response.url(),
138
- status: response.status(),
139
- statusText: response.statusText(),
140
- headers,
141
- mimeType: getHeaderValue(headers, "content-type"),
142
- fromDiskCache: response.fromCache(),
143
- fromServiceWorker: response.fromServiceWorker(),
144
- timestamp,
145
- body: {
146
- kind: "late",
147
- read: async () => {
148
- try {
149
- const buffer = await response.buffer();
150
- return new Uint8Array(buffer);
151
- }
152
- catch (error) {
153
- if (isResponseBodyUnavailableError(error)) {
154
- if (!isExpectedMissingBody()) {
155
- const method = request.method();
156
- const status = response.status();
157
- const url = response.url();
158
- handlers.onError?.(new Error(`Unexpected missing response body (method=${method} status=${status}) for ${url}`));
159
- }
160
- return new Uint8Array();
161
- }
162
- handlers.onError?.(error);
163
- return new Uint8Array();
164
- }
165
- }
166
- }
167
- };
168
- handlers.onEvent(responseEvent);
169
- };
170
- const onRequestFailed = (request) => {
171
- const timestamp = Date.now();
172
- const failure = request.failure?.();
173
- const hadRequest = requestIds.has(request);
174
- const requestId = getRequestId(request);
175
- if (!hadRequest) {
176
- emitRequest(request, timestamp);
177
- }
178
- const failedEvent = {
179
- type: "failed",
180
- requestId,
181
- url: request.url(),
182
- errorText: failure?.errorText ?? "Request failed",
183
- timestamp
184
- };
185
- handlers.onEvent(failedEvent);
186
- };
65
+ const { getRequestId, has } = createRequestIdStore();
66
+ const emitRequest = createEmitRequest(handlers, getRequestId);
67
+ const onRequest = (request) => emitRequest(request, Date.now());
68
+ const onResponse = createOnResponse({
69
+ handlers,
70
+ getRequestId,
71
+ hasRequestId: has,
72
+ emitRequest
73
+ });
74
+ const onRequestFailed = createOnRequestFailed({
75
+ handlers,
76
+ getRequestId,
77
+ hasRequestId: has,
78
+ emitRequest
79
+ });
187
80
  page.on("request", onRequest);
188
81
  page.on("response", onResponse);
189
82
  page.on("requestfailed", onRequestFailed);
190
- let navigationPromise = null;
191
- const navigate = async (url, options) => {
192
- navigationPromise = page.goto(url, {
193
- ...this.options.gotoOptions,
194
- ...options
195
- });
196
- await navigationPromise;
197
- };
198
- const ensureHtmlTargetLoaded = async () => {
199
- if (target.kind !== "html") {
200
- return;
201
- }
202
- const baseTag = `<base href="${target.baseUrl}">`;
203
- const alreadyHasBase = /<base\s+/i.test(target.htmlString);
204
- const htmlWithBase = alreadyHasBase
205
- ? target.htmlString
206
- : target.htmlString.replace(/<head(\s[^>]*)?>/i, (match) => `${match}${baseTag}`);
207
- await page.setContent(htmlWithBase, { waitUntil: "domcontentloaded" });
208
- };
83
+ const { navigate, awaitLastNavigation } = buildNavigate(page, this.options.gotoOptions);
209
84
  let htmlLoaded = false;
210
85
  const whenHtmlLoaded = async () => {
211
86
  if (htmlLoaded) {
212
87
  return;
213
88
  }
214
89
  if (target.kind === "url") {
215
- await (navigationPromise ?? navigate(target.url));
90
+ await awaitLastNavigation(target.url);
216
91
  }
217
92
  else if (target.kind === "html") {
218
- await ensureHtmlTargetLoaded();
93
+ await ensureHtmlTargetLoaded(page, target);
219
94
  }
220
95
  htmlLoaded = true;
221
96
  };
@@ -1,4 +1,5 @@
1
1
  export declare const isMissingChromeError: (error: unknown) => boolean;
2
+ export declare const getErrorMessage: (error: unknown) => string;
2
3
  export declare const buildMissingChromeHelp: () => string;
3
4
  export declare const isNoDataForResourceError: (error: unknown) => boolean;
4
5
  /**
@@ -4,6 +4,18 @@ export const isMissingChromeError = (error) => {
4
4
  : "";
5
5
  return message.includes("Could not find Chrome") || message.includes("Could not find Chromium");
6
6
  };
7
+ export const getErrorMessage = (error) => {
8
+ if (error instanceof Error) {
9
+ return error.message;
10
+ }
11
+ if (error && typeof error === "object" && "message" in error) {
12
+ const msg = error.message;
13
+ if (typeof msg === "string") {
14
+ return msg;
15
+ }
16
+ }
17
+ return String(error);
18
+ };
7
19
  export const buildMissingChromeHelp = () => {
8
20
  return `Puppeteer could not find a compatible Chrome installation.
9
21
 
@@ -1,21 +1,26 @@
1
1
  export const getFrameId = (request) => {
2
2
  const frame = request.frame();
3
- if (!frame)
3
+ if (!frame) {
4
4
  return undefined;
5
+ }
5
6
  const frameRef = frame;
6
- if (frameRef._id)
7
+ if (frameRef._id) {
7
8
  return frameRef._id;
8
- if (frameRef._frameId)
9
+ }
10
+ if (frameRef._frameId) {
9
11
  return frameRef._frameId;
10
- if (typeof frameRef.id === "function")
12
+ }
13
+ if (typeof frameRef.id === "function") {
11
14
  return frameRef.id();
15
+ }
12
16
  return undefined;
13
17
  };
14
18
  export const getInitiator = (request) => {
15
19
  const requestRef = request;
16
20
  const initiator = requestRef.initiator?.();
17
- if (!initiator)
21
+ if (!initiator) {
18
22
  return undefined;
23
+ }
19
24
  return {
20
25
  type: initiator.type,
21
26
  url: initiator.url
@@ -1,6 +1,7 @@
1
1
  export const runTriggerActions = async (page, actions = []) => {
2
- if (actions.length === 0)
2
+ if (actions.length === 0) {
3
3
  return;
4
+ }
4
5
  await page.evaluate((actionsArg) => {
5
6
  const hoverAll = () => {
6
7
  const elements = Array.from(document.querySelectorAll("*"));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pagepocket/capture-http-puppeteer-unit",
3
- "version": "0.8.5",
3
+ "version": "0.9.0",
4
4
  "description": "PagePocket plugin: capture HTTP events (puppeteer adapter)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -11,8 +11,8 @@
11
11
  "license": "ISC",
12
12
  "dependencies": {
13
13
  "puppeteer": "^22.12.1",
14
- "@pagepocket/lib": "0.8.5",
15
- "@pagepocket/contracts": "0.8.5"
14
+ "@pagepocket/lib": "0.9.0",
15
+ "@pagepocket/contracts": "0.9.0"
16
16
  },
17
17
  "devDependencies": {
18
18
  "typescript": "^5.4.5"