@cellaware/utils 8.4.1 → 8.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,3 @@
1
- import type { Browser } from "playwright-core";
2
- /**
3
- * Browserless (hosted) performance & reliability notes
4
- *
5
- * 1) Prefer connect-per-request (default)
6
- * - We use chromium.connectOverCDP() instead of launching a local browser.
7
- * - In serverless, connecting + closing per request is the safest and most cost-predictable option.
8
- * - Hosted Browserless usage is typically metered by session/connection time, so leaving connections
9
- * open while idle can increase cost.
10
- *
11
- * 2) Always isolate work in a fresh BrowserContext per PDF
12
- * - Never reuse BrowserContext or Page across requests.
13
- * - Context-per-request avoids cookie/storage leaks, cross-request interference, and memory growth.
14
- *
15
- * 3) Close deterministically (inner → outer)
16
- * - Always close Page, then Context, then Browser (the CDP connection).
17
- * - This releases remote resources quickly and prevents “dangling sessions” on the provider side.
18
- *
19
- * 4) Waiting strategy
20
- * - Default waitUntil: "load" to avoid hanging on long-lived network activity (analytics/beacons).
21
- * - If your HTML depends on external assets (remote CSS/fonts/images), consider "networkidle",
22
- * or better: emit an explicit readiness signal and waitForFunction() for deterministic completion.
23
- *
24
- * 5) Biggest speed wins usually come from reducing network work
25
- * - Inline critical CSS and avoid loading third-party scripts if possible.
26
- * - Host/inline fonts and images where feasible to reduce variability and speed up rendering.
27
- * - The less the page has to fetch, the faster and more consistent PDF generation becomes.
28
- *
29
- * 6) Optional burst optimization (only if needed)
30
- * - If you generate many PDFs back-to-back on the same warm instance, you can reuse the CDP
31
- * connection briefly (with a short idle timeout).
32
- * - Still keep Context-per-request. Avoid keeping the connection open indefinitely to prevent idle cost.
33
- */
34
- export declare function getBrowser(): Promise<Browser>;
35
1
  export interface PdfOptions {
36
2
  /**
37
3
  * Display header and footer. Defaults to `false`.
@@ -126,4 +92,4 @@ export interface PdfOptions {
126
92
  */
127
93
  width?: string | number;
128
94
  }
129
- export declare function generatePdf(html: string, pdfOptions?: PdfOptions): Promise<string>;
95
+ export declare function generatePdf(html: string, pdfOptions: PdfOptions): Promise<Buffer>;
@@ -0,0 +1,132 @@
1
+ import { sleep } from "../util.js";
2
+ /*
3
+
4
+ In summary -- try to generate a PDF, and if Browserless is busy, politely wait and try again a few times — then give up cleanly.
5
+
6
+ Step by step details:
7
+
8
+ 1. Send HTML to Browserless
9
+ - We make an HTTP request to Browserless’s `/pdf` endpoint.
10
+ - Browserless runs a browser, renders the HTML, and sends back a PDF.
11
+
12
+ 2. If it works → we’re done
13
+ - We return the PDF immediately.
14
+
15
+ 3. If Browserless is too busy
16
+ - Browserless may say:
17
+ - “Too many requests” (429), or
18
+ - “Service temporarily unavailable” (503/502/504)
19
+ - This usually means you hit your concurrency limit, not that something is broken.
20
+
21
+ 4. When that happens
22
+ - We wait a bit
23
+ - Then try again
24
+ - But:
25
+ - We don’t retry forever
26
+ - We wait slightly longer each time
27
+ - We add a little randomness so many requests don’t retry at once
28
+
29
+ 5. If Browserless tells us how long to wait
30
+ - We listen (`Retry-After` header)
31
+ - That’s the best signal
32
+
33
+ 6. If the request hangs or the network glitches
34
+ - We cancel it after a timeout
35
+ - Then retry (once or twice)
36
+
37
+ 7. If it keeps failing
38
+ - We stop retrying
39
+ - We throw a clear error explaining what happened
40
+ */
41
+ const ENDPOINT = "https://production-sfo.browserless.io/pdf";
42
+ const TIMEOUT_MS = 60_000;
43
+ const RETRIES = 3;
44
+ const BASE_DELAY_MS = 3_000;
45
+ const MAX_DELAY_MS = 15_000;
46
+ const JITTER_MS = 1_000;
47
+ function parseRetryAfterMs(retryAfter) {
48
+ if (!retryAfter)
49
+ return null;
50
+ const asSeconds = Number(retryAfter);
51
+ if (Number.isFinite(asSeconds) && asSeconds >= 0)
52
+ return Math.floor(asSeconds * 1000);
53
+ const asDate = Date.parse(retryAfter);
54
+ if (!Number.isNaN(asDate)) {
55
+ const delta = asDate - Date.now();
56
+ return delta > 0 ? delta : 0;
57
+ }
58
+ return null;
59
+ }
60
+ function backoffMs(attempt) {
61
+ const backoff = BASE_DELAY_MS * Math.pow(2, Math.max(0, attempt - 1));
62
+ const capped = Math.min(backoff, MAX_DELAY_MS);
63
+ const jitter = Math.floor(Math.random() * (JITTER_MS + 1));
64
+ return capped + jitter;
65
+ }
66
+ function shouldRetryStatus(status) {
67
+ return status === 429 || status === 503 || status === 502 || status === 504;
68
+ }
69
+ export async function generatePdf(html, pdfOptions) {
70
+ const token = process.env.BROWSERLESS_TOKEN;
71
+ if (!token) {
72
+ throw new Error('PDF: `BROWSER_TOKEN` environment variable is not set');
73
+ }
74
+ const url = new URL(ENDPOINT);
75
+ url.searchParams.set("token", token);
76
+ const totalAttempts = 1 + RETRIES;
77
+ for (let attempt = 1; attempt <= totalAttempts; attempt++) {
78
+ const controller = new AbortController();
79
+ const timeout = setTimeout(() => controller.abort(), TIMEOUT_MS);
80
+ try {
81
+ const res = await fetch(url.toString(), {
82
+ method: "POST",
83
+ headers: {
84
+ "content-type": "application/json",
85
+ "accept": "application/pdf",
86
+ },
87
+ body: JSON.stringify({
88
+ html,
89
+ options: pdfOptions ??
90
+ {
91
+ format: "Letter",
92
+ printBackground: true,
93
+ preferCSSPageSize: true,
94
+ }
95
+ }),
96
+ signal: controller.signal,
97
+ });
98
+ if (res.ok) {
99
+ const ab = await res.arrayBuffer();
100
+ return Buffer.from(ab);
101
+ }
102
+ const retryable = shouldRetryStatus(res.status);
103
+ if (retryable && attempt < totalAttempts) {
104
+ const retryAfter = parseRetryAfterMs(res.headers.get("retry-after"));
105
+ const delay = retryAfter !== null ? retryAfter + Math.floor(Math.random() * (JITTER_MS + 1)) : backoffMs(attempt);
106
+ await sleep(delay);
107
+ continue;
108
+ }
109
+ const bodyText = await res.text().catch(() => "");
110
+ const preview = bodyText ? bodyText.slice(0, 1000) : "";
111
+ throw new Error(`PDF: Browserless /pdf failed (${res.status} ${res.statusText})` + (preview ? ` — ${preview}` : ""));
112
+ }
113
+ catch (err) {
114
+ const isAbort = err?.name === "AbortError";
115
+ const isTransientNetwork = isAbort ||
116
+ err?.code === "ECONNRESET" ||
117
+ err?.code === "ETIMEDOUT" ||
118
+ err?.code === "EAI_AGAIN" ||
119
+ err?.cause?.code === "ECONNRESET" ||
120
+ err?.cause?.code === "ETIMEDOUT";
121
+ if (isTransientNetwork && attempt < totalAttempts) {
122
+ await sleep(backoffMs(attempt));
123
+ continue;
124
+ }
125
+ throw err;
126
+ }
127
+ finally {
128
+ clearTimeout(timeout);
129
+ }
130
+ }
131
+ throw new Error("PDF: Browserless /pdf failed");
132
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cellaware/utils",
3
- "version": "8.4.1",
3
+ "version": "8.5.0",
4
4
  "description": "Cellaware Utilities for Node.js",
5
5
  "author": "Cellaware Technologies",
6
6
  "type": "module",
@@ -21,8 +21,7 @@
21
21
  "@azure/functions": "^4.5.1",
22
22
  "@azure/storage-blob": "^12.16.0",
23
23
  "dotenv": "^16.3.1",
24
- "langchain": "^0.2.19",
25
- "playwright-core": "^1.57.0"
24
+ "langchain": "^0.2.19"
26
25
  },
27
26
  "devDependencies": {
28
27
  "typescript": "^5.3.2"
package/dist/browser.js DELETED
@@ -1,70 +0,0 @@
1
- import { chromium } from "playwright-core";
2
- /**
3
- * Browserless (hosted) performance & reliability notes
4
- *
5
- * 1) Prefer connect-per-request (default)
6
- * - We use chromium.connectOverCDP() instead of launching a local browser.
7
- * - In serverless, connecting + closing per request is the safest and most cost-predictable option.
8
- * - Hosted Browserless usage is typically metered by session/connection time, so leaving connections
9
- * open while idle can increase cost.
10
- *
11
- * 2) Always isolate work in a fresh BrowserContext per PDF
12
- * - Never reuse BrowserContext or Page across requests.
13
- * - Context-per-request avoids cookie/storage leaks, cross-request interference, and memory growth.
14
- *
15
- * 3) Close deterministically (inner → outer)
16
- * - Always close Page, then Context, then Browser (the CDP connection).
17
- * - This releases remote resources quickly and prevents “dangling sessions” on the provider side.
18
- *
19
- * 4) Waiting strategy
20
- * - Default waitUntil: "load" to avoid hanging on long-lived network activity (analytics/beacons).
21
- * - If your HTML depends on external assets (remote CSS/fonts/images), consider "networkidle",
22
- * or better: emit an explicit readiness signal and waitForFunction() for deterministic completion.
23
- *
24
- * 5) Biggest speed wins usually come from reducing network work
25
- * - Inline critical CSS and avoid loading third-party scripts if possible.
26
- * - Host/inline fonts and images where feasible to reduce variability and speed up rendering.
27
- * - The less the page has to fetch, the faster and more consistent PDF generation becomes.
28
- *
29
- * 6) Optional burst optimization (only if needed)
30
- * - If you generate many PDFs back-to-back on the same warm instance, you can reuse the CDP
31
- * connection briefly (with a short idle timeout).
32
- * - Still keep Context-per-request. Avoid keeping the connection open indefinitely to prevent idle cost.
33
- */
34
- export async function getBrowser() {
35
- const token = process.env.BROWSERLESS_TOKEN;
36
- if (!token) {
37
- throw new Error('BROWSER: `BROWSER_TOKEN` environment variable is not set');
38
- }
39
- const wsEndpoint = `wss://chrome.browserless.io?token=${encodeURIComponent(token)}`;
40
- return await chromium.connectOverCDP(wsEndpoint);
41
- }
42
- export async function generatePdf(html, pdfOptions) {
43
- let browser;
44
- let context;
45
- let page;
46
- try {
47
- browser = await getBrowser();
48
- context = await browser.newContext();
49
- page = await context.newPage();
50
- await page.setContent(html, {
51
- waitUntil: "load",
52
- });
53
- const pdfBytes = await page.pdf(pdfOptions ??
54
- {
55
- format: "Letter",
56
- printBackground: true,
57
- preferCSSPageSize: true,
58
- });
59
- return pdfBytes.toString("base64");
60
- }
61
- catch (err) {
62
- console.log(`BROWSER: PDF generation error: ${err.message}`);
63
- return '';
64
- }
65
- finally {
66
- await page?.close().catch(() => { });
67
- await context?.close().catch(() => { });
68
- await browser?.close().catch(() => { });
69
- }
70
- }