@purepageio/fetch-engines 0.2.12 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -58
- package/dist/HybridEngine.d.ts +1 -0
- package/dist/HybridEngine.d.ts.map +1 -1
- package/dist/HybridEngine.js +14 -3
- package/dist/HybridEngine.js.map +1 -1
- package/dist/PlaywrightEngine.d.ts +28 -1
- package/dist/PlaywrightEngine.d.ts.map +1 -1
- package/dist/PlaywrightEngine.js +220 -128
- package/dist/PlaywrightEngine.js.map +1 -1
- package/dist/browser/PlaywrightBrowserPool.d.ts +3 -1
- package/dist/browser/PlaywrightBrowserPool.d.ts.map +1 -1
- package/dist/browser/PlaywrightBrowserPool.js +319 -190
- package/dist/browser/PlaywrightBrowserPool.js.map +1 -1
- package/dist/constants.d.ts +29 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +31 -0
- package/dist/constants.js.map +1 -0
- package/dist/types.d.ts +14 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/markdown-converter.d.ts +7 -0
- package/dist/utils/markdown-converter.d.ts.map +1 -1
- package/dist/utils/markdown-converter.js +155 -64
- package/dist/utils/markdown-converter.js.map +1 -1
- package/package.json +2 -1
|
@@ -1,30 +1,229 @@
|
|
|
1
1
|
// Import chromium directly from playwright
|
|
2
|
-
import { chromium as
|
|
2
|
+
import { chromium as playwrightChromiumLauncher, } from "playwright";
|
|
3
3
|
import UserAgent from "user-agents";
|
|
4
4
|
import { v4 as uuidv4 } from "uuid";
|
|
5
5
|
import PQueue from "p-queue";
|
|
6
6
|
// Import addExtra from playwright-extra
|
|
7
7
|
import { addExtra } from "playwright-extra";
|
|
8
|
-
|
|
9
|
-
let
|
|
10
|
-
let StealthPluginInstance; // Still need the stealth plugin instance
|
|
8
|
+
let augmentedLauncher;
|
|
9
|
+
let stealthPlugin;
|
|
11
10
|
// Asynchronous function to load dependencies (now mainly for stealth plugin)
|
|
12
11
|
async function loadDependencies() {
|
|
13
|
-
if (!
|
|
14
|
-
//
|
|
15
|
-
|
|
16
|
-
//
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
//
|
|
21
|
-
|
|
22
|
-
|
|
12
|
+
if (!augmentedLauncher) {
|
|
13
|
+
// addExtra takes the original launcher and returns an augmented version.
|
|
14
|
+
// The original playwrightChromiumLauncher is of type BrowserType<ChromiumBrowser>.
|
|
15
|
+
// addExtra itself doesn't change this base type in a way TS immediately understands for .use,
|
|
16
|
+
// so we cast after applying the plugin.
|
|
17
|
+
const tempLauncher = addExtra(playwrightChromiumLauncher);
|
|
18
|
+
stealthPlugin = (await import("puppeteer-extra-plugin-stealth")).default();
|
|
19
|
+
tempLauncher.use(stealthPlugin); // Apply plugin
|
|
20
|
+
augmentedLauncher = tempLauncher; // Cast to our augmented type
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
// Define structure for browser instance managed by this pool -- THIS INTERFACE IS NO LONGER USED AND CAN BE REMOVED
|
|
24
|
+
/*
|
|
25
|
+
interface PlaywrightBrowserInstance {
|
|
26
|
+
id: string;
|
|
27
|
+
browser: PlaywrightBrowserType;
|
|
28
|
+
context: BrowserContext;
|
|
29
|
+
pages: Set<Page>;
|
|
30
|
+
metrics: BrowserMetrics;
|
|
31
|
+
isHealthy: boolean;
|
|
32
|
+
disconnectedHandler: () => void;
|
|
33
|
+
}
|
|
34
|
+
*/
|
|
35
|
+
class ManagedBrowserInstance {
|
|
36
|
+
id;
|
|
37
|
+
browser;
|
|
38
|
+
context;
|
|
39
|
+
pages = new Set();
|
|
40
|
+
metrics;
|
|
41
|
+
isHealthy = true;
|
|
42
|
+
disconnectedHandler;
|
|
43
|
+
useHeadedMode;
|
|
44
|
+
blockedDomains;
|
|
45
|
+
blockedResourceTypes;
|
|
46
|
+
proxyConfig;
|
|
47
|
+
onDisconnect;
|
|
48
|
+
launchOptions;
|
|
49
|
+
constructor(config) {
|
|
50
|
+
this.id = uuidv4();
|
|
51
|
+
this.useHeadedMode = config.useHeadedMode;
|
|
52
|
+
this.blockedDomains = config.blockedDomains;
|
|
53
|
+
this.blockedResourceTypes = config.blockedResourceTypes;
|
|
54
|
+
this.proxyConfig = config.proxyConfig;
|
|
55
|
+
this.onDisconnect = config.onDisconnect;
|
|
56
|
+
this.launchOptions = config.launchOptions;
|
|
57
|
+
const now = new Date();
|
|
58
|
+
this.metrics = {
|
|
59
|
+
id: this.id,
|
|
60
|
+
pagesCreated: 0,
|
|
61
|
+
activePages: 0,
|
|
62
|
+
lastUsed: now,
|
|
63
|
+
errors: 0,
|
|
64
|
+
createdAt: now,
|
|
65
|
+
isHealthy: true,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
async initialize() {
|
|
69
|
+
await loadDependencies(); // Ensure augmentedLauncher is ready
|
|
70
|
+
const defaultLaunchArgs = [
|
|
71
|
+
"--no-sandbox",
|
|
72
|
+
"--disable-setuid-sandbox",
|
|
73
|
+
"--disable-dev-shm-usage",
|
|
74
|
+
"--disable-accelerated-2d-canvas",
|
|
75
|
+
"--no-first-run",
|
|
76
|
+
"--no-zygote",
|
|
77
|
+
"--disable-gpu",
|
|
78
|
+
"--mute-audio",
|
|
79
|
+
"--disable-background-networking",
|
|
80
|
+
];
|
|
81
|
+
// Start with default headless state based on useHeadedMode, and default args
|
|
82
|
+
// Then merge with provided launchOptions, which can override headless and args.
|
|
83
|
+
const mergedLaunchOptions = {
|
|
84
|
+
headless: !this.useHeadedMode, // Default based on pool mode
|
|
85
|
+
args: [...defaultLaunchArgs], // Default args
|
|
86
|
+
proxy: this.proxyConfig, // Proxy from pool config (can be overridden by this.launchOptions.proxy)
|
|
87
|
+
...this.launchOptions, // User-provided options (can override headless, args, proxy)
|
|
88
|
+
};
|
|
89
|
+
// If user-provided launchOptions include args, ensure they are merged, not just replaced.
|
|
90
|
+
// User args should ideally be additive or replace specific conflicting args intelligently.
|
|
91
|
+
// For simplicity, we'll concatenate and de-duplicate, giving preference to user args for duplicates if any.
|
|
92
|
+
if (this.launchOptions && this.launchOptions.args) {
|
|
93
|
+
mergedLaunchOptions.args = Array.from(new Set([...defaultLaunchArgs, ...this.launchOptions.args]));
|
|
94
|
+
}
|
|
95
|
+
// Explicitly set headless from this.launchOptions if provided, otherwise default based on this.useHeadedMode
|
|
96
|
+
if (this.launchOptions && typeof this.launchOptions.headless === "boolean") {
|
|
97
|
+
mergedLaunchOptions.headless = this.launchOptions.headless;
|
|
98
|
+
}
|
|
99
|
+
this.browser = await augmentedLauncher.launch(mergedLaunchOptions);
|
|
100
|
+
this.context = await this.browser.newContext({
|
|
101
|
+
userAgent: new UserAgent().toString(),
|
|
102
|
+
viewport: {
|
|
103
|
+
width: 1280 + Math.floor(Math.random() * 120),
|
|
104
|
+
height: 720 + Math.floor(Math.random() * 80),
|
|
105
|
+
},
|
|
106
|
+
javaScriptEnabled: true,
|
|
107
|
+
ignoreHTTPSErrors: true,
|
|
108
|
+
});
|
|
109
|
+
await this.context.route("**/*", async (route) => {
|
|
110
|
+
const request = route.request();
|
|
111
|
+
const url = request.url();
|
|
112
|
+
const resourceType = request.resourceType();
|
|
113
|
+
try {
|
|
114
|
+
const hostname = new URL(url).hostname.toLowerCase();
|
|
115
|
+
if (this.blockedDomains.some((domain) => hostname.includes(domain)) ||
|
|
116
|
+
this.blockedResourceTypes.includes(resourceType)) {
|
|
117
|
+
await route.abort("aborted");
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
await route.continue();
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
catch (routeError) {
|
|
124
|
+
console.debug(`Error in ManagedBrowserInstance (${this.id}) route interceptor for URL ${url}: ${routeError?.message}. Request continued.`, routeError);
|
|
125
|
+
await route.continue();
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
this.disconnectedHandler = () => {
|
|
129
|
+
if (this.isHealthy) {
|
|
130
|
+
this.isHealthy = false;
|
|
131
|
+
this.metrics.isHealthy = false;
|
|
132
|
+
console.warn(`ManagedBrowserInstance ${this.id} disconnected unexpectedly.`);
|
|
133
|
+
this.onDisconnect(this.id); // Notify pool
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
this.browser.on("disconnected", this.disconnectedHandler);
|
|
137
|
+
this.isHealthy = true; // Mark as healthy after successful initialization
|
|
138
|
+
}
|
|
139
|
+
canCreateMorePages(maxPagesPerContext) {
|
|
140
|
+
return this.isHealthy && this.pages.size < maxPagesPerContext;
|
|
141
|
+
}
|
|
142
|
+
async acquirePage() {
|
|
143
|
+
if (!this.isHealthy) {
|
|
144
|
+
throw new Error(`Browser instance ${this.id} is not healthy.`);
|
|
145
|
+
}
|
|
146
|
+
try {
|
|
147
|
+
const page = await this.context.newPage();
|
|
148
|
+
this.pages.add(page);
|
|
149
|
+
this.metrics.pagesCreated++;
|
|
150
|
+
this.metrics.activePages = this.pages.size;
|
|
151
|
+
this.metrics.lastUsed = new Date();
|
|
152
|
+
page.on("close", () => {
|
|
153
|
+
this.pages.delete(page);
|
|
154
|
+
this.metrics.activePages = this.pages.size;
|
|
155
|
+
this.metrics.lastUsed = new Date();
|
|
156
|
+
});
|
|
157
|
+
page.on("crash", () => {
|
|
158
|
+
console.warn(`Page crashed in instance ${this.id}, URL: ${page.url()}`);
|
|
159
|
+
this.metrics.errors++;
|
|
160
|
+
this.pages.delete(page); // Remove from active pages
|
|
161
|
+
this.metrics.activePages = this.pages.size;
|
|
162
|
+
this.isHealthy = false; // Mark instance as unhealthy due to page crash
|
|
163
|
+
this.metrics.isHealthy = false;
|
|
164
|
+
this.onDisconnect(this.id); // Trigger pool's handling for unhealthy instance
|
|
165
|
+
});
|
|
166
|
+
return page;
|
|
167
|
+
}
|
|
168
|
+
catch (error) {
|
|
169
|
+
console.error(`Failed to create new page in instance ${this.id}: ${error.message}`, error);
|
|
170
|
+
this.metrics.errors++;
|
|
171
|
+
this.isHealthy = false;
|
|
172
|
+
this.metrics.isHealthy = false;
|
|
173
|
+
this.onDisconnect(this.id);
|
|
174
|
+
throw new Error(`Failed to create new page in instance ${this.id}: ${error.message}`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
async releasePage(page) {
|
|
178
|
+
if (this.pages.has(page) && !page.isClosed()) {
|
|
179
|
+
try {
|
|
180
|
+
await page.close();
|
|
181
|
+
}
|
|
182
|
+
catch (error) {
|
|
183
|
+
console.warn(`Error closing page in instance ${this.id}: ${error.message}`, error);
|
|
184
|
+
this.metrics.errors++;
|
|
185
|
+
// If page close fails, instance might still be usable, but flag it as potentially problematic
|
|
186
|
+
// Consider if this should mark instance unhealthy immediately
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// The page.on('close') handler will update metrics.pages and activePages
|
|
190
|
+
}
|
|
191
|
+
checkHealth(now, maxBrowserAgeMs, maxIdleTimeMs) {
|
|
192
|
+
if (!this.isHealthy) {
|
|
193
|
+
return { shouldRemove: true, reason: "already marked unhealthy" };
|
|
194
|
+
}
|
|
195
|
+
if (!this.browser.isConnected()) {
|
|
196
|
+
this.isHealthy = false;
|
|
197
|
+
this.metrics.isHealthy = false;
|
|
198
|
+
return { shouldRemove: true, reason: "browser disconnected" };
|
|
199
|
+
}
|
|
200
|
+
if (maxBrowserAgeMs > 0 && now.getTime() - this.metrics.createdAt.getTime() > maxBrowserAgeMs) {
|
|
201
|
+
return { shouldRemove: true, reason: "max age reached" };
|
|
202
|
+
}
|
|
203
|
+
if (this.pages.size === 0 && maxIdleTimeMs > 0 && now.getTime() - this.metrics.lastUsed.getTime() > maxIdleTimeMs) {
|
|
204
|
+
return { shouldRemove: true, reason: "idle timeout" };
|
|
205
|
+
}
|
|
206
|
+
return { shouldRemove: false, reason: "" };
|
|
207
|
+
}
|
|
208
|
+
async close(reason) {
|
|
209
|
+
this.isHealthy = false;
|
|
210
|
+
this.metrics.isHealthy = false;
|
|
211
|
+
console.log(`Closing browser instance ${this.id}, reason: ${reason || "cleanup"}`);
|
|
212
|
+
if (this.browser) {
|
|
213
|
+
this.browser.off("disconnected", this.disconnectedHandler); // Important to remove listener
|
|
214
|
+
try {
|
|
215
|
+
await this.context.close();
|
|
216
|
+
}
|
|
217
|
+
catch (error) {
|
|
218
|
+
console.warn(`Error closing context for instance ${this.id}: ${error.message}`, error);
|
|
219
|
+
}
|
|
220
|
+
try {
|
|
221
|
+
await this.browser.close();
|
|
222
|
+
}
|
|
223
|
+
catch (error) {
|
|
224
|
+
console.warn(`Error closing browser for instance ${this.id}: ${error.message}`, error);
|
|
225
|
+
}
|
|
23
226
|
}
|
|
24
|
-
// Get the plugin instance
|
|
25
|
-
StealthPluginInstance = stealthPluginFactory();
|
|
26
|
-
// Apply the plugin instance to the wrapped chromium object
|
|
27
|
-
chromiumWithExtras.use(StealthPluginInstance);
|
|
28
227
|
}
|
|
29
228
|
}
|
|
30
229
|
/**
|
|
@@ -43,6 +242,7 @@ export class PlaywrightBrowserPool {
|
|
|
43
242
|
blockedDomains;
|
|
44
243
|
blockedResourceTypes;
|
|
45
244
|
proxyConfig;
|
|
245
|
+
launchOptions;
|
|
46
246
|
static DEFAULT_BLOCKED_DOMAINS = [
|
|
47
247
|
"doubleclick.net",
|
|
48
248
|
"google-analytics.com",
|
|
@@ -68,6 +268,11 @@ export class PlaywrightBrowserPool {
|
|
|
68
268
|
"outbrain.com",
|
|
69
269
|
];
|
|
70
270
|
static DEFAULT_BLOCKED_RESOURCE_TYPES = ["image", "font", "media", "websocket"];
|
|
271
|
+
// The acquireQueue is used to serialize all page acquisition requests.
|
|
272
|
+
// With concurrency: 1, it ensures that operations for finding/creating browser instances
|
|
273
|
+
// and then acquiring a page from an instance are processed one at a time.
|
|
274
|
+
// This prevents race conditions when checking pool capacity, creating new browser instances,
|
|
275
|
+
// or selecting an instance from the pool, thus maintaining a consistent state for the pool.
|
|
71
276
|
acquireQueue = new PQueue({ concurrency: 1 });
|
|
72
277
|
constructor(config = {}) {
|
|
73
278
|
this.maxBrowsers = config.maxBrowsers ?? 2;
|
|
@@ -85,6 +290,7 @@ export class PlaywrightBrowserPool {
|
|
|
85
290
|
? config.blockedResourceTypes
|
|
86
291
|
: PlaywrightBrowserPool.DEFAULT_BLOCKED_RESOURCE_TYPES;
|
|
87
292
|
this.proxyConfig = config.proxy;
|
|
293
|
+
this.launchOptions = config.launchOptions;
|
|
88
294
|
}
|
|
89
295
|
async initialize() {
|
|
90
296
|
await loadDependencies(); // Load dependencies first
|
|
@@ -101,8 +307,8 @@ export class PlaywrightBrowserPool {
|
|
|
101
307
|
}
|
|
102
308
|
if (this.healthCheckInterval > 0) {
|
|
103
309
|
this.healthCheckTimer = setTimeout(() => {
|
|
104
|
-
this.healthCheck().catch((
|
|
105
|
-
|
|
310
|
+
this.healthCheck().catch((err) => {
|
|
311
|
+
console.warn(`Scheduled PlaywrightBrowserPool health check process encountered an error: ${err?.message}`, err);
|
|
106
312
|
});
|
|
107
313
|
}, this.healthCheckInterval);
|
|
108
314
|
}
|
|
@@ -121,78 +327,32 @@ export class PlaywrightBrowserPool {
|
|
|
121
327
|
}
|
|
122
328
|
async createBrowserInstance() {
|
|
123
329
|
await loadDependencies(); // Ensure dependencies are loaded
|
|
124
|
-
const
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
proxy: this.proxyConfig,
|
|
139
|
-
};
|
|
140
|
-
// Use the wrapped chromiumWithExtras object to launch
|
|
141
|
-
const browser = await chromiumWithExtras.launch(launchOptions);
|
|
142
|
-
const context = await browser.newContext({
|
|
143
|
-
userAgent: new UserAgent().toString(),
|
|
144
|
-
viewport: {
|
|
145
|
-
width: 1280 + Math.floor(Math.random() * 120),
|
|
146
|
-
height: 720 + Math.floor(Math.random() * 80),
|
|
147
|
-
},
|
|
148
|
-
javaScriptEnabled: true,
|
|
149
|
-
ignoreHTTPSErrors: true,
|
|
150
|
-
});
|
|
151
|
-
await context.route("**/*", async (route) => {
|
|
152
|
-
const request = route.request();
|
|
153
|
-
const url = request.url();
|
|
154
|
-
const resourceType = request.resourceType();
|
|
155
|
-
try {
|
|
156
|
-
const hostname = new URL(url).hostname.toLowerCase();
|
|
157
|
-
if (this.blockedDomains.some((domain) => hostname.includes(domain)) ||
|
|
158
|
-
this.blockedResourceTypes.includes(resourceType)) {
|
|
159
|
-
await route.abort("aborted");
|
|
330
|
+
const instance = new ManagedBrowserInstance({
|
|
331
|
+
useHeadedMode: this.useHeadedMode,
|
|
332
|
+
blockedDomains: this.blockedDomains,
|
|
333
|
+
blockedResourceTypes: this.blockedResourceTypes,
|
|
334
|
+
proxyConfig: this.proxyConfig,
|
|
335
|
+
launchOptions: this.launchOptions,
|
|
336
|
+
onDisconnect: (instanceId) => {
|
|
337
|
+
// Find the instance by ID and remove it from the pool
|
|
338
|
+
let instanceToRemove;
|
|
339
|
+
for (const inst of this.pool) {
|
|
340
|
+
if (inst.id === instanceId) {
|
|
341
|
+
instanceToRemove = inst;
|
|
342
|
+
break;
|
|
343
|
+
}
|
|
160
344
|
}
|
|
161
|
-
|
|
162
|
-
|
|
345
|
+
if (instanceToRemove) {
|
|
346
|
+
this.pool.delete(instanceToRemove);
|
|
347
|
+
console.warn(`Removed disconnected instance ${instanceId} from pool.`);
|
|
348
|
+
// Ensure minimum instances are maintained
|
|
349
|
+
this.ensureMinimumInstances().catch((err) => {
|
|
350
|
+
console.error(`Error ensuring minimum instances after removing disconnected instance ${instanceId}: ${err.message}`, err);
|
|
351
|
+
});
|
|
163
352
|
}
|
|
164
|
-
}
|
|
165
|
-
catch (_e) {
|
|
166
|
-
await route.continue();
|
|
167
|
-
}
|
|
353
|
+
},
|
|
168
354
|
});
|
|
169
|
-
|
|
170
|
-
const metrics = {
|
|
171
|
-
id,
|
|
172
|
-
pagesCreated: 0,
|
|
173
|
-
activePages: 0,
|
|
174
|
-
lastUsed: now,
|
|
175
|
-
errors: 0,
|
|
176
|
-
createdAt: now,
|
|
177
|
-
isHealthy: true,
|
|
178
|
-
};
|
|
179
|
-
const instance = {
|
|
180
|
-
id,
|
|
181
|
-
browser,
|
|
182
|
-
context,
|
|
183
|
-
pages: new Set(),
|
|
184
|
-
metrics,
|
|
185
|
-
isHealthy: true,
|
|
186
|
-
disconnectedHandler: () => { },
|
|
187
|
-
};
|
|
188
|
-
instance.disconnectedHandler = () => {
|
|
189
|
-
if (instance.isHealthy) {
|
|
190
|
-
instance.isHealthy = false;
|
|
191
|
-
instance.metrics.isHealthy = false;
|
|
192
|
-
this.healthCheck().catch((_err) => { });
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
|
-
browser.on("disconnected", instance.disconnectedHandler);
|
|
355
|
+
await instance.initialize();
|
|
196
356
|
this.pool.add(instance);
|
|
197
357
|
return instance;
|
|
198
358
|
}
|
|
@@ -202,62 +362,50 @@ export class PlaywrightBrowserPool {
|
|
|
202
362
|
throw new Error("Pool is shutting down.");
|
|
203
363
|
}
|
|
204
364
|
let bestInstance = null;
|
|
365
|
+
// Try to find an existing healthy instance that can create more pages
|
|
205
366
|
for (const instance of this.pool) {
|
|
206
|
-
if (instance.
|
|
367
|
+
if (instance.canCreateMorePages(this.maxPagesPerContext)) {
|
|
207
368
|
if (!bestInstance || instance.pages.size < bestInstance.pages.size) {
|
|
208
369
|
bestInstance = instance;
|
|
209
370
|
}
|
|
210
371
|
}
|
|
211
372
|
}
|
|
373
|
+
// If no suitable existing instance, and pool is not full, try to create a new one
|
|
212
374
|
if (!bestInstance && this.pool.size < this.maxBrowsers) {
|
|
213
375
|
try {
|
|
214
376
|
bestInstance = await this.createBrowserInstance();
|
|
215
377
|
}
|
|
216
378
|
catch (error) {
|
|
217
|
-
|
|
379
|
+
console.error(`Failed to create new browser instance during page acquisition: ${error.message}`, error);
|
|
380
|
+
// Don't re-throw immediately, try checking existing pool members again in case one became available
|
|
218
381
|
}
|
|
219
382
|
}
|
|
383
|
+
// If still no instance (either creation failed or pool was full and no suitable instance found), re-check pool
|
|
384
|
+
// This also covers the case where createBrowserInstance succeeded and bestInstance is now set.
|
|
220
385
|
if (!bestInstance) {
|
|
221
|
-
await this.ensureMinimumInstances(); // Try adding an instance if none suitable
|
|
222
386
|
for (const instance of this.pool) {
|
|
223
|
-
|
|
224
|
-
if (instance.isHealthy && instance.pages.size < this.maxPagesPerContext) {
|
|
387
|
+
if (instance.canCreateMorePages(this.maxPagesPerContext)) {
|
|
225
388
|
if (!bestInstance || instance.pages.size < bestInstance.pages.size) {
|
|
226
389
|
bestInstance = instance;
|
|
227
390
|
}
|
|
228
391
|
}
|
|
229
392
|
}
|
|
230
|
-
if (!bestInstance) {
|
|
231
|
-
// Still no instance?
|
|
232
|
-
throw new Error("Failed to acquire Playwright page: No available or creatable browser instance.");
|
|
233
|
-
}
|
|
234
393
|
}
|
|
394
|
+
if (!bestInstance) {
|
|
395
|
+
// After all attempts, if still no instance, then throw.
|
|
396
|
+
throw new Error("Failed to acquire Playwright page: No available or creatable healthy browser instance.");
|
|
397
|
+
}
|
|
398
|
+
// Now, bestInstance should be a valid ManagedBrowserInstance
|
|
235
399
|
try {
|
|
236
|
-
const page = await bestInstance.
|
|
237
|
-
|
|
238
|
-
bestInstance.metrics.pagesCreated++;
|
|
239
|
-
bestInstance.metrics.activePages = bestInstance.pages.size;
|
|
240
|
-
bestInstance.metrics.lastUsed = new Date();
|
|
241
|
-
page.on("close", () => {
|
|
242
|
-
bestInstance.pages.delete(page);
|
|
243
|
-
bestInstance.metrics.activePages = bestInstance.pages.size;
|
|
244
|
-
bestInstance.metrics.lastUsed = new Date();
|
|
245
|
-
});
|
|
246
|
-
page.on("crash", () => {
|
|
247
|
-
bestInstance.metrics.errors++;
|
|
248
|
-
bestInstance.pages.delete(page);
|
|
249
|
-
bestInstance.isHealthy = false;
|
|
250
|
-
bestInstance.metrics.isHealthy = false;
|
|
251
|
-
this.healthCheck().catch((_err) => { });
|
|
252
|
-
});
|
|
400
|
+
const page = await bestInstance.acquirePage();
|
|
401
|
+
// page.on('close') and page.on('crash') are handled within ManagedBrowserInstance.acquirePage()
|
|
253
402
|
return page;
|
|
254
403
|
}
|
|
255
404
|
catch (error) {
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
bestInstance.
|
|
259
|
-
|
|
260
|
-
throw new Error(`Failed to create new page: ${error.message}`);
|
|
405
|
+
// If page acquisition from the chosen instance fails, that instance would have marked itself unhealthy
|
|
406
|
+
// and called onDisconnect, which triggers the pool to re-evaluate. We should throw here.
|
|
407
|
+
console.error(`Failed to acquire page from instance ${bestInstance.id} (it might have become unhealthy): ${error.message}`, error);
|
|
408
|
+
throw new Error(`Failed to acquire page from instance ${bestInstance.id}: ${error.message}`); // Re-throw to signal failure to the caller
|
|
261
409
|
}
|
|
262
410
|
});
|
|
263
411
|
}
|
|
@@ -265,64 +413,41 @@ export class PlaywrightBrowserPool {
|
|
|
265
413
|
if (this.isCleaningUp)
|
|
266
414
|
return;
|
|
267
415
|
const now = new Date();
|
|
268
|
-
const
|
|
416
|
+
const instancesToRemove = [];
|
|
269
417
|
for (const instance of this.pool) {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
instance.pages.size === 0 &&
|
|
289
|
-
this.maxIdleTime > 0 &&
|
|
290
|
-
now.getTime() - instance.metrics.lastUsed.getTime() > this.maxIdleTime) {
|
|
291
|
-
shouldRemove = true;
|
|
292
|
-
reason = "idle timeout";
|
|
293
|
-
}
|
|
294
|
-
if (shouldRemove) {
|
|
295
|
-
instance.isHealthy = false;
|
|
296
|
-
instance.metrics.isHealthy = false;
|
|
297
|
-
await this.closeAndRemoveInstance(instance, reason);
|
|
298
|
-
}
|
|
299
|
-
else {
|
|
300
|
-
instance.isHealthy = true;
|
|
301
|
-
instance.metrics.isHealthy = true;
|
|
302
|
-
}
|
|
303
|
-
})().catch((_err) => { }));
|
|
418
|
+
const healthStatus = instance.checkHealth(now, this.maxBrowserAge, this.maxIdleTime);
|
|
419
|
+
if (healthStatus.shouldRemove) {
|
|
420
|
+
// Mark for removal, but don't modify the set while iterating
|
|
421
|
+
instancesToRemove.push(instance);
|
|
422
|
+
console.log(`Instance ${instance.id} marked for removal due to health check: ${healthStatus.reason}`);
|
|
423
|
+
}
|
|
424
|
+
else {
|
|
425
|
+
// Ensure instance.isHealthy and metrics.isHealthy are up-to-date if checkHealth didn't mark for removal
|
|
426
|
+
// (e.g. if it was previously unhealthy but now browser.isConnected() is true again - unlikely but good to be robust)
|
|
427
|
+
instance.isHealthy = instance.browser.isConnected();
|
|
428
|
+
instance.metrics.isHealthy = instance.isHealthy;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
// Close and remove unhealthy/aged/idle instances
|
|
432
|
+
if (instancesToRemove.length > 0) {
|
|
433
|
+
const removalPromises = instancesToRemove.map((instance) => this.closeAndRemoveInstance(instance, `health check: ${instance.metrics.id} failed`) // Using metrics.id in reason might be redundant
|
|
434
|
+
);
|
|
435
|
+
await Promise.allSettled(removalPromises);
|
|
304
436
|
}
|
|
305
437
|
try {
|
|
306
|
-
await
|
|
438
|
+
await this.ensureMinimumInstances(); // Ensure minimum instances after potential removals
|
|
307
439
|
}
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
this.scheduleHealthCheck();
|
|
440
|
+
catch (error) {
|
|
441
|
+
console.error(`Error ensuring minimum instances during health check: ${error.message}`, error);
|
|
311
442
|
}
|
|
443
|
+
this.scheduleHealthCheck(); // Reschedule the next health check
|
|
312
444
|
}
|
|
313
|
-
async closeAndRemoveInstance(instance,
|
|
445
|
+
async closeAndRemoveInstance(instance, reason) {
|
|
314
446
|
const removed = this.pool.delete(instance);
|
|
315
447
|
if (!removed)
|
|
316
|
-
return;
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
await instance.context.close();
|
|
320
|
-
}
|
|
321
|
-
catch (_error) { }
|
|
322
|
-
try {
|
|
323
|
-
await instance.browser.close();
|
|
324
|
-
}
|
|
325
|
-
catch (_error) { }
|
|
448
|
+
return; // Instance was not in the pool or already removed
|
|
449
|
+
// The ManagedBrowserInstance is responsible for its own internal cleanup, including listeners.
|
|
450
|
+
await instance.close(reason);
|
|
326
451
|
}
|
|
327
452
|
async releasePage(page) {
|
|
328
453
|
if (!page || page.isClosed())
|
|
@@ -334,21 +459,27 @@ export class PlaywrightBrowserPool {
|
|
|
334
459
|
break;
|
|
335
460
|
}
|
|
336
461
|
}
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
ownerInstance.
|
|
341
|
-
|
|
342
|
-
|
|
462
|
+
if (ownerInstance) {
|
|
463
|
+
try {
|
|
464
|
+
// ManagedBrowserInstance.releasePage will handle closing the page and updating its own metrics.
|
|
465
|
+
await ownerInstance.releasePage(page);
|
|
466
|
+
}
|
|
467
|
+
catch (error) {
|
|
468
|
+
// If releasePage in ManagedBrowserInstance itself throws (e.g., error during page.close()),
|
|
469
|
+
// that method should handle marking the instance as unhealthy if necessary.
|
|
470
|
+
// Log here for pool-level visibility.
|
|
471
|
+
console.warn(`Error while instance ${ownerInstance.id} was releasing page: ${error.message}`, error);
|
|
472
|
+
// The instance's own error handling (e.g. in acquirePage or crash handler) should trigger onDisconnect
|
|
473
|
+
// if the instance becomes critically unhealthy.
|
|
343
474
|
}
|
|
344
475
|
}
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
476
|
+
else {
|
|
477
|
+
// Page not found in any managed instance, try to close it as a orphaned page.
|
|
478
|
+
try {
|
|
479
|
+
await page.close();
|
|
480
|
+
}
|
|
481
|
+
catch (error) {
|
|
482
|
+
console.warn(`Error closing an orphaned page (not found in any pool instance): ${error.message}`, error);
|
|
352
483
|
}
|
|
353
484
|
}
|
|
354
485
|
}
|
|
@@ -362,17 +493,15 @@ export class PlaywrightBrowserPool {
|
|
|
362
493
|
}
|
|
363
494
|
this.acquireQueue.clear();
|
|
364
495
|
await this.acquireQueue.onIdle();
|
|
365
|
-
|
|
366
|
-
this.pool
|
|
496
|
+
// Create a copy of the pool to iterate over, as closeAndRemoveInstance modifies the original set.
|
|
497
|
+
const instancesToClose = Array.from(this.pool);
|
|
498
|
+
const closePromises = instancesToClose.map((instance) => this.closeAndRemoveInstance(instance, "pool cleanup"));
|
|
499
|
+
this.pool.clear(); // Clear the main pool set immediately
|
|
367
500
|
await Promise.allSettled(closePromises);
|
|
368
501
|
this.isCleaningUp = false;
|
|
369
502
|
}
|
|
370
503
|
getMetrics() {
|
|
371
|
-
return [...this.pool].map((instance) =>
|
|
372
|
-
...instance.metrics,
|
|
373
|
-
activePages: instance.pages.size,
|
|
374
|
-
isHealthy: instance.isHealthy,
|
|
375
|
-
}));
|
|
504
|
+
return [...this.pool].map((instance) => instance.metrics);
|
|
376
505
|
}
|
|
377
506
|
}
|
|
378
507
|
//# sourceMappingURL=PlaywrightBrowserPool.js.map
|