@pablovitasso/szkrabok 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +114 -0
  3. package/package.json +124 -0
  4. package/packages/runtime/config.js +173 -0
  5. package/packages/runtime/index.js +10 -0
  6. package/packages/runtime/launch.js +240 -0
  7. package/packages/runtime/logger.js +42 -0
  8. package/packages/runtime/mcp-client/adapters/szkrabok-session.js +69 -0
  9. package/packages/runtime/mcp-client/codegen/generate-mcp-tools.mjs +66 -0
  10. package/packages/runtime/mcp-client/codegen/render-tools.js +219 -0
  11. package/packages/runtime/mcp-client/codegen/schema-to-jsdoc.js +60 -0
  12. package/packages/runtime/mcp-client/mcp-tools.d.ts +92 -0
  13. package/packages/runtime/mcp-client/mcp-tools.js +99 -0
  14. package/packages/runtime/mcp-client/runtime/invoker.js +95 -0
  15. package/packages/runtime/mcp-client/runtime/logger.js +145 -0
  16. package/packages/runtime/mcp-client/runtime/transport.js +35 -0
  17. package/packages/runtime/package.json +25 -0
  18. package/packages/runtime/pool.js +59 -0
  19. package/packages/runtime/scripts/patch-playwright.js +736 -0
  20. package/packages/runtime/sessions.js +77 -0
  21. package/packages/runtime/stealth.js +232 -0
  22. package/packages/runtime/storage.js +64 -0
  23. package/scripts/detect_browsers.sh +147 -0
  24. package/scripts/patch-playwright.js +736 -0
  25. package/scripts/postinstall.js +47 -0
  26. package/scripts/release-publish.js +19 -0
  27. package/scripts/release-reminder.js +14 -0
  28. package/scripts/setup.js +17 -0
  29. package/src/cli.js +166 -0
  30. package/src/config.js +36 -0
  31. package/src/index.js +53 -0
  32. package/src/server.js +40 -0
  33. package/src/tools/registry.js +171 -0
  34. package/src/tools/scaffold.js +133 -0
  35. package/src/tools/szkrabok_browser.js +227 -0
  36. package/src/tools/szkrabok_session.js +174 -0
  37. package/src/tools/templates/automation/example.mcp.spec.js +54 -0
  38. package/src/tools/templates/automation/example.spec.js +29 -0
  39. package/src/tools/templates/automation/fixtures.js +59 -0
  40. package/src/tools/templates/playwright.config.js +10 -0
  41. package/src/tools/templates/szkrabok.config.local.toml.example +12 -0
  42. package/src/tools/workflow.js +45 -0
  43. package/src/utils/errors.js +36 -0
  44. package/src/utils/logger.js +64 -0
@@ -0,0 +1,736 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * patch-playwright.js
4
+ *
5
+ * ── Goal ──────────────────────────────────────────────────────────────────────
6
+ * Apply anti-bot detection fixes directly to compiled playwright-core lib/
7
+ * files in node_modules. Pattern-based string replacement — more resilient
8
+ * than line-number .patch files, independent of any third-party patcher.
9
+ *
10
+ * ── What it fixes ─────────────────────────────────────────────────────────────
11
+ *
12
+ * FIX A — Runtime.enable CDP leak (patches #2 #3 #4 #5 #6)
13
+ * Playwright calls CDP Runtime.enable on every frame/worker/service-worker.
14
+ * Anti-bot systems (Cloudflare, DataDome) detect this and flag the session.
15
+ * Fix: suppress Runtime.enable; obtain execution-context IDs on-demand via
16
+ * an addBinding round-trip (__re__getMainWorld) or Page.createIsolatedWorld
17
+ * (__re__getIsolatedWorld) instead.
18
+ *
19
+ * FIX B — sourceUrlLeak (patch #7)
20
+ * (see inline comment)
21
+ *
22
+ * FIX C — userAgentData brands (patch #8)
23
+ * The rebrowser bot-detector overrides document.getElementById and inspects
24
+ * new Error().stack for the string "UtilityScript." (class.method notation).
25
+ * Playwright injects a compiled bundle with class UtilityScript into every
26
+ * page context; all page.evaluate() calls run through UtilityScript.evaluate().
27
+ * Fix: rename the class to __pwUs inside the compiled bundle. The export key
28
+ * "UtilityScript" stays unchanged so internal playwright code is unaffected.
29
+ * NOTE: rebrowser-patches does NOT include this fix for playwright (only for
30
+ * puppeteer). This is an original szkrabok addition.
31
+ *
32
+ * ── The utility-world name bug and waitForSelector fix ─────────────────────────
33
+ * After suppressing Runtime.enable, playwright never receives automatic
34
+ * Runtime.executionContextCreated events. We emit them manually from
35
+ * __re__emitExecutionContext. The emitted contextPayload must include the
36
+ * exact utility world name that crPage.js checks when registering a context:
37
+ *
38
+ * crPage.js: this.utilityWorldName = `__playwright_utility_world_${this._page.guid}`
39
+ * crPage.js: if (contextPayload.name === this._crPage.utilityWorldName) worldName = "utility"
40
+ *
41
+ * The name is per-page (includes a GUID) and CANNOT be hardcoded.
42
+ * We pass it explicitly from frames.js patch #5b, where the frame has
43
+ * access to this._page.delegate.utilityWorldName (the CRPage instance).
44
+ * crConnection.js receives it as `callerUtilityWorldName` — it stays
45
+ * decoupled from CRPage internals so only frames.js needs updating if the
46
+ * property moves upstream.
47
+ * Without this fix: waitForSelector / locators / page.click all hang forever
48
+ * because the utility world context is never registered.
49
+ *
50
+ * ── Multiple playwright-core installs ─────────────────────────────────────────
51
+ * npm may install playwright-core in two locations:
52
+ * node_modules/playwright-core — used by the MCP server
53
+ * node_modules/playwright/node_modules/playwright-core — used by the
54
+ * test runner (browser.run_test spawns `npx playwright test` which
55
+ * resolves playwright-core through its own nested copy)
56
+ * Both must be patched. This script finds and patches all copies.
57
+ * Each patched install gets a `.szkrabok-patched` stamp file next to
58
+ * package.json so patches are visible at a glance.
59
+ * To re-patch after a version bump: rm -rf both dirs, npm install, re-run.
60
+ *
61
+ * ── Upstream merge survival ────────────────────────────────────────────────────
62
+ * Each patch uses a search string anchored to a stable code pattern.
63
+ * When a patch fails (pattern not found), update the search string to match
64
+ * the new compiled source and re-run. Per-patch fragility notes are inline.
65
+ * Key things to re-verify after any playwright-core version bump:
66
+ * - crPage.js still has `utilityWorldName` property (used by patch #5b)
67
+ * - crPage.js still matches context by name === utilityWorldName (patch #1)
68
+ * - Worker constructor signature (patches #3b, #6a)
69
+ * - PageBinding.dispatch still parses JSON payload (patch #6c)
70
+ * Reference: vendor/rebrowser-patches/patches/playwright-core/src.patch
71
+ * Reference: docs/rebrowser-patches-research.md
72
+ * Reference: docs/waitForSelector-bug.md
73
+ *
74
+ * ── Source of logic ───────────────────────────────────────────────────────────
75
+ * Derived from rebrowser-patches (https://github.com/rebrowser/rebrowser-patches)
76
+ * MIT licence. We re-implement as pattern-based replacements.
77
+ * Reference copy: vendor/rebrowser-patches/ (gitignored, update with git pull)
78
+ *
79
+ * ── Behaviour flags (env vars, same as rebrowser-patches) ─────────────────────
80
+ * REBROWSER_PATCHES_RUNTIME_FIX_MODE=addBinding (default — safest)
81
+ * REBROWSER_PATCHES_RUNTIME_FIX_MODE=alwaysIsolated
82
+ * REBROWSER_PATCHES_RUNTIME_FIX_MODE=enableDisable
83
+ * REBROWSER_PATCHES_RUNTIME_FIX_MODE=0 (disable all fixes)
84
+ * REBROWSER_PATCHES_DEBUG=1 (verbose logging)
85
+ *
86
+ * ── Atomicity / rollback ──────────────────────────────────────────────────────
87
+ * Before touching any file the script writes <file>.bak to disk.
88
+ * If ANY patch step fails, all modified files for that install are restored
89
+ * from .bak and the process exits non-zero with a diagnostic.
90
+ * On success .bak files are deleted.
91
+ */
92
+
93
+ import fs from 'fs'
94
+ import path from 'path'
95
+ import { execSync } from 'child_process'
96
+ import { createRequire } from 'module'
97
+
98
+ const require = createRequire(import.meta.url)
99
+
100
+ // ── locate all playwright-core installs ───────────────────────────────────────
101
+ // npm hoists one copy to node_modules/playwright-core but playwright itself
102
+ // may carry its own nested copy at node_modules/playwright/node_modules/playwright-core.
103
+ // Both must be patched — the MCP server uses the hoisted one, the test runner
104
+ // (spawned by browser.run_test) uses whichever playwright/test resolves.
105
+
106
+ function findPkgRoots() {
107
+ const roots = []
108
+ const nmDir = path.join(process.env.INIT_CWD || process.cwd(), 'node_modules')
109
+
110
+ // 1. top-level playwright-core
111
+ const top = path.join(nmDir, 'playwright-core')
112
+ if (fs.existsSync(path.join(top, 'package.json'))) roots.push(top)
113
+
114
+ // 2. any nested playwright-core inside other packages
115
+ try {
116
+ const out = execSync(
117
+ `find "${nmDir}" -maxdepth 4 -name "package.json" -path "*/playwright-core/package.json" 2>/dev/null`,
118
+ { encoding: 'utf8' }
119
+ )
120
+ for (const line of out.trim().split('\n')) {
121
+ if (!line) continue
122
+ const dir = path.dirname(line)
123
+ if (!roots.includes(dir)) roots.push(dir)
124
+ }
125
+ } catch {}
126
+
127
+ return roots
128
+ }
129
+
130
+ const pkgRoots = findPkgRoots()
131
+ if (!pkgRoots.length) {
132
+ console.error('[patch-playwright] ERROR: playwright-core not found in node_modules.')
133
+ console.error(' Run `npm install` first.')
134
+ process.exit(1)
135
+ }
136
+
137
+ // ── helpers ───────────────────────────────────────────────────────────────────
138
+ // (read/write/backup/rollback/removeBaks are defined per-install inside the run loop)
139
+
140
+ /**
141
+ * Apply a single named replacement to content.
142
+ * Throws with a descriptive message if the search string is not found.
143
+ */
144
+ function replace(file, content, searchStr, replacement, label) {
145
+ if (!content.includes(searchStr)) {
146
+ throw new Error(
147
+ `[patch-playwright] Pattern not found in ${file}\n` +
148
+ ` patch: "${label}"\n` +
149
+ ` searched for: ${searchStr.slice(0, 120).replace(/\n/g, '\\n')}\n\n` +
150
+ ` Update the search string in scripts/patch-playwright.js to match the new source.\n` +
151
+ ` Reference: vendor/rebrowser-patches/patches/playwright-core/src.patch`
152
+ )
153
+ }
154
+ return content.replace(searchStr, replacement)
155
+ }
156
+
157
+ // ── patch definitions ─────────────────────────────────────────────────────────
158
+
159
+ const patches = [
160
+ // ── 1. crConnection.js — inject __re__ helpers into CRSession ────────────────
161
+ // Adds three methods to CRSession (the CDP session class):
162
+ // __re__emitExecutionContext — top-level coordinator; emits the
163
+ // Runtime.executionContextCreated event that playwright needs
164
+ // __re__getMainWorld — gets the main-world context ID via
165
+ // Runtime.addBinding round-trip (avoids Runtime.enable)
166
+ // __re__getIsolatedWorld — gets an isolated-world context ID via
167
+ // Page.createIsolatedWorld
168
+ //
169
+ // KEY DESIGN: callerUtilityWorldName is passed in from frames.js (patch #5b)
170
+ // rather than derived here from frame._page.delegate.utilityWorldName.
171
+ // Reason: crConnection.js should not know CRPage internals. If the property
172
+ // moves upstream, only frames.js patch #5b needs updating.
173
+ //
174
+ // UPSTREAM FRAGILITY:
175
+ // Anchor: end of CRSession class (this._callbacks.clear()) + start of
176
+ // CDPSession class. Stable — class boundaries rarely move.
177
+ // If it breaks: find the end of CRSession.dispose() and start of CDPSession.
178
+ {
179
+ file: 'server/chromium/crConnection.js',
180
+ steps: src => {
181
+ // Insert the three helper methods just before the closing brace of
182
+ // CRSession (right after this._callbacks.clear(); })
183
+ const anchor = ` this._callbacks.clear();
184
+ }
185
+ }
186
+ class CDPSession`
187
+
188
+ const injection = ` this._callbacks.clear();
189
+ }
190
+
191
+ // ── rebrowser Runtime.enable fix ──────────────────────────────────────────
192
+ // Obtains an execution-context ID for a given world without calling
193
+ // Runtime.enable, which is detectable by anti-bot systems.
194
+ async __re__emitExecutionContext({ world, targetId, frame = null, utilityWorldName: callerUtilityWorldName }) {
195
+ const fixMode = process.env['REBROWSER_PATCHES_RUNTIME_FIX_MODE'] || 'addBinding'
196
+ const utilityWorldName =
197
+ process.env['REBROWSER_PATCHES_UTILITY_WORLD_NAME'] !== '0'
198
+ ? (process.env['REBROWSER_PATCHES_UTILITY_WORLD_NAME'] || 'util')
199
+ : '__playwright_utility_world__'
200
+ if (process.env['REBROWSER_PATCHES_DEBUG'])
201
+ console.log(\`[rebrowser-patches][crSession] targetId=\${targetId} world=\${world} frame=\${frame ? 'Y' : 'N'} fixMode=\${fixMode}\`)
202
+
203
+ let getWorldPromise
204
+ if (fixMode === 'addBinding') {
205
+ if (world === 'utility') {
206
+ getWorldPromise = this.__re__getIsolatedWorld({ client: this, frameId: targetId, worldName: utilityWorldName })
207
+ .then(contextId => ({ id: contextId, name: callerUtilityWorldName || '__playwright_utility_world__', auxData: { frameId: targetId, isDefault: false } }))
208
+ } else if (world === 'main') {
209
+ getWorldPromise = this.__re__getMainWorld({ client: this, frameId: targetId, isWorker: frame === null })
210
+ .then(contextId => ({ id: contextId, name: '', auxData: { frameId: targetId, isDefault: true } }))
211
+ }
212
+ } else if (fixMode === 'alwaysIsolated') {
213
+ getWorldPromise = this.__re__getIsolatedWorld({ client: this, frameId: targetId, worldName: utilityWorldName })
214
+ .then(contextId => ({ id: contextId, name: '', auxData: { frameId: targetId, isDefault: true } }))
215
+ }
216
+
217
+ const contextPayload = await getWorldPromise
218
+ this.emit('Runtime.executionContextCreated', { context: contextPayload })
219
+ }
220
+
221
+ async __re__getMainWorld({ client, frameId, isWorker = false }) {
222
+ let contextId
223
+ const randomName = [...Array(Math.floor(Math.random() * 11) + 10)]
224
+ .map(() => Math.random().toString(36)[2]).join('')
225
+ if (process.env['REBROWSER_PATCHES_DEBUG'])
226
+ console.log(\`[rebrowser-patches][getMainWorld] binding=\${randomName}\`)
227
+
228
+ await client.send('Runtime.addBinding', { name: randomName })
229
+
230
+ const bindingCalledHandler = ({ name, payload, executionContextId }) => {
231
+ if (contextId > 0 || name !== randomName || payload !== frameId) return
232
+ contextId = executionContextId
233
+ client.off('Runtime.bindingCalled', bindingCalledHandler)
234
+ }
235
+ client.on('Runtime.bindingCalled', bindingCalledHandler)
236
+
237
+ if (isWorker) {
238
+ await client.send('Runtime.evaluate', { expression: \`this['\${randomName}']('\${frameId}')\` })
239
+ } else {
240
+ await client.send('Page.addScriptToEvaluateOnNewDocument', {
241
+ source: \`document.addEventListener('\${randomName}', (e) => self['\${randomName}'](e.detail.frameId))\`,
242
+ runImmediately: true,
243
+ })
244
+ const isolated = await client.send('Page.createIsolatedWorld', { frameId, worldName: randomName, grantUniveralAccess: true })
245
+ await client.send('Runtime.evaluate', {
246
+ expression: \`document.dispatchEvent(new CustomEvent('\${randomName}', { detail: { frameId: '\${frameId}' } }))\`,
247
+ contextId: isolated.executionContextId,
248
+ })
249
+ }
250
+ if (process.env['REBROWSER_PATCHES_DEBUG'])
251
+ console.log(\`[rebrowser-patches][getMainWorld] contextId=\${contextId}\`)
252
+ return contextId
253
+ }
254
+
255
+ async __re__getIsolatedWorld({ client, frameId, worldName }) {
256
+ const result = await client.send('Page.createIsolatedWorld', { frameId, worldName, grantUniveralAccess: true })
257
+ if (process.env['REBROWSER_PATCHES_DEBUG'])
258
+ console.log('[rebrowser-patches][getIsolatedWorld]', result)
259
+ return result.executionContextId
260
+ }
261
+ // ── end rebrowser fix ──────────────────────────────────────────────────────
262
+ }
263
+ class CDPSession`
264
+
265
+ return replace('crConnection.js', src, anchor, injection, 'inject __re__ helpers')
266
+ },
267
+ },
268
+
269
+ // ── 2. crDevTools.js — suppress Runtime.enable ────────────────────────────────
270
+ // crDevTools.js enables the runtime for DevTools protocol sessions.
271
+ // UPSTREAM FRAGILITY: anchor is the literal `session.send("Runtime.enable"),`
272
+ // inside a Promise.all([...]). If the surrounding code is refactored or
273
+ // the Promise.all is removed, update the search string.
274
+ {
275
+ file: 'server/chromium/crDevTools.js',
276
+ steps: src => {
277
+ return replace(
278
+ 'crDevTools.js',
279
+ src,
280
+ ` session.send("Runtime.enable"),`,
281
+ ` (() => { if (process.env['REBROWSER_PATCHES_RUNTIME_FIX_MODE'] === '0') return session.send('Runtime.enable', {}) })(),`,
282
+ 'suppress Runtime.enable in crDevTools'
283
+ )
284
+ },
285
+ },
286
+
287
+ // ── 3. crPage.js — suppress Runtime.enable (page + worker) ───────────────────
288
+ // Three changes:
289
+ // 3a. Suppress page-level Runtime.enable in the session setup Promise.all
290
+ // 3b. Pass targetId + session to the Worker constructor (needed by patch #6)
291
+ // 3c. Suppress worker-level Runtime.enable
292
+ // UPSTREAM FRAGILITY:
293
+ // 3a: anchor is `this._client.send("Runtime.enable", {})` inside Promise.all
294
+ // 3b: anchor is `new import_page.Worker(this._page, url)` — if the Worker
295
+ // constructor gains/loses args upstream, both this line AND patch #6a
296
+ // must be updated together
297
+ // 3c: anchor is `session._sendMayFail("Runtime.enable")` in the worker handler
298
+ {
299
+ file: 'server/chromium/crPage.js',
300
+ steps: src => {
301
+ // 3a. page-level Runtime.enable
302
+ src = replace(
303
+ 'crPage.js',
304
+ src,
305
+ ` this._client.send("Runtime.enable", {}),`,
306
+ ` (() => { if (process.env['REBROWSER_PATCHES_RUNTIME_FIX_MODE'] === '0') return this._client.send('Runtime.enable', {}) })(),`,
307
+ 'suppress Runtime.enable for page in crPage'
308
+ )
309
+ // 3b. worker-level Runtime.enable + pass targetId/session to Worker
310
+ src = replace(
311
+ 'crPage.js',
312
+ src,
313
+ ` const worker = new import_page.Worker(this._page, url);`,
314
+ ` const worker = new import_page.Worker(this._page, url, event.targetInfo.targetId, session);`,
315
+ 'pass targetId+session to Worker constructor'
316
+ )
317
+ src = replace(
318
+ 'crPage.js',
319
+ src,
320
+ ` session._sendMayFail("Runtime.enable");`,
321
+ ` if (process.env['REBROWSER_PATCHES_RUNTIME_FIX_MODE'] === '0') session._sendMayFail('Runtime.enable');`,
322
+ 'suppress Runtime.enable for worker in crPage'
323
+ )
324
+ return src
325
+ },
326
+ },
327
+
328
+ // ── 4. crServiceWorker.js — suppress Runtime.enable ──────────────────────────
329
+ // UPSTREAM FRAGILITY: anchor includes the .catch((e) => {}) pattern.
330
+ // If the catch block changes (e.g. adds a log line), update both lines.
331
+ {
332
+ file: 'server/chromium/crServiceWorker.js',
333
+ steps: src => {
334
+ return replace(
335
+ 'crServiceWorker.js',
336
+ src,
337
+ ` session.send("Runtime.enable", {}).catch((e) => {
338
+ });`,
339
+ ` if (process.env['REBROWSER_PATCHES_RUNTIME_FIX_MODE'] === '0') {
340
+ session.send('Runtime.enable', {}).catch((e) => {})
341
+ }`,
342
+ 'suppress Runtime.enable in crServiceWorker'
343
+ )
344
+ },
345
+ },
346
+
347
+ // ── 5. frames.js — emit executionContextsCleared + rewire _context() ─────────
348
+ // Two changes:
349
+ // 5a. After each frame commit (navigation), emit executionContextsCleared on
350
+ // the CRSession so existing context IDs are invalidated and re-acquired
351
+ // on next use. Without this, stale context IDs from before navigation
352
+ // are used and evaluate() calls fail silently.
353
+ // 5b. Rewire Frame._context() to lazily call __re__emitExecutionContext when
354
+ // the context hasn't been established yet (which is always, since we
355
+ // suppressed Runtime.enable in patches #2-#4).
356
+ //
357
+ // CRITICAL — utilityWorldName passing:
358
+ // This patch passes `this._page.delegate?.utilityWorldName` to
359
+ // __re__emitExecutionContext as `utilityWorldName`. This is the per-page
360
+ // GUID-suffixed name that crPage.js uses to register a context as the
361
+ // utility world (crPage.js: contextPayload.name === this._crPage.utilityWorldName).
362
+ // Without this, waitForSelector / locators / page.click hang forever —
363
+ // the utility world context is created but never registered. See:
364
+ // docs/waitForSelector-bug.md for full investigation.
365
+ //
366
+ // UPSTREAM FRAGILITY:
367
+ // 5a: anchor spans _recalculateNetworkIdle + _onLifecycleEvent("commit").
368
+ // If the frame commit lifecycle changes, update both lines.
369
+ // 5b: anchor is the entire _context(world) function body (4 lines).
370
+ // If playwright refactors _context() (e.g. adds parameters, renames),
371
+ // update the search string.
372
+ // Also: if crPage.js renames `utilityWorldName` property, update the
373
+ // `this._page.delegate?.utilityWorldName` reference in the replacement.
374
+ {
375
+ file: 'server/frames.js',
376
+ steps: src => {
377
+ // 5a. emit executionContextsCleared on commit so CRConnection knows
378
+ // to re-acquire context IDs after navigation
379
+ src = replace(
380
+ 'frames.js',
381
+ src,
382
+ ` this._page.mainFrame()._recalculateNetworkIdle(this);
383
+ this._onLifecycleEvent("commit");
384
+ }`,
385
+ ` this._page.mainFrame()._recalculateNetworkIdle(this);
386
+ this._onLifecycleEvent("commit");
387
+ const crSession = (this._page.delegate._sessions?.get(this._id) || this._page.delegate._mainFrameSession)?._client
388
+ if (crSession) crSession.emit('Runtime.executionContextsCleared')
389
+ }`,
390
+ 'emit executionContextsCleared on commit'
391
+ )
392
+ // 5b. rewire Frame._context() to use __re__emitExecutionContext instead
393
+ // of waiting on the contextPromise that never resolves without Runtime.enable
394
+ src = replace(
395
+ 'frames.js',
396
+ src,
397
+ ` _context(world) {
398
+ return this._contextData.get(world).contextPromise.then((contextOrDestroyedReason) => {
399
+ if (contextOrDestroyedReason instanceof js.ExecutionContext)
400
+ return contextOrDestroyedReason;
401
+ throw new Error(contextOrDestroyedReason.destroyedReason);
402
+ });
403
+ }`,
404
+ ` _context(world, useContextPromise = false) {
405
+ if (process.env['REBROWSER_PATCHES_RUNTIME_FIX_MODE'] === '0' || this._contextData.get(world).context || useContextPromise) {
406
+ return this._contextData.get(world).contextPromise.then((contextOrDestroyedReason) => {
407
+ if (contextOrDestroyedReason instanceof js.ExecutionContext)
408
+ return contextOrDestroyedReason;
409
+ throw new Error(contextOrDestroyedReason.destroyedReason);
410
+ });
411
+ }
412
+ const crSession = (this._page.delegate._sessions?.get(this._id) || this._page.delegate._mainFrameSession)?._client
413
+ return crSession.__re__emitExecutionContext({ world, targetId: this._id, frame: this, utilityWorldName: this._page.delegate?.utilityWorldName })
414
+ .then(() => this._context(world, true))
415
+ .catch(error => {
416
+ if (error.message.includes('No frame for given id found'))
417
+ return { destroyedReason: 'Frame was detached' }
418
+ console.error('[rebrowser-patches][frames._context] error:', error)
419
+ })
420
+ }`,
421
+ 'rewire Frame._context to use __re__emitExecutionContext'
422
+ )
423
+ return src
424
+ },
425
+ },
426
+
427
+ // ── 6. page.js — update Worker constructor + guard PageBinding.dispatch ───────
428
+ // Three changes:
429
+ // 6a. Worker constructor: accept the targetId + session args added by patch #3b
430
+ // and store them for use in getExecutionContext()
431
+ // 6b. Add getExecutionContext() to Worker; rewire evaluateExpression /
432
+ // evaluateExpressionHandle to use it. On first call, triggers
433
+ // __re__emitExecutionContext for the worker's main world.
434
+ // 6c. Guard PageBinding.dispatch against non-JSON payloads: the addBinding
435
+ // round-trip in __re__getMainWorld fires Runtime.bindingCalled with a
436
+ // raw string payload (not our JSON envelope), which would crash JSON.parse.
437
+ //
438
+ // UPSTREAM FRAGILITY:
439
+ // 6a: anchor is `constructor(parent, url)` + first two lines of Worker body.
440
+ // Must stay in sync with patch #3b (which adds the extra args at call site).
441
+ // If Worker constructor changes, update both #3b and #6a together.
442
+ // 6b: anchor spans both evaluateExpression + evaluateExpressionHandle.
443
+ // If either method signature changes, update the search string.
444
+ // 6c: anchor is `static async dispatch(page, payload, context)` + JSON.parse line.
445
+ // If dispatch is refactored, update search string.
446
+ {
447
+ file: 'server/page.js',
448
+ steps: src => {
449
+ // 6a. Worker constructor: accept targetId + session
450
+ src = replace(
451
+ 'page.js',
452
+ src,
453
+ ` constructor(parent, url) {
454
+ super(parent, "worker");
455
+ this._executionContextPromise = new import_manualPromise.ManualPromise();`,
456
+ ` constructor(parent, url, targetId, session) {
457
+ super(parent, "worker");
458
+ this._executionContextPromise = new import_manualPromise.ManualPromise();
459
+ this._targetId = targetId
460
+ this._session = session`,
461
+ 'Worker constructor accept targetId+session'
462
+ )
463
+ // 6b. evaluateExpression / evaluateExpressionHandle: use getExecutionContext()
464
+ src = replace(
465
+ 'page.js',
466
+ src,
467
+ ` async evaluateExpression(expression, isFunction, arg) {
468
+ return js.evaluateExpression(await this._executionContextPromise, expression, { returnByValue: true, isFunction }, arg);
469
+ }
470
+ async evaluateExpressionHandle(expression, isFunction, arg) {
471
+ return js.evaluateExpression(await this._executionContextPromise, expression, { returnByValue: false, isFunction }, arg);
472
+ }`,
473
+ ` async getExecutionContext() {
474
+ if (process.env['REBROWSER_PATCHES_RUNTIME_FIX_MODE'] !== '0' && !this.existingExecutionContext) {
475
+ await this._session.__re__emitExecutionContext({ world: 'main', targetId: this._targetId })
476
+ }
477
+ return this._executionContextPromise
478
+ }
479
+ async evaluateExpression(expression, isFunction, arg) {
480
+ return js.evaluateExpression(await this.getExecutionContext(), expression, { returnByValue: true, isFunction }, arg);
481
+ }
482
+ async evaluateExpressionHandle(expression, isFunction, arg) {
483
+ return js.evaluateExpression(await this.getExecutionContext(), expression, { returnByValue: false, isFunction }, arg);
484
+ }`,
485
+ 'Worker.evaluateExpression use getExecutionContext'
486
+ )
487
+ // 6c. PageBinding.dispatch: ignore binding calls that are not JSON
488
+ // (the addBinding helper emits raw strings, not our JSON payloads)
489
+ src = replace(
490
+ 'page.js',
491
+ src,
492
+ ` static async dispatch(page, payload, context) {
493
+ const { name, seq, serializedArgs } = JSON.parse(payload);`,
494
+ ` static async dispatch(page, payload, context) {
495
+ if (process.env['REBROWSER_PATCHES_RUNTIME_FIX_MODE'] !== '0' && !payload.includes('{')) return;
496
+ const { name, seq, serializedArgs } = JSON.parse(payload);`,
497
+ 'PageBinding.dispatch guard non-JSON payloads'
498
+ )
499
+ return src
500
+ },
501
+ },
502
+ // ── 8. crPage.js — inject greasy brands into calculateUserAgentMetadata ────────
503
+ // Problem: when browser.run_test connects via CDP, Playwright wraps the page via
504
+ // connectOverCDP. On frame init, crPage.js calls _updateUserAgent() which calls
505
+ // Emulation.setUserAgentOverride with calculateUserAgentMetadata(options). That
506
+ // function builds the metadata object but never sets `brands`, so Chrome reverts
507
+ // to its binary default (Chromium-only brands), clobbering the brands we set via
508
+ // Network.setUserAgentOverride in applyStealthToExistingPage.
509
+ //
510
+ // Fix: append brands generation (greasy brand algorithm) to the end of
511
+ // calculateUserAgentMetadata, just before `return metadata`. Brands are derived
512
+ // from the Chrome major version in the UA string, matching what
513
+ // applyStealthToExistingPage sets via Network.setUserAgentOverride.
514
+ //
515
+ // The greasy brand algorithm is the same as puppeteer-extra-plugin-stealth:
516
+ // rotates brand order by (seed % 6) to avoid a static fingerprint.
517
+ //
518
+ // UPSTREAM FRAGILITY:
519
+ // Anchor: exact closing lines of calculateUserAgentMetadata.
520
+ // If ua.includes("ARM") or the return line moves, update the search string.
521
+ {
522
+ file: 'server/chromium/crPage.js',
523
+ steps: src => {
524
+ src = replace(
525
+ 'crPage.js',
526
+ src,
527
+ ` if (ua.includes("ARM"))
528
+ metadata.architecture = "arm";
529
+ return metadata;
530
+ }`,
531
+ ` if (ua.includes("ARM"))
532
+ metadata.architecture = "arm";
533
+ // ── szkrabok: greasy brands ───────────────────────────────────────────────
534
+ // Generate navigator.userAgentData.brands from the Chrome major version so
535
+ // Playwright's own Emulation.setUserAgentOverride includes correct brands.
536
+ const chromeMatch = ua.match(/Chrome\\/(\\d+)/);
537
+ if (chromeMatch) {
538
+ const seed = parseInt(chromeMatch[1], 10);
539
+ const order = [[0,1,2],[0,2,1],[1,0,2],[1,2,0],[2,0,1],[2,1,0]][seed % 6];
540
+ const esc = [' ', ' ', ';'];
541
+ const grease = \`\${esc[order[0]]}Not\${esc[order[1]]}A\${esc[order[2]]}Brand\`;
542
+ const brands = [];
543
+ brands[order[0]] = { brand: grease, version: '99' };
544
+ brands[order[1]] = { brand: 'Chromium', version: String(seed) };
545
+ brands[order[2]] = { brand: 'Google Chrome', version: String(seed) };
546
+ metadata.brands = brands;
547
+ }
548
+ // ── end szkrabok greasy brands ────────────────────────────────────────────
549
+ return metadata;
550
+ }`,
551
+ 'inject greasy brands into calculateUserAgentMetadata'
552
+ )
553
+ return src
554
+ },
555
+ },
556
+ // ── 7. utilityScriptSource.js — rename UtilityScript class ───────────────────
557
+ // The rebrowser bot-detector overrides document.getElementById and inspects
558
+ // new Error().stack for the string "UtilityScript." (class.method notation).
559
+ // Playwright injects a compiled UtilityScript bundle into every page context;
560
+ // all page.evaluate() calls run through UtilityScript.evaluate().
561
+ // Renaming the class variable to __pwUs breaks the stack-trace string match
562
+ // without affecting functionality — the export key "UtilityScript" is kept so
563
+ // internal playwright code that references it by name is unaffected.
564
+ // This is an original szkrabok fix — rebrowser-patches only fixes this for
565
+ // puppeteer (pptr: sourceURL), not playwright.
566
+ //
567
+ // UPSTREAM FRAGILITY:
568
+ // The file is a large generated JS bundle (single-line string).
569
+ // Anchors `var UtilityScript = class {` and `UtilityScript: () => UtilityScript`
570
+ // are stable — they are part of the compiled output naming convention.
571
+ // If playwright renames the class in source, update both search strings.
572
+ {
573
+ file: 'generated/utilityScriptSource.js',
574
+ steps: src => {
575
+ // The source is a large single-line JS string. Inside it, the class is
576
+ // declared as "var UtilityScript = class {" and exported via the key
577
+ // "UtilityScript: () => UtilityScript". We rename the variable only
578
+ // (not the export key, which other playwright code references by name).
579
+ src = replace(
580
+ 'utilityScriptSource.js',
581
+ src,
582
+ `var UtilityScript = class {`,
583
+ `var __pwUs = class {`,
584
+ 'rename UtilityScript class variable'
585
+ )
586
+ // The export arrow also references the old name — update it
587
+ src = replace(
588
+ 'utilityScriptSource.js',
589
+ src,
590
+ `UtilityScript: () => UtilityScript`,
591
+ `UtilityScript: () => __pwUs`,
592
+ 'update UtilityScript export reference'
593
+ )
594
+ return src
595
+ },
596
+ },
597
+ ]
598
+
599
+ // ── run ───────────────────────────────────────────────────────────────────────
600
+
601
+ // Markers to detect already-patched installs.
602
+ // All must be present for the install to be considered fully patched.
603
+ const PATCH_MARKERS = [
604
+ { file: 'server/chromium/crConnection.js', marker: '__re__emitExecutionContext' }, // patch #1
605
+ { file: 'server/chromium/crPage.js', marker: 'szkrabok: greasy brands' }, // patch #8
606
+ ]
607
+ // Stamp file written next to package.json so it's easy to see patches are active.
608
+ const STAMP_FILE = '.szkrabok-patched'
609
+
610
+ function isAlreadyPatched(libDir) {
611
+ return PATCH_MARKERS.every(({ file, marker }) => {
612
+ try {
613
+ return fs.readFileSync(path.join(libDir, file), 'utf8').includes(marker)
614
+ } catch {
615
+ return false
616
+ }
617
+ })
618
+ }
619
+
620
+ // ── patch each playwright-core install ────────────────────────────────────────
621
+
622
+ let anyFailed = false
623
+
624
+ for (const pkgRoot of pkgRoots) {
625
+ const pwVersion = JSON.parse(fs.readFileSync(path.join(pkgRoot, 'package.json'), 'utf8')).version
626
+ const lib = path.join(pkgRoot, 'lib')
627
+ const stamp = path.join(pkgRoot, STAMP_FILE)
628
+
629
+ console.log(`\n[patch-playwright] playwright-core ${pwVersion} at ${pkgRoot}`)
630
+
631
+ if (isAlreadyPatched(lib)) {
632
+ console.log(' Already patched — skipping.')
633
+ continue
634
+ }
635
+
636
+ // per-install backup list
637
+ const backedUp = []
638
+
639
+ function bakPath(rel) {
640
+ return path.join(lib, rel) + '.bak'
641
+ }
642
+ function read(rel) {
643
+ return fs.readFileSync(path.join(lib, rel), 'utf8')
644
+ }
645
+ function write(rel, content) {
646
+ fs.writeFileSync(path.join(lib, rel), content, 'utf8')
647
+ }
648
+
649
+ function backup(rel) {
650
+ fs.copyFileSync(path.join(lib, rel), bakPath(rel))
651
+ backedUp.push(rel)
652
+ console.log(` backed up ${rel}`)
653
+ }
654
+
655
+ function rollback() {
656
+ console.error(' Rolling back ...')
657
+ for (const rel of backedUp) {
658
+ const bak = bakPath(rel)
659
+ try {
660
+ fs.copyFileSync(bak, path.join(lib, rel))
661
+ fs.unlinkSync(bak)
662
+ console.error(` restored ${rel}`)
663
+ } catch (e) {
664
+ console.error(` FAILED to restore ${rel}: ${e.message} — backup at ${bak}`)
665
+ }
666
+ }
667
+ }
668
+
669
+ function removeBaks() {
670
+ for (const rel of backedUp) {
671
+ try {
672
+ fs.unlinkSync(bakPath(rel))
673
+ } catch {}
674
+ }
675
+ }
676
+
677
+ console.log(` Applying ${patches.length} patch groups ...`)
678
+ let failed = false
679
+
680
+ for (const { file, steps } of patches) {
681
+ try {
682
+ backup(file)
683
+ } catch (e) {
684
+ console.error(` ERROR backing up ${file}: ${e.message}`)
685
+ failed = true
686
+ break
687
+ }
688
+ let src
689
+ try {
690
+ src = read(file)
691
+ } catch (e) {
692
+ console.error(` ERROR reading ${file}: ${e.message}`)
693
+ failed = true
694
+ break
695
+ }
696
+ let patched
697
+ try {
698
+ patched = steps(src)
699
+ } catch (e) {
700
+ console.error(e.message.replace('[patch-playwright] ', ' '))
701
+ failed = true
702
+ break
703
+ }
704
+ try {
705
+ write(file, patched)
706
+ console.log(` patched ${file}`)
707
+ } catch (e) {
708
+ console.error(` ERROR writing ${file}: ${e.message}`)
709
+ failed = true
710
+ break
711
+ }
712
+ }
713
+
714
+ if (failed) {
715
+ rollback()
716
+ console.error(
717
+ `\n PATCH FAILED for playwright-core ${pwVersion} — files restored from .bak backups.`
718
+ )
719
+ console.error(' What to do:')
720
+ console.error(` 1. Check what changed in playwright-core ${pwVersion}.`)
721
+ console.error(
722
+ ' 2. Update the failing patch step search string in scripts/patch-playwright.js.'
723
+ )
724
+ console.error(' 3. Re-run: node scripts/patch-playwright.js')
725
+ console.error(' Reference: vendor/rebrowser-patches/patches/playwright-core/src.patch')
726
+ anyFailed = true
727
+ } else {
728
+ removeBaks()
729
+ // write stamp file so patches are visible at a glance
730
+ fs.writeFileSync(stamp, `szkrabok-patched playwright-core@${pwVersion}\n`)
731
+ console.log(` All patches applied. Stamp: ${STAMP_FILE}`)
732
+ }
733
+ }
734
+
735
+ if (anyFailed) process.exit(1)
736
+ console.log('\n[patch-playwright] Done.')