@wordbricks/playwright-mcp 0.1.18 → 0.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -44
- package/lib/hooks/antiBotDetectionHook.js +171 -0
- package/lib/hooks/eventConsumer.js +16 -3
- package/lib/hooks/networkFilters.js +9 -0
- package/lib/hooks/networkSetup.js +4 -1
- package/lib/hooks/networkTrackingHook.js +14 -2
- package/lib/hooks/registry.js +4 -1
- package/lib/hooks/schema.js +12 -0
- package/lib/tools/networkDetail.js +14 -16
- package/lib/tools/networkSearch.js +23 -3
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -142,49 +142,58 @@ Playwright MCP server supports following arguments. They can be provided in the
|
|
|
142
142
|
|
|
143
143
|
```
|
|
144
144
|
> npx @wordbricks/playwright-mcp@latest --help
|
|
145
|
-
--allowed-origins <origins>
|
|
146
|
-
|
|
147
|
-
--blocked-origins <origins>
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
--block-service-workers
|
|
153
|
-
--browser <browser>
|
|
154
|
-
|
|
155
|
-
--caps <caps>
|
|
156
|
-
|
|
157
|
-
--cdp-endpoint <endpoint>
|
|
158
|
-
--config <path>
|
|
159
|
-
--device <device>
|
|
160
|
-
--executable-path <path>
|
|
161
|
-
--headless
|
|
162
|
-
--host <host>
|
|
163
|
-
|
|
164
|
-
--ignore-https-errors
|
|
165
|
-
--
|
|
166
|
-
|
|
167
|
-
--
|
|
168
|
-
|
|
169
|
-
--
|
|
170
|
-
|
|
171
|
-
--
|
|
172
|
-
|
|
173
|
-
--
|
|
174
|
-
|
|
175
|
-
--proxy-
|
|
176
|
-
|
|
177
|
-
--
|
|
178
|
-
|
|
179
|
-
--save-
|
|
180
|
-
|
|
181
|
-
--
|
|
182
|
-
|
|
183
|
-
--
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
--
|
|
187
|
-
|
|
145
|
+
--allowed-origins <origins> semicolon-separated list of origins to allow the
|
|
146
|
+
browser to request. Default is to allow all.
|
|
147
|
+
--blocked-origins <origins> semicolon-separated list of origins to block the
|
|
148
|
+
browser from requesting. Blocklist is evaluated
|
|
149
|
+
before allowlist. If used without the allowlist,
|
|
150
|
+
requests not matching the blocklist are still
|
|
151
|
+
allowed.
|
|
152
|
+
--block-service-workers block service workers
|
|
153
|
+
--browser <browser> browser or chrome channel to use, possible
|
|
154
|
+
values: chrome, firefox, webkit, msedge.
|
|
155
|
+
--caps <caps> comma-separated list of additional capabilities
|
|
156
|
+
to enable, possible values: vision, pdf.
|
|
157
|
+
--cdp-endpoint <endpoint> CDP endpoint to connect to.
|
|
158
|
+
--config <path> path to the configuration file.
|
|
159
|
+
--device <device> device to emulate, for example: "iPhone 15"
|
|
160
|
+
--executable-path <path> path to the browser executable.
|
|
161
|
+
--headless run browser in headless mode, headed by default
|
|
162
|
+
--host <host> host to bind server to. Default is localhost.
|
|
163
|
+
Use 0.0.0.0 to bind to all interfaces.
|
|
164
|
+
--ignore-https-errors ignore https errors
|
|
165
|
+
--init-script <path> path to a JavaScript file to inject into all
|
|
166
|
+
pages using addInitScript.
|
|
167
|
+
--isolated keep the browser profile in memory, do not save
|
|
168
|
+
it to disk.
|
|
169
|
+
--image-responses <mode> whether to send image responses to the client.
|
|
170
|
+
Can be "allow" or "omit", Defaults to "allow".
|
|
171
|
+
--no-sandbox disable the sandbox for all process types that
|
|
172
|
+
are normally sandboxed.
|
|
173
|
+
--output-dir <path> path to the directory for output files.
|
|
174
|
+
--port <port> port to listen on for SSE transport.
|
|
175
|
+
--proxy-bypass <bypass> comma-separated domains to bypass proxy, for
|
|
176
|
+
example ".com,chromium.org,.domain.com"
|
|
177
|
+
--proxy-server <proxy> specify proxy server, for example
|
|
178
|
+
"http://myproxy:3128" or "socks5://myproxy:8080"
|
|
179
|
+
--save-session Whether to save the Playwright MCP session into
|
|
180
|
+
the output directory.
|
|
181
|
+
--save-trace Whether to save the Playwright Trace of the
|
|
182
|
+
session into the output directory.
|
|
183
|
+
--storage-state <path> path to the storage state file for isolated
|
|
184
|
+
sessions.
|
|
185
|
+
--user-agent <ua string> specify user agent string
|
|
186
|
+
--user-data-dir <path> path to the user data directory. If not
|
|
187
|
+
specified, a temporary directory will be
|
|
188
|
+
created.
|
|
189
|
+
--viewport-size <size> specify browser viewport size in pixels, for
|
|
190
|
+
example "1280, 720"
|
|
191
|
+
--window-position <x,y> specify Chrome window position in pixels, for
|
|
192
|
+
example "100,200"
|
|
193
|
+
--window-size <width,height> specify Chrome window size in pixels, for
|
|
194
|
+
example "1280,720"
|
|
195
|
+
--app <url> launch browser in app mode with the specified
|
|
196
|
+
URL
|
|
188
197
|
```
|
|
189
198
|
|
|
190
199
|
<!--- End of options generated section -->
|
|
@@ -397,6 +406,7 @@ http.createServer(async (req, res) => {
|
|
|
397
406
|
- `ref` (string): Exact target element reference from the page snapshot
|
|
398
407
|
- `doubleClick` (boolean, optional): Whether to perform a double click instead of a single click
|
|
399
408
|
- `button` (string, optional): Button to click, defaults to left
|
|
409
|
+
- `modifiers` (array, optional): Modifier keys to press
|
|
400
410
|
- Read-only: **false**
|
|
401
411
|
|
|
402
412
|
<!-- NOTE: This has been generated via update-readme.js -->
|
|
@@ -564,7 +574,7 @@ http.createServer(async (req, res) => {
|
|
|
564
574
|
- Description: Scroll the page using mouse wheel with human-like behavior
|
|
565
575
|
- Parameters:
|
|
566
576
|
- `amount` (number): Vertical scroll amount in pixels (positive scrolls down, negative up)
|
|
567
|
-
- Read-only: **
|
|
577
|
+
- Read-only: **true**
|
|
568
578
|
|
|
569
579
|
<!-- NOTE: This has been generated via update-readme.js -->
|
|
570
580
|
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import ms from 'ms';
|
|
2
|
+
import { Ok } from '../utils/result.js';
|
|
3
|
+
import { hookNameSchema } from './schema.js';
|
|
4
|
+
import { getEventsAfter, isEventType, trackEvent } from './events.js';
|
|
5
|
+
const RESOLUTION_WAIT_MS = ms('10s');
|
|
6
|
+
const lastProcessedEventIdByContext = new WeakMap();
|
|
7
|
+
const detectedProvidersByContext = new WeakMap();
|
|
8
|
+
const isLikelyResolved = async (ctx) => {
|
|
9
|
+
if (!ctx.tab?.page)
|
|
10
|
+
return false;
|
|
11
|
+
const host = ctx.tab.page.url();
|
|
12
|
+
if (host.includes('challenges.cloudflare.com'))
|
|
13
|
+
return false;
|
|
14
|
+
return ctx.tab.page.evaluate(() => {
|
|
15
|
+
const challengeSelectors = [
|
|
16
|
+
'#challenge-stage',
|
|
17
|
+
'#cf-challenge-running',
|
|
18
|
+
'iframe[src*="turnstile"]',
|
|
19
|
+
'form[action*="/cdn-cgi/challenge-platform"]',
|
|
20
|
+
'[data-cf-challenge]'
|
|
21
|
+
];
|
|
22
|
+
const hasChallengeDom = challengeSelectors.some(selector => document.querySelector(selector));
|
|
23
|
+
if (hasChallengeDom)
|
|
24
|
+
return false;
|
|
25
|
+
const title = document.title.toLowerCase();
|
|
26
|
+
const bodyText = (document.body?.innerText || '').slice(0, 2000).toLowerCase();
|
|
27
|
+
if (title.includes('just a moment') || bodyText.includes('just a moment'))
|
|
28
|
+
return false;
|
|
29
|
+
if (title.includes('checking your browser') || bodyText.includes('checking your browser'))
|
|
30
|
+
return false;
|
|
31
|
+
return true;
|
|
32
|
+
});
|
|
33
|
+
};
|
|
34
|
+
const getDetectedProviders = (context) => {
|
|
35
|
+
const detectedProviders = detectedProvidersByContext.get(context);
|
|
36
|
+
if (detectedProviders)
|
|
37
|
+
return detectedProviders;
|
|
38
|
+
const newSet = new Set();
|
|
39
|
+
detectedProvidersByContext.set(context, newSet);
|
|
40
|
+
return newSet;
|
|
41
|
+
};
|
|
42
|
+
const updateLastProcessedEventId = (context, events) => {
|
|
43
|
+
const lastEvent = events[events.length - 1];
|
|
44
|
+
if (!lastEvent)
|
|
45
|
+
return;
|
|
46
|
+
lastProcessedEventIdByContext.set(context, lastEvent.id);
|
|
47
|
+
};
|
|
48
|
+
const isStatusOk = (status) => status >= 200 && status < 400;
|
|
49
|
+
const providerConfigs = [
|
|
50
|
+
{
|
|
51
|
+
provider: 'cloudflare-turnstile',
|
|
52
|
+
match: event => isStatusOk(event.data.status) &&
|
|
53
|
+
event.data.url.includes('challenges.cloudflare.com') &&
|
|
54
|
+
event.data.url.includes('/turnstile/'),
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
provider: 'aws-waf',
|
|
58
|
+
match: event => isStatusOk(event.data.status) &&
|
|
59
|
+
event.data.method.toUpperCase() === 'POST' &&
|
|
60
|
+
event.data.url.includes('.awswaf.com') &&
|
|
61
|
+
event.data.url.includes('/telemetry'),
|
|
62
|
+
},
|
|
63
|
+
];
|
|
64
|
+
export const getAntiBotProviderConfigs = () => providerConfigs;
|
|
65
|
+
const waitForResolution = async (ctx) => {
|
|
66
|
+
const start = Date.now();
|
|
67
|
+
if (!ctx.tab?.page)
|
|
68
|
+
return { resolved: false, waitedMs: 0 };
|
|
69
|
+
await ctx.tab.page.waitForTimeout(RESOLUTION_WAIT_MS);
|
|
70
|
+
const resolved = await isLikelyResolved(ctx);
|
|
71
|
+
return { resolved, waitedMs: Date.now() - start };
|
|
72
|
+
};
|
|
73
|
+
const detectAntiBot = (ctx, events) => {
|
|
74
|
+
const detectedProviders = getDetectedProviders(ctx.context);
|
|
75
|
+
const networkEvents = events.filter(isEventType('network-request'));
|
|
76
|
+
return providerConfigs.reduce((acc, config) => {
|
|
77
|
+
if (detectedProviders.has(config.provider))
|
|
78
|
+
return acc;
|
|
79
|
+
const match = networkEvents.find(config.match);
|
|
80
|
+
if (!match)
|
|
81
|
+
return acc;
|
|
82
|
+
detectedProviders.add(config.provider);
|
|
83
|
+
return {
|
|
84
|
+
hits: [
|
|
85
|
+
...acc.hits,
|
|
86
|
+
{
|
|
87
|
+
provider: config.provider,
|
|
88
|
+
url: match.data.url,
|
|
89
|
+
status: match.data.status,
|
|
90
|
+
}
|
|
91
|
+
],
|
|
92
|
+
};
|
|
93
|
+
}, { hits: [] });
|
|
94
|
+
};
|
|
95
|
+
export const antiBotDetectionPreHook = {
|
|
96
|
+
name: hookNameSchema.enum['anti-bot-detection-pre'],
|
|
97
|
+
handler: async (ctx) => {
|
|
98
|
+
if (lastProcessedEventIdByContext.has(ctx.context))
|
|
99
|
+
return Ok(undefined);
|
|
100
|
+
if (typeof ctx.eventStore.lastSeenEventId === 'number')
|
|
101
|
+
lastProcessedEventIdByContext.set(ctx.context, ctx.eventStore.lastSeenEventId);
|
|
102
|
+
return Ok(undefined);
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
export const antiBotDetectionPostHook = {
|
|
106
|
+
name: hookNameSchema.enum['anti-bot-detection-post'],
|
|
107
|
+
handler: async (ctx) => {
|
|
108
|
+
const newEvents = getEventsAfter(ctx.eventStore, lastProcessedEventIdByContext.get(ctx.context));
|
|
109
|
+
if (newEvents.length === 0)
|
|
110
|
+
return Ok(undefined);
|
|
111
|
+
const detection = detectAntiBot(ctx, newEvents);
|
|
112
|
+
if (detection.hits.length > 0) {
|
|
113
|
+
detection.hits.forEach(hit => {
|
|
114
|
+
trackEvent(ctx.context, {
|
|
115
|
+
type: 'anti-bot',
|
|
116
|
+
data: {
|
|
117
|
+
provider: hit.provider,
|
|
118
|
+
detectionMethod: 'network-request',
|
|
119
|
+
url: hit.url,
|
|
120
|
+
status: hit.status,
|
|
121
|
+
action: 'detected',
|
|
122
|
+
waitMs: RESOLUTION_WAIT_MS,
|
|
123
|
+
},
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
const waitResult = await waitForResolution(ctx);
|
|
127
|
+
detection.hits.forEach(hit => {
|
|
128
|
+
trackEvent(ctx.context, {
|
|
129
|
+
type: 'anti-bot',
|
|
130
|
+
data: {
|
|
131
|
+
provider: hit.provider,
|
|
132
|
+
detectionMethod: 'network-request',
|
|
133
|
+
url: hit.url,
|
|
134
|
+
status: hit.status,
|
|
135
|
+
action: waitResult.resolved ? 'resolved' : 'still-blocked',
|
|
136
|
+
waitMs: waitResult.waitedMs,
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
updateLastProcessedEventId(ctx.context, newEvents);
|
|
142
|
+
return Ok(undefined);
|
|
143
|
+
},
|
|
144
|
+
};
|
|
145
|
+
export const antiBotDetectionHooks = {
|
|
146
|
+
pre: antiBotDetectionPreHook,
|
|
147
|
+
post: antiBotDetectionPostHook,
|
|
148
|
+
};
|
|
149
|
+
export const formatAntiBotEvent = (event) => {
|
|
150
|
+
if (event.data.action === 'still-blocked') {
|
|
151
|
+
const waitSeconds = event.data.waitMs ? Math.round(event.data.waitMs / 1000) : 0;
|
|
152
|
+
if (event.data.provider === 'cloudflare-turnstile')
|
|
153
|
+
return `Anti-bot still active: Cloudflare Turnstile after ${waitSeconds || '<1'}s wait`;
|
|
154
|
+
if (event.data.provider === 'aws-waf')
|
|
155
|
+
return `Anti-bot still active: AWS WAF after ${waitSeconds || '<1'}s wait`;
|
|
156
|
+
return 'Anti-bot mechanism still active';
|
|
157
|
+
}
|
|
158
|
+
if (event.data.action === 'resolved') {
|
|
159
|
+
const waitSeconds = event.data.waitMs ? Math.round(event.data.waitMs / 1000) : 0;
|
|
160
|
+
if (event.data.provider === 'cloudflare-turnstile')
|
|
161
|
+
return `Anti-bot resolved: Cloudflare Turnstile after ${waitSeconds || '<1'}s wait`;
|
|
162
|
+
if (event.data.provider === 'aws-waf')
|
|
163
|
+
return `Anti-bot resolved: AWS WAF after ${waitSeconds || '<1'}s wait`;
|
|
164
|
+
return 'Anti-bot mechanism resolved';
|
|
165
|
+
}
|
|
166
|
+
if (event.data.provider === 'cloudflare-turnstile')
|
|
167
|
+
return `Anti-bot detected: Cloudflare Turnstile (${event.data.status})`;
|
|
168
|
+
if (event.data.provider === 'aws-waf')
|
|
169
|
+
return `Anti-bot detected: AWS WAF telemetry request (${event.data.status})`;
|
|
170
|
+
return 'Anti-bot mechanism detected';
|
|
171
|
+
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { getEventsAfter, updateLastSeenId } from './events.js';
|
|
1
|
+
import { getEventsAfter, isEventType, updateLastSeenId } from './events.js';
|
|
2
2
|
import { formatWaitEvent } from './waitHook.js';
|
|
3
3
|
import { formatPageHeightEvent } from './pageHeightHook.js';
|
|
4
4
|
import { formatNetworkEvent } from './networkTrackingHook.js';
|
|
@@ -6,6 +6,8 @@ import { planGroupedMessages } from './grouping.js';
|
|
|
6
6
|
import { formatToolCallEvent } from './formatToolCallEvent.js';
|
|
7
7
|
import { formatFrameworkStateEvent } from './frameworkStateHook.js';
|
|
8
8
|
import { formatJsonLdEvent } from './jsonLdDetectionHook.js';
|
|
9
|
+
import { formatAntiBotEvent, getAntiBotProviderConfigs } from './antiBotDetectionHook.js';
|
|
10
|
+
import { isAntiBotUrl } from './networkFilters.js';
|
|
9
11
|
const eventFormatters = {
|
|
10
12
|
'wait': formatWaitEvent,
|
|
11
13
|
'page-height-change': formatPageHeightEvent,
|
|
@@ -13,6 +15,7 @@ const eventFormatters = {
|
|
|
13
15
|
'tool-call': formatToolCallEvent,
|
|
14
16
|
'framework-state': formatFrameworkStateEvent,
|
|
15
17
|
'json-ld': formatJsonLdEvent,
|
|
18
|
+
'anti-bot': formatAntiBotEvent,
|
|
16
19
|
};
|
|
17
20
|
const formatEvent = (event) => {
|
|
18
21
|
const formatter = eventFormatters[event.type];
|
|
@@ -25,13 +28,23 @@ const consumeEvent = (event, response, plan) => {
|
|
|
25
28
|
const formattedMessage = replacement ?? formatEvent(event);
|
|
26
29
|
response.addEvent(`[${event.id}] ${formattedMessage}`);
|
|
27
30
|
};
|
|
31
|
+
const shouldHideEvent = (event) => {
|
|
32
|
+
const isNetworkRequest = isEventType('network-request');
|
|
33
|
+
if (!isNetworkRequest(event))
|
|
34
|
+
return false;
|
|
35
|
+
if (isAntiBotUrl(event.data.url))
|
|
36
|
+
return true;
|
|
37
|
+
const configs = getAntiBotProviderConfigs().filter(config => config.provider === 'cloudflare-turnstile');
|
|
38
|
+
return configs.some(config => config.match(event));
|
|
39
|
+
};
|
|
28
40
|
export const consumeEvents = (context, eventStore, response) => {
|
|
29
41
|
const unconsumedEvents = getEventsAfter(eventStore, eventStore.lastSeenEventId);
|
|
30
42
|
if (unconsumedEvents.length === 0)
|
|
31
43
|
return;
|
|
32
|
-
const
|
|
44
|
+
const visibleEvents = unconsumedEvents.filter(event => !shouldHideEvent(event));
|
|
45
|
+
const plan = planGroupedMessages(visibleEvents);
|
|
33
46
|
// Consume all events in chronological order
|
|
34
|
-
for (const event of
|
|
47
|
+
for (const event of visibleEvents)
|
|
35
48
|
consumeEvent(event, response, plan);
|
|
36
49
|
// Update last seen event ID
|
|
37
50
|
const latestEvent = unconsumedEvents[unconsumedEvents.length - 1];
|
|
@@ -11,6 +11,13 @@ const hasExcludedExtension = (url) => {
|
|
|
11
11
|
return extRegex.test(url);
|
|
12
12
|
});
|
|
13
13
|
};
|
|
14
|
+
export const isAntiBotUrl = (url) => {
|
|
15
|
+
if (url.includes('challenges.cloudflare.com'))
|
|
16
|
+
return true;
|
|
17
|
+
if (url.includes('.awswaf.com'))
|
|
18
|
+
return true;
|
|
19
|
+
return false;
|
|
20
|
+
};
|
|
14
21
|
const isSuccessfulStatus = (status) => {
|
|
15
22
|
// Status 0 is for failed requests, which we want to capture
|
|
16
23
|
// 2xx status codes are successful
|
|
@@ -18,6 +25,8 @@ const isSuccessfulStatus = (status) => {
|
|
|
18
25
|
return status === 0 || (status >= 200 && status < 300 && status !== 204);
|
|
19
26
|
};
|
|
20
27
|
export const shouldCaptureRequest = (method, url, status, resourceType) => {
|
|
28
|
+
if (isAntiBotUrl(url))
|
|
29
|
+
return true;
|
|
21
30
|
return !hasExcludedExtension(url) &&
|
|
22
31
|
MEANINGFUL_RESOURCE_TYPES.includes(resourceType) &&
|
|
23
32
|
ALLOWED_METHODS.includes(method) &&
|
|
@@ -11,7 +11,7 @@ const getEventIdMap = (context) => {
|
|
|
11
11
|
};
|
|
12
12
|
export const getNetworkEventEntry = (context, id) => getEventIdMap(context).get(id);
|
|
13
13
|
export const setupNetworkTracking = (context, page) => {
|
|
14
|
-
page.on('response', response => {
|
|
14
|
+
page.on('response', async (response) => {
|
|
15
15
|
const request = response.request();
|
|
16
16
|
const method = request.method();
|
|
17
17
|
const url = request.url();
|
|
@@ -19,12 +19,15 @@ export const setupNetworkTracking = (context, page) => {
|
|
|
19
19
|
const resourceType = request.resourceType();
|
|
20
20
|
// Apply filters before saving the event
|
|
21
21
|
if (shouldCaptureRequest(method, url, status, resourceType)) {
|
|
22
|
+
const setCookies = await response.headerValues('set-cookie').catch(() => []);
|
|
23
|
+
const cookieValues = setCookies.length ? setCookies : undefined;
|
|
22
24
|
const networkData = {
|
|
23
25
|
method,
|
|
24
26
|
url,
|
|
25
27
|
status,
|
|
26
28
|
resourceType,
|
|
27
29
|
postData: request.postData() || undefined,
|
|
30
|
+
setCookies: cookieValues,
|
|
28
31
|
};
|
|
29
32
|
const id = trackEvent(context, {
|
|
30
33
|
type: 'network-request',
|
|
@@ -25,8 +25,20 @@ export const networkTrackingHooks = {
|
|
|
25
25
|
post: networkTrackingPostHook,
|
|
26
26
|
};
|
|
27
27
|
export const formatNetworkEvent = (event) => {
|
|
28
|
-
const { method, url, status, postData } = event.data;
|
|
29
|
-
|
|
28
|
+
const { method, url, status, postData, setCookies } = event.data;
|
|
29
|
+
const summary = formatNetworkSummaryLine({ method, url, status, postData });
|
|
30
|
+
if (!setCookies || setCookies.length === 0)
|
|
31
|
+
return summary;
|
|
32
|
+
const names = setCookies
|
|
33
|
+
.map(cookie => {
|
|
34
|
+
const firstPart = cookie.split(';', 1)[0];
|
|
35
|
+
const [name] = firstPart.split('=', 1);
|
|
36
|
+
return name?.trim();
|
|
37
|
+
})
|
|
38
|
+
.filter((name) => !!name);
|
|
39
|
+
if (!names.length)
|
|
40
|
+
return summary;
|
|
41
|
+
return `${summary} | Set-Cookie keys: ${names.join(', ')}`;
|
|
30
42
|
};
|
|
31
43
|
const computeNetworkGroupKey = (event) => {
|
|
32
44
|
const method = (event.data.method || '').toUpperCase();
|
package/lib/hooks/registry.js
CHANGED
|
@@ -9,18 +9,21 @@ import { toolNameSchema } from './schema.js';
|
|
|
9
9
|
import { requireTabHooks } from './requireTabHook.js';
|
|
10
10
|
import { registerGroupingRule } from './grouping.js';
|
|
11
11
|
import { networkGroupingRule } from './networkTrackingHook.js';
|
|
12
|
+
import { antiBotDetectionHooks } from './antiBotDetectionHook.js';
|
|
12
13
|
const COMMON_HOOKS = {
|
|
13
14
|
preHooks: [
|
|
14
15
|
requireTabHooks.pre,
|
|
15
16
|
networkTrackingHooks.pre,
|
|
17
|
+
antiBotDetectionHooks.pre,
|
|
16
18
|
pageHeightHooks.pre,
|
|
17
19
|
frameworkStateHooks.pre,
|
|
18
20
|
jsonLdDetectionHooks.pre
|
|
19
21
|
],
|
|
20
22
|
postHooks: [
|
|
23
|
+
networkTrackingHooks.post,
|
|
24
|
+
antiBotDetectionHooks.post,
|
|
21
25
|
frameworkStateHooks.post,
|
|
22
26
|
jsonLdDetectionHooks.post,
|
|
23
|
-
networkTrackingHooks.post,
|
|
24
27
|
pageHeightHooks.post,
|
|
25
28
|
waitHooks.post
|
|
26
29
|
],
|
package/lib/hooks/schema.js
CHANGED
|
@@ -10,6 +10,8 @@ export const hookNameSchema = z.enum([
|
|
|
10
10
|
'json-ld-detection-pre',
|
|
11
11
|
'json-ld-detection-post',
|
|
12
12
|
'require-tab-pre',
|
|
13
|
+
'anti-bot-detection-pre',
|
|
14
|
+
'anti-bot-detection-post',
|
|
13
15
|
]);
|
|
14
16
|
// Tool names enum - should match actual tool names in
|
|
15
17
|
export const toolNameSchema = z.enum([
|
|
@@ -37,6 +39,7 @@ export const EventTypeSchema = z.enum([
|
|
|
37
39
|
'tool-call',
|
|
38
40
|
'framework-state',
|
|
39
41
|
'json-ld',
|
|
42
|
+
'anti-bot',
|
|
40
43
|
]);
|
|
41
44
|
export const NetworkRequestEventDataSchema = z.object({
|
|
42
45
|
method: z.string(),
|
|
@@ -44,6 +47,7 @@ export const NetworkRequestEventDataSchema = z.object({
|
|
|
44
47
|
status: z.number(),
|
|
45
48
|
resourceType: z.string(),
|
|
46
49
|
postData: z.string().optional(),
|
|
50
|
+
setCookies: z.array(z.string()).optional(),
|
|
47
51
|
responseSize: z.number().optional(),
|
|
48
52
|
});
|
|
49
53
|
export const PageHeightChangeEventDataSchema = z.object({
|
|
@@ -75,3 +79,11 @@ export const JsonLdEventDataSchema = z.object({
|
|
|
75
79
|
changes: z.array(z.string()).optional(),
|
|
76
80
|
action: z.enum(['detected', 'changed']),
|
|
77
81
|
});
|
|
82
|
+
export const AntiBotEventDataSchema = z.object({
|
|
83
|
+
provider: z.enum(['cloudflare-turnstile', 'aws-waf']),
|
|
84
|
+
detectionMethod: z.literal('network-request'),
|
|
85
|
+
url: z.string(),
|
|
86
|
+
status: z.number(),
|
|
87
|
+
action: z.enum(['detected', 'resolved', 'still-blocked']),
|
|
88
|
+
waitMs: z.number().optional(),
|
|
89
|
+
});
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import ms from 'ms';
|
|
3
|
-
import * as cheerio from 'cheerio';
|
|
4
3
|
import { defineTabTool } from './tool.js';
|
|
5
|
-
import {
|
|
4
|
+
import { sanitizeHtml } from '../utils/sanitizeHtml.js';
|
|
6
5
|
import { formatTruncationLine } from '../utils/truncate.js';
|
|
7
6
|
import { withTimeout } from '../utils/withTimeout.js';
|
|
8
7
|
import { getNetworkEventEntry } from '../hooks/networkSetup.js';
|
|
@@ -148,6 +147,12 @@ const formatRequestDetail = async (ctx, { request, response, id, }) => {
|
|
|
148
147
|
headerParts.push(`content-length=${resCl}`);
|
|
149
148
|
if (headerParts.length)
|
|
150
149
|
lines.push(`\nResponse Headers: { ${headerParts.join(', ')} }`);
|
|
150
|
+
const setCookieHeaders = await response.headerValues('set-cookie').catch(() => []);
|
|
151
|
+
if (setCookieHeaders.length) {
|
|
152
|
+
lines.push(`\nSet-Cookie Headers (${setCookieHeaders.length}):`);
|
|
153
|
+
for (const cookie of setCookieHeaders)
|
|
154
|
+
lines.push(cookie);
|
|
155
|
+
}
|
|
151
156
|
}
|
|
152
157
|
// Fetch the full body each time
|
|
153
158
|
let buf;
|
|
@@ -174,24 +179,17 @@ const formatRequestDetail = async (ctx, { request, response, id, }) => {
|
|
|
174
179
|
}
|
|
175
180
|
catch { }
|
|
176
181
|
}
|
|
177
|
-
// Reset if finished in a prior call
|
|
178
|
-
if (state.cursor >= buf.length) {
|
|
179
|
-
state.cursor = 0;
|
|
180
|
-
state.headersShown = false; // allow headers again after a full pass
|
|
181
|
-
}
|
|
182
182
|
let bodyText = buf.toString('utf8');
|
|
183
183
|
const resHeadersForBody = normalizeHeaderKeys(await response.allHeaders());
|
|
184
184
|
const resCtForBody = resHeadersForBody['content-type'] || '';
|
|
185
185
|
const isHtml = resCtForBody.includes('html');
|
|
186
|
-
if (isHtml)
|
|
187
|
-
|
|
188
|
-
removeNonEssentialLinks($);
|
|
189
|
-
removeMetaTags($);
|
|
190
|
-
removeHtmlComments($);
|
|
191
|
-
stripSvgAttributes($);
|
|
192
|
-
bodyText = minifyHtml($.root().html() || '');
|
|
193
|
-
}
|
|
186
|
+
if (isHtml)
|
|
187
|
+
bodyText = sanitizeHtml(bodyText, { shouldRemoveScripts: false, shouldRemoveStyles: true });
|
|
194
188
|
const totalLength = bodyText.length;
|
|
189
|
+
if (state.cursor >= totalLength) {
|
|
190
|
+
state.cursor = 0;
|
|
191
|
+
state.headersShown = false; // allow headers again after a full pass
|
|
192
|
+
}
|
|
195
193
|
const start = state.cursor;
|
|
196
194
|
const end = Math.min(start + CHUNK_BYTES, totalLength);
|
|
197
195
|
const chunk = bodyText.slice(start, end);
|
|
@@ -201,7 +199,7 @@ const formatRequestDetail = async (ctx, { request, response, id, }) => {
|
|
|
201
199
|
lines.push(`\nBody Chunk (text): ${percent}%${more ? ' (more)' : ' (done)'}:`);
|
|
202
200
|
lines.push(chunk);
|
|
203
201
|
if (more)
|
|
204
|
-
lines.push(formatTruncationLine(nextCursor,
|
|
202
|
+
lines.push(formatTruncationLine(nextCursor, totalLength, { formatter: formatBytes }));
|
|
205
203
|
state.cursor = nextCursor;
|
|
206
204
|
state.headersShown = true;
|
|
207
205
|
return lines.join('\n');
|
|
@@ -8,7 +8,7 @@ import { toJsonPathNormalized, truncateStringTo } from '../utils/truncate.js';
|
|
|
8
8
|
import { searchInUrls } from './networkSearch/urlSearch.js';
|
|
9
9
|
import { searchInRequestBody, searchInResponseBody } from './networkSearch/bodySearch.js';
|
|
10
10
|
import { normalizePath, getDepth, mergeGroupedMatches } from './networkSearch/grouping.js';
|
|
11
|
-
import { parseKeywordParams } from './networkSearch/helpers.js';
|
|
11
|
+
import { parseKeywordParams, highlightMatch } from './networkSearch/helpers.js';
|
|
12
12
|
const MAX_SEARCH_RESULTS = 10;
|
|
13
13
|
const MAX_GROUPS_TO_SHOW = 3;
|
|
14
14
|
const networkSearchSchema = z.object({
|
|
@@ -19,6 +19,7 @@ const createSourceCounts = () => ({
|
|
|
19
19
|
requestBody: 0,
|
|
20
20
|
responseUrl: 0,
|
|
21
21
|
responseBody: 0,
|
|
22
|
+
responseHeaders: 0,
|
|
22
23
|
});
|
|
23
24
|
const accumulateSourceCounts = (records) => {
|
|
24
25
|
const counts = createSourceCounts();
|
|
@@ -31,6 +32,7 @@ const cloneSourceCounts = (counts) => ({
|
|
|
31
32
|
requestBody: counts.requestBody,
|
|
32
33
|
responseUrl: counts.responseUrl,
|
|
33
34
|
responseBody: counts.responseBody,
|
|
35
|
+
responseHeaders: counts.responseHeaders,
|
|
34
36
|
});
|
|
35
37
|
const computeEventScore = (match) => {
|
|
36
38
|
const ageMs = Math.max(0, Date.now() - match.timestamp);
|
|
@@ -40,7 +42,8 @@ const computeEventScore = (match) => {
|
|
|
40
42
|
const responseBonus = match.sourceCounts.responseBody * 2;
|
|
41
43
|
const requestBonus = match.sourceCounts.requestBody;
|
|
42
44
|
const urlBonus = (match.sourceCounts.requestUrl + match.sourceCounts.responseUrl) * 0.25;
|
|
43
|
-
|
|
45
|
+
const headerBonus = match.sourceCounts.responseHeaders;
|
|
46
|
+
return base + responseBonus + requestBonus + urlBonus + headerBonus + recencyBoost;
|
|
44
47
|
};
|
|
45
48
|
const formatGroupPath = (group) => {
|
|
46
49
|
const normalized = group.normalized;
|
|
@@ -74,6 +77,21 @@ const extractExamples = (group) => {
|
|
|
74
77
|
}
|
|
75
78
|
return items;
|
|
76
79
|
};
|
|
80
|
+
const searchInResponseSetCookies = async (response, keyword, matches) => {
|
|
81
|
+
const cookies = await response.headerValues('set-cookie').catch(() => []);
|
|
82
|
+
if (!cookies.length)
|
|
83
|
+
return;
|
|
84
|
+
for (const [index, cookie] of cookies.entries()) {
|
|
85
|
+
if (!cookie.toLowerCase().includes(keyword))
|
|
86
|
+
continue;
|
|
87
|
+
matches.push({
|
|
88
|
+
path: `response.headers.set-cookie[${index}]`,
|
|
89
|
+
value: cookie,
|
|
90
|
+
context: highlightMatch(cookie, keyword, 120),
|
|
91
|
+
source: 'responseHeaders',
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
};
|
|
77
95
|
const networkSearch = defineTool({
|
|
78
96
|
capability: 'core',
|
|
79
97
|
schema: {
|
|
@@ -103,8 +121,10 @@ const networkSearch = defineTool({
|
|
|
103
121
|
const matches = [];
|
|
104
122
|
searchInUrls(request, resp, keyword, keywordParams, matches);
|
|
105
123
|
searchInRequestBody(request, keyword, matches);
|
|
106
|
-
if (resp)
|
|
124
|
+
if (resp) {
|
|
107
125
|
await searchInResponseBody(resp, keyword, matches);
|
|
126
|
+
await searchInResponseSetCookies(resp, keyword, matches);
|
|
127
|
+
}
|
|
108
128
|
if (matches.length > 0) {
|
|
109
129
|
const groupMap = new Map();
|
|
110
130
|
for (const m of matches) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wordbricks/playwright-mcp",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.20",
|
|
4
4
|
"description": "Playwright Tools for MCP",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
}
|
|
50
50
|
},
|
|
51
51
|
"dependencies": {
|
|
52
|
-
"@fxts/core": "1.
|
|
52
|
+
"@fxts/core": "1.20.0",
|
|
53
53
|
"@ghostery/adblocker": "2.12.5",
|
|
54
54
|
"@modelcontextprotocol/sdk": "1.16.0",
|
|
55
55
|
"cheerio": "1.1.2",
|