@wordbricks/playwright-mcp 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli-wrapper.js +15 -14
- package/cli.js +1 -1
- package/config.d.ts +11 -6
- package/index.d.ts +7 -5
- package/index.js +1 -1
- package/lib/browserContextFactory.js +131 -58
- package/lib/browserServerBackend.js +14 -12
- package/lib/config.js +60 -46
- package/lib/context.js +41 -39
- package/lib/extension/cdpRelay.js +67 -61
- package/lib/extension/extensionContextFactory.js +10 -10
- package/lib/frameworkPatterns.js +21 -21
- package/lib/hooks/antiBotDetectionHook.js +59 -52
- package/lib/hooks/core.js +11 -10
- package/lib/hooks/eventConsumer.js +21 -21
- package/lib/hooks/events.js +3 -3
- package/lib/hooks/formatToolCallEvent.js +3 -7
- package/lib/hooks/frameworkStateHook.js +40 -40
- package/lib/hooks/grouping.js +3 -3
- package/lib/hooks/jsonLdDetectionHook.js +44 -37
- package/lib/hooks/networkFilters.js +17 -17
- package/lib/hooks/networkSetup.js +9 -7
- package/lib/hooks/networkTrackingHook.js +21 -21
- package/lib/hooks/pageHeightHook.js +9 -9
- package/lib/hooks/registry.js +15 -16
- package/lib/hooks/requireTabHook.js +3 -3
- package/lib/hooks/schema.js +38 -38
- package/lib/hooks/waitHook.js +7 -7
- package/lib/index.js +12 -10
- package/lib/mcp/inProcessTransport.js +3 -4
- package/lib/mcp/proxyBackend.js +43 -28
- package/lib/mcp/server.js +24 -19
- package/lib/mcp/tool.js +14 -8
- package/lib/mcp/transport.js +60 -53
- package/lib/playwrightTransformer.js +129 -106
- package/lib/program.js +54 -52
- package/lib/response.js +36 -30
- package/lib/sessionLog.js +19 -17
- package/lib/tab.js +41 -39
- package/lib/tools/common.js +19 -19
- package/lib/tools/console.js +11 -11
- package/lib/tools/dialogs.js +18 -15
- package/lib/tools/evaluate.js +26 -17
- package/lib/tools/extractFrameworkState.js +48 -37
- package/lib/tools/files.js +17 -14
- package/lib/tools/form.js +32 -23
- package/lib/tools/getSnapshot.js +14 -15
- package/lib/tools/getVisibleHtml.js +33 -17
- package/lib/tools/install.js +20 -20
- package/lib/tools/keyboard.js +29 -24
- package/lib/tools/mouse.js +29 -31
- package/lib/tools/navigate.js +19 -23
- package/lib/tools/network.js +12 -14
- package/lib/tools/networkDetail.js +58 -49
- package/lib/tools/networkSearch/bodySearch.js +46 -32
- package/lib/tools/networkSearch/grouping.js +15 -6
- package/lib/tools/networkSearch/helpers.js +4 -4
- package/lib/tools/networkSearch/searchHtml.js +25 -16
- package/lib/tools/networkSearch/urlSearch.js +56 -14
- package/lib/tools/networkSearch.js +46 -36
- package/lib/tools/pdf.js +13 -12
- package/lib/tools/repl.js +66 -54
- package/lib/tools/screenshot.js +57 -33
- package/lib/tools/scroll.js +29 -24
- package/lib/tools/snapshot.js +66 -49
- package/lib/tools/tabs.js +22 -19
- package/lib/tools/tool.js +5 -3
- package/lib/tools/utils.js +17 -13
- package/lib/tools/wait.js +24 -19
- package/lib/tools.js +21 -20
- package/lib/utils/adBlockFilter.js +29 -26
- package/lib/utils/codegen.js +20 -16
- package/lib/utils/extensionPath.js +4 -4
- package/lib/utils/fileUtils.js +17 -13
- package/lib/utils/graphql.js +69 -58
- package/lib/utils/guid.js +3 -3
- package/lib/utils/httpServer.js +9 -9
- package/lib/utils/log.js +3 -3
- package/lib/utils/manualPromise.js +7 -7
- package/lib/utils/networkFormat.js +7 -5
- package/lib/utils/package.js +4 -4
- package/lib/utils/sanitizeHtml.js +66 -34
- package/lib/utils/truncate.js +25 -25
- package/lib/utils/withTimeout.js +1 -1
- package/package.json +34 -57
- package/src/index.ts +27 -17
- package/LICENSE +0 -202
package/lib/hooks/core.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { reduce } from
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import { toolNameSchema } from
|
|
5
|
-
export {
|
|
1
|
+
import { reduce } from "@fxts/core";
|
|
2
|
+
import { consumeEvents } from "./eventConsumer.js";
|
|
3
|
+
import { getEventStore, trackEvent } from "./events.js";
|
|
4
|
+
import { toolNameSchema } from "./schema.js";
|
|
5
|
+
export { Err, Ok } from "../utils/result.js";
|
|
6
6
|
export const runHook = async (hook, ctx) => {
|
|
7
7
|
const result = await hook.handler(ctx);
|
|
8
8
|
if (!result.ok)
|
|
@@ -28,7 +28,8 @@ export const wrapToolWithHooks = (tool, registry) => {
|
|
|
28
28
|
const toolName = parsedName.data;
|
|
29
29
|
const toolHooks = getToolHooks(registry, toolName);
|
|
30
30
|
// Even if no hooks configured, we still need to consume events and track tool calls
|
|
31
|
-
if (!toolHooks ||
|
|
31
|
+
if (!toolHooks ||
|
|
32
|
+
(toolHooks.preHooks.length === 0 && toolHooks.postHooks.length === 0)) {
|
|
32
33
|
return {
|
|
33
34
|
...tool,
|
|
34
35
|
handle: async (context, params, response) => {
|
|
@@ -49,7 +50,7 @@ export const wrapToolWithHooks = (tool, registry) => {
|
|
|
49
50
|
// Record tool call completion
|
|
50
51
|
const executionTime = Date.now() - startTime;
|
|
51
52
|
trackEvent(context, {
|
|
52
|
-
type:
|
|
53
|
+
type: "tool-call",
|
|
53
54
|
data: {
|
|
54
55
|
toolName,
|
|
55
56
|
params: params,
|
|
@@ -102,7 +103,7 @@ export const wrapToolWithHooks = (tool, registry) => {
|
|
|
102
103
|
// Record tool call completion
|
|
103
104
|
const executionTime = Date.now() - startTime;
|
|
104
105
|
trackEvent(context, {
|
|
105
|
-
type:
|
|
106
|
+
type: "tool-call",
|
|
106
107
|
data: {
|
|
107
108
|
toolName,
|
|
108
109
|
params: params,
|
|
@@ -125,7 +126,7 @@ export const wrapToolWithHooks = (tool, registry) => {
|
|
|
125
126
|
await reduce(async (ctx, hook) => runHook(hook, await ctx), Promise.resolve(postHookContext), toolHooks.postHooks);
|
|
126
127
|
}
|
|
127
128
|
catch (error) {
|
|
128
|
-
response.addError(error instanceof Error ? error.message :
|
|
129
|
+
response.addError(error instanceof Error ? error.message : "Post-hook failed");
|
|
129
130
|
}
|
|
130
131
|
// Consume post-tool events
|
|
131
132
|
consumeEvents(context, eventStore, response);
|
|
@@ -140,5 +141,5 @@ export const applyHooksToTools = (tools, context) => {
|
|
|
140
141
|
const registry = getHookRegistry(context);
|
|
141
142
|
if (registry.tools.size === 0)
|
|
142
143
|
return tools;
|
|
143
|
-
return tools.map(tool => wrapToolWithHooks(tool, registry));
|
|
144
|
+
return tools.map((tool) => wrapToolWithHooks(tool, registry));
|
|
144
145
|
};
|
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import { planGroupedMessages } from
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
1
|
+
import { formatAntiBotEvent, getAntiBotProviderConfigs, } from "./antiBotDetectionHook.js";
|
|
2
|
+
import { getEventsAfter, isEventType, updateLastSeenId } from "./events.js";
|
|
3
|
+
import { formatToolCallEvent } from "./formatToolCallEvent.js";
|
|
4
|
+
import { formatFrameworkStateEvent } from "./frameworkStateHook.js";
|
|
5
|
+
import { planGroupedMessages } from "./grouping.js";
|
|
6
|
+
import { formatJsonLdEvent } from "./jsonLdDetectionHook.js";
|
|
7
|
+
import { isAntiBotUrl } from "./networkFilters.js";
|
|
8
|
+
import { formatNetworkEvent } from "./networkTrackingHook.js";
|
|
9
|
+
import { formatPageHeightEvent } from "./pageHeightHook.js";
|
|
10
|
+
import { formatWaitEvent } from "./waitHook.js";
|
|
11
11
|
const eventFormatters = {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
12
|
+
wait: formatWaitEvent,
|
|
13
|
+
"page-height-change": formatPageHeightEvent,
|
|
14
|
+
"network-request": formatNetworkEvent,
|
|
15
|
+
"tool-call": formatToolCallEvent,
|
|
16
|
+
"framework-state": formatFrameworkStateEvent,
|
|
17
|
+
"json-ld": formatJsonLdEvent,
|
|
18
|
+
"anti-bot": formatAntiBotEvent,
|
|
19
19
|
};
|
|
20
20
|
const formatEvent = (event) => {
|
|
21
21
|
const formatter = eventFormatters[event.type];
|
|
@@ -29,19 +29,19 @@ const consumeEvent = (event, response, plan) => {
|
|
|
29
29
|
response.addEvent(`[${event.id}] ${formattedMessage}`);
|
|
30
30
|
};
|
|
31
31
|
const shouldHideEvent = (event) => {
|
|
32
|
-
const isNetworkRequest = isEventType(
|
|
32
|
+
const isNetworkRequest = isEventType("network-request");
|
|
33
33
|
if (!isNetworkRequest(event))
|
|
34
34
|
return false;
|
|
35
35
|
if (isAntiBotUrl(event.data.url))
|
|
36
36
|
return true;
|
|
37
|
-
const configs = getAntiBotProviderConfigs().filter(config => config.provider ===
|
|
38
|
-
return configs.some(config => config.match(event));
|
|
37
|
+
const configs = getAntiBotProviderConfigs().filter((config) => config.provider === "cloudflare-turnstile");
|
|
38
|
+
return configs.some((config) => config.match(event));
|
|
39
39
|
};
|
|
40
40
|
export const consumeEvents = (context, eventStore, response) => {
|
|
41
41
|
const unconsumedEvents = getEventsAfter(eventStore, eventStore.lastSeenEventId);
|
|
42
42
|
if (unconsumedEvents.length === 0)
|
|
43
43
|
return;
|
|
44
|
-
const visibleEvents = unconsumedEvents.filter(event => !shouldHideEvent(event));
|
|
44
|
+
const visibleEvents = unconsumedEvents.filter((event) => !shouldHideEvent(event));
|
|
45
45
|
const plan = planGroupedMessages(visibleEvents);
|
|
46
46
|
// Consume all events in chronological order
|
|
47
47
|
for (const event of visibleEvents)
|
package/lib/hooks/events.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { filter, pipe, toArray } from "@fxts/core";
|
|
2
2
|
export const isEventType = (type) => (event) => event.type === type;
|
|
3
3
|
export const createEventStore = () => ({
|
|
4
4
|
events: new Map(),
|
|
@@ -12,7 +12,7 @@ export const trackEvent = (context, params) => {
|
|
|
12
12
|
id: eventId,
|
|
13
13
|
type: params.type,
|
|
14
14
|
data: params.data,
|
|
15
|
-
timestamp: params.timestamp ?? Date.now()
|
|
15
|
+
timestamp: params.timestamp ?? Date.now(),
|
|
16
16
|
};
|
|
17
17
|
store.events.set(eventId, event);
|
|
18
18
|
return eventId;
|
|
@@ -26,7 +26,7 @@ export const getEventsAfter = (store, afterEventId) => {
|
|
|
26
26
|
if (!afterEventId) {
|
|
27
27
|
return pipe(store.events.values(), toArray);
|
|
28
28
|
}
|
|
29
|
-
return pipe(store.events.values(), filter(event => event.id > afterEventId), toArray);
|
|
29
|
+
return pipe(store.events.values(), filter((event) => event.id > afterEventId), toArray);
|
|
30
30
|
};
|
|
31
31
|
const eventStoreMap = new WeakMap();
|
|
32
32
|
export const getEventStore = (context) => {
|
|
@@ -3,14 +3,10 @@ export const formatToolCallEvent = (event) => {
|
|
|
3
3
|
// Format parameters (truncate if too long)
|
|
4
4
|
const paramStr = params && Object.keys(params).length > 0
|
|
5
5
|
? ` with params: ${JSON.stringify(params, null, 0).slice(0, 100)}`
|
|
6
|
-
:
|
|
6
|
+
: "";
|
|
7
7
|
// Format execution time if available
|
|
8
|
-
const timeStr = executionTime !== undefined
|
|
9
|
-
? ` (${executionTime}ms)`
|
|
10
|
-
: '';
|
|
8
|
+
const timeStr = executionTime !== undefined ? ` (${executionTime}ms)` : "";
|
|
11
9
|
// Format success status if available
|
|
12
|
-
const statusStr = success !== undefined
|
|
13
|
-
? success ? ' ✓' : ' ✗'
|
|
14
|
-
: '';
|
|
10
|
+
const statusStr = success !== undefined ? (success ? " ✓" : " ✗") : "";
|
|
15
11
|
return `Tool ${toolName}${paramStr}${timeStr}${statusStr}`;
|
|
16
12
|
};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { trackEvent } from
|
|
4
|
-
import {
|
|
1
|
+
import { FRAMEWORK_STATE_PATTERNS, MAX_DISPLAY_ITEMS, } from "../frameworkPatterns.js";
|
|
2
|
+
import { Ok } from "../utils/result.js";
|
|
3
|
+
import { trackEvent } from "./events.js";
|
|
4
|
+
import { hookNameSchema } from "./schema.js";
|
|
5
5
|
const pageFrameworkStates = new WeakMap();
|
|
6
6
|
const seenFrameworkKeysByContext = new WeakMap();
|
|
7
7
|
const getSeenFrameworkKeys = (context) => {
|
|
@@ -13,7 +13,7 @@ const getSeenFrameworkKeys = (context) => {
|
|
|
13
13
|
return set;
|
|
14
14
|
};
|
|
15
15
|
export const frameworkStatePreHook = {
|
|
16
|
-
name: hookNameSchema.enum[
|
|
16
|
+
name: hookNameSchema.enum["framework-state-pre"],
|
|
17
17
|
handler: async (context) => {
|
|
18
18
|
const frameworkState = await detectFrameworkState(context);
|
|
19
19
|
if (frameworkState) {
|
|
@@ -21,27 +21,29 @@ export const frameworkStatePreHook = {
|
|
|
21
21
|
if (context.tab?.page)
|
|
22
22
|
pageFrameworkStates.set(context.tab.page, frameworkState);
|
|
23
23
|
// Track event for newly detected framework state
|
|
24
|
-
const newKeys = Object.keys(frameworkState).filter(key => !getSeenFrameworkKeys(context.context).has(key));
|
|
24
|
+
const newKeys = Object.keys(frameworkState).filter((key) => !getSeenFrameworkKeys(context.context).has(key));
|
|
25
25
|
if (newKeys.length > 0) {
|
|
26
26
|
trackEvent(context.context, {
|
|
27
|
-
type:
|
|
27
|
+
type: "framework-state",
|
|
28
28
|
data: {
|
|
29
29
|
state: frameworkState,
|
|
30
|
-
action:
|
|
30
|
+
action: "detected",
|
|
31
31
|
},
|
|
32
32
|
});
|
|
33
33
|
// Mark keys as seen
|
|
34
|
-
newKeys.forEach(key => getSeenFrameworkKeys(context.context).add(key));
|
|
34
|
+
newKeys.forEach((key) => getSeenFrameworkKeys(context.context).add(key));
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
return Ok(undefined);
|
|
38
38
|
},
|
|
39
39
|
};
|
|
40
40
|
export const frameworkStatePostHook = {
|
|
41
|
-
name: hookNameSchema.enum[
|
|
41
|
+
name: hookNameSchema.enum["framework-state-post"],
|
|
42
42
|
handler: async (context) => {
|
|
43
43
|
const newFrameworkState = await detectFrameworkState(context);
|
|
44
|
-
const initialState = context.tab?.page
|
|
44
|
+
const initialState = context.tab?.page
|
|
45
|
+
? pageFrameworkStates.get(context.tab.page)
|
|
46
|
+
: undefined;
|
|
45
47
|
if (newFrameworkState) {
|
|
46
48
|
const changes = [];
|
|
47
49
|
if (initialState) {
|
|
@@ -64,27 +66,27 @@ export const frameworkStatePostHook = {
|
|
|
64
66
|
}
|
|
65
67
|
if (changes.length > 0) {
|
|
66
68
|
trackEvent(context.context, {
|
|
67
|
-
type:
|
|
69
|
+
type: "framework-state",
|
|
68
70
|
data: {
|
|
69
71
|
state: newFrameworkState,
|
|
70
72
|
changes,
|
|
71
|
-
action:
|
|
73
|
+
action: "changed",
|
|
72
74
|
},
|
|
73
75
|
});
|
|
74
76
|
}
|
|
75
77
|
}
|
|
76
78
|
else {
|
|
77
79
|
// No initial state, but we have state now
|
|
78
|
-
const newKeys = Object.keys(newFrameworkState).filter(key => !getSeenFrameworkKeys(context.context).has(key));
|
|
80
|
+
const newKeys = Object.keys(newFrameworkState).filter((key) => !getSeenFrameworkKeys(context.context).has(key));
|
|
79
81
|
if (newKeys.length > 0) {
|
|
80
82
|
trackEvent(context.context, {
|
|
81
|
-
type:
|
|
83
|
+
type: "framework-state",
|
|
82
84
|
data: {
|
|
83
85
|
state: newFrameworkState,
|
|
84
|
-
action:
|
|
86
|
+
action: "detected",
|
|
85
87
|
},
|
|
86
88
|
});
|
|
87
|
-
newKeys.forEach(key => getSeenFrameworkKeys(context.context).add(key));
|
|
89
|
+
newKeys.forEach((key) => getSeenFrameworkKeys(context.context).add(key));
|
|
88
90
|
}
|
|
89
91
|
}
|
|
90
92
|
// Update stored state
|
|
@@ -107,11 +109,11 @@ async function detectFrameworkState(context) {
|
|
|
107
109
|
const value = window[pattern];
|
|
108
110
|
// Only capture if it's a non-empty object or has meaningful content
|
|
109
111
|
if (value &&
|
|
110
|
-
(typeof value ===
|
|
112
|
+
(typeof value === "object" || typeof value === "string")) {
|
|
111
113
|
state[pattern] =
|
|
112
|
-
typeof value ===
|
|
114
|
+
typeof value === "object"
|
|
113
115
|
? {
|
|
114
|
-
type:
|
|
116
|
+
type: "object",
|
|
115
117
|
keys: Object.keys(value).slice(0, MAX_ITEMS * 2),
|
|
116
118
|
}
|
|
117
119
|
: {
|
|
@@ -127,19 +129,19 @@ async function detectFrameworkState(context) {
|
|
|
127
129
|
}
|
|
128
130
|
// Also check for React Fiber internals
|
|
129
131
|
const reactRootSelectors = [
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
132
|
+
"#__next",
|
|
133
|
+
"#root",
|
|
134
|
+
"#app",
|
|
135
|
+
"[data-reactroot]",
|
|
134
136
|
];
|
|
135
137
|
for (const selector of reactRootSelectors) {
|
|
136
138
|
const element = document.querySelector(selector);
|
|
137
139
|
if (element) {
|
|
138
|
-
const fiberKey = Object.keys(element).find(key => key.startsWith(
|
|
139
|
-
key.startsWith(
|
|
140
|
-
key.startsWith(
|
|
140
|
+
const fiberKey = Object.keys(element).find((key) => key.startsWith("__reactInternalInstance") ||
|
|
141
|
+
key.startsWith("__reactFiber") ||
|
|
142
|
+
key.startsWith("_reactRootContainer"));
|
|
141
143
|
if (fiberKey) {
|
|
142
|
-
state[
|
|
144
|
+
state["React Fiber Root"] = { selector, fiberKey };
|
|
143
145
|
break;
|
|
144
146
|
}
|
|
145
147
|
}
|
|
@@ -149,32 +151,30 @@ async function detectFrameworkState(context) {
|
|
|
149
151
|
return result;
|
|
150
152
|
}
|
|
151
153
|
function formatValue(value) {
|
|
152
|
-
if (typeof value ===
|
|
153
|
-
if (value.type ===
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
return `
|
|
157
|
-
else if ('preview' in value && typeof value.preview === 'string')
|
|
158
|
-
return `"${value.preview}${value.preview.length >= 200 ? '...' : ''}"`;
|
|
154
|
+
if (typeof value === "object" && value !== null && "type" in value) {
|
|
155
|
+
if (value.type === "object" && "keys" in value && Array.isArray(value.keys))
|
|
156
|
+
return `{${value.keys.join(", ")}${value.keys.length >= MAX_DISPLAY_ITEMS * 2 ? ", ..." : ""}}`;
|
|
157
|
+
else if ("preview" in value && typeof value.preview === "string")
|
|
158
|
+
return `"${value.preview}${value.preview.length >= 200 ? "..." : ""}"`;
|
|
159
159
|
}
|
|
160
160
|
return JSON.stringify(value);
|
|
161
161
|
}
|
|
162
162
|
export const formatFrameworkStateEvent = (event) => {
|
|
163
163
|
const { state, changes, action } = event.data;
|
|
164
164
|
const messages = [];
|
|
165
|
-
if (action ===
|
|
166
|
-
messages.push(
|
|
165
|
+
if (action === "detected") {
|
|
166
|
+
messages.push("Framework state detected:");
|
|
167
167
|
const keys = Object.keys(state);
|
|
168
168
|
for (const key of keys) {
|
|
169
169
|
const value = state[key];
|
|
170
170
|
messages.push(` ${key}: ${formatValue(value)}`);
|
|
171
171
|
}
|
|
172
172
|
}
|
|
173
|
-
else if (action ===
|
|
174
|
-
messages.push(
|
|
173
|
+
else if (action === "changed" && changes) {
|
|
174
|
+
messages.push("Framework state changed:");
|
|
175
175
|
messages.push(...changes.map((change) => ` ${change}`));
|
|
176
176
|
}
|
|
177
|
-
return messages.join(
|
|
177
|
+
return messages.join("\n");
|
|
178
178
|
};
|
|
179
179
|
export const frameworkStateHooks = {
|
|
180
180
|
pre: frameworkStatePreHook,
|
package/lib/hooks/grouping.js
CHANGED
|
@@ -7,7 +7,7 @@ export const defineGroupingRule = (spec) => {
|
|
|
7
7
|
keyOf: (e) => {
|
|
8
8
|
// Planner guarantees keyOf is only called when match(e) is true
|
|
9
9
|
if (!match(e))
|
|
10
|
-
return
|
|
10
|
+
return "";
|
|
11
11
|
return spec.keyOf(e);
|
|
12
12
|
},
|
|
13
13
|
summaryOf: (first, run) => {
|
|
@@ -19,9 +19,9 @@ export const defineGroupingRule = (spec) => {
|
|
|
19
19
|
}
|
|
20
20
|
const typedFirst = match(first) ? first : typedRun[0];
|
|
21
21
|
if (!typedFirst)
|
|
22
|
-
return
|
|
22
|
+
return "";
|
|
23
23
|
return spec.summaryOf(typedFirst, typedRun);
|
|
24
|
-
}
|
|
24
|
+
},
|
|
25
25
|
};
|
|
26
26
|
};
|
|
27
27
|
export const registerGroupingRule = (type, rule) => {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { Ok } from
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
1
|
+
import { Ok } from "../utils/result.js";
|
|
2
|
+
import { trackEvent } from "./events.js";
|
|
3
|
+
import { hookNameSchema } from "./schema.js";
|
|
4
4
|
const MAX_DISPLAY_ITEMS = 5;
|
|
5
5
|
const pageJsonLdStates = new WeakMap();
|
|
6
6
|
const seenJsonLdTypesByContext = new WeakMap();
|
|
@@ -13,7 +13,7 @@ const getSeenJsonLdTypes = (context) => {
|
|
|
13
13
|
return set;
|
|
14
14
|
};
|
|
15
15
|
export const jsonLdDetectionPreHook = {
|
|
16
|
-
name: hookNameSchema.enum[
|
|
16
|
+
name: hookNameSchema.enum["json-ld-detection-pre"],
|
|
17
17
|
handler: async (context) => {
|
|
18
18
|
const jsonLdState = await detectJsonLdState(context);
|
|
19
19
|
if (jsonLdState) {
|
|
@@ -21,27 +21,29 @@ export const jsonLdDetectionPreHook = {
|
|
|
21
21
|
if (context.tab?.page)
|
|
22
22
|
pageJsonLdStates.set(context.tab.page, jsonLdState);
|
|
23
23
|
// Track event for newly detected JSON-LD types
|
|
24
|
-
const newTypes = Object.keys(jsonLdState).filter(type => !getSeenJsonLdTypes(context.context).has(type));
|
|
24
|
+
const newTypes = Object.keys(jsonLdState).filter((type) => !getSeenJsonLdTypes(context.context).has(type));
|
|
25
25
|
if (newTypes.length > 0) {
|
|
26
26
|
trackEvent(context.context, {
|
|
27
|
-
type:
|
|
27
|
+
type: "json-ld",
|
|
28
28
|
data: {
|
|
29
29
|
state: jsonLdState,
|
|
30
|
-
action:
|
|
30
|
+
action: "detected",
|
|
31
31
|
},
|
|
32
32
|
});
|
|
33
33
|
// Mark types as seen
|
|
34
|
-
newTypes.forEach(type => getSeenJsonLdTypes(context.context).add(type));
|
|
34
|
+
newTypes.forEach((type) => getSeenJsonLdTypes(context.context).add(type));
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
return Ok(undefined);
|
|
38
38
|
},
|
|
39
39
|
};
|
|
40
40
|
export const jsonLdDetectionPostHook = {
|
|
41
|
-
name: hookNameSchema.enum[
|
|
41
|
+
name: hookNameSchema.enum["json-ld-detection-post"],
|
|
42
42
|
handler: async (context) => {
|
|
43
43
|
const newJsonLdState = await detectJsonLdState(context);
|
|
44
|
-
const initialState = context.tab?.page
|
|
44
|
+
const initialState = context.tab?.page
|
|
45
|
+
? pageJsonLdStates.get(context.tab.page)
|
|
46
|
+
: undefined;
|
|
45
47
|
if (newJsonLdState || initialState) {
|
|
46
48
|
const changes = [];
|
|
47
49
|
if (initialState && newJsonLdState) {
|
|
@@ -54,37 +56,35 @@ export const jsonLdDetectionPostHook = {
|
|
|
54
56
|
const initInfo = initialState[type];
|
|
55
57
|
const currInfo = newJsonLdState[type];
|
|
56
58
|
if (!initInfo && currInfo)
|
|
57
|
-
changes.push(`+ ${type}${currInfo.count > 1 ? ` (${currInfo.count} instances)` :
|
|
59
|
+
changes.push(`+ ${type}${currInfo.count > 1 ? ` (${currInfo.count} instances)` : ""}`);
|
|
58
60
|
else if (initInfo && !currInfo)
|
|
59
|
-
changes.push(`- ${type}${initInfo.count > 1 ? ` (${initInfo.count} instances)` :
|
|
60
|
-
else if (initInfo &&
|
|
61
|
-
currInfo &&
|
|
62
|
-
initInfo.count !== currInfo.count)
|
|
61
|
+
changes.push(`- ${type}${initInfo.count > 1 ? ` (${initInfo.count} instances)` : ""}`);
|
|
62
|
+
else if (initInfo && currInfo && initInfo.count !== currInfo.count)
|
|
63
63
|
changes.push(`~ ${type}: ${initInfo.count} → ${currInfo.count} instances`);
|
|
64
64
|
}
|
|
65
65
|
if (changes.length > 0) {
|
|
66
66
|
trackEvent(context.context, {
|
|
67
|
-
type:
|
|
67
|
+
type: "json-ld",
|
|
68
68
|
data: {
|
|
69
69
|
state: newJsonLdState,
|
|
70
70
|
changes,
|
|
71
|
-
action:
|
|
71
|
+
action: "changed",
|
|
72
72
|
},
|
|
73
73
|
});
|
|
74
74
|
}
|
|
75
75
|
}
|
|
76
76
|
else if (newJsonLdState && !initialState) {
|
|
77
77
|
// No initial state, but we have state now
|
|
78
|
-
const newTypes = Object.keys(newJsonLdState).filter(type => !getSeenJsonLdTypes(context.context).has(type));
|
|
78
|
+
const newTypes = Object.keys(newJsonLdState).filter((type) => !getSeenJsonLdTypes(context.context).has(type));
|
|
79
79
|
if (newTypes.length > 0) {
|
|
80
80
|
trackEvent(context.context, {
|
|
81
|
-
type:
|
|
81
|
+
type: "json-ld",
|
|
82
82
|
data: {
|
|
83
83
|
state: newJsonLdState,
|
|
84
|
-
action:
|
|
84
|
+
action: "detected",
|
|
85
85
|
},
|
|
86
86
|
});
|
|
87
|
-
newTypes.forEach(type => getSeenJsonLdTypes(context.context).add(type));
|
|
87
|
+
newTypes.forEach((type) => getSeenJsonLdTypes(context.context).add(type));
|
|
88
88
|
}
|
|
89
89
|
}
|
|
90
90
|
// Update stored state
|
|
@@ -104,23 +104,27 @@ async function detectJsonLdState(context) {
|
|
|
104
104
|
scripts.forEach((script, index) => {
|
|
105
105
|
try {
|
|
106
106
|
// Parse JSON
|
|
107
|
-
const data = JSON.parse(script.textContent ||
|
|
107
|
+
const data = JSON.parse(script.textContent || "{}");
|
|
108
108
|
// Extract @type - handle both single and array types
|
|
109
109
|
let types = [];
|
|
110
|
-
if (data[
|
|
111
|
-
types = Array.isArray(data[
|
|
110
|
+
if (data["@type"]) {
|
|
111
|
+
types = Array.isArray(data["@type"])
|
|
112
|
+
? data["@type"]
|
|
113
|
+
: [data["@type"]];
|
|
112
114
|
}
|
|
113
|
-
else if (data[
|
|
115
|
+
else if (data["@graph"] && Array.isArray(data["@graph"])) {
|
|
114
116
|
// Handle @graph structures
|
|
115
|
-
data[
|
|
116
|
-
if (item[
|
|
117
|
-
const itemTypes = Array.isArray(item[
|
|
117
|
+
data["@graph"].forEach((item) => {
|
|
118
|
+
if (item["@type"]) {
|
|
119
|
+
const itemTypes = Array.isArray(item["@type"])
|
|
120
|
+
? item["@type"]
|
|
121
|
+
: [item["@type"]];
|
|
118
122
|
types.push(...itemTypes);
|
|
119
123
|
}
|
|
120
124
|
});
|
|
121
125
|
}
|
|
122
126
|
// Count occurrences of each type
|
|
123
|
-
types.forEach(type => {
|
|
127
|
+
types.forEach((type) => {
|
|
124
128
|
if (!state[type])
|
|
125
129
|
state[type] = { count: 0, indices: [] };
|
|
126
130
|
state[type].count++;
|
|
@@ -128,9 +132,12 @@ async function detectJsonLdState(context) {
|
|
|
128
132
|
});
|
|
129
133
|
}
|
|
130
134
|
catch (e) {
|
|
131
|
-
state[
|
|
132
|
-
|
|
133
|
-
|
|
135
|
+
state["InvalidJSON-LD"] = state["InvalidJSON-LD"] || {
|
|
136
|
+
count: 0,
|
|
137
|
+
indices: [],
|
|
138
|
+
};
|
|
139
|
+
state["InvalidJSON-LD"].count++;
|
|
140
|
+
state["InvalidJSON-LD"].indices.push(index);
|
|
134
141
|
}
|
|
135
142
|
});
|
|
136
143
|
return Object.keys(state).length > 0 ? state : null;
|
|
@@ -159,15 +166,15 @@ function buildStateMessages(state, types) {
|
|
|
159
166
|
export const formatJsonLdEvent = (event) => {
|
|
160
167
|
const { state, changes, action } = event.data;
|
|
161
168
|
const messages = [];
|
|
162
|
-
if (action ===
|
|
163
|
-
messages.push(
|
|
169
|
+
if (action === "detected") {
|
|
170
|
+
messages.push("New JSON-LD types detected:");
|
|
164
171
|
messages.push(...buildStateMessages(state));
|
|
165
172
|
}
|
|
166
|
-
else if (action ===
|
|
167
|
-
messages.push(
|
|
173
|
+
else if (action === "changed" && changes) {
|
|
174
|
+
messages.push("JSON-LD changes after action:");
|
|
168
175
|
messages.push(...changes.map((change) => ` ${change}`));
|
|
169
176
|
}
|
|
170
|
-
return messages.join(
|
|
177
|
+
return messages.join("\n");
|
|
171
178
|
};
|
|
172
179
|
export const jsonLdDetectionHooks = {
|
|
173
180
|
pre: jsonLdDetectionPreHook,
|
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
const MEANINGFUL_RESOURCE_TYPES = [
|
|
2
|
-
const ALLOWED_METHODS = [
|
|
1
|
+
const MEANINGFUL_RESOURCE_TYPES = ["document", "xhr", "fetch"];
|
|
2
|
+
const ALLOWED_METHODS = ["GET", "POST"];
|
|
3
3
|
const EXCLUDED_EXTENSIONS = [
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
".svg",
|
|
5
|
+
".css",
|
|
6
|
+
".map", // JS files and source maps
|
|
7
7
|
];
|
|
8
8
|
const hasExcludedExtension = (url) => {
|
|
9
|
-
return EXCLUDED_EXTENSIONS.some(ext => {
|
|
10
|
-
const extRegex = new RegExp(`${ext.replace(
|
|
9
|
+
return EXCLUDED_EXTENSIONS.some((ext) => {
|
|
10
|
+
const extRegex = new RegExp(`${ext.replace(".", "\\.")}(\\?|#|$)`, "i");
|
|
11
11
|
return extRegex.test(url);
|
|
12
12
|
});
|
|
13
13
|
};
|
|
14
14
|
export const isAntiBotUrl = (url) => {
|
|
15
|
-
if (url.includes(
|
|
15
|
+
if (url.includes("challenges.cloudflare.com"))
|
|
16
16
|
return true;
|
|
17
|
-
if (url.includes(
|
|
17
|
+
if (url.includes(".awswaf.com"))
|
|
18
18
|
return true;
|
|
19
19
|
return false;
|
|
20
20
|
};
|
|
@@ -27,10 +27,10 @@ const isSuccessfulStatus = (status) => {
|
|
|
27
27
|
export const shouldCaptureRequest = (method, url, status, resourceType) => {
|
|
28
28
|
if (isAntiBotUrl(url))
|
|
29
29
|
return true;
|
|
30
|
-
return !hasExcludedExtension(url) &&
|
|
30
|
+
return (!hasExcludedExtension(url) &&
|
|
31
31
|
MEANINGFUL_RESOURCE_TYPES.includes(resourceType) &&
|
|
32
32
|
ALLOWED_METHODS.includes(method) &&
|
|
33
|
-
isSuccessfulStatus(status);
|
|
33
|
+
isSuccessfulStatus(status));
|
|
34
34
|
};
|
|
35
35
|
/**
|
|
36
36
|
* Format URL with trimmed parameters
|
|
@@ -43,7 +43,7 @@ export const formatUrlWithTrimmedParams = (url) => {
|
|
|
43
43
|
const trimmedParams = new URLSearchParams();
|
|
44
44
|
params.forEach((value, key) => {
|
|
45
45
|
if (value.length > 5)
|
|
46
|
-
trimmedParams.set(key, value.substring(0, 5) +
|
|
46
|
+
trimmedParams.set(key, value.substring(0, 5) + "...");
|
|
47
47
|
else
|
|
48
48
|
trimmedParams.set(key, value);
|
|
49
49
|
});
|
|
@@ -61,10 +61,10 @@ export const formatUrlWithTrimmedParams = (url) => {
|
|
|
61
61
|
*/
|
|
62
62
|
export const normalizePathname = (pathname) => {
|
|
63
63
|
if (!pathname)
|
|
64
|
-
return
|
|
65
|
-
if (pathname ===
|
|
66
|
-
return
|
|
67
|
-
return pathname.endsWith(
|
|
64
|
+
return "/";
|
|
65
|
+
if (pathname === "/")
|
|
66
|
+
return "/";
|
|
67
|
+
return pathname.endsWith("/") ? pathname.slice(0, -1) : pathname;
|
|
68
68
|
};
|
|
69
69
|
/**
|
|
70
70
|
* Normalize URL for grouping by ignoring query/hash and trailing slash
|
|
@@ -76,7 +76,7 @@ export const normalizeUrlForGrouping = (url) => {
|
|
|
76
76
|
}
|
|
77
77
|
catch {
|
|
78
78
|
// Fallback for non-standard/relative URLs: strip query/hash and trailing slash
|
|
79
|
-
const base = url.split(/[?#]/)[0] ||
|
|
79
|
+
const base = url.split(/[?#]/)[0] || "/";
|
|
80
80
|
return normalizePathname(base);
|
|
81
81
|
}
|
|
82
82
|
};
|