@slashgear/gdpr-cookie-scanner 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +8 -0
- package/.changeset/config.json +11 -0
- package/.github/ISSUE_TEMPLATE/bug_report.yml +44 -0
- package/.github/ISSUE_TEMPLATE/feature_request.yml +26 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +24 -0
- package/.github/workflows/ci.yml +38 -0
- package/.github/workflows/release.yml +57 -0
- package/.idea/gdpr-report.iml +8 -0
- package/.idea/modules.xml +8 -0
- package/.idea/vcs.xml +6 -0
- package/CHANGELOG.md +7 -0
- package/CLAUDE.md +75 -0
- package/CODE_OF_CONDUCT.md +41 -0
- package/CONTRIBUTING.md +79 -0
- package/LICENSE +21 -0
- package/README.md +127 -0
- package/SECURITY.md +15 -0
- package/dist/analyzers/compliance.d.ts +13 -0
- package/dist/analyzers/compliance.d.ts.map +1 -0
- package/dist/analyzers/compliance.js +171 -0
- package/dist/analyzers/compliance.js.map +1 -0
- package/dist/analyzers/wording.d.ts +13 -0
- package/dist/analyzers/wording.d.ts.map +1 -0
- package/dist/analyzers/wording.js +91 -0
- package/dist/analyzers/wording.js.map +1 -0
- package/dist/classifiers/cookie-classifier.d.ts +8 -0
- package/dist/classifiers/cookie-classifier.d.ts.map +1 -0
- package/dist/classifiers/cookie-classifier.js +108 -0
- package/dist/classifiers/cookie-classifier.js.map +1 -0
- package/dist/classifiers/network-classifier.d.ts +9 -0
- package/dist/classifiers/network-classifier.d.ts.map +1 -0
- package/dist/classifiers/network-classifier.js +51 -0
- package/dist/classifiers/network-classifier.js.map +1 -0
- package/dist/classifiers/tracker-list.d.ts +16 -0
- package/dist/classifiers/tracker-list.d.ts.map +1 -0
- package/dist/classifiers/tracker-list.js +86 -0
- package/dist/classifiers/tracker-list.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +110 -0
- package/dist/cli.js.map +1 -0
- package/dist/report/generator.d.ts +19 -0
- package/dist/report/generator.d.ts.map +1 -0
- package/dist/report/generator.js +552 -0
- package/dist/report/generator.js.map +1 -0
- package/dist/scanner/browser.d.ts +11 -0
- package/dist/scanner/browser.d.ts.map +1 -0
- package/dist/scanner/browser.js +38 -0
- package/dist/scanner/browser.js.map +1 -0
- package/dist/scanner/consent-modal.d.ts +5 -0
- package/dist/scanner/consent-modal.d.ts.map +1 -0
- package/dist/scanner/consent-modal.js +244 -0
- package/dist/scanner/consent-modal.js.map +1 -0
- package/dist/scanner/cookies.d.ts +11 -0
- package/dist/scanner/cookies.d.ts.map +1 -0
- package/dist/scanner/cookies.js +30 -0
- package/dist/scanner/cookies.js.map +1 -0
- package/dist/scanner/index.d.ts +9 -0
- package/dist/scanner/index.d.ts.map +1 -0
- package/dist/scanner/index.js +146 -0
- package/dist/scanner/index.js.map +1 -0
- package/dist/scanner/network.d.ts +8 -0
- package/dist/scanner/network.d.ts.map +1 -0
- package/dist/scanner/network.js +41 -0
- package/dist/scanner/network.js.map +1 -0
- package/dist/types.d.ts +105 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +52 -0
- package/renovate.json +17 -0
- package/src/analyzers/compliance.ts +203 -0
- package/src/analyzers/wording.ts +112 -0
- package/src/classifiers/cookie-classifier.ts +125 -0
- package/src/classifiers/network-classifier.ts +65 -0
- package/src/classifiers/tracker-list.ts +105 -0
- package/src/cli.ts +134 -0
- package/src/report/generator.ts +703 -0
- package/src/scanner/browser.ts +52 -0
- package/src/scanner/consent-modal.ts +276 -0
- package/src/scanner/cookies.ts +43 -0
- package/src/scanner/index.ts +163 -0
- package/src/scanner/network.ts +51 -0
- package/src/types.ts +134 -0
- package/tsconfig.json +18 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { chromium, type Browser, type BrowserContext, type Page } from "playwright";
|
|
2
|
+
import type { ScanOptions } from "../types.js";
|
|
3
|
+
|
|
4
|
+
export interface BrowserSession {
|
|
5
|
+
browser: Browser;
|
|
6
|
+
context: BrowserContext;
|
|
7
|
+
page: Page;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export async function createBrowser(options: ScanOptions): Promise<BrowserSession> {
|
|
11
|
+
const browser = await chromium.launch({
|
|
12
|
+
headless: true,
|
|
13
|
+
args: [
|
|
14
|
+
"--no-sandbox",
|
|
15
|
+
"--disable-setuid-sandbox",
|
|
16
|
+
"--disable-dev-shm-usage",
|
|
17
|
+
"--disable-blink-features=AutomationControlled",
|
|
18
|
+
],
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
const context = await browser.newContext({
|
|
22
|
+
locale: options.locale,
|
|
23
|
+
viewport: { width: 1280, height: 900 },
|
|
24
|
+
userAgent:
|
|
25
|
+
options.userAgent ??
|
|
26
|
+
[
|
|
27
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
|
|
28
|
+
"AppleWebKit/537.36 (KHTML, like Gecko)",
|
|
29
|
+
"Chrome/131.0.0.0 Safari/537.36",
|
|
30
|
+
].join(" "),
|
|
31
|
+
// Disable existing cookies to get a clean state
|
|
32
|
+
storageState: undefined,
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
// Block known resource types that we don't need (speed up)
|
|
36
|
+
await context.route("**/*.{woff,woff2,ttf,eot,ico}", (route) => route.abort());
|
|
37
|
+
|
|
38
|
+
const page = await context.newPage();
|
|
39
|
+
|
|
40
|
+
return { browser, context, page };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export async function clearState(context: BrowserContext): Promise<void> {
|
|
44
|
+
await context.clearCookies();
|
|
45
|
+
await context.clearPermissions();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export async function closeBrowser(session: BrowserSession): Promise<void> {
|
|
49
|
+
await session.page.close().catch(() => null);
|
|
50
|
+
await session.context.close().catch(() => null);
|
|
51
|
+
await session.browser.close().catch(() => null);
|
|
52
|
+
}
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import type { Page } from "playwright";
|
|
2
|
+
import type { ConsentModal, ConsentButton, ConsentCheckbox, ConsentButtonType } from "../types.js";
|
|
3
|
+
import { analyzeButtonWording } from "../analyzers/wording.js";
|
|
4
|
+
import type { ScanOptions } from "../types.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Ordered list of CSS selectors to try for detecting a consent modal/banner.
|
|
8
|
+
* Covers major CMP platforms (Axeptio, Cookiebot, OneTrust, Didomi, Tarteaucitron, etc.)
|
|
9
|
+
*/
|
|
10
|
+
const MODAL_SELECTORS = [
|
|
11
|
+
// Well-known CMPs
|
|
12
|
+
"#axeptio_overlay",
|
|
13
|
+
"#axeptio-root",
|
|
14
|
+
"#CybotCookiebotDialog",
|
|
15
|
+
"#onetrust-consent-sdk",
|
|
16
|
+
"#onetrust-banner-sdk",
|
|
17
|
+
".didomi-popup-container",
|
|
18
|
+
".didomi-consent-popup",
|
|
19
|
+
"#didomi-host",
|
|
20
|
+
"#tarteaucitronRoot",
|
|
21
|
+
"#tarteaucitron",
|
|
22
|
+
"#usercentrics-root",
|
|
23
|
+
"#sp-cc",
|
|
24
|
+
"#gdpr-consent-tool-wrapper",
|
|
25
|
+
".cc-banner",
|
|
26
|
+
".cc-window",
|
|
27
|
+
"#cookieConsent",
|
|
28
|
+
"#cookie-consent",
|
|
29
|
+
"#cookie-banner",
|
|
30
|
+
"#cookie-notice",
|
|
31
|
+
"#cookie-law-info-bar",
|
|
32
|
+
// Generic heuristics
|
|
33
|
+
"[class*='cookie'][class*='banner']",
|
|
34
|
+
"[class*='cookie'][class*='modal']",
|
|
35
|
+
"[class*='cookie'][class*='popup']",
|
|
36
|
+
"[class*='consent'][class*='banner']",
|
|
37
|
+
"[class*='consent'][class*='modal']",
|
|
38
|
+
"[id*='cookie'][id*='banner']",
|
|
39
|
+
"[id*='cookie'][id*='modal']",
|
|
40
|
+
"[id*='consent']",
|
|
41
|
+
"[aria-label*='cookie' i]",
|
|
42
|
+
"[aria-label*='consent' i]",
|
|
43
|
+
"[aria-label*='cookies' i]",
|
|
44
|
+
"[role='dialog'][aria-label*='cookie' i]",
|
|
45
|
+
"[role='alertdialog']",
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
const ACCEPT_PATTERNS = [
|
|
49
|
+
/\b(accept|accepter|acceptez|tout accepter|accept all|j'accepte|i accept|agree|ok\b|d'accord|continuer|continue|valider|confirmer)\b/i,
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
const REJECT_PATTERNS = [
|
|
53
|
+
/\b(refus|refuse|refuser|reject|deny|decline|tout refuser|reject all|non merci|no thanks|continuer sans accepter|skip)\b/i,
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
const PREFERENCES_PATTERNS = [
|
|
57
|
+
/\b(param[eè]tres|pr[eé]f[eé]rences|personnaliser|customise|customize|manage|g[eé]rer|options|choose|choisir|configure)\b/i,
|
|
58
|
+
];
|
|
59
|
+
|
|
60
|
+
export async function detectConsentModal(page: Page, options: ScanOptions): Promise<ConsentModal> {
|
|
61
|
+
// Try each selector until we find a visible modal
|
|
62
|
+
let foundSelector: string | null = null;
|
|
63
|
+
|
|
64
|
+
for (const selector of MODAL_SELECTORS) {
|
|
65
|
+
try {
|
|
66
|
+
const element = await page.$(selector);
|
|
67
|
+
if (!element) continue;
|
|
68
|
+
const isVisible = await element.isVisible();
|
|
69
|
+
if (isVisible) {
|
|
70
|
+
foundSelector = selector;
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
} catch {
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Fallback: look for any large fixed/sticky element with cookie-related text
|
|
79
|
+
if (!foundSelector) {
|
|
80
|
+
foundSelector = await page.evaluate(() => {
|
|
81
|
+
const candidates = document.querySelectorAll("div, section, aside, dialog");
|
|
82
|
+
const keywords = /cookie|consent|consentement|rgpd|gdpr|privacy|vie priv/i;
|
|
83
|
+
|
|
84
|
+
for (const el of candidates) {
|
|
85
|
+
const style = window.getComputedStyle(el);
|
|
86
|
+
const isFixed = style.position === "fixed" || style.position === "sticky";
|
|
87
|
+
const text = el.textContent ?? "";
|
|
88
|
+
const hasCookieText = keywords.test(text);
|
|
89
|
+
const isLargeEnough = el.getBoundingClientRect().width > 200;
|
|
90
|
+
|
|
91
|
+
if (isFixed && hasCookieText && isLargeEnough) {
|
|
92
|
+
// Generate a unique selector
|
|
93
|
+
if (el.id) return `#${el.id}`;
|
|
94
|
+
const classes = Array.from(el.classList).slice(0, 2).join(".");
|
|
95
|
+
if (classes) return `${el.tagName.toLowerCase()}.${classes}`;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return null;
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (!foundSelector) {
|
|
103
|
+
return {
|
|
104
|
+
detected: false,
|
|
105
|
+
selector: null,
|
|
106
|
+
text: "",
|
|
107
|
+
buttons: [],
|
|
108
|
+
checkboxes: [],
|
|
109
|
+
hasGranularControls: false,
|
|
110
|
+
layerCount: 0,
|
|
111
|
+
screenshotPath: null,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Extract modal text
|
|
116
|
+
const modalText = await page.$eval(foundSelector, (el) => el.textContent ?? "").catch(() => "");
|
|
117
|
+
|
|
118
|
+
// Find all buttons and interactive elements within the modal
|
|
119
|
+
const buttons = await extractButtons(page, foundSelector);
|
|
120
|
+
|
|
121
|
+
// Find checkboxes / toggles
|
|
122
|
+
const checkboxes = await extractCheckboxes(page, foundSelector);
|
|
123
|
+
|
|
124
|
+
// Detect if there are nested layers (e.g., "more options" behind a click)
|
|
125
|
+
const hasGranularControls =
|
|
126
|
+
checkboxes.length > 0 || buttons.some((b) => b.type === "preferences");
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
detected: true,
|
|
130
|
+
selector: foundSelector,
|
|
131
|
+
text: modalText.trim().replace(/\s+/g, " "),
|
|
132
|
+
buttons,
|
|
133
|
+
checkboxes,
|
|
134
|
+
hasGranularControls,
|
|
135
|
+
layerCount: hasGranularControls ? 2 : 1,
|
|
136
|
+
screenshotPath: null,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
async function extractButtons(page: Page, modalSelector: string): Promise<ConsentButton[]> {
|
|
141
|
+
const buttonEls = await page.$$(
|
|
142
|
+
`${modalSelector} button, ${modalSelector} [role="button"], ${modalSelector} a[href="#"]`,
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
const buttons: ConsentButton[] = [];
|
|
146
|
+
|
|
147
|
+
for (const el of buttonEls) {
|
|
148
|
+
try {
|
|
149
|
+
const text = ((await el.textContent()) ?? "").trim();
|
|
150
|
+
if (!text) continue;
|
|
151
|
+
|
|
152
|
+
const isVisible = await el.isVisible();
|
|
153
|
+
const box = await el.boundingBox();
|
|
154
|
+
|
|
155
|
+
const computedStyle = await el.evaluate((node) => {
|
|
156
|
+
const style = window.getComputedStyle(node as Element);
|
|
157
|
+
return {
|
|
158
|
+
fontSize: parseFloat(style.fontSize),
|
|
159
|
+
backgroundColor: style.backgroundColor,
|
|
160
|
+
color: style.color,
|
|
161
|
+
};
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
const type = classifyButtonType(text);
|
|
165
|
+
|
|
166
|
+
// Build a unique selector for this button
|
|
167
|
+
const selector = await el.evaluate((node) => {
|
|
168
|
+
const el = node as Element;
|
|
169
|
+
if (el.id) return `#${el.id}`;
|
|
170
|
+
const classes = Array.from(el.classList).slice(0, 3).join(".");
|
|
171
|
+
const tag = el.tagName.toLowerCase();
|
|
172
|
+
// Try to build a text-based selector as fallback
|
|
173
|
+
const escapedText = el.textContent?.trim().substring(0, 30) ?? "";
|
|
174
|
+
return classes ? `${tag}.${classes}` : `${tag}:has-text("${escapedText}")`;
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
const contrastRatio = computeContrastRatio(
|
|
178
|
+
computedStyle.color,
|
|
179
|
+
computedStyle.backgroundColor,
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
buttons.push({
|
|
183
|
+
type,
|
|
184
|
+
text,
|
|
185
|
+
selector,
|
|
186
|
+
isVisible,
|
|
187
|
+
boundingBox: box,
|
|
188
|
+
fontSize: computedStyle.fontSize || null,
|
|
189
|
+
backgroundColor: computedStyle.backgroundColor,
|
|
190
|
+
textColor: computedStyle.color,
|
|
191
|
+
contrastRatio,
|
|
192
|
+
clickDepth: 1,
|
|
193
|
+
});
|
|
194
|
+
} catch {
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return buttons;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
async function extractCheckboxes(page: Page, modalSelector: string): Promise<ConsentCheckbox[]> {
|
|
203
|
+
return page
|
|
204
|
+
.evaluate((selector) => {
|
|
205
|
+
const modal = document.querySelector(selector);
|
|
206
|
+
if (!modal) return [];
|
|
207
|
+
|
|
208
|
+
const checkboxes: ConsentCheckbox[] = [];
|
|
209
|
+
const inputs = modal.querySelectorAll(
|
|
210
|
+
'input[type="checkbox"], input[type="radio"], [role="switch"], [role="checkbox"]',
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
for (const input of inputs) {
|
|
214
|
+
const el = input as HTMLInputElement;
|
|
215
|
+
// Find associated label
|
|
216
|
+
let label = "";
|
|
217
|
+
if (el.id) {
|
|
218
|
+
const labelEl = document.querySelector(`label[for="${el.id}"]`);
|
|
219
|
+
label = labelEl?.textContent?.trim() ?? "";
|
|
220
|
+
}
|
|
221
|
+
if (!label) {
|
|
222
|
+
const parent = el.closest("label") ?? el.parentElement;
|
|
223
|
+
label = parent?.textContent?.trim() ?? "";
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
checkboxes.push({
|
|
227
|
+
name: el.name || el.id || "",
|
|
228
|
+
label: label.substring(0, 100),
|
|
229
|
+
isCheckedByDefault: el.checked || el.getAttribute("aria-checked") === "true",
|
|
230
|
+
category: "unknown", // will be classified later
|
|
231
|
+
selector: el.id ? `#${el.id}` : "",
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return checkboxes;
|
|
236
|
+
}, modalSelector)
|
|
237
|
+
.catch(() => [] as ConsentCheckbox[]);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function classifyButtonType(text: string): ConsentButtonType {
|
|
241
|
+
if (ACCEPT_PATTERNS.some((p) => p.test(text))) return "accept";
|
|
242
|
+
if (REJECT_PATTERNS.some((p) => p.test(text))) return "reject";
|
|
243
|
+
if (PREFERENCES_PATTERNS.some((p) => p.test(text))) return "preferences";
|
|
244
|
+
if (/\b(ferm|close|×|✕)\b/i.test(text)) return "close";
|
|
245
|
+
return "unknown";
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Basic contrast ratio computation from RGB strings.
|
|
250
|
+
* Returns null if colors cannot be parsed.
|
|
251
|
+
*/
|
|
252
|
+
function computeContrastRatio(fg: string, bg: string): number | null {
|
|
253
|
+
const fgRgb = parseRgb(fg);
|
|
254
|
+
const bgRgb = parseRgb(bg);
|
|
255
|
+
if (!fgRgb || !bgRgb) return null;
|
|
256
|
+
|
|
257
|
+
const fgL = relativeLuminance(fgRgb);
|
|
258
|
+
const bgL = relativeLuminance(bgRgb);
|
|
259
|
+
const lighter = Math.max(fgL, bgL);
|
|
260
|
+
const darker = Math.min(fgL, bgL);
|
|
261
|
+
return parseFloat(((lighter + 0.05) / (darker + 0.05)).toFixed(2));
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function parseRgb(color: string): [number, number, number] | null {
|
|
265
|
+
const match = color.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/);
|
|
266
|
+
if (!match) return null;
|
|
267
|
+
return [parseInt(match[1], 10), parseInt(match[2], 10), parseInt(match[3], 10)];
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function relativeLuminance([r, g, b]: [number, number, number]): number {
|
|
271
|
+
const toLinear = (c: number) => {
|
|
272
|
+
const s = c / 255;
|
|
273
|
+
return s <= 0.04045 ? s / 12.92 : Math.pow((s + 0.055) / 1.055, 2.4);
|
|
274
|
+
};
|
|
275
|
+
return 0.2126 * toLinear(r) + 0.7152 * toLinear(g) + 0.0722 * toLinear(b);
|
|
276
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { BrowserContext } from "playwright";
|
|
2
|
+
import type { ScannedCookie } from "../types.js";
|
|
3
|
+
import { classifyCookie } from "../classifiers/cookie-classifier.js";
|
|
4
|
+
|
|
5
|
+
type CapturePhase = ScannedCookie["capturedAt"];
|
|
6
|
+
|
|
7
|
+
export async function captureCookies(
|
|
8
|
+
context: BrowserContext,
|
|
9
|
+
phase: CapturePhase,
|
|
10
|
+
): Promise<ScannedCookie[]> {
|
|
11
|
+
const rawCookies = await context.cookies();
|
|
12
|
+
|
|
13
|
+
return rawCookies.map((c) => {
|
|
14
|
+
const classification = classifyCookie(c.name, c.domain, c.value);
|
|
15
|
+
return {
|
|
16
|
+
name: c.name,
|
|
17
|
+
domain: c.domain,
|
|
18
|
+
path: c.path,
|
|
19
|
+
value: c.value.substring(0, 100), // truncate long values
|
|
20
|
+
expires: c.expires === -1 ? null : c.expires,
|
|
21
|
+
httpOnly: c.httpOnly,
|
|
22
|
+
secure: c.secure,
|
|
23
|
+
sameSite: c.sameSite ?? null,
|
|
24
|
+
category: classification.category,
|
|
25
|
+
requiresConsent: classification.requiresConsent,
|
|
26
|
+
capturedAt: phase,
|
|
27
|
+
};
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function diffCookies(
|
|
32
|
+
before: ScannedCookie[],
|
|
33
|
+
after: ScannedCookie[],
|
|
34
|
+
): { added: ScannedCookie[]; removed: ScannedCookie[]; persisted: ScannedCookie[] } {
|
|
35
|
+
const beforeKeys = new Set(before.map((c) => `${c.domain}|${c.name}`));
|
|
36
|
+
const afterKeys = new Set(after.map((c) => `${c.domain}|${c.name}`));
|
|
37
|
+
|
|
38
|
+
return {
|
|
39
|
+
added: after.filter((c) => !beforeKeys.has(`${c.domain}|${c.name}`)),
|
|
40
|
+
removed: before.filter((c) => !afterKeys.has(`${c.domain}|${c.name}`)),
|
|
41
|
+
persisted: after.filter((c) => beforeKeys.has(`${c.domain}|${c.name}`)),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { mkdir } from "fs/promises";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
import type { ScanOptions, ScanResult } from "../types.js";
|
|
4
|
+
import { createBrowser, clearState, closeBrowser } from "./browser.js";
|
|
5
|
+
import { captureCookies } from "./cookies.js";
|
|
6
|
+
import { createNetworkInterceptor } from "./network.js";
|
|
7
|
+
import { detectConsentModal } from "./consent-modal.js";
|
|
8
|
+
import { analyzeCompliance } from "../analyzers/compliance.js";
|
|
9
|
+
|
|
10
|
+
type PhaseCallback = (message: string) => void;
|
|
11
|
+
|
|
12
|
+
export class Scanner {
|
|
13
|
+
constructor(private readonly options: ScanOptions) {}
|
|
14
|
+
|
|
15
|
+
async run(onPhase: PhaseCallback = () => {}): Promise<ScanResult> {
|
|
16
|
+
const startTime = Date.now();
|
|
17
|
+
const screenshotPaths: string[] = [];
|
|
18
|
+
const errors: string[] = [];
|
|
19
|
+
|
|
20
|
+
if (this.options.screenshots) {
|
|
21
|
+
await mkdir(this.options.outputDir, { recursive: true });
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// ────────────────────────────────────────────────────────────
|
|
25
|
+
// Phase 1 — Load page, capture state BEFORE any interaction
|
|
26
|
+
// ────────────────────────────────────────────────────────────
|
|
27
|
+
onPhase("Phase 1/4 — Loading page (no interaction)...");
|
|
28
|
+
const session1 = await createBrowser(this.options);
|
|
29
|
+
const interceptor1 = createNetworkInterceptor(session1.page, "before-interaction");
|
|
30
|
+
|
|
31
|
+
try {
|
|
32
|
+
await session1.page.goto(this.options.url, {
|
|
33
|
+
waitUntil: "networkidle",
|
|
34
|
+
timeout: this.options.timeout,
|
|
35
|
+
});
|
|
36
|
+
} catch (err) {
|
|
37
|
+
errors.push(`Navigation timeout or error: ${String(err)}`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Give a moment for late-loading scripts
|
|
41
|
+
await session1.page.waitForTimeout(2000);
|
|
42
|
+
|
|
43
|
+
const cookiesBeforeInteraction = await captureCookies(session1.context, "before-interaction");
|
|
44
|
+
const networkBeforeInteraction = interceptor1.getRequests();
|
|
45
|
+
interceptor1.stop();
|
|
46
|
+
|
|
47
|
+
// ────────────────────────────────────────────────────────────
|
|
48
|
+
// Phase 2 — Detect and analyze the consent modal
|
|
49
|
+
// ────────────────────────────────────────────────────────────
|
|
50
|
+
onPhase("Phase 2/4 — Analyzing consent modal...");
|
|
51
|
+
const modal = await detectConsentModal(session1.page, this.options);
|
|
52
|
+
|
|
53
|
+
if (this.options.screenshots && modal.detected) {
|
|
54
|
+
const screenshotPath = join(this.options.outputDir, "modal-initial.png");
|
|
55
|
+
await session1.page.screenshot({ path: screenshotPath, fullPage: false });
|
|
56
|
+
screenshotPaths.push(screenshotPath);
|
|
57
|
+
modal.screenshotPath = screenshotPath;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// ────────────────────────────────────────────────────────────
|
|
61
|
+
// Phase 3 — Click REJECT, capture state after
|
|
62
|
+
// ────────────────────────────────────────────────────────────
|
|
63
|
+
onPhase("Phase 3/4 — Testing reject button...");
|
|
64
|
+
const interceptor3 = createNetworkInterceptor(session1.page, "after-reject");
|
|
65
|
+
|
|
66
|
+
let cookiesAfterReject = cookiesBeforeInteraction;
|
|
67
|
+
let networkAfterReject: typeof networkBeforeInteraction = [];
|
|
68
|
+
|
|
69
|
+
const rejectButton = modal.buttons.find((b) => b.type === "reject");
|
|
70
|
+
if (rejectButton) {
|
|
71
|
+
try {
|
|
72
|
+
await session1.page.click(rejectButton.selector, { timeout: 5000 });
|
|
73
|
+
await session1.page.waitForTimeout(2000);
|
|
74
|
+
cookiesAfterReject = await captureCookies(session1.context, "after-reject");
|
|
75
|
+
networkAfterReject = interceptor3.getRequests();
|
|
76
|
+
} catch (err) {
|
|
77
|
+
errors.push(`Could not click reject button: ${String(err)}`);
|
|
78
|
+
}
|
|
79
|
+
} else {
|
|
80
|
+
errors.push("No reject button found — could not test rejection flow");
|
|
81
|
+
}
|
|
82
|
+
interceptor3.stop();
|
|
83
|
+
|
|
84
|
+
if (this.options.screenshots) {
|
|
85
|
+
const screenshotPath = join(this.options.outputDir, "after-reject.png");
|
|
86
|
+
await session1.page.screenshot({ path: screenshotPath, fullPage: false });
|
|
87
|
+
screenshotPaths.push(screenshotPath);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
await closeBrowser(session1);
|
|
91
|
+
|
|
92
|
+
// ────────────────────────────────────────────────────────────
|
|
93
|
+
// Phase 4 — Fresh session, click ACCEPT, capture state after
|
|
94
|
+
// ────────────────────────────────────────────────────────────
|
|
95
|
+
onPhase("Phase 4/4 — Testing accept button...");
|
|
96
|
+
const session2 = await createBrowser(this.options);
|
|
97
|
+
await clearState(session2.context);
|
|
98
|
+
const interceptor4 = createNetworkInterceptor(session2.page, "after-accept");
|
|
99
|
+
|
|
100
|
+
let cookiesAfterAccept = cookiesBeforeInteraction;
|
|
101
|
+
let networkAfterAccept: typeof networkBeforeInteraction = [];
|
|
102
|
+
|
|
103
|
+
try {
|
|
104
|
+
await session2.page.goto(this.options.url, {
|
|
105
|
+
waitUntil: "networkidle",
|
|
106
|
+
timeout: this.options.timeout,
|
|
107
|
+
});
|
|
108
|
+
await session2.page.waitForTimeout(2000);
|
|
109
|
+
|
|
110
|
+
const modal2 = await detectConsentModal(session2.page, this.options);
|
|
111
|
+
const acceptButton = modal2.buttons.find((b) => b.type === "accept");
|
|
112
|
+
|
|
113
|
+
if (acceptButton) {
|
|
114
|
+
await session2.page.click(acceptButton.selector, { timeout: 5000 });
|
|
115
|
+
await session2.page.waitForTimeout(3000);
|
|
116
|
+
cookiesAfterAccept = await captureCookies(session2.context, "after-accept");
|
|
117
|
+
networkAfterAccept = interceptor4.getRequests();
|
|
118
|
+
} else {
|
|
119
|
+
errors.push("No accept button found — could not test acceptance flow");
|
|
120
|
+
}
|
|
121
|
+
} catch (err) {
|
|
122
|
+
errors.push(`Accept phase error: ${String(err)}`);
|
|
123
|
+
}
|
|
124
|
+
interceptor4.stop();
|
|
125
|
+
|
|
126
|
+
if (this.options.screenshots) {
|
|
127
|
+
const screenshotPath = join(this.options.outputDir, "after-accept.png");
|
|
128
|
+
await session2.page.screenshot({ path: screenshotPath, fullPage: false });
|
|
129
|
+
screenshotPaths.push(screenshotPath);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
await closeBrowser(session2);
|
|
133
|
+
|
|
134
|
+
// ────────────────────────────────────────────────────────────
|
|
135
|
+
// Analyze compliance
|
|
136
|
+
// ────────────────────────────────────────────────────────────
|
|
137
|
+
const compliance = analyzeCompliance({
|
|
138
|
+
modal,
|
|
139
|
+
cookiesBeforeInteraction,
|
|
140
|
+
cookiesAfterAccept,
|
|
141
|
+
cookiesAfterReject,
|
|
142
|
+
networkBeforeInteraction,
|
|
143
|
+
networkAfterAccept,
|
|
144
|
+
networkAfterReject,
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
return {
|
|
148
|
+
url: this.options.url,
|
|
149
|
+
scanDate: new Date().toISOString(),
|
|
150
|
+
duration: Date.now() - startTime,
|
|
151
|
+
modal,
|
|
152
|
+
cookiesBeforeInteraction,
|
|
153
|
+
cookiesAfterAccept,
|
|
154
|
+
cookiesAfterReject,
|
|
155
|
+
networkBeforeInteraction,
|
|
156
|
+
networkAfterAccept,
|
|
157
|
+
networkAfterReject,
|
|
158
|
+
compliance,
|
|
159
|
+
screenshotPaths,
|
|
160
|
+
errors,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { Page, Request, Response } from "playwright";
|
|
2
|
+
import type { NetworkRequest } from "../types.js";
|
|
3
|
+
import { classifyNetworkRequest } from "../classifiers/network-classifier.js";
|
|
4
|
+
|
|
5
|
+
export type NetworkPhase = "before-interaction" | "after-accept" | "after-reject";
|
|
6
|
+
|
|
7
|
+
export function createNetworkInterceptor(page: Page, phase: NetworkPhase) {
|
|
8
|
+
const captured: NetworkRequest[] = [];
|
|
9
|
+
const responseMap = new Map<string, { status: number; contentType: string | null }>();
|
|
10
|
+
|
|
11
|
+
const onResponse = (response: Response) => {
|
|
12
|
+
responseMap.set(response.request().url(), {
|
|
13
|
+
status: response.status(),
|
|
14
|
+
contentType: response.headers()["content-type"] ?? null,
|
|
15
|
+
});
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const onRequest = (request: Request) => {
|
|
19
|
+
const url = request.url();
|
|
20
|
+
|
|
21
|
+
// Skip data URIs and internal chrome requests
|
|
22
|
+
if (url.startsWith("data:") || url.startsWith("chrome-extension:")) return;
|
|
23
|
+
|
|
24
|
+
const classification = classifyNetworkRequest(url, request.resourceType());
|
|
25
|
+
const meta = responseMap.get(url) ?? null;
|
|
26
|
+
|
|
27
|
+
captured.push({
|
|
28
|
+
url,
|
|
29
|
+
method: request.method(),
|
|
30
|
+
resourceType: request.resourceType(),
|
|
31
|
+
initiator: null,
|
|
32
|
+
isThirdParty: classification.isThirdParty,
|
|
33
|
+
trackerCategory: classification.trackerCategory,
|
|
34
|
+
trackerName: classification.trackerName,
|
|
35
|
+
capturedAt: phase,
|
|
36
|
+
responseStatus: meta?.status ?? null,
|
|
37
|
+
contentType: meta?.contentType ?? null,
|
|
38
|
+
});
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
page.on("response", onResponse);
|
|
42
|
+
page.on("request", onRequest);
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
stop: () => {
|
|
46
|
+
page.off("response", onResponse);
|
|
47
|
+
page.off("request", onRequest);
|
|
48
|
+
},
|
|
49
|
+
getRequests: () => [...captured],
|
|
50
|
+
};
|
|
51
|
+
}
|