@slashgear/gdpr-cookie-scanner 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/.changeset/README.md +8 -0
  2. package/.changeset/config.json +11 -0
  3. package/.github/ISSUE_TEMPLATE/bug_report.yml +44 -0
  4. package/.github/ISSUE_TEMPLATE/feature_request.yml +26 -0
  5. package/.github/PULL_REQUEST_TEMPLATE.md +24 -0
  6. package/.github/workflows/ci.yml +38 -0
  7. package/.github/workflows/release.yml +57 -0
  8. package/.idea/gdpr-report.iml +8 -0
  9. package/.idea/modules.xml +8 -0
  10. package/.idea/vcs.xml +6 -0
  11. package/CHANGELOG.md +7 -0
  12. package/CLAUDE.md +75 -0
  13. package/CODE_OF_CONDUCT.md +41 -0
  14. package/CONTRIBUTING.md +79 -0
  15. package/LICENSE +21 -0
  16. package/README.md +127 -0
  17. package/SECURITY.md +15 -0
  18. package/dist/analyzers/compliance.d.ts +13 -0
  19. package/dist/analyzers/compliance.d.ts.map +1 -0
  20. package/dist/analyzers/compliance.js +171 -0
  21. package/dist/analyzers/compliance.js.map +1 -0
  22. package/dist/analyzers/wording.d.ts +13 -0
  23. package/dist/analyzers/wording.d.ts.map +1 -0
  24. package/dist/analyzers/wording.js +91 -0
  25. package/dist/analyzers/wording.js.map +1 -0
  26. package/dist/classifiers/cookie-classifier.d.ts +8 -0
  27. package/dist/classifiers/cookie-classifier.d.ts.map +1 -0
  28. package/dist/classifiers/cookie-classifier.js +108 -0
  29. package/dist/classifiers/cookie-classifier.js.map +1 -0
  30. package/dist/classifiers/network-classifier.d.ts +9 -0
  31. package/dist/classifiers/network-classifier.d.ts.map +1 -0
  32. package/dist/classifiers/network-classifier.js +51 -0
  33. package/dist/classifiers/network-classifier.js.map +1 -0
  34. package/dist/classifiers/tracker-list.d.ts +16 -0
  35. package/dist/classifiers/tracker-list.d.ts.map +1 -0
  36. package/dist/classifiers/tracker-list.js +86 -0
  37. package/dist/classifiers/tracker-list.js.map +1 -0
  38. package/dist/cli.d.ts +3 -0
  39. package/dist/cli.d.ts.map +1 -0
  40. package/dist/cli.js +110 -0
  41. package/dist/cli.js.map +1 -0
  42. package/dist/report/generator.d.ts +19 -0
  43. package/dist/report/generator.d.ts.map +1 -0
  44. package/dist/report/generator.js +552 -0
  45. package/dist/report/generator.js.map +1 -0
  46. package/dist/scanner/browser.d.ts +11 -0
  47. package/dist/scanner/browser.d.ts.map +1 -0
  48. package/dist/scanner/browser.js +38 -0
  49. package/dist/scanner/browser.js.map +1 -0
  50. package/dist/scanner/consent-modal.d.ts +5 -0
  51. package/dist/scanner/consent-modal.d.ts.map +1 -0
  52. package/dist/scanner/consent-modal.js +244 -0
  53. package/dist/scanner/consent-modal.js.map +1 -0
  54. package/dist/scanner/cookies.d.ts +11 -0
  55. package/dist/scanner/cookies.d.ts.map +1 -0
  56. package/dist/scanner/cookies.js +30 -0
  57. package/dist/scanner/cookies.js.map +1 -0
  58. package/dist/scanner/index.d.ts +9 -0
  59. package/dist/scanner/index.d.ts.map +1 -0
  60. package/dist/scanner/index.js +146 -0
  61. package/dist/scanner/index.js.map +1 -0
  62. package/dist/scanner/network.d.ts +8 -0
  63. package/dist/scanner/network.d.ts.map +1 -0
  64. package/dist/scanner/network.js +41 -0
  65. package/dist/scanner/network.js.map +1 -0
  66. package/dist/types.d.ts +105 -0
  67. package/dist/types.d.ts.map +1 -0
  68. package/dist/types.js +2 -0
  69. package/dist/types.js.map +1 -0
  70. package/package.json +52 -0
  71. package/renovate.json +17 -0
  72. package/src/analyzers/compliance.ts +203 -0
  73. package/src/analyzers/wording.ts +112 -0
  74. package/src/classifiers/cookie-classifier.ts +125 -0
  75. package/src/classifiers/network-classifier.ts +65 -0
  76. package/src/classifiers/tracker-list.ts +105 -0
  77. package/src/cli.ts +134 -0
  78. package/src/report/generator.ts +703 -0
  79. package/src/scanner/browser.ts +52 -0
  80. package/src/scanner/consent-modal.ts +276 -0
  81. package/src/scanner/cookies.ts +43 -0
  82. package/src/scanner/index.ts +163 -0
  83. package/src/scanner/network.ts +51 -0
  84. package/src/types.ts +134 -0
  85. package/tsconfig.json +18 -0
@@ -0,0 +1,65 @@
1
+ import { TRACKER_DB, PIXEL_PATTERNS } from "./tracker-list.js";
2
+ import type { TrackerCategory } from "../types.js";
3
+
4
+ interface NetworkClassification {
5
+ isThirdParty: boolean;
6
+ trackerCategory: TrackerCategory | null;
7
+ trackerName: string | null;
8
+ }
9
+
10
+ export function classifyNetworkRequest(url: string, resourceType: string): NetworkClassification {
11
+ let hostname: string;
12
+
13
+ try {
14
+ hostname = new URL(url).hostname.replace(/^www\./, "");
15
+ } catch {
16
+ return { isThirdParty: false, trackerCategory: null, trackerName: null };
17
+ }
18
+
19
+ // Check tracker database (exact match or suffix match)
20
+ for (const [domain, entry] of Object.entries(TRACKER_DB)) {
21
+ if (hostname === domain || hostname.endsWith(`.${domain}`)) {
22
+ return {
23
+ isThirdParty: true,
24
+ trackerCategory: entry.category,
25
+ trackerName: entry.name,
26
+ };
27
+ }
28
+ }
29
+
30
+ // Check pixel/beacon patterns in URL
31
+ if (PIXEL_PATTERNS.some((p) => p.test(url))) {
32
+ return {
33
+ isThirdParty: true,
34
+ trackerCategory: "pixel",
35
+ trackerName: "Tracking Pixel",
36
+ };
37
+ }
38
+
39
+ // Resource type heuristics
40
+ if (resourceType === "image" && isLikelyPixel(url)) {
41
+ return {
42
+ isThirdParty: true,
43
+ trackerCategory: "pixel",
44
+ trackerName: "Tracking Pixel (image)",
45
+ };
46
+ }
47
+
48
+ return {
49
+ isThirdParty: false,
50
+ trackerCategory: null,
51
+ trackerName: null,
52
+ };
53
+ }
54
+
55
+ /**
56
+ * Heuristic: 1x1 gif / tiny image with tracking params
57
+ */
58
+ function isLikelyPixel(url: string): boolean {
59
+ const u = url.toLowerCase();
60
+ return (
61
+ (u.includes(".gif") || u.includes(".png")) &&
62
+ u.includes("?") &&
63
+ /[?&](uid|userid|sid|cid|vid|ts|t=|e=|ev=)/i.test(url)
64
+ );
65
+ }
@@ -0,0 +1,105 @@
1
+ import type { TrackerCategory } from "../types.js";
2
+
3
+ interface TrackerEntry {
4
+ name: string;
5
+ category: TrackerCategory;
6
+ }
7
+
8
+ /**
9
+ * Known tracker domains and their categories.
10
+ * Based on open-source tracker databases (EasyPrivacy, Disconnect, DuckDuckGo Tracker Radar).
11
+ */
12
+ export const TRACKER_DB: Record<string, TrackerEntry> = {
13
+ // ── Google ────────────────────────────────────────────────────
14
+ "google-analytics.com": { name: "Google Analytics", category: "analytics" },
15
+ "analytics.google.com": { name: "Google Analytics", category: "analytics" },
16
+ "googletagmanager.com": { name: "Google Tag Manager", category: "analytics" },
17
+ "googletagservices.com": { name: "Google Tag Services", category: "advertising" },
18
+ "googlesyndication.com": { name: "Google AdSense", category: "advertising" },
19
+ "doubleclick.net": { name: "Google DoubleClick", category: "advertising" },
20
+ "adservice.google.com": { name: "Google Ad Services", category: "advertising" },
21
+ "google.com/ads": { name: "Google Ads", category: "advertising" },
22
+ "googleadservices.com": { name: "Google Ad Services", category: "advertising" },
23
+ "pagead2.googlesyndication.com": { name: "Google PageAd", category: "advertising" },
24
+
25
+ // ── Meta / Facebook ───────────────────────────────────────────
26
+ "connect.facebook.net": { name: "Facebook SDK", category: "social" },
27
+ "graph.facebook.com": { name: "Facebook Graph API", category: "social" },
28
+ "facebook.com/tr": { name: "Meta Pixel", category: "advertising" },
29
+ "fbcdn.net": { name: "Facebook CDN", category: "social" },
30
+
31
+ // ── Microsoft ─────────────────────────────────────────────────
32
+ "bat.bing.com": { name: "Bing Ads", category: "advertising" },
33
+ "clarity.ms": { name: "Microsoft Clarity", category: "analytics" },
34
+ "ads.microsoft.com": { name: "Microsoft Ads", category: "advertising" },
35
+ "scorecardresearch.com": { name: "Scorecard Research", category: "analytics" },
36
+
37
+ // ── Hotjar ────────────────────────────────────────────────────
38
+ "hotjar.com": { name: "Hotjar", category: "analytics" },
39
+ "static.hotjar.com": { name: "Hotjar", category: "analytics" },
40
+
41
+ // ── LinkedIn ─────────────────────────────────────────────────
42
+ "snap.licdn.com": { name: "LinkedIn Insight Tag", category: "advertising" },
43
+ "platform.linkedin.com": { name: "LinkedIn", category: "social" },
44
+
45
+ // ── Twitter / X ──────────────────────────────────────────────
46
+ "static.ads-twitter.com": { name: "Twitter Ads", category: "advertising" },
47
+ "analytics.twitter.com": { name: "Twitter Analytics", category: "analytics" },
48
+ "t.co": { name: "Twitter URL shortener", category: "advertising" },
49
+
50
+ // ── TikTok ───────────────────────────────────────────────────
51
+ "analytics.tiktok.com": { name: "TikTok Analytics", category: "analytics" },
52
+ "ads-api.tiktok.com": { name: "TikTok Ads", category: "advertising" },
53
+
54
+ // ── Criteo ───────────────────────────────────────────────────
55
+ "dis.us.criteo.com": { name: "Criteo", category: "advertising" },
56
+ "rtax.criteo.com": { name: "Criteo Retargeting", category: "advertising" },
57
+ "static.criteo.net": { name: "Criteo", category: "advertising" },
58
+
59
+ // ── Segment / Amplitude / Mixpanel ───────────────────────────
60
+ "api.segment.io": { name: "Segment", category: "analytics" },
61
+ "cdn.segment.com": { name: "Segment", category: "analytics" },
62
+ "api2.amplitude.com": { name: "Amplitude", category: "analytics" },
63
+ "api.mixpanel.com": { name: "Mixpanel", category: "analytics" },
64
+
65
+ // ── Intercom / Drift / HubSpot ────────────────────────────────
66
+ "js.intercomcdn.com": { name: "Intercom", category: "analytics" },
67
+ "widget.intercom.io": { name: "Intercom Widget", category: "analytics" },
68
+ "hubspot.com": { name: "HubSpot", category: "analytics" },
69
+ "js.hs-scripts.com": { name: "HubSpot", category: "analytics" },
70
+ "drift.com": { name: "Drift", category: "analytics" },
71
+
72
+ // ── Fingerprinting ───────────────────────────────────────────
73
+ "fingerprintjs.com": { name: "FingerprintJS", category: "fingerprinting" },
74
+ "fpnpmcdn.net": { name: "FingerprintJS CDN", category: "fingerprinting" },
75
+
76
+ // ── Advertising networks ─────────────────────────────────────
77
+ "amazon-adsystem.com": { name: "Amazon Ads", category: "advertising" },
78
+ "pubmatic.com": { name: "PubMatic", category: "advertising" },
79
+ "rubiconproject.com": { name: "Rubicon Project", category: "advertising" },
80
+ "openx.net": { name: "OpenX", category: "advertising" },
81
+ "casalemedia.com": { name: "Casale Media", category: "advertising" },
82
+ "akamaized.net": { name: "Akamai", category: "cdn" },
83
+ "outbrain.com": { name: "Outbrain", category: "advertising" },
84
+ "taboola.com": { name: "Taboola", category: "advertising" },
85
+ "quantserve.com": { name: "Quantcast", category: "advertising" },
86
+ "chartbeat.com": { name: "Chartbeat", category: "analytics" },
87
+
88
+ // ── AB Testing ───────────────────────────────────────────────
89
+ "optimizely.com": { name: "Optimizely", category: "analytics" },
90
+ "vwo.com": { name: "VWO", category: "analytics" },
91
+ "app.convert.com": { name: "Convert", category: "analytics" },
92
+ };
93
+
94
+ /**
95
+ * Patterns for detecting tracking pixels and beacons by URL shape.
96
+ */
97
+ export const PIXEL_PATTERNS: RegExp[] = [
98
+ /\/pixel(\.gif|\.png|\.php)?(\?|$)/i,
99
+ /\/beacon(\.gif|\.png|\.php)?(\?|$)/i,
100
+ /\/track(ing)?(\.gif|\.png|\.php)?(\?|$)/i,
101
+ /\/collect(\?|$)/i,
102
+ /\/event(\?|$)/i,
103
+ /\/(hit|ping|log)(\?|$)/i,
104
+ /\?.*(?:pixel|beacon|track|event|hit)=/i,
105
+ ];
package/src/cli.ts ADDED
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from "commander";
3
+ import chalk from "chalk";
4
+ import ora from "ora";
5
+ import { join, resolve } from "path";
6
+ import { Scanner } from "./scanner/index.js";
7
+ import { ReportGenerator } from "./report/generator.js";
8
+ import type { ScanOptions } from "./types.js";
9
+
10
+ const program = new Command();
11
+
12
+ program
13
+ .name("gdpr-scan")
14
+ .description("Scan a website for GDPR cookie consent compliance")
15
+ .version("0.1.0");
16
+
17
+ program
18
+ .command("scan")
19
+ .description("Scan a website and generate a GDPR compliance report")
20
+ .argument("<url>", "URL of the website to scan")
21
+ .option("-o, --output <dir>", "Output directory for the report", "./gdpr-reports")
22
+ .option("-t, --timeout <ms>", "Navigation timeout in milliseconds", "30000")
23
+ .option("--no-screenshots", "Disable screenshot capture")
24
+ .option("-l, --locale <locale>", "Browser locale for language detection", "fr-FR")
25
+ .option("-v, --verbose", "Show detailed output", false)
26
+ .action(async (url: string, opts) => {
27
+ console.log();
28
+ console.log(chalk.bold.blue(" GDPR Cookie Scanner"));
29
+ console.log(chalk.gray(" ─────────────────────────────────────"));
30
+ const normalizedUrl = normalizeUrl(url);
31
+ const hostname = new URL(normalizedUrl).hostname;
32
+ const outputDir = join(resolve(opts.output), hostname);
33
+
34
+ console.log(chalk.gray(` Target : ${url}`));
35
+ console.log(chalk.gray(` Output : ${outputDir}`));
36
+ console.log();
37
+
38
+ const options: ScanOptions = {
39
+ url: normalizedUrl,
40
+ outputDir,
41
+ timeout: parseInt(opts.timeout, 10),
42
+ screenshots: opts.screenshots !== false,
43
+ locale: opts.locale,
44
+ verbose: opts.verbose,
45
+ };
46
+
47
+ const spinner = ora("Launching browser...").start();
48
+
49
+ try {
50
+ const scanner = new Scanner(options);
51
+
52
+ spinner.text = "Loading page (before interaction)...";
53
+ const result = await scanner.run((phase) => {
54
+ spinner.text = phase;
55
+ });
56
+
57
+ spinner.succeed("Scan complete");
58
+ console.log();
59
+
60
+ const generator = new ReportGenerator(options);
61
+ const reportPath = await generator.generate(result);
62
+
63
+ console.log(
64
+ chalk.bold(
65
+ ` Compliance score: ${formatScore(result.compliance.total)} ${result.compliance.grade}`,
66
+ ),
67
+ );
68
+ console.log();
69
+
70
+ if (result.compliance.issues.length > 0) {
71
+ console.log(chalk.yellow(` ${result.compliance.issues.length} issue(s) detected:`));
72
+ for (const issue of result.compliance.issues.slice(0, 5)) {
73
+ const icon = issue.severity === "critical" ? chalk.red("✗") : chalk.yellow("⚠");
74
+ console.log(` ${icon} ${issue.description}`);
75
+ }
76
+ if (result.compliance.issues.length > 5) {
77
+ console.log(
78
+ chalk.gray(` ... and ${result.compliance.issues.length - 5} more (see report)`),
79
+ );
80
+ }
81
+ console.log();
82
+ }
83
+
84
+ console.log(chalk.green(` Report saved: ${reportPath}`));
85
+ console.log();
86
+
87
+ process.exit(result.compliance.grade === "F" ? 1 : 0);
88
+ } catch (err) {
89
+ spinner.fail("Scan failed");
90
+ console.error(chalk.red(`\n Error: ${err instanceof Error ? err.message : String(err)}`));
91
+ if (opts.verbose && err instanceof Error && err.stack) {
92
+ console.error(chalk.gray(err.stack));
93
+ }
94
+ process.exit(2);
95
+ }
96
+ });
97
+
98
+ program
99
+ .command("list-trackers")
100
+ .description("Show the built-in tracker database summary")
101
+ .action(async () => {
102
+ const { TRACKER_DB } = await import("./classifiers/tracker-list.js");
103
+ const categories = new Map<string, number>();
104
+ for (const entry of Object.values(TRACKER_DB)) {
105
+ const cat = entry.category;
106
+ categories.set(cat, (categories.get(cat) ?? 0) + 1);
107
+ }
108
+ console.log(chalk.bold("\n Built-in tracker database:"));
109
+ for (const [cat, count] of categories.entries()) {
110
+ console.log(` ${chalk.cyan(cat.padEnd(20))} ${count} domains`);
111
+ }
112
+ console.log(`\n Total: ${Object.keys(TRACKER_DB).length} tracked domains\n`);
113
+ });
114
+
115
+ program.parse(process.argv);
116
+
117
+ function normalizeUrl(url: string): string {
118
+ if (!url.startsWith("http://") && !url.startsWith("https://")) {
119
+ return `https://${url}`;
120
+ }
121
+ return url;
122
+ }
123
+
124
+ function formatScore(score: number): string {
125
+ const colored =
126
+ score >= 80
127
+ ? chalk.green(score)
128
+ : score >= 60
129
+ ? chalk.yellow(score)
130
+ : score >= 40
131
+ ? chalk.hex("#FFA500")(score)
132
+ : chalk.red(score);
133
+ return `${colored}/100`;
134
+ }