@etalon/cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # ETALON Scanner CLI
2
+
3
+ **Scan websites for third-party trackers and GDPR compliance.**
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install -g etalon
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ # Scan a website
15
+ optic scan https://example.com
16
+
17
+ # JSON output
18
+ optic scan https://example.com --format json
19
+
20
+ # SARIF for CI/CD (GitHub Code Scanning)
21
+ optic scan https://example.com --format sarif
22
+
23
+ # Deep scan — scroll page, click consent dialogs
24
+ optic scan https://example.com --deep
25
+
26
+ # Look up a single domain
27
+ optic lookup google-analytics.com
28
+
29
+ # Registry stats
30
+ optic info
31
+ ```
32
+
33
+ ## Options
34
+
35
+ | Option | Description | Default |
36
+ |--------|-------------|---------|
37
+ | `-f, --format` | `text`, `json`, `sarif` | `text` |
38
+ | `-d, --deep` | Scroll + consent interaction | `false` |
39
+ | `-t, --timeout` | Nav timeout in ms | `30000` |
40
+ | `--no-idle` | Skip network idle wait | `false` |
41
+ | `--config` | Path to `etalon.yaml` | auto-detect |
42
+
43
+ ## CI/CD
44
+
45
+ The `scan` command exits with code **1** if high-risk trackers are found.
46
+
47
+ ```yaml
48
+ # GitHub Actions
49
+ - run: npx etalon scan https://your-site.com --format sarif > results.sarif
50
+ - uses: github/codeql-action/upload-sarif@v3
51
+ with:
52
+ sarif_file: results.sarif
53
+ ```
54
+
55
+ ## Configuration
56
+
57
+ See the root [etalon.yaml](../../etalon.yaml.example) for a complete config example.
58
+
59
+ ## License
60
+
61
+ MIT
@@ -0,0 +1,189 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/scanner.ts
4
+ import { chromium } from "playwright";
5
+ import {
6
+ VendorRegistry,
7
+ extractDomain,
8
+ isFirstParty
9
+ } from "@etalon/core";
10
+ var ETALON_VERSION = "1.0.0";
11
+ var DEFAULT_OPTIONS = {
12
+ deep: false,
13
+ timeout: 3e4,
14
+ waitForNetworkIdle: true,
15
+ userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
16
+ viewport: { width: 1920, height: 1080 },
17
+ vendorDbPath: ""
18
+ };
19
+ async function scanSite(url, options = {}) {
20
+ const opts = { ...DEFAULT_OPTIONS, ...options };
21
+ const startTime = Date.now();
22
+ const registry = opts.vendorDbPath ? VendorRegistry.load(opts.vendorDbPath) : VendorRegistry.load();
23
+ const siteDomain = extractDomain(url);
24
+ if (!siteDomain) {
25
+ throw new Error(`Invalid URL: ${url}`);
26
+ }
27
+ const capturedRequests = [];
28
+ let browser = null;
29
+ try {
30
+ browser = await chromium.launch({ headless: true });
31
+ const context = await browser.newContext({
32
+ userAgent: opts.userAgent,
33
+ viewport: opts.viewport,
34
+ ignoreHTTPSErrors: true
35
+ });
36
+ const page = await context.newPage();
37
+ page.on("request", (request) => {
38
+ const reqUrl = request.url();
39
+ const domain = extractDomain(reqUrl);
40
+ if (!domain) return;
41
+ capturedRequests.push({
42
+ url: reqUrl,
43
+ domain,
44
+ method: request.method(),
45
+ type: request.resourceType(),
46
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
47
+ });
48
+ });
49
+ await page.goto(url, {
50
+ waitUntil: opts.waitForNetworkIdle ? "networkidle" : "domcontentloaded",
51
+ timeout: opts.timeout
52
+ });
53
+ if (opts.deep) {
54
+ await performDeepScan(page, opts.timeout);
55
+ }
56
+ await page.waitForTimeout(1e3);
57
+ await context.close();
58
+ } finally {
59
+ if (browser) {
60
+ await browser.close();
61
+ }
62
+ }
63
+ const scanDurationMs = Date.now() - startTime;
64
+ const thirdPartyRequests = capturedRequests.filter(
65
+ (req) => !isFirstParty(req.domain, siteDomain)
66
+ );
67
+ const vendorMap = /* @__PURE__ */ new Map();
68
+ const unknownMap = /* @__PURE__ */ new Map();
69
+ for (const req of thirdPartyRequests) {
70
+ const vendor = registry.lookupDomain(req.domain);
71
+ if (vendor) {
72
+ const existing = vendorMap.get(vendor.id);
73
+ if (existing) {
74
+ existing.requests.push(req);
75
+ } else {
76
+ vendorMap.set(vendor.id, { vendor, requests: [req] });
77
+ }
78
+ } else {
79
+ const existing = unknownMap.get(req.domain);
80
+ if (existing) {
81
+ existing.requests.push(req);
82
+ } else {
83
+ unknownMap.set(req.domain, {
84
+ domain: req.domain,
85
+ requests: [req],
86
+ suggestedAction: "submit_for_review"
87
+ });
88
+ }
89
+ }
90
+ }
91
+ const vendors = Array.from(vendorMap.values());
92
+ const unknown = Array.from(unknownMap.values());
93
+ vendors.sort((a, b) => b.vendor.risk_score - a.vendor.risk_score);
94
+ const recommendations = generateRecommendations(vendors, unknown);
95
+ const highRisk = vendors.filter((v) => v.vendor.risk_score >= 6).length;
96
+ const mediumRisk = vendors.filter((v) => v.vendor.risk_score >= 3 && v.vendor.risk_score < 6).length;
97
+ const lowRisk = vendors.filter((v) => v.vendor.risk_score < 3).length;
98
+ return {
99
+ meta: {
100
+ etalonVersion: ETALON_VERSION,
101
+ scanDate: (/* @__PURE__ */ new Date()).toISOString(),
102
+ scanDurationMs,
103
+ url,
104
+ deep: opts.deep ?? false
105
+ },
106
+ summary: {
107
+ totalRequests: capturedRequests.length,
108
+ thirdPartyRequests: thirdPartyRequests.length,
109
+ knownVendors: vendors.length,
110
+ unknownDomains: unknown.length,
111
+ highRisk,
112
+ mediumRisk,
113
+ lowRisk
114
+ },
115
+ vendors,
116
+ unknown,
117
+ recommendations
118
+ };
119
+ }
120
+ async function performDeepScan(page, _timeout) {
121
+ const scrollDelay = 500;
122
+ const maxScrolls = 10;
123
+ for (let i = 0; i < maxScrolls; i++) {
124
+ await page.evaluate(() => {
125
+ window.scrollBy(0, window.innerHeight);
126
+ });
127
+ await page.waitForTimeout(scrollDelay);
128
+ }
129
+ await page.evaluate(() => window.scrollTo(0, 0));
130
+ await page.waitForTimeout(2e3);
131
+ const consentSelectors = [
132
+ 'button[id*="accept"]',
133
+ 'button[id*="consent"]',
134
+ 'button[class*="accept"]',
135
+ 'button[class*="consent"]',
136
+ 'a[id*="accept"]',
137
+ '[data-testid*="accept"]',
138
+ '[data-testid*="consent"]'
139
+ ];
140
+ for (const selector of consentSelectors) {
141
+ try {
142
+ const button = page.locator(selector).first();
143
+ if (await button.isVisible({ timeout: 500 })) {
144
+ await button.click();
145
+ await page.waitForTimeout(1e3);
146
+ break;
147
+ }
148
+ } catch {
149
+ }
150
+ }
151
+ }
152
+ function generateRecommendations(vendors, unknown) {
153
+ const recs = [];
154
+ for (const v of vendors) {
155
+ if (v.vendor.risk_score >= 6) {
156
+ const altText = v.vendor.alternatives?.length ? ` Consider alternatives: ${v.vendor.alternatives.join(", ")}` : "";
157
+ recs.push({
158
+ type: "high_risk_vendor",
159
+ vendorId: v.vendor.id,
160
+ message: `${v.vendor.name} is a high-risk tracker (score: ${v.vendor.risk_score}/10).${altText}`
161
+ });
162
+ }
163
+ if (v.vendor.gdpr_compliant && !v.vendor.dpa_url && v.vendor.risk_score >= 3) {
164
+ recs.push({
165
+ type: "missing_dpa",
166
+ vendorId: v.vendor.id,
167
+ message: `${v.vendor.name} is GDPR-compliant but no DPA URL is documented. Verify your Data Processing Agreement.`
168
+ });
169
+ }
170
+ if (v.vendor.risk_score >= 4 && v.vendor.alternatives?.length) {
171
+ recs.push({
172
+ type: "consider_alternative",
173
+ vendorId: v.vendor.id,
174
+ message: `Consider privacy-friendly alternatives to ${v.vendor.name}: ${v.vendor.alternatives.join(", ")}`
175
+ });
176
+ }
177
+ }
178
+ if (unknown.length > 0) {
179
+ recs.push({
180
+ type: "unknown_tracker",
181
+ message: `${unknown.length} unknown domain(s) detected. Review and submit to the ETALON registry if they are trackers.`
182
+ });
183
+ }
184
+ return recs;
185
+ }
186
+
187
+ export {
188
+ scanSite
189
+ };
@@ -0,0 +1,199 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/consent-checker.ts
4
+ import { chromium } from "playwright";
5
+ import {
6
+ VendorRegistry,
7
+ extractDomain,
8
+ isFirstParty
9
+ } from "@etalon/core";
10
+ var CMP_SELECTORS = [
11
+ // CookieBot
12
+ { name: "Cookiebot", banner: "#CybotCookiebotDialog", reject: '#CybotCookiebotDialogBodyButtonDecline, .CybotCookiebotDialogBodyButton[id*="decline"]' },
13
+ // OneTrust
14
+ { name: "OneTrust", banner: "#onetrust-consent-sdk, .onetrust-pc-dark-filter", reject: "#onetrust-reject-all-handler, .ot-pc-refuse-all-handler" },
15
+ // Didomi
16
+ { name: "Didomi", banner: "#didomi-popup, .didomi-popup-container", reject: "#didomi-notice-disagree-button, .didomi-components-button--disagree" },
17
+ // Quantcast
18
+ { name: "Quantcast", banner: ".qc-cmp2-container, #qcCmpUi", reject: '.qc-cmp2-summary-buttons button[mode="secondary"], .qc-cmp2-footer button:first-child' },
19
+ // Klaro
20
+ { name: "Klaro", banner: ".klaro .cookie-notice, .klaro .cm-app", reject: ".klaro .cn-decline, .klaro .cm-btn-decline" },
21
+ // TrustArc
22
+ { name: "TrustArc", banner: "#truste-consent-track", reject: "#truste-consent-required" },
23
+ // Generic selectors
24
+ { name: "Generic", banner: '[class*="cookie-banner"], [class*="cookie-consent"], [id*="cookie-banner"], [id*="cookie-consent"], [class*="gdpr"], [id*="gdpr"]', reject: 'button[class*="reject"], button[class*="decline"], button[class*="deny"], a[class*="reject"], a[class*="decline"]' },
25
+ // Text-based (last resort)
26
+ { name: "Text-based", banner: "", reject: "" }
27
+ ];
28
+ var REJECT_TEXT_PATTERNS = [
29
+ /reject\s*all/i,
30
+ /decline\s*all/i,
31
+ /refuse\s*all/i,
32
+ /deny\s*all/i,
33
+ /reject/i,
34
+ /decline/i,
35
+ /refuse/i,
36
+ /nur\s*notwendige/i,
37
+ // German: "only necessary"
38
+ /refuser/i,
39
+ // French
40
+ /rifiuta/i,
41
+ // Italian
42
+ /rechazar/i
43
+ // Spanish
44
+ ];
45
+ async function checkConsent(url, options = {}) {
46
+ const timeout = options.timeout ?? 15e3;
47
+ const registry = VendorRegistry.load();
48
+ const siteDomain = extractDomain(url);
49
+ if (!siteDomain) throw new Error(`Invalid URL: ${url}`);
50
+ const preConsentRequests = [];
51
+ const postRejectRequests = [];
52
+ let phase = "pre-consent";
53
+ let browser = null;
54
+ try {
55
+ browser = await chromium.launch({ headless: true });
56
+ const context = await browser.newContext({
57
+ userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
58
+ viewport: { width: 1920, height: 1080 }
59
+ });
60
+ const page = await context.newPage();
61
+ page.on("request", (request) => {
62
+ const reqUrl = request.url();
63
+ const domain = extractDomain(reqUrl);
64
+ if (!domain || isFirstParty(domain, siteDomain)) return;
65
+ const req = {
66
+ url: reqUrl,
67
+ method: request.method(),
68
+ resourceType: request.resourceType(),
69
+ domain
70
+ };
71
+ if (phase === "pre-consent") {
72
+ preConsentRequests.push(req);
73
+ } else {
74
+ postRejectRequests.push(req);
75
+ }
76
+ });
77
+ await page.goto(url, { waitUntil: "networkidle", timeout });
78
+ await page.waitForTimeout(2e3);
79
+ const { bannerDetected, bannerType, rejectSelector } = await detectBanner(page);
80
+ let rejectButtonFound = false;
81
+ let rejectClicked = false;
82
+ if (bannerDetected && rejectSelector) {
83
+ rejectButtonFound = true;
84
+ try {
85
+ await page.click(rejectSelector, { timeout: 5e3 });
86
+ rejectClicked = true;
87
+ } catch {
88
+ rejectClicked = false;
89
+ }
90
+ }
91
+ if (bannerDetected && !rejectClicked) {
92
+ const result = await tryTextBasedReject(page);
93
+ rejectButtonFound = rejectButtonFound || result.found;
94
+ rejectClicked = result.clicked;
95
+ }
96
+ if (rejectClicked) {
97
+ phase = "post-reject";
98
+ await page.waitForTimeout(3e3);
99
+ }
100
+ const preConsentTrackers = classifyTrackers(preConsentRequests, registry);
101
+ const postRejectTrackers = classifyTrackers(postRejectRequests, registry);
102
+ const violations = [];
103
+ for (const tracker of preConsentTrackers) {
104
+ if (tracker.category === "analytics" || tracker.category === "advertising") {
105
+ violations.push({
106
+ vendor: tracker.name,
107
+ vendorId: tracker.id,
108
+ domain: tracker.matchedDomain,
109
+ phase: "before-interaction",
110
+ message: `${tracker.name} loaded BEFORE any consent interaction \u2014 violates GDPR Art. 6(1)(a)`
111
+ });
112
+ }
113
+ }
114
+ for (const tracker of postRejectTrackers) {
115
+ if (tracker.category === "analytics" || tracker.category === "advertising") {
116
+ violations.push({
117
+ vendor: tracker.name,
118
+ vendorId: tracker.id,
119
+ domain: tracker.matchedDomain,
120
+ phase: "after-reject",
121
+ message: `${tracker.name} still active AFTER consent rejection \u2014 consent mechanism is broken`
122
+ });
123
+ }
124
+ }
125
+ return {
126
+ url,
127
+ bannerDetected,
128
+ bannerType,
129
+ rejectButtonFound,
130
+ rejectClicked,
131
+ preConsentTrackers,
132
+ postRejectTrackers,
133
+ violations,
134
+ pass: violations.length === 0
135
+ };
136
+ } finally {
137
+ await browser?.close();
138
+ }
139
+ }
140
+ async function detectBanner(page) {
141
+ for (const cmp of CMP_SELECTORS) {
142
+ if (!cmp.banner) continue;
143
+ try {
144
+ const banner = await page.$(cmp.banner);
145
+ if (banner) {
146
+ const isVisible = await banner.isVisible();
147
+ if (isVisible) {
148
+ let rejectSelector = null;
149
+ if (cmp.reject) {
150
+ const reject = await page.$(cmp.reject);
151
+ if (reject && await reject.isVisible()) {
152
+ rejectSelector = cmp.reject;
153
+ }
154
+ }
155
+ return { bannerDetected: true, bannerType: cmp.name, rejectSelector };
156
+ }
157
+ }
158
+ } catch {
159
+ }
160
+ }
161
+ return { bannerDetected: false, bannerType: null, rejectSelector: null };
162
+ }
163
+ async function tryTextBasedReject(page) {
164
+ try {
165
+ const buttons = await page.$$('button, a[role="button"], [class*="btn"]');
166
+ for (const button of buttons) {
167
+ const text = await button.textContent();
168
+ if (!text) continue;
169
+ const matches = REJECT_TEXT_PATTERNS.some((p) => p.test(text.trim()));
170
+ if (matches && await button.isVisible()) {
171
+ await button.click();
172
+ return { found: true, clicked: true };
173
+ }
174
+ }
175
+ } catch {
176
+ }
177
+ return { found: false, clicked: false };
178
+ }
179
+ function classifyTrackers(requests, registry) {
180
+ const seen = /* @__PURE__ */ new Set();
181
+ const trackers = [];
182
+ for (const req of requests) {
183
+ const vendor = registry.lookupDomain(req.domain);
184
+ if (vendor && !seen.has(vendor.id)) {
185
+ seen.add(vendor.id);
186
+ trackers.push({
187
+ id: vendor.id,
188
+ name: vendor.name,
189
+ category: vendor.category,
190
+ matchedDomain: req.domain,
191
+ firstSeenUrl: req.url
192
+ });
193
+ }
194
+ }
195
+ return trackers;
196
+ }
197
+ export {
198
+ checkConsent
199
+ };