@etalon/cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,294 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ scanSite
4
+ } from "./chunk-Z6ZQZ5HI.js";
5
+
6
+ // src/policy-checker.ts
7
+ import { chromium } from "playwright";
8
+ import {
9
+ VendorRegistry,
10
+ extractDomain
11
+ } from "@etalon/core";
12
+ var POLICY_PATH_PATTERNS = [
13
+ /^\/privacy[-_]?policy/i,
14
+ /^\/privacy/i,
15
+ /^\/datenschutz/i,
16
+ /^\/politique[-_]de[-_]confidentialite/i,
17
+ /^\/informativa[-_]privacy/i,
18
+ /^\/politica[-_]de[-_]privacidad/i,
19
+ /^\/legal\/privacy/i,
20
+ /^\/about\/privacy/i,
21
+ /^\/cookie[-_]?policy/i
22
+ ];
23
+ var POLICY_LINK_TEXT_PATTERNS = [
24
+ /privacy\s*policy/i,
25
+ /privacy\s*notice/i,
26
+ /privacy/i,
27
+ /datenschutz/i,
28
+ /cookie\s*policy/i,
29
+ /data\s*protection/i,
30
+ /politique\s*de\s*confidentialit[eé]/i
31
+ ];
32
+ async function findPolicyPage(page, siteUrl) {
33
+ const siteDomain = extractDomain(siteUrl);
34
+ if (!siteDomain) return null;
35
+ const baseUrl = new URL(siteUrl);
36
+ const commonPaths = [
37
+ "/privacy-policy",
38
+ "/privacy",
39
+ "/legal/privacy",
40
+ "/datenschutz",
41
+ "/cookie-policy",
42
+ "/about/privacy"
43
+ ];
44
+ for (const path of commonPaths) {
45
+ try {
46
+ const testUrl = `${baseUrl.origin}${path}`;
47
+ const response = await page.goto(testUrl, { waitUntil: "domcontentloaded", timeout: 1e4 });
48
+ if (response && response.status() >= 200 && response.status() < 400) {
49
+ const bodyText = await page.evaluate(() => globalThis.document.body?.innerText?.trim() ?? "");
50
+ if (bodyText.length > 200) {
51
+ return testUrl;
52
+ }
53
+ }
54
+ } catch {
55
+ }
56
+ }
57
+ try {
58
+ await page.goto(siteUrl, { waitUntil: "domcontentloaded", timeout: 15e3 });
59
+ await page.waitForTimeout(1e3);
60
+ const links = await page.$$eval(
61
+ "a[href]",
62
+ (anchors) => anchors.map((a) => ({
63
+ href: a.getAttribute("href") ?? "",
64
+ text: a.textContent?.trim() ?? ""
65
+ }))
66
+ );
67
+ for (const link of links) {
68
+ try {
69
+ const fullUrl = new URL(link.href, siteUrl);
70
+ const matchesPath = POLICY_PATH_PATTERNS.some((p) => p.test(fullUrl.pathname));
71
+ const matchesText = POLICY_LINK_TEXT_PATTERNS.some((p) => p.test(link.text));
72
+ if (matchesPath || matchesText) {
73
+ if (fullUrl.hostname === baseUrl.hostname || fullUrl.hostname.includes("iubenda")) {
74
+ return fullUrl.href;
75
+ }
76
+ }
77
+ } catch {
78
+ }
79
+ }
80
+ } catch {
81
+ }
82
+ return null;
83
+ }
84
+ async function extractPolicyText(page, policyUrl) {
85
+ await page.goto(policyUrl, { waitUntil: "networkidle", timeout: 2e4 });
86
+ await page.waitForTimeout(1e3);
87
+ const text = await page.evaluate(() => {
88
+ const doc = globalThis.document;
89
+ const elementsToRemove = doc.querySelectorAll("script, style, nav, header, footer, iframe");
90
+ elementsToRemove.forEach((el) => el.remove());
91
+ return doc.body?.innerText ?? "";
92
+ });
93
+ return text.replace(/\s+/g, " ").trim();
94
+ }
95
+ function matchVendorsInPolicy(policyText, registry) {
96
+ const mentionedIds = /* @__PURE__ */ new Set();
97
+ const normalizedText = policyText.toLowerCase();
98
+ for (const vendor of registry.getAllVendors()) {
99
+ if (normalizedText.includes(vendor.name.toLowerCase())) {
100
+ mentionedIds.add(vendor.id);
101
+ continue;
102
+ }
103
+ if (normalizedText.includes(vendor.company.toLowerCase())) {
104
+ mentionedIds.add(vendor.id);
105
+ continue;
106
+ }
107
+ for (const domain of vendor.domains) {
108
+ if (normalizedText.includes(domain.toLowerCase())) {
109
+ mentionedIds.add(vendor.id);
110
+ break;
111
+ }
112
+ }
113
+ }
114
+ return mentionedIds;
115
+ }
116
+ function severityForUndisclosed(category, gdprCompliant) {
117
+ if (!gdprCompliant) return "critical";
118
+ if (category === "advertising" || category === "social") return "critical";
119
+ if (category === "analytics" || category === "heatmaps" || category === "ab_testing") return "high";
120
+ if (category === "tag_manager" || category === "chat") return "medium";
121
+ return "low";
122
+ }
123
+ function crossReference(mentionedVendorIds, detectedVendors, registry) {
124
+ const undisclosed = [];
125
+ const overclaimed = [];
126
+ const disclosed = [];
127
+ const detectedIds = new Set(detectedVendors.map((v) => v.vendor.id));
128
+ for (const dv of detectedVendors) {
129
+ const v = dv.vendor;
130
+ if (mentionedVendorIds.has(v.id)) {
131
+ disclosed.push({
132
+ vendorId: v.id,
133
+ vendorName: v.name,
134
+ category: v.category,
135
+ gdprCompliant: v.gdpr_compliant,
136
+ severity: "info",
137
+ message: `${v.name} is both detected on site and disclosed in privacy policy`
138
+ });
139
+ } else {
140
+ const skipCategories = ["cdn", "consent", "security", "payments", "fonts"];
141
+ if (skipCategories.includes(v.category)) continue;
142
+ const severity = severityForUndisclosed(v.category, v.gdpr_compliant);
143
+ undisclosed.push({
144
+ vendorId: v.id,
145
+ vendorName: v.name,
146
+ category: v.category,
147
+ gdprCompliant: v.gdpr_compliant,
148
+ severity,
149
+ message: `${v.name} (${v.category}) detected on site but NOT disclosed in privacy policy`
150
+ });
151
+ }
152
+ }
153
+ for (const vendorId of mentionedVendorIds) {
154
+ if (!detectedIds.has(vendorId)) {
155
+ const v = registry.getById(vendorId);
156
+ if (v) {
157
+ overclaimed.push({
158
+ vendorId: v.id,
159
+ vendorName: v.name,
160
+ category: v.category,
161
+ gdprCompliant: v.gdpr_compliant,
162
+ severity: "info",
163
+ message: `${v.name} is mentioned in privacy policy but was not detected during scan`
164
+ });
165
+ }
166
+ }
167
+ }
168
+ const severityOrder = { critical: 0, high: 1, medium: 2, low: 3, info: 4 };
169
+ undisclosed.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
170
+ return { undisclosed, overclaimed, disclosed };
171
+ }
172
+ var CATEGORY_LABELS = {
173
+ analytics: "website analytics",
174
+ advertising: "advertising and conversion tracking",
175
+ social: "social media integration",
176
+ tag_manager: "tag management",
177
+ heatmaps: "heatmap and session recording",
178
+ ab_testing: "A/B testing",
179
+ error_tracking: "error monitoring",
180
+ chat: "live chat and customer support",
181
+ video: "video embedding",
182
+ cdn: "content delivery",
183
+ payments: "payment processing",
184
+ consent: "cookie consent management",
185
+ security: "security",
186
+ fonts: "web fonts",
187
+ other: "third-party services"
188
+ };
189
+ function generateDisclosures(undisclosed, registry) {
190
+ const disclosures = [];
191
+ for (const mismatch of undisclosed) {
192
+ const vendor = registry.getById(mismatch.vendorId);
193
+ if (!vendor) continue;
194
+ const categoryLabel = CATEGORY_LABELS[vendor.category] ?? vendor.category;
195
+ const dataList = vendor.data_collected.length > 0 ? vendor.data_collected.join(", ") : "usage data";
196
+ let snippet = `We use ${vendor.name}`;
197
+ if (vendor.company && vendor.company !== vendor.name) {
198
+ snippet += ` (provided by ${vendor.company})`;
199
+ }
200
+ snippet += ` for ${categoryLabel}.`;
201
+ snippet += ` This service may collect ${dataList}.`;
202
+ if (vendor.retention_period) {
203
+ snippet += ` Data is retained for ${vendor.retention_period}.`;
204
+ }
205
+ if (!vendor.gdpr_compliant) {
206
+ snippet += ` Please note that this service may transfer data outside the EU/EEA.`;
207
+ }
208
+ if (vendor.privacy_policy) {
209
+ snippet += ` For more information, see ${vendor.company || vendor.name}'s privacy policy at ${vendor.privacy_policy}.`;
210
+ }
211
+ disclosures.push({
212
+ vendorId: vendor.id,
213
+ vendorName: vendor.name,
214
+ company: vendor.company,
215
+ category: vendor.category,
216
+ snippet,
217
+ dataCollected: vendor.data_collected,
218
+ privacyPolicyUrl: vendor.privacy_policy,
219
+ dpaUrl: vendor.dpa_url
220
+ });
221
+ }
222
+ return disclosures;
223
+ }
224
+ async function checkPolicy(url, options = {}) {
225
+ const timeout = options.timeout ?? 3e4;
226
+ const registry = VendorRegistry.load();
227
+ const scanReport = await scanSite(url, { timeout, deep: false });
228
+ const detectedVendors = scanReport.vendors;
229
+ let browser = null;
230
+ try {
231
+ browser = await chromium.launch({ headless: true });
232
+ const context = await browser.newContext({
233
+ userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
234
+ viewport: { width: 1920, height: 1080 }
235
+ });
236
+ const page = await context.newPage();
237
+ let policyUrl = options.policyUrl ?? null;
238
+ if (!policyUrl) {
239
+ policyUrl = await findPolicyPage(page, url);
240
+ }
241
+ if (!policyUrl) {
242
+ const result2 = crossReference(/* @__PURE__ */ new Set(), detectedVendors, registry);
243
+ const disclosures2 = generateDisclosures(result2.undisclosed, registry);
244
+ return {
245
+ url,
246
+ policyUrl: null,
247
+ policyFound: false,
248
+ mentionedVendors: [],
249
+ detectedVendors: detectedVendors.map((dv) => ({
250
+ vendorId: dv.vendor.id,
251
+ vendorName: dv.vendor.name,
252
+ category: dv.vendor.category
253
+ })),
254
+ ...result2,
255
+ disclosures: disclosures2,
256
+ pass: false
257
+ };
258
+ }
259
+ const policyText = await extractPolicyText(page, policyUrl);
260
+ const mentionedVendorIds = matchVendorsInPolicy(policyText, registry);
261
+ const result = crossReference(mentionedVendorIds, detectedVendors, registry);
262
+ const disclosures = generateDisclosures(result.undisclosed, registry);
263
+ const mentionedVendors = [];
264
+ for (const id of mentionedVendorIds) {
265
+ const v = registry.getById(id);
266
+ if (v) {
267
+ mentionedVendors.push({
268
+ vendorId: v.id,
269
+ vendorName: v.name,
270
+ category: v.category
271
+ });
272
+ }
273
+ }
274
+ return {
275
+ url,
276
+ policyUrl,
277
+ policyFound: true,
278
+ mentionedVendors,
279
+ detectedVendors: detectedVendors.map((dv) => ({
280
+ vendorId: dv.vendor.id,
281
+ vendorName: dv.vendor.name,
282
+ category: dv.vendor.category
283
+ })),
284
+ ...result,
285
+ disclosures,
286
+ pass: result.undisclosed.length === 0
287
+ };
288
+ } finally {
289
+ await browser?.close();
290
+ }
291
+ }
292
+ export {
293
+ checkPolicy
294
+ };
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "@etalon/cli",
3
+ "version": "1.0.0",
4
+ "description": "ETALON \u2014 Privacy audit tool for websites. Scan any site for trackers and GDPR compliance.",
5
+ "main": "./dist/index.js",
6
+ "types": "./dist/index.d.ts",
7
+ "type": "module",
8
+ "bin": {
9
+ "etalon": "./dist/index.js"
10
+ },
11
+ "scripts": {
12
+ "build": "tsup",
13
+ "clean": "rm -rf dist",
14
+ "postinstall": "npx playwright install chromium || true"
15
+ },
16
+ "files": [
17
+ "dist",
18
+ "README.md"
19
+ ],
20
+ "dependencies": {
21
+ "chalk": "^5.3.0",
22
+ "commander": "^12.0.0",
23
+ "ora": "^8.0.0",
24
+ "playwright": "^1.42.0",
25
+ "yaml": "^2.3.0",
26
+ "@etalon/core": "*"
27
+ },
28
+ "keywords": [
29
+ "privacy",
30
+ "gdpr",
31
+ "tracker",
32
+ "audit",
33
+ "scanner",
34
+ "compliance",
35
+ "cookies",
36
+ "mcp"
37
+ ],
38
+ "license": "MIT",
39
+ "repository": {
40
+ "type": "git",
41
+ "url": "https://github.com/NMA-vc/etalon"
42
+ }
43
+ }