@etalon/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,557 @@
1
+ interface Vendor {
2
+ id: string;
3
+ domains: string[];
4
+ name: string;
5
+ company: string;
6
+ category: VendorCategory;
7
+ gdpr_compliant: boolean;
8
+ dpa_url?: string;
9
+ privacy_policy?: string;
10
+ purpose: string;
11
+ data_collected: string[];
12
+ retention_period?: string;
13
+ last_verified?: string;
14
+ risk_score: number;
15
+ alternatives?: string[];
16
+ }
17
+ type VendorCategory = 'analytics' | 'advertising' | 'social' | 'cdn' | 'payments' | 'chat' | 'heatmaps' | 'ab_testing' | 'error_tracking' | 'tag_manager' | 'consent' | 'video' | 'fonts' | 'security' | 'push' | 'forms' | 'referral' | 'booking' | 'maps' | 'web3' | 'b2b_intelligence' | 'other';
18
+ interface Category {
19
+ id: VendorCategory;
20
+ name: string;
21
+ description: string;
22
+ risk_level: 'low' | 'medium' | 'high' | 'critical';
23
+ }
24
+ interface VendorDatabase {
25
+ version: string;
26
+ last_updated: string;
27
+ vendors: Vendor[];
28
+ categories: Category[];
29
+ }
30
+ interface ScanRequest {
31
+ url: string;
32
+ deep?: boolean;
33
+ timeout?: number;
34
+ waitForNetworkIdle?: boolean;
35
+ userAgent?: string;
36
+ viewport?: {
37
+ width: number;
38
+ height: number;
39
+ };
40
+ }
41
+ interface NetworkRequest {
42
+ url: string;
43
+ domain: string;
44
+ method: string;
45
+ type: string;
46
+ timestamp: string;
47
+ statusCode?: number;
48
+ contentType?: string;
49
+ size?: number;
50
+ }
51
+ interface DetectedVendor {
52
+ vendor: Vendor;
53
+ requests: NetworkRequest[];
54
+ }
55
+ interface UnknownDomain {
56
+ domain: string;
57
+ requests: NetworkRequest[];
58
+ suggestedAction: 'submit_for_review' | 'likely_benign' | 'investigate';
59
+ }
60
+ type RiskLevel = 'low' | 'medium' | 'high' | 'critical';
61
+ interface ScanSummary {
62
+ totalRequests: number;
63
+ thirdPartyRequests: number;
64
+ knownVendors: number;
65
+ unknownDomains: number;
66
+ highRisk: number;
67
+ mediumRisk: number;
68
+ lowRisk: number;
69
+ }
70
+ interface ScanReport {
71
+ meta: {
72
+ etalonVersion: string;
73
+ scanDate: string;
74
+ scanDurationMs: number;
75
+ url: string;
76
+ deep: boolean;
77
+ };
78
+ summary: ScanSummary;
79
+ vendors: DetectedVendor[];
80
+ unknown: UnknownDomain[];
81
+ recommendations: Recommendation[];
82
+ }
83
+ interface Recommendation {
84
+ type: 'high_risk_vendor' | 'missing_dpa' | 'unknown_tracker' | 'consider_alternative';
85
+ vendorId?: string;
86
+ domain?: string;
87
+ message: string;
88
+ }
89
+ interface OpticConfig {
90
+ version: string;
91
+ sites?: SiteConfig[];
92
+ allowlist?: AllowlistEntry[];
93
+ notifications?: NotificationConfig;
94
+ scan?: ScanConfig;
95
+ }
96
+ interface SiteConfig {
97
+ url: string;
98
+ name: string;
99
+ schedule?: 'daily' | 'weekly' | 'on-commit';
100
+ }
101
+ interface AllowlistEntry {
102
+ vendor_id?: string;
103
+ domain?: string;
104
+ reason: string;
105
+ approved_by?: string;
106
+ approved_date?: string;
107
+ notes?: string;
108
+ }
109
+ interface NotificationConfig {
110
+ email?: string;
111
+ slack_webhook?: string;
112
+ threshold?: 'low' | 'medium' | 'high';
113
+ }
114
+ interface ScanConfig {
115
+ wait_for_network_idle?: boolean;
116
+ timeout?: number;
117
+ user_agent?: string;
118
+ viewport?: {
119
+ width: number;
120
+ height: number;
121
+ };
122
+ }
123
+
124
+ /**
125
+ * VendorRegistry provides O(1) domain-to-vendor lookups, category filtering,
126
+ * and search capabilities over the ETALON vendor database.
127
+ */
128
+ declare class VendorRegistry {
129
+ private vendors;
130
+ private categories;
131
+ private domainMap;
132
+ private version;
133
+ private lastUpdated;
134
+ /**
135
+ * Load the vendor database from a JSON file.
136
+ * If no path is provided, loads the bundled vendors.json.
137
+ */
138
+ static load(path?: string): VendorRegistry;
139
+ /**
140
+ * Load from an in-memory VendorDatabase object (useful for testing).
141
+ */
142
+ static fromDatabase(db: VendorDatabase): VendorRegistry;
143
+ /**
144
+ * Look up a vendor by domain.
145
+ * Tries exact match first, then walks up parent domains.
146
+ *
147
+ * @param domainOrUrl - A domain (e.g. "ssl.google-analytics.com") or full URL
148
+ * @returns The matched Vendor, or null if not found
149
+ */
150
+ lookupDomain(domainOrUrl: string): Vendor | null;
151
+ /**
152
+ * Get all vendors in a specific category.
153
+ */
154
+ getByCategory(category: VendorCategory): Vendor[];
155
+ /**
156
+ * Get all GDPR-compliant vendors.
157
+ */
158
+ getCompliant(): Vendor[];
159
+ /**
160
+ * Get all available categories.
161
+ */
162
+ getCategories(): Category[];
163
+ /**
164
+ * Search vendors by name or company (case-insensitive substring match).
165
+ */
166
+ search(query: string): Vendor[];
167
+ /**
168
+ * Get all vendors.
169
+ */
170
+ getAllVendors(): Vendor[];
171
+ /**
172
+ * Get a vendor by its ID.
173
+ */
174
+ getById(id: string): Vendor | null;
175
+ /**
176
+ * Get registry metadata.
177
+ */
178
+ getMetadata(): {
179
+ version: string;
180
+ lastUpdated: string;
181
+ vendorCount: number;
182
+ categoryCount: number;
183
+ domainCount: number;
184
+ };
185
+ /**
186
+ * Default path to the bundled vendors.json file.
187
+ */
188
+ private static defaultPath;
189
+ }
190
+
191
+ /**
192
+ * Domain utility functions for URL parsing and domain matching.
193
+ */
194
+ /**
195
+ * Extract the hostname (domain) from a URL string.
196
+ * Returns null for invalid URLs, data URIs, etc.
197
+ */
198
+ declare function extractDomain(url: string): string | null;
199
+ /**
200
+ * Get all parent domains of a given hostname.
201
+ * e.g., "ssl.google-analytics.com" → ["google-analytics.com", "com"]
202
+ *
203
+ * Stops at TLD level (single-label domains are excluded from results).
204
+ */
205
+ declare function getParentDomains(domain: string): string[];
206
+ /**
207
+ * Check if a request domain is first-party relative to the scanned site.
208
+ * First-party means the domain is the same or a subdomain of the site domain.
209
+ */
210
+ declare function isFirstParty(requestDomain: string, siteDomain: string): boolean;
211
+ /**
212
+ * Normalize a URL for consistent processing.
213
+ * Adds https:// if no protocol is specified.
214
+ */
215
+ declare function normalizeUrl(url: string): string;
216
+
217
+ type FindingSeverity = 'critical' | 'high' | 'medium' | 'low' | 'info';
218
+ type FindingCategory = 'code' | 'schema' | 'config';
219
+ interface AuditFinding {
220
+ id: string;
221
+ category: FindingCategory;
222
+ severity: FindingSeverity;
223
+ title: string;
224
+ message: string;
225
+ file: string;
226
+ line?: number;
227
+ column?: number;
228
+ vendorId?: string;
229
+ rule: string;
230
+ fix?: string;
231
+ blame?: BlameInfo;
232
+ gdprArticles?: GdprReference$1[];
233
+ }
234
+ interface BlameInfo {
235
+ author: string;
236
+ email: string;
237
+ date: string;
238
+ commit: string;
239
+ commitMessage: string;
240
+ }
241
+ interface DiffResult {
242
+ added: AuditFinding[];
243
+ removed: AuditFinding[];
244
+ unchanged: AuditFinding[];
245
+ }
246
+ interface GdprReference$1 {
247
+ article: string;
248
+ title: string;
249
+ url: string;
250
+ }
251
+ type ComplianceGrade = 'A' | 'B' | 'C' | 'D' | 'F';
252
+ interface ComplianceScore {
253
+ score: number;
254
+ grade: ComplianceGrade;
255
+ breakdown: {
256
+ critical: number;
257
+ high: number;
258
+ medium: number;
259
+ low: number;
260
+ info: number;
261
+ };
262
+ }
263
+ interface AuditReport {
264
+ meta: {
265
+ etalonVersion: string;
266
+ auditDate: string;
267
+ auditDurationMs: number;
268
+ directory: string;
269
+ stack: StackInfo;
270
+ };
271
+ summary: AuditSummary;
272
+ score?: ComplianceScore;
273
+ findings: AuditFinding[];
274
+ recommendations: string[];
275
+ }
276
+ interface AuditSummary {
277
+ totalFindings: number;
278
+ critical: number;
279
+ high: number;
280
+ medium: number;
281
+ low: number;
282
+ info: number;
283
+ trackerSdksFound: number;
284
+ piiColumnsFound: number;
285
+ configIssues: number;
286
+ }
287
+ type Language = 'javascript' | 'typescript' | 'python' | 'rust' | 'unknown';
288
+ type Framework = 'nextjs' | 'express' | 'fastify' | 'nuxt' | 'svelte' | 'django' | 'flask' | 'fastapi' | 'actix' | 'axum' | 'rocket' | 'none';
289
+ type ORM = 'prisma' | 'typeorm' | 'drizzle' | 'sequelize' | 'django-orm' | 'sqlalchemy' | 'diesel' | 'sea-orm' | 'raw-sql' | 'none';
290
+ interface StackInfo {
291
+ languages: Language[];
292
+ framework: Framework;
293
+ orm: ORM;
294
+ packageManager: 'npm' | 'yarn' | 'pnpm' | 'pip' | 'poetry' | 'cargo' | 'unknown';
295
+ detectedFiles: string[];
296
+ }
297
+ interface TrackerPattern {
298
+ vendorId: string;
299
+ severity: FindingSeverity;
300
+ }
301
+ interface TrackerPatternDatabase {
302
+ npm: Record<string, TrackerPattern>;
303
+ pypi: Record<string, TrackerPattern>;
304
+ cargo: Record<string, TrackerPattern>;
305
+ envVars: Record<string, TrackerPattern>;
306
+ htmlPatterns: Array<{
307
+ pattern: string;
308
+ vendorId: string;
309
+ severity: FindingSeverity;
310
+ }>;
311
+ importPatterns: Array<{
312
+ pattern: string;
313
+ language: Language;
314
+ vendorId: string;
315
+ severity: FindingSeverity;
316
+ }>;
317
+ }
318
+
319
+ /**
320
+ * Auto-detect the project stack from filesystem presence.
321
+ */
322
+ declare function detectStack(directory: string): StackInfo;
323
+
324
+ /**
325
+ * Scan source files for tracker SDK usage, hardcoded tracking pixels,
326
+ * tracker-related env vars, and raw cookie writes.
327
+ */
328
+ declare function scanCode(files: string[], baseDir: string, stack: StackInfo, patterns: TrackerPatternDatabase): AuditFinding[];
329
+
330
+ /**
331
+ * Scan database schema files for PII storage patterns.
332
+ */
333
+ declare function scanSchemas(files: string[], baseDir: string, stack: StackInfo): AuditFinding[];
334
+
335
+ /**
336
+ * Scan configuration files for privacy/security issues.
337
+ */
338
+ declare function scanConfigs(files: string[], baseDir: string, stack: StackInfo): AuditFinding[];
339
+
340
+ /**
341
+ * Scan source files for server-side API calls to tracking service endpoints.
342
+ * These requests bypass client-side privacy controls (ad blockers, consent banners).
343
+ */
344
+ declare function scanServerTracking(files: string[], baseDir: string, _stack: StackInfo): AuditFinding[];
345
+
346
+ /**
347
+ * Scan for CNAME cloaking patterns across DNS configs, IaC, proxy configs,
348
+ * and framework-level rewrites/proxies.
349
+ */
350
+ declare function scanCnameCloaking(files: string[], baseDir: string, stack: StackInfo): AuditFinding[];
351
+
352
+ /**
353
+ * Format an AuditReport as SARIF 2.1.0.
354
+ * Compatible with GitHub Code Scanning and other SARIF consumers.
355
+ * Spec: https://sarifweb.azurewebsites.net/
356
+ */
357
+ declare function formatAuditSarif(report: AuditReport): string;
358
+
359
+ /**
360
+ * Compare two audit reports to identify added, removed, and unchanged findings.
361
+ * Used by the GitHub Action to surface only new issues introduced in a PR.
362
+ */
363
+ declare function diffReports(current: AuditReport, baseline: AuditReport): DiffResult;
364
+ /**
365
+ * Check whether diff results should block a PR based on minimum severity threshold.
366
+ */
367
+ declare function shouldBlock(diff: DiffResult, threshold: FindingSeverity): boolean;
368
+
369
+ /**
370
+ * Check if we're inside a git repository.
371
+ */
372
+ declare function isGitRepo(cwd?: string): boolean;
373
+ /**
374
+ * Get blame info for a specific line in a file.
375
+ */
376
+ declare function getBlameForLine(filePath: string, lineNumber: number, cwd?: string): BlameInfo | null;
377
+ /**
378
+ * Enrich audit findings with git blame information.
379
+ * Only enriches findings that have a file and line number.
380
+ */
381
+ declare function enrichFindings(findings: AuditFinding[], cwd?: string): AuditFinding[];
382
+ /**
383
+ * Group findings by author for reporting.
384
+ */
385
+ declare function groupByAuthor(findings: AuditFinding[]): Map<string, AuditFinding[]>;
386
+
387
+ interface GdprReference {
388
+ article: string;
389
+ title: string;
390
+ url: string;
391
+ }
392
+ declare const GDPR_RULE_MAP: Record<string, GdprReference[]>;
393
+ /**
394
+ * Enrich findings with GDPR article references.
395
+ */
396
+ declare function enrichWithGdpr(findings: AuditFinding[]): AuditFinding[];
397
+
398
+ /**
399
+ * Calculate a compliance score (0–100) from an audit report.
400
+ *
401
+ * Formula: score = max(0, 100 - Σ(severity_weight × count))
402
+ *
403
+ * Grade scale:
404
+ * A = 90–100 (excellent)
405
+ * B = 75–89 (good, minor issues)
406
+ * C = 60–74 (fair, needs attention)
407
+ * D = 40–59 (poor, significant issues)
408
+ * F = 0–39 (failing, critical issues)
409
+ */
410
+ declare function calculateScore(report: AuditReport): ComplianceScore;
411
+ /**
412
+ * Get styling hints for a grade (for badges, HTML reports, etc.).
413
+ */
414
+ declare function gradeColor(grade: ComplianceGrade): string;
415
+
416
+ /**
417
+ * Generate a shields.io-style SVG badge for the compliance grade.
418
+ */
419
+ declare function generateBadgeSvg(score: ComplianceScore): string;
420
+ /**
421
+ * Generate a shields.io badge URL for an ETALON score.
422
+ * Uses the shields.io endpoint badge format with the ETALON API.
423
+ */
424
+ declare function badgeUrl(grade: string, score: number): string;
425
+ /**
426
+ * Generate markdown for embedding a badge.
427
+ */
428
+ declare function badgeMarkdown(grade: string, score: number): string;
429
+
430
+ interface FilePatch {
431
+ file: string;
432
+ line: number;
433
+ rule: string;
434
+ oldContent: string;
435
+ newContent: string;
436
+ description: string;
437
+ }
438
+ /**
439
+ * Generate patches for fixable findings.
440
+ */
441
+ declare function generatePatches(findings: AuditFinding[], baseDir: string): FilePatch[];
442
+ /**
443
+ * Apply patches to files.
444
+ */
445
+ declare function applyPatches(patches: FilePatch[], baseDir: string): number;
446
+ /**
447
+ * List of rules that have auto-fixers.
448
+ */
449
+ declare function fixableRules(): string[];
450
+
451
+ interface CustomRule {
452
+ name: string;
453
+ description: string;
454
+ severity: FindingSeverity;
455
+ patterns: CustomPattern[];
456
+ gdpr_articles?: string[];
457
+ fix?: string;
458
+ }
459
+ interface CustomPattern {
460
+ regex: string;
461
+ languages?: string[];
462
+ message: string;
463
+ }
464
+ /**
465
+ * Load custom rules from a directory of YAML files.
466
+ * Expected location: .etalon/rules/*.yaml
467
+ */
468
+ declare function loadCustomRules(directory: string): CustomRule[];
469
+ /**
470
+ * Scan files using custom rules and return findings.
471
+ */
472
+ declare function scanWithCustomRules(files: string[], directory: string, rules: CustomRule[]): AuditFinding[];
473
+
474
+ type FlowNodeType = 'source' | 'storage' | 'sink';
475
+ interface FlowNode {
476
+ type: FlowNodeType;
477
+ label: string;
478
+ file: string;
479
+ line: number;
480
+ piiType: string;
481
+ detail: string;
482
+ }
483
+ interface FlowEdge {
484
+ from: number;
485
+ to: number;
486
+ label?: string;
487
+ }
488
+ interface DataFlowMap {
489
+ nodes: FlowNode[];
490
+ edges: FlowEdge[];
491
+ }
492
+ /**
493
+ * Analyze data flow across files — find sources, storage, and sinks of PII.
494
+ */
495
+ declare function analyzeDataFlow(files: string[], directory: string): DataFlowMap;
496
+ /**
497
+ * Generate a Mermaid diagram from the data flow map.
498
+ */
499
+ declare function toMermaid(flow: DataFlowMap): string;
500
+ /**
501
+ * Generate a text summary of the data flow.
502
+ */
503
+ declare function toTextSummary(flow: DataFlowMap): string;
504
+
505
+ interface PolicyInput {
506
+ siteUrl?: string;
507
+ projectDir?: string;
508
+ companyName: string;
509
+ companyEmail: string;
510
+ companyCountry?: string;
511
+ }
512
+ interface PolicySection {
513
+ id: string;
514
+ title: string;
515
+ content: string;
516
+ }
517
+ interface PolicyVendorEntry {
518
+ vendorId: string;
519
+ vendorName: string;
520
+ company: string;
521
+ category: string;
522
+ purpose: string;
523
+ dataCollected: string[];
524
+ privacyPolicyUrl?: string;
525
+ dpaUrl?: string;
526
+ retentionPeriod?: string;
527
+ gdprCompliant: boolean;
528
+ source: 'code' | 'network' | 'both';
529
+ }
530
+ interface GeneratedPolicy {
531
+ sections: PolicySection[];
532
+ vendors: PolicyVendorEntry[];
533
+ piiTypes: string[];
534
+ fullText: string;
535
+ meta: {
536
+ generatedAt: string;
537
+ etalonVersion: string;
538
+ sources: string[];
539
+ };
540
+ }
541
+ interface PolicyGeneratorInput {
542
+ input: PolicyInput;
543
+ audit?: AuditReport;
544
+ networkVendorIds?: Set<string>;
545
+ dataFlow?: DataFlowMap;
546
+ }
547
+ declare function generatePolicy(opts: PolicyGeneratorInput): GeneratedPolicy;
548
+
549
+ /**
550
+ * Run a full audit on a project directory.
551
+ */
552
+ declare function auditProject(directory: string, options?: {
553
+ severity?: string;
554
+ includeBlame?: boolean;
555
+ }): Promise<AuditReport>;
556
+
557
+ export { type AllowlistEntry, type AuditFinding, type AuditReport, type AuditSummary, type BlameInfo, type Category, type ComplianceGrade, type ComplianceScore, type CustomPattern, type CustomRule, type DataFlowMap, type DetectedVendor, type DiffResult, type FilePatch, type FindingCategory, type FindingSeverity, type FlowEdge, type FlowNode, type FlowNodeType, GDPR_RULE_MAP, type GdprReference$1 as GdprReference, type GeneratedPolicy, type NetworkRequest, type NotificationConfig, type OpticConfig, type PolicyGeneratorInput, type PolicyInput, type PolicySection, type PolicyVendorEntry, type Recommendation, type RiskLevel, type ScanConfig, type ScanReport, type ScanRequest, type ScanSummary, type SiteConfig, type StackInfo, type UnknownDomain, type Vendor, type VendorCategory, type VendorDatabase, VendorRegistry, analyzeDataFlow, applyPatches, auditProject, badgeMarkdown, badgeUrl, calculateScore, detectStack, diffReports, enrichFindings, enrichWithGdpr, extractDomain, fixableRules, formatAuditSarif, generateBadgeSvg, generatePatches, generatePolicy, getBlameForLine, getParentDomains, gradeColor, groupByAuthor, isFirstParty, isGitRepo, loadCustomRules, normalizeUrl, scanCnameCloaking, scanCode, scanConfigs, scanSchemas, scanServerTracking, scanWithCustomRules, shouldBlock, toMermaid, toTextSummary };