ship-safe 5.0.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@
18
18
  import fs from 'fs';
19
19
  import path from 'path';
20
20
  import fg from 'fast-glob';
21
- import { SKIP_DIRS, SKIP_EXTENSIONS, MAX_FILE_SIZE, loadGitignorePatterns } from '../utils/patterns.js';
21
+ import { SKIP_DIRS, SKIP_EXTENSIONS, SKIP_FILENAMES, MAX_FILE_SIZE, loadGitignorePatterns } from '../utils/patterns.js';
22
22
 
23
23
  // =============================================================================
24
24
  // FINDING FACTORY
@@ -131,6 +131,7 @@ export class BaseAgent {
131
131
  const ext = path.extname(file).toLowerCase();
132
132
  if (SKIP_EXTENSIONS.has(ext)) return false;
133
133
  const basename = path.basename(file);
134
+ if (SKIP_FILENAMES.has(basename)) return false;
134
135
  if (basename.endsWith('.min.js') || basename.endsWith('.min.css')) return false;
135
136
  try {
136
137
  const stats = fs.statSync(file);
@@ -1,301 +1,301 @@
1
- /**
2
- * PII Compliance Agent
3
- * ======================
4
- *
5
- * Detects privacy violations and PII (Personally Identifiable
6
- * Information) exposure in source code.
7
- *
8
- * Checks:
9
- * - PII logged to console/files/external services
10
- * - PII in URLs and query parameters
11
- * - PII in error responses sent to clients
12
- * - PII sent to third-party analytics/tracking
13
- * - Hardcoded PII patterns (SSN, credit cards) in source
14
- * - Unencrypted PII storage patterns
15
- * - Missing data deletion endpoints (GDPR right to erasure)
16
- *
17
- * Maps to: GDPR Articles 5, 25, 32; CCPA; OWASP A01
18
- */
19
-
20
- import path from 'path';
21
- import { BaseAgent, createFinding } from './base-agent.js';
22
-
23
- // =============================================================================
24
- // PII COMPLIANCE PATTERNS
25
- // =============================================================================
26
-
27
- const PATTERNS = [
28
- // ── PII in Logging ───────────────────────────────────────────────────────
29
- {
30
- rule: 'PII_IN_CONSOLE_LOG',
31
- title: 'Privacy: PII Logged to Console',
32
- regex: /console\.(?:log|info|warn|error|debug)\s*\([\s\S]{0,100}(?:email|password|ssn|social.?security|credit.?card|phone.?number|date.?of.?birth|dob|passport|national.?id|driver.?license)/gi,
33
- severity: 'high',
34
- cwe: 'CWE-532',
35
- owasp: 'A09:2021',
36
- description: 'PII fields logged to console. Log output may be stored in log aggregation services, exposing sensitive user data.',
37
- fix: 'Remove PII from log statements. Use structured logging with PII redaction: logger.info({ userId: user.id }) instead of logging full user objects.',
38
- },
39
- {
40
- rule: 'PII_IN_LOGGER',
41
- title: 'Privacy: PII in Structured Logger',
42
- regex: /(?:logger|log|winston|pino|bunyan|morgan)\.(?:info|warn|error|debug|log)\s*\([\s\S]{0,200}(?:email|password|ssn|creditCard|credit_card|phoneNumber|phone_number|dateOfBirth|date_of_birth)/g,
43
- severity: 'high',
44
- cwe: 'CWE-532',
45
- owasp: 'A09:2021',
46
- description: 'PII fields passed to structured logger. These values persist in log storage and may violate data retention policies.',
47
- fix: 'Mask or redact PII before logging: email → "u***@example.com", phone → "***-***-1234".',
48
- },
49
- {
50
- rule: 'PII_FULL_OBJECT_LOG',
51
- title: 'Privacy: Full User Object Logged',
52
- regex: /console\.(?:log|info|warn|error)\s*\(\s*(?:user|customer|patient|member|account|profile|person)\s*\)/gi,
53
- severity: 'medium',
54
- cwe: 'CWE-532',
55
- owasp: 'A09:2021',
56
- confidence: 'medium',
57
- description: 'Full user/customer object passed to console.log. Likely contains PII fields (email, name, phone, address).',
58
- fix: 'Log only necessary identifiers: console.log({ userId: user.id, action: "login" }).',
59
- },
60
-
61
- // ── PII in Error Responses ───────────────────────────────────────────────
62
- {
63
- rule: 'PII_IN_ERROR_RESPONSE',
64
- title: 'Privacy: PII in Error Response to Client',
65
- regex: /(?:res\.(?:json|send|status)|response\.(?:json|send)|jsonify)\s*\(\s*(?:\{[\s\S]{0,200}(?:email|password|ssn|creditCard|phone|user|customer|patient)[\s\S]{0,100}\}|err|error)/g,
66
- severity: 'high',
67
- cwe: 'CWE-209',
68
- owasp: 'A01:2021',
69
- confidence: 'medium',
70
- description: 'PII or user data included in error responses sent to clients. Exposes sensitive information to end users or attackers.',
71
- fix: 'Return generic error messages to clients. Log detailed errors server-side only.',
72
- },
73
- {
74
- rule: 'PII_STACK_TRACE_RESPONSE',
75
- title: 'Privacy: Stack Trace With PII in Response',
76
- regex: /(?:res\.(?:json|send)|response\.(?:json|send))\s*\(\s*\{[\s\S]{0,100}(?:stack|stackTrace|stack_trace)/g,
77
- severity: 'high',
78
- cwe: 'CWE-209',
79
- owasp: 'A01:2021',
80
- description: 'Stack traces sent in API responses may contain PII from variable values in the call chain.',
81
- fix: 'Never send stack traces to clients in production. Use error IDs for correlation.',
82
- },
83
-
84
- // ── PII in URLs ──────────────────────────────────────────────────────────
85
- {
86
- rule: 'PII_IN_URL_PARAMS',
87
- title: 'Privacy: PII in URL Query Parameters',
88
- regex: /(?:url|href|link|redirect|location)\s*[:=][\s\S]{0,100}(?:\?|&)(?:email|phone|ssn|name|address|dob|password)=/gi,
89
- severity: 'high',
90
- cwe: 'CWE-598',
91
- owasp: 'A01:2021',
92
- description: 'PII passed in URL query parameters. URLs are logged in server access logs, browser history, and CDN logs.',
93
- fix: 'Send PII in request body (POST) instead of URL parameters (GET). Use encrypted tokens for cross-page references.',
94
- },
95
- {
96
- rule: 'PII_IN_GET_REQUEST',
97
- title: 'Privacy: PII Sent via GET Request',
98
- regex: /(?:fetch|axios\.get|http\.get|requests\.get|got\.get)\s*\(\s*(?:`[^`]*\$\{[^}]*(?:email|phone|ssn|password|name|address)[^}]*\}`|.*\+\s*(?:email|phone|ssn|password))/g,
99
- severity: 'high',
100
- cwe: 'CWE-598',
101
- owasp: 'A01:2021',
102
- description: 'PII interpolated into GET request URLs. GET parameters are visible in logs, referrer headers, and browser history.',
103
- fix: 'Use POST requests for sending PII. Never include sensitive data in URL parameters.',
104
- },
105
-
106
- // ── PII to Third Parties ─────────────────────────────────────────────────
107
- {
108
- rule: 'PII_TO_ANALYTICS',
109
- title: 'Privacy: PII Sent to Analytics Service',
110
- regex: /(?:analytics|segment|mixpanel|amplitude|posthog|gtag|dataLayer|fbq|intercom|hotjar)[\s\S]{0,50}(?:\.track|\.identify|\.page|\.push|\.event)\s*\([\s\S]{0,200}(?:email|phone|name|address|ssn|dob|userId|user_id)/gi,
111
- severity: 'high',
112
- cwe: 'CWE-359',
113
- owasp: 'A01:2021',
114
- description: 'PII sent to third-party analytics service without visible consent check. May violate GDPR Article 6 (lawful basis) and CCPA.',
115
- fix: 'Hash or anonymize PII before sending to analytics. Implement consent management (check opt-in before tracking).',
116
- },
117
- {
118
- rule: 'PII_TO_ERROR_TRACKING',
119
- title: 'Privacy: PII Sent to Error Tracking Service',
120
- regex: /(?:Sentry|Bugsnag|Rollbar|TrackJS|LogRocket|DataDog|NewRelic)[\s\S]{0,100}(?:setUser|setContext|setExtra|captureException|notify|addBreadcrumb)[\s\S]{0,200}(?:email|phone|name|address|ssn|password)/gi,
121
- severity: 'high',
122
- cwe: 'CWE-359',
123
- owasp: 'A01:2021',
124
- description: 'PII attached to error tracking events. Error tracking services store data externally, potentially in different jurisdictions.',
125
- fix: 'Configure PII scrubbing in your error tracking service. Only send user IDs, not PII fields.',
126
- },
127
-
128
- // ── Hardcoded PII Patterns ───────────────────────────────────────────────
129
- {
130
- rule: 'PII_SSN_IN_CODE',
131
- title: 'Privacy: Social Security Number in Source Code',
132
- regex: /\b\d{3}-\d{2}-\d{4}\b/g,
133
- severity: 'critical',
134
- cwe: 'CWE-312',
135
- owasp: 'A02:2021',
136
- confidence: 'medium',
137
- description: 'Pattern matching a US Social Security Number found in source code. May be test data or a real SSN.',
138
- fix: 'Remove SSNs from source code. Use environment variables or a secrets manager for test data.',
139
- },
140
- {
141
- rule: 'PII_CREDIT_CARD_IN_CODE',
142
- title: 'Privacy: Credit Card Number in Source Code',
143
- regex: /\b(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2}|6(?:011|5\d{2}))[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b/g,
144
- severity: 'critical',
145
- cwe: 'CWE-312',
146
- owasp: 'A02:2021',
147
- confidence: 'medium',
148
- description: 'Pattern matching a credit card number found in source code. Violates PCI DSS requirements.',
149
- fix: 'Remove credit card numbers from source code immediately. Never store raw card numbers — use tokenization.',
150
- },
151
- {
152
- rule: 'PII_EMAIL_HARDCODED',
153
- title: 'Privacy: Real Email Address Hardcoded',
154
- regex: /['"][a-zA-Z0-9._%+-]+@(?!example\.com|test\.com|placeholder|fake|dummy)[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}['"]/g,
155
- severity: 'medium',
156
- cwe: 'CWE-312',
157
- owasp: 'A02:2021',
158
- confidence: 'low',
159
- description: 'Real email address hardcoded in source code (not @example.com). May be PII or a credential.',
160
- fix: 'Use @example.com for test emails. Store real emails in environment variables or config.',
161
- },
162
-
163
- // ── Unencrypted PII Storage ──────────────────────────────────────────────
164
- {
165
- rule: 'PII_PLAINTEXT_PASSWORD_STORE',
166
- title: 'Privacy: Password Stored in Plaintext',
167
- regex: /(?:password|passwd|pwd)\s*[:=]\s*(?:req\.|request\.|body\.|input\.|data\.)(?:password|passwd)/gi,
168
- severity: 'critical',
169
- cwe: 'CWE-256',
170
- owasp: 'A02:2021',
171
- confidence: 'medium',
172
- description: 'Password appears to be stored directly from user input without hashing. Passwords must be hashed before storage.',
173
- fix: 'Hash passwords with bcrypt, scrypt, or argon2 before storing: await bcrypt.hash(password, 12)',
174
- },
175
- {
176
- rule: 'PII_NO_ENCRYPTION_AT_REST',
177
- title: 'Privacy: PII Column Without Encryption',
178
- regex: /(?:CREATE\s+TABLE|addColumn|column|field|attribute)[\s\S]{0,200}(?:ssn|social_security|credit_card|card_number|bank_account|passport|national_id|driver_license)[\s\S]{0,100}(?:VARCHAR|TEXT|STRING|varchar|text|string)(?![\s\S]{0,100}encrypt)/gi,
179
- severity: 'high',
180
- cwe: 'CWE-311',
181
- owasp: 'A02:2021',
182
- confidence: 'medium',
183
- description: 'Database column storing sensitive PII (SSN, credit card, passport) without encryption-at-rest annotation.',
184
- fix: 'Encrypt sensitive PII columns at the application level or use database-level encryption.',
185
- },
186
-
187
- // ── IP Address & Geolocation Logging ─────────────────────────────────────
188
- {
189
- rule: 'PII_IP_LOGGING',
190
- title: 'Privacy: IP Address Logged Without Anonymization',
191
- regex: /(?:console\.log|logger\.\w+|log\.\w+)\s*\([\s\S]{0,100}(?:ip|ipAddress|ip_address|remoteAddress|x-forwarded-for|client.?ip)/gi,
192
- severity: 'medium',
193
- cwe: 'CWE-532',
194
- owasp: 'A09:2021',
195
- confidence: 'medium',
196
- description: 'IP addresses logged without anonymization. Under GDPR, IP addresses are personal data.',
197
- fix: 'Anonymize IP addresses in logs: mask the last octet (192.168.1.xxx) or hash before logging.',
198
- },
199
- {
200
- rule: 'PII_GEOLOCATION_STORAGE',
201
- title: 'Privacy: Precise Geolocation Stored',
202
- regex: /(?:latitude|longitude|lat|lng|geolocation|geoip|geo_location)[\s\S]{0,100}(?:save|store|insert|create|update|write|database|db|mongo|prisma|sequelize)/gi,
203
- severity: 'medium',
204
- cwe: 'CWE-359',
205
- owasp: 'A01:2021',
206
- confidence: 'low',
207
- description: 'Precise geolocation data stored in database. May require explicit consent under GDPR/CCPA.',
208
- fix: 'Reduce precision of stored geolocation (city-level, not street-level). Require explicit consent for precise location.',
209
- },
210
-
211
- // ── Cookie & Tracking Without Consent ────────────────────────────────────
212
- {
213
- rule: 'PII_TRACKING_NO_CONSENT',
214
- title: 'Privacy: Tracking Script Without Consent Check',
215
- regex: /(?:gtag|GoogleAnalytics|ga\s*\(|fbq\s*\(|_paq\.push|hotjar|hj\s*\(|intercom|drift|crisp)[\s\S]{0,100}(?:init|config|identify|track|page)(?![\s\S]{0,300}(?:consent|gdpr|cookie.?banner|opt.?in|permission|accept))/g,
216
- severity: 'medium',
217
- cwe: 'CWE-359',
218
- owasp: 'A01:2021',
219
- confidence: 'low',
220
- description: 'Analytics or tracking script initialized without visible consent check. May violate GDPR ePrivacy Directive.',
221
- fix: 'Load tracking scripts only after user consents. Use a consent management platform (CMP).',
222
- },
223
- ];
224
-
225
- // =============================================================================
226
- // PII COMPLIANCE AGENT
227
- // =============================================================================
228
-
229
- export class PIIComplianceAgent extends BaseAgent {
230
- constructor() {
231
- super(
232
- 'PIIComplianceAgent',
233
- 'Detect PII exposure, privacy violations, and GDPR/CCPA compliance gaps',
234
- 'config'
235
- );
236
- }
237
-
238
- async analyze(context) {
239
- const { files } = context;
240
-
241
- const codeFiles = files.filter(f => {
242
- const ext = path.extname(f).toLowerCase();
243
- return ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.py', '.rb', '.php', '.go', '.java', '.sql'].includes(ext);
244
- });
245
-
246
- let findings = [];
247
-
248
- // ── 1. Scan code files with PII patterns ─────────────────────────────
249
- for (const file of codeFiles) {
250
- findings = findings.concat(this.scanFileWithPatterns(file, PATTERNS));
251
- }
252
-
253
- // ── 2. Check for missing data deletion endpoint (GDPR right to erasure)
254
- findings = findings.concat(this._checkDataDeletion(context));
255
-
256
- return findings;
257
- }
258
-
259
- /**
260
- * Check if the project has user data deletion capability (GDPR Article 17).
261
- */
262
- _checkDataDeletion(context) {
263
- const { files } = context;
264
- const findings = [];
265
-
266
- // Check if there are user models/routes
267
- const hasUserModel = files.some(f =>
268
- /(?:user|account|customer|member|profile)(?:\.model|\.schema|\.entity|Model|Schema)/i.test(f)
269
- );
270
-
271
- if (!hasUserModel) return findings;
272
-
273
- // Check if there's a delete user endpoint or function
274
- const hasDeleteEndpoint = files.some(f => {
275
- const content = this.readFile(f);
276
- if (!content) return false;
277
- return /(?:delete.*user|remove.*account|erase.*data|destroy.*profile|gdpr.*delete|data.*deletion|right.*erasure|forget.*me)/i.test(content);
278
- });
279
-
280
- if (!hasDeleteEndpoint) {
281
- findings.push(createFinding({
282
- file: 'project',
283
- line: 0,
284
- severity: 'medium',
285
- category: this.category,
286
- rule: 'PII_NO_DATA_DELETION',
287
- title: 'Privacy: No User Data Deletion Capability',
288
- description: 'Project has user models but no visible data deletion endpoint or function. GDPR Article 17 requires the ability to erase personal data on request.',
289
- matched: 'No delete/erase/destroy user endpoint found',
290
- confidence: 'low',
291
- cwe: 'CWE-359',
292
- owasp: 'A01:2021',
293
- fix: 'Implement a user data deletion endpoint (DELETE /api/users/:id or similar) that removes all PII from your systems.',
294
- }));
295
- }
296
-
297
- return findings;
298
- }
299
- }
300
-
301
- export default PIIComplianceAgent;
1
+ /**
2
+ * PII Compliance Agent
3
+ * ======================
4
+ *
5
+ * Detects privacy violations and PII (Personally Identifiable
6
+ * Information) exposure in source code.
7
+ *
8
+ * Checks:
9
+ * - PII logged to console/files/external services
10
+ * - PII in URLs and query parameters
11
+ * - PII in error responses sent to clients
12
+ * - PII sent to third-party analytics/tracking
13
+ * - Hardcoded PII patterns (SSN, credit cards) in source
14
+ * - Unencrypted PII storage patterns
15
+ * - Missing data deletion endpoints (GDPR right to erasure)
16
+ *
17
+ * Maps to: GDPR Articles 5, 25, 32; CCPA; OWASP A01
18
+ */
19
+
20
+ import path from 'path';
21
+ import { BaseAgent, createFinding } from './base-agent.js';
22
+
23
+ // =============================================================================
24
+ // PII COMPLIANCE PATTERNS
25
+ // =============================================================================
26
+
27
+ const PATTERNS = [
28
+ // ── PII in Logging ───────────────────────────────────────────────────────
29
+ {
30
+ rule: 'PII_IN_CONSOLE_LOG',
31
+ title: 'Privacy: PII Logged to Console',
32
+ regex: /console\.(?:log|info|warn|error|debug)\s*\([\s\S]{0,100}(?:email|password|ssn|social.?security|credit.?card|phone.?number|date.?of.?birth|dob|passport|national.?id|driver.?license)/gi,
33
+ severity: 'high',
34
+ cwe: 'CWE-532',
35
+ owasp: 'A09:2021',
36
+ description: 'PII fields logged to console. Log output may be stored in log aggregation services, exposing sensitive user data.',
37
+ fix: 'Remove PII from log statements. Use structured logging with PII redaction: logger.info({ userId: user.id }) instead of logging full user objects.',
38
+ },
39
+ {
40
+ rule: 'PII_IN_LOGGER',
41
+ title: 'Privacy: PII in Structured Logger',
42
+ regex: /(?:logger|log|winston|pino|bunyan|morgan)\.(?:info|warn|error|debug|log)\s*\([\s\S]{0,200}(?:email|password|ssn|creditCard|credit_card|phoneNumber|phone_number|dateOfBirth|date_of_birth)/g,
43
+ severity: 'high',
44
+ cwe: 'CWE-532',
45
+ owasp: 'A09:2021',
46
+ description: 'PII fields passed to structured logger. These values persist in log storage and may violate data retention policies.',
47
+ fix: 'Mask or redact PII before logging: email → "u***@example.com", phone → "***-***-1234".',
48
+ },
49
+ {
50
+ rule: 'PII_FULL_OBJECT_LOG',
51
+ title: 'Privacy: Full User Object Logged',
52
+ regex: /console\.(?:log|info|warn|error)\s*\(\s*(?:user|customer|patient|member|account|profile|person)\s*\)/gi,
53
+ severity: 'medium',
54
+ cwe: 'CWE-532',
55
+ owasp: 'A09:2021',
56
+ confidence: 'medium',
57
+ description: 'Full user/customer object passed to console.log. Likely contains PII fields (email, name, phone, address).',
58
+ fix: 'Log only necessary identifiers: console.log({ userId: user.id, action: "login" }).',
59
+ },
60
+
61
+ // ── PII in Error Responses ───────────────────────────────────────────────
62
+ {
63
+ rule: 'PII_IN_ERROR_RESPONSE',
64
+ title: 'Privacy: PII in Error Response to Client',
65
+ regex: /(?:res\.(?:json|send|status)|response\.(?:json|send)|jsonify)\s*\(\s*(?:\{[\s\S]{0,200}(?:email|password|ssn|creditCard|phone|user|customer|patient)[\s\S]{0,100}\}|err|error)/g,
66
+ severity: 'high',
67
+ cwe: 'CWE-209',
68
+ owasp: 'A01:2021',
69
+ confidence: 'medium',
70
+ description: 'PII or user data included in error responses sent to clients. Exposes sensitive information to end users or attackers.',
71
+ fix: 'Return generic error messages to clients. Log detailed errors server-side only.',
72
+ },
73
+ {
74
+ rule: 'PII_STACK_TRACE_RESPONSE',
75
+ title: 'Privacy: Stack Trace With PII in Response',
76
+ regex: /(?:res\.(?:json|send)|response\.(?:json|send))\s*\(\s*\{[\s\S]{0,100}(?:stack|stackTrace|stack_trace)/g,
77
+ severity: 'high',
78
+ cwe: 'CWE-209',
79
+ owasp: 'A01:2021',
80
+ description: 'Stack traces sent in API responses may contain PII from variable values in the call chain.',
81
+ fix: 'Never send stack traces to clients in production. Use error IDs for correlation.',
82
+ },
83
+
84
+ // ── PII in URLs ──────────────────────────────────────────────────────────
85
+ {
86
+ rule: 'PII_IN_URL_PARAMS',
87
+ title: 'Privacy: PII in URL Query Parameters',
88
+ regex: /(?:url|href|link|redirect|location)\s*[:=][\s\S]{0,100}(?:\?|&)(?:email|phone|ssn|name|address|dob|password)=/gi,
89
+ severity: 'high',
90
+ cwe: 'CWE-598',
91
+ owasp: 'A01:2021',
92
+ description: 'PII passed in URL query parameters. URLs are logged in server access logs, browser history, and CDN logs.',
93
+ fix: 'Send PII in request body (POST) instead of URL parameters (GET). Use encrypted tokens for cross-page references.',
94
+ },
95
+ {
96
+ rule: 'PII_IN_GET_REQUEST',
97
+ title: 'Privacy: PII Sent via GET Request',
98
+ regex: /(?:fetch|axios\.get|http\.get|requests\.get|got\.get)\s*\(\s*(?:`[^`]*\$\{[^}]*(?:email|phone|ssn|password|name|address)[^}]*\}`|.*\+\s*(?:email|phone|ssn|password))/g,
99
+ severity: 'high',
100
+ cwe: 'CWE-598',
101
+ owasp: 'A01:2021',
102
+ description: 'PII interpolated into GET request URLs. GET parameters are visible in logs, referrer headers, and browser history.',
103
+ fix: 'Use POST requests for sending PII. Never include sensitive data in URL parameters.',
104
+ },
105
+
106
+ // ── PII to Third Parties ─────────────────────────────────────────────────
107
+ {
108
+ rule: 'PII_TO_ANALYTICS',
109
+ title: 'Privacy: PII Sent to Analytics Service',
110
+ regex: /(?:analytics|segment|mixpanel|amplitude|posthog|gtag|dataLayer|fbq|intercom|hotjar)[\s\S]{0,50}(?:\.track|\.identify|\.page|\.push|\.event)\s*\([\s\S]{0,200}(?:email|phone|name|address|ssn|dob|userId|user_id)/gi,
111
+ severity: 'high',
112
+ cwe: 'CWE-359',
113
+ owasp: 'A01:2021',
114
+ description: 'PII sent to third-party analytics service without visible consent check. May violate GDPR Article 6 (lawful basis) and CCPA.',
115
+ fix: 'Hash or anonymize PII before sending to analytics. Implement consent management (check opt-in before tracking).',
116
+ },
117
+ {
118
+ rule: 'PII_TO_ERROR_TRACKING',
119
+ title: 'Privacy: PII Sent to Error Tracking Service',
120
+ regex: /(?:Sentry|Bugsnag|Rollbar|TrackJS|LogRocket|DataDog|NewRelic)[\s\S]{0,100}(?:setUser|setContext|setExtra|captureException|notify|addBreadcrumb)[\s\S]{0,200}(?:email|phone|name|address|ssn|password)/gi,
121
+ severity: 'high',
122
+ cwe: 'CWE-359',
123
+ owasp: 'A01:2021',
124
+ description: 'PII attached to error tracking events. Error tracking services store data externally, potentially in different jurisdictions.',
125
+ fix: 'Configure PII scrubbing in your error tracking service. Only send user IDs, not PII fields.',
126
+ },
127
+
128
+ // ── Hardcoded PII Patterns ───────────────────────────────────────────────
129
+ {
130
+ rule: 'PII_SSN_IN_CODE',
131
+ title: 'Privacy: Social Security Number in Source Code',
132
+ regex: /\b\d{3}-\d{2}-\d{4}\b/g,
133
+ severity: 'critical',
134
+ cwe: 'CWE-312',
135
+ owasp: 'A02:2021',
136
+ confidence: 'medium',
137
+ description: 'Pattern matching a US Social Security Number found in source code. May be test data or a real SSN.',
138
+ fix: 'Remove SSNs from source code. Use environment variables or a secrets manager for test data.',
139
+ },
140
+ {
141
+ rule: 'PII_CREDIT_CARD_IN_CODE',
142
+ title: 'Privacy: Credit Card Number in Source Code',
143
+ regex: /\b(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2}|6(?:011|5\d{2}))[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b/g,
144
+ severity: 'critical',
145
+ cwe: 'CWE-312',
146
+ owasp: 'A02:2021',
147
+ confidence: 'medium',
148
+ description: 'Pattern matching a credit card number found in source code. Violates PCI DSS requirements.',
149
+ fix: 'Remove credit card numbers from source code immediately. Never store raw card numbers — use tokenization.',
150
+ },
151
+ {
152
+ rule: 'PII_EMAIL_HARDCODED',
153
+ title: 'Privacy: Real Email Address Hardcoded',
154
+ regex: /['"][a-zA-Z0-9._%+-]+@(?!example\.com|test\.com|placeholder|fake|dummy)[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}['"]/g,
155
+ severity: 'medium',
156
+ cwe: 'CWE-312',
157
+ owasp: 'A02:2021',
158
+ confidence: 'low',
159
+ description: 'Real email address hardcoded in source code (not @example.com). May be PII or a credential.',
160
+ fix: 'Use @example.com for test emails. Store real emails in environment variables or config.',
161
+ },
162
+
163
+ // ── Unencrypted PII Storage ──────────────────────────────────────────────
164
+ {
165
+ rule: 'PII_PLAINTEXT_PASSWORD_STORE',
166
+ title: 'Privacy: Password Stored in Plaintext',
167
+ regex: /(?:password|passwd|pwd)\s*[:=]\s*(?:req\.|request\.|body\.|input\.|data\.)(?:password|passwd)/gi,
168
+ severity: 'critical',
169
+ cwe: 'CWE-256',
170
+ owasp: 'A02:2021',
171
+ confidence: 'medium',
172
+ description: 'Password appears to be stored directly from user input without hashing. Passwords must be hashed before storage.',
173
+ fix: 'Hash passwords with bcrypt, scrypt, or argon2 before storing: await bcrypt.hash(password, 12)',
174
+ },
175
+ {
176
+ rule: 'PII_NO_ENCRYPTION_AT_REST',
177
+ title: 'Privacy: PII Column Without Encryption',
178
+ regex: /(?:CREATE\s+TABLE|addColumn|column|field|attribute)[\s\S]{0,200}(?:ssn|social_security|credit_card|card_number|bank_account|passport|national_id|driver_license)[\s\S]{0,100}(?:VARCHAR|TEXT|STRING|varchar|text|string)(?![\s\S]{0,100}encrypt)/gi,
179
+ severity: 'high',
180
+ cwe: 'CWE-311',
181
+ owasp: 'A02:2021',
182
+ confidence: 'medium',
183
+ description: 'Database column storing sensitive PII (SSN, credit card, passport) without encryption-at-rest annotation.',
184
+ fix: 'Encrypt sensitive PII columns at the application level or use database-level encryption.',
185
+ },
186
+
187
+ // ── IP Address & Geolocation Logging ─────────────────────────────────────
188
+ {
189
+ rule: 'PII_IP_LOGGING',
190
+ title: 'Privacy: IP Address Logged Without Anonymization',
191
+ regex: /(?:console\.log|logger\.\w+|log\.\w+)\s*\([\s\S]{0,100}(?:(?<![a-z])ip(?![a-z])|ipAddress|ip_address|remoteAddress|x-forwarded-for|client.?ip)/gi,
192
+ severity: 'medium',
193
+ cwe: 'CWE-532',
194
+ owasp: 'A09:2021',
195
+ confidence: 'medium',
196
+ description: 'IP addresses logged without anonymization. Under GDPR, IP addresses are personal data.',
197
+ fix: 'Anonymize IP addresses in logs: mask the last octet (192.168.1.xxx) or hash before logging.',
198
+ },
199
+ {
200
+ rule: 'PII_GEOLOCATION_STORAGE',
201
+ title: 'Privacy: Precise Geolocation Stored',
202
+ regex: /(?:latitude|longitude|lat|lng|geolocation|geoip|geo_location)[\s\S]{0,100}(?:save|store|insert|create|update|write|database|db|mongo|prisma|sequelize)/gi,
203
+ severity: 'medium',
204
+ cwe: 'CWE-359',
205
+ owasp: 'A01:2021',
206
+ confidence: 'low',
207
+ description: 'Precise geolocation data stored in database. May require explicit consent under GDPR/CCPA.',
208
+ fix: 'Reduce precision of stored geolocation (city-level, not street-level). Require explicit consent for precise location.',
209
+ },
210
+
211
+ // ── Cookie & Tracking Without Consent ────────────────────────────────────
212
+ {
213
+ rule: 'PII_TRACKING_NO_CONSENT',
214
+ title: 'Privacy: Tracking Script Without Consent Check',
215
+ regex: /(?:gtag|GoogleAnalytics|ga\s*\(|fbq\s*\(|_paq\.push|hotjar|hj\s*\(|intercom|drift|crisp)[\s\S]{0,100}(?:init|config|identify|track|page)(?![\s\S]{0,300}(?:consent|gdpr|cookie.?banner|opt.?in|permission|accept))/g,
216
+ severity: 'medium',
217
+ cwe: 'CWE-359',
218
+ owasp: 'A01:2021',
219
+ confidence: 'low',
220
+ description: 'Analytics or tracking script initialized without visible consent check. May violate GDPR ePrivacy Directive.',
221
+ fix: 'Load tracking scripts only after user consents. Use a consent management platform (CMP).',
222
+ },
223
+ ];
224
+
225
+ // =============================================================================
226
+ // PII COMPLIANCE AGENT
227
+ // =============================================================================
228
+
229
+ export class PIIComplianceAgent extends BaseAgent {
230
+ constructor() {
231
+ super(
232
+ 'PIIComplianceAgent',
233
+ 'Detect PII exposure, privacy violations, and GDPR/CCPA compliance gaps',
234
+ 'config'
235
+ );
236
+ }
237
+
238
+ async analyze(context) {
239
+ const { files } = context;
240
+
241
+ const codeFiles = files.filter(f => {
242
+ const ext = path.extname(f).toLowerCase();
243
+ return ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.py', '.rb', '.php', '.go', '.java', '.sql'].includes(ext);
244
+ });
245
+
246
+ let findings = [];
247
+
248
+ // ── 1. Scan code files with PII patterns ─────────────────────────────
249
+ for (const file of codeFiles) {
250
+ findings = findings.concat(this.scanFileWithPatterns(file, PATTERNS));
251
+ }
252
+
253
+ // ── 2. Check for missing data deletion endpoint (GDPR right to erasure)
254
+ findings = findings.concat(this._checkDataDeletion(context));
255
+
256
+ return findings;
257
+ }
258
+
259
+ /**
260
+ * Check if the project has user data deletion capability (GDPR Article 17).
261
+ */
262
+ _checkDataDeletion(context) {
263
+ const { files } = context;
264
+ const findings = [];
265
+
266
+ // Check if there are user models/routes
267
+ const hasUserModel = files.some(f =>
268
+ /(?:user|account|customer|member|profile)(?:\.model|\.schema|\.entity|Model|Schema)/i.test(f)
269
+ );
270
+
271
+ if (!hasUserModel) return findings;
272
+
273
+ // Check if there's a delete user endpoint or function
274
+ const hasDeleteEndpoint = files.some(f => {
275
+ const content = this.readFile(f);
276
+ if (!content) return false;
277
+ return /(?:delete.*user|remove.*account|erase.*data|destroy.*profile|gdpr.*delete|data.*deletion|right.*erasure|forget.*me)/i.test(content);
278
+ });
279
+
280
+ if (!hasDeleteEndpoint) {
281
+ findings.push(createFinding({
282
+ file: 'project',
283
+ line: 0,
284
+ severity: 'medium',
285
+ category: this.category,
286
+ rule: 'PII_NO_DATA_DELETION',
287
+ title: 'Privacy: No User Data Deletion Capability',
288
+ description: 'Project has user models but no visible data deletion endpoint or function. GDPR Article 17 requires the ability to erase personal data on request.',
289
+ matched: 'No delete/erase/destroy user endpoint found',
290
+ confidence: 'low',
291
+ cwe: 'CWE-359',
292
+ owasp: 'A01:2021',
293
+ fix: 'Implement a user data deletion endpoint (DELETE /api/users/:id or similar) that removes all PII from your systems.',
294
+ }));
295
+ }
296
+
297
+ return findings;
298
+ }
299
+ }
300
+
301
+ export default PIIComplianceAgent;
@@ -33,6 +33,7 @@ import {
33
33
  SECURITY_PATTERNS,
34
34
  SKIP_DIRS,
35
35
  SKIP_EXTENSIONS,
36
+ SKIP_FILENAMES,
36
37
  TEST_FILE_PATTERNS,
37
38
  MAX_FILE_SIZE
38
39
  } from '../utils/patterns.js';
@@ -270,6 +271,7 @@ async function scanProject(rootPath) {
270
271
  const files = allFiles.filter(file => {
271
272
  const ext = path.extname(file).toLowerCase();
272
273
  if (SKIP_EXTENSIONS.has(ext)) return false;
274
+ if (SKIP_FILENAMES.has(path.basename(file))) return false;
273
275
  const basename = path.basename(file);
274
276
  if (basename.endsWith('.min.js') || basename.endsWith('.min.css')) return false;
275
277
  if (TEST_FILE_PATTERNS.some(p => p.test(file))) return false;