ship-safe 4.3.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -23
- package/cli/__tests__/agents.test.js +579 -0
- package/cli/agents/agentic-security-agent.js +261 -0
- package/cli/agents/base-agent.js +11 -1
- package/cli/agents/deep-analyzer.js +333 -0
- package/cli/agents/index.js +16 -1
- package/cli/agents/injection-tester.js +45 -0
- package/cli/agents/mcp-security-agent.js +358 -0
- package/cli/agents/mobile-scanner.js +6 -0
- package/cli/agents/orchestrator.js +67 -8
- package/cli/agents/pii-compliance-agent.js +301 -0
- package/cli/agents/rag-security-agent.js +204 -0
- package/cli/agents/sbom-generator.js +100 -11
- package/cli/agents/scoring-engine.js +4 -0
- package/cli/agents/supabase-rls-agent.js +6 -0
- package/cli/agents/supply-chain-agent.js +152 -1
- package/cli/agents/verifier-agent.js +292 -0
- package/cli/bin/ship-safe.js +32 -6
- package/cli/commands/agent.js +2 -0
- package/cli/commands/audit.js +103 -7
- package/cli/commands/baseline.js +2 -1
- package/cli/commands/ci.js +262 -0
- package/cli/commands/fix.js +218 -216
- package/cli/commands/mcp.js +304 -303
- package/cli/commands/red-team.js +8 -2
- package/cli/commands/remediate.js +2 -0
- package/cli/commands/scan.js +567 -565
- package/cli/commands/score.js +2 -0
- package/cli/commands/watch.js +161 -160
- package/cli/index.js +1 -1
- package/cli/utils/patterns.js +1118 -1104
- package/cli/utils/secrets-verifier.js +247 -0
- package/package.json +2 -2
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PII Compliance Agent
|
|
3
|
+
* ======================
|
|
4
|
+
*
|
|
5
|
+
* Detects privacy violations and PII (Personally Identifiable
|
|
6
|
+
* Information) exposure in source code.
|
|
7
|
+
*
|
|
8
|
+
* Checks:
|
|
9
|
+
* - PII logged to console/files/external services
|
|
10
|
+
* - PII in URLs and query parameters
|
|
11
|
+
* - PII in error responses sent to clients
|
|
12
|
+
* - PII sent to third-party analytics/tracking
|
|
13
|
+
* - Hardcoded PII patterns (SSN, credit cards) in source
|
|
14
|
+
* - Unencrypted PII storage patterns
|
|
15
|
+
* - Missing data deletion endpoints (GDPR right to erasure)
|
|
16
|
+
*
|
|
17
|
+
* Maps to: GDPR Articles 5, 25, 32; CCPA; OWASP A01
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import path from 'path';
|
|
21
|
+
import { BaseAgent, createFinding } from './base-agent.js';
|
|
22
|
+
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// PII COMPLIANCE PATTERNS
|
|
25
|
+
// =============================================================================
|
|
26
|
+
|
|
27
|
+
const PATTERNS = [
|
|
28
|
+
// ── PII in Logging ───────────────────────────────────────────────────────
|
|
29
|
+
{
|
|
30
|
+
rule: 'PII_IN_CONSOLE_LOG',
|
|
31
|
+
title: 'Privacy: PII Logged to Console',
|
|
32
|
+
regex: /console\.(?:log|info|warn|error|debug)\s*\([\s\S]{0,100}(?:email|password|ssn|social.?security|credit.?card|phone.?number|date.?of.?birth|dob|passport|national.?id|driver.?license)/gi,
|
|
33
|
+
severity: 'high',
|
|
34
|
+
cwe: 'CWE-532',
|
|
35
|
+
owasp: 'A09:2021',
|
|
36
|
+
description: 'PII fields logged to console. Log output may be stored in log aggregation services, exposing sensitive user data.',
|
|
37
|
+
fix: 'Remove PII from log statements. Use structured logging with PII redaction: logger.info({ userId: user.id }) instead of logging full user objects.',
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
rule: 'PII_IN_LOGGER',
|
|
41
|
+
title: 'Privacy: PII in Structured Logger',
|
|
42
|
+
regex: /(?:logger|log|winston|pino|bunyan|morgan)\.(?:info|warn|error|debug|log)\s*\([\s\S]{0,200}(?:email|password|ssn|creditCard|credit_card|phoneNumber|phone_number|dateOfBirth|date_of_birth)/g,
|
|
43
|
+
severity: 'high',
|
|
44
|
+
cwe: 'CWE-532',
|
|
45
|
+
owasp: 'A09:2021',
|
|
46
|
+
description: 'PII fields passed to structured logger. These values persist in log storage and may violate data retention policies.',
|
|
47
|
+
fix: 'Mask or redact PII before logging: email → "u***@example.com", phone → "***-***-1234".',
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
rule: 'PII_FULL_OBJECT_LOG',
|
|
51
|
+
title: 'Privacy: Full User Object Logged',
|
|
52
|
+
regex: /console\.(?:log|info|warn|error)\s*\(\s*(?:user|customer|patient|member|account|profile|person)\s*\)/gi,
|
|
53
|
+
severity: 'medium',
|
|
54
|
+
cwe: 'CWE-532',
|
|
55
|
+
owasp: 'A09:2021',
|
|
56
|
+
confidence: 'medium',
|
|
57
|
+
description: 'Full user/customer object passed to console.log. Likely contains PII fields (email, name, phone, address).',
|
|
58
|
+
fix: 'Log only necessary identifiers: console.log({ userId: user.id, action: "login" }).',
|
|
59
|
+
},
|
|
60
|
+
|
|
61
|
+
// ── PII in Error Responses ───────────────────────────────────────────────
|
|
62
|
+
{
|
|
63
|
+
rule: 'PII_IN_ERROR_RESPONSE',
|
|
64
|
+
title: 'Privacy: PII in Error Response to Client',
|
|
65
|
+
regex: /(?:res\.(?:json|send|status)|response\.(?:json|send)|jsonify)\s*\(\s*(?:\{[\s\S]{0,200}(?:email|password|ssn|creditCard|phone|user|customer|patient)[\s\S]{0,100}\}|err|error)/g,
|
|
66
|
+
severity: 'high',
|
|
67
|
+
cwe: 'CWE-209',
|
|
68
|
+
owasp: 'A01:2021',
|
|
69
|
+
confidence: 'medium',
|
|
70
|
+
description: 'PII or user data included in error responses sent to clients. Exposes sensitive information to end users or attackers.',
|
|
71
|
+
fix: 'Return generic error messages to clients. Log detailed errors server-side only.',
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
rule: 'PII_STACK_TRACE_RESPONSE',
|
|
75
|
+
title: 'Privacy: Stack Trace With PII in Response',
|
|
76
|
+
regex: /(?:res\.(?:json|send)|response\.(?:json|send))\s*\(\s*\{[\s\S]{0,100}(?:stack|stackTrace|stack_trace)/g,
|
|
77
|
+
severity: 'high',
|
|
78
|
+
cwe: 'CWE-209',
|
|
79
|
+
owasp: 'A01:2021',
|
|
80
|
+
description: 'Stack traces sent in API responses may contain PII from variable values in the call chain.',
|
|
81
|
+
fix: 'Never send stack traces to clients in production. Use error IDs for correlation.',
|
|
82
|
+
},
|
|
83
|
+
|
|
84
|
+
// ── PII in URLs ──────────────────────────────────────────────────────────
|
|
85
|
+
{
|
|
86
|
+
rule: 'PII_IN_URL_PARAMS',
|
|
87
|
+
title: 'Privacy: PII in URL Query Parameters',
|
|
88
|
+
regex: /(?:url|href|link|redirect|location)\s*[:=][\s\S]{0,100}(?:\?|&)(?:email|phone|ssn|name|address|dob|password)=/gi,
|
|
89
|
+
severity: 'high',
|
|
90
|
+
cwe: 'CWE-598',
|
|
91
|
+
owasp: 'A01:2021',
|
|
92
|
+
description: 'PII passed in URL query parameters. URLs are logged in server access logs, browser history, and CDN logs.',
|
|
93
|
+
fix: 'Send PII in request body (POST) instead of URL parameters (GET). Use encrypted tokens for cross-page references.',
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
rule: 'PII_IN_GET_REQUEST',
|
|
97
|
+
title: 'Privacy: PII Sent via GET Request',
|
|
98
|
+
regex: /(?:fetch|axios\.get|http\.get|requests\.get|got\.get)\s*\(\s*(?:`[^`]*\$\{[^}]*(?:email|phone|ssn|password|name|address)[^}]*\}`|.*\+\s*(?:email|phone|ssn|password))/g,
|
|
99
|
+
severity: 'high',
|
|
100
|
+
cwe: 'CWE-598',
|
|
101
|
+
owasp: 'A01:2021',
|
|
102
|
+
description: 'PII interpolated into GET request URLs. GET parameters are visible in logs, referrer headers, and browser history.',
|
|
103
|
+
fix: 'Use POST requests for sending PII. Never include sensitive data in URL parameters.',
|
|
104
|
+
},
|
|
105
|
+
|
|
106
|
+
// ── PII to Third Parties ─────────────────────────────────────────────────
|
|
107
|
+
{
|
|
108
|
+
rule: 'PII_TO_ANALYTICS',
|
|
109
|
+
title: 'Privacy: PII Sent to Analytics Service',
|
|
110
|
+
regex: /(?:analytics|segment|mixpanel|amplitude|posthog|gtag|dataLayer|fbq|intercom|hotjar)[\s\S]{0,50}(?:\.track|\.identify|\.page|\.push|\.event)\s*\([\s\S]{0,200}(?:email|phone|name|address|ssn|dob|userId|user_id)/gi,
|
|
111
|
+
severity: 'high',
|
|
112
|
+
cwe: 'CWE-359',
|
|
113
|
+
owasp: 'A01:2021',
|
|
114
|
+
description: 'PII sent to third-party analytics service without visible consent check. May violate GDPR Article 6 (lawful basis) and CCPA.',
|
|
115
|
+
fix: 'Hash or anonymize PII before sending to analytics. Implement consent management (check opt-in before tracking).',
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
rule: 'PII_TO_ERROR_TRACKING',
|
|
119
|
+
title: 'Privacy: PII Sent to Error Tracking Service',
|
|
120
|
+
regex: /(?:Sentry|Bugsnag|Rollbar|TrackJS|LogRocket|DataDog|NewRelic)[\s\S]{0,100}(?:setUser|setContext|setExtra|captureException|notify|addBreadcrumb)[\s\S]{0,200}(?:email|phone|name|address|ssn|password)/gi,
|
|
121
|
+
severity: 'high',
|
|
122
|
+
cwe: 'CWE-359',
|
|
123
|
+
owasp: 'A01:2021',
|
|
124
|
+
description: 'PII attached to error tracking events. Error tracking services store data externally, potentially in different jurisdictions.',
|
|
125
|
+
fix: 'Configure PII scrubbing in your error tracking service. Only send user IDs, not PII fields.',
|
|
126
|
+
},
|
|
127
|
+
|
|
128
|
+
// ── Hardcoded PII Patterns ───────────────────────────────────────────────
|
|
129
|
+
{
|
|
130
|
+
rule: 'PII_SSN_IN_CODE',
|
|
131
|
+
title: 'Privacy: Social Security Number in Source Code',
|
|
132
|
+
regex: /\b\d{3}-\d{2}-\d{4}\b/g,
|
|
133
|
+
severity: 'critical',
|
|
134
|
+
cwe: 'CWE-312',
|
|
135
|
+
owasp: 'A02:2021',
|
|
136
|
+
confidence: 'medium',
|
|
137
|
+
description: 'Pattern matching a US Social Security Number found in source code. May be test data or a real SSN.',
|
|
138
|
+
fix: 'Remove SSNs from source code. Use environment variables or a secrets manager for test data.',
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
rule: 'PII_CREDIT_CARD_IN_CODE',
|
|
142
|
+
title: 'Privacy: Credit Card Number in Source Code',
|
|
143
|
+
regex: /\b(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2}|6(?:011|5\d{2}))[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b/g,
|
|
144
|
+
severity: 'critical',
|
|
145
|
+
cwe: 'CWE-312',
|
|
146
|
+
owasp: 'A02:2021',
|
|
147
|
+
confidence: 'medium',
|
|
148
|
+
description: 'Pattern matching a credit card number found in source code. Violates PCI DSS requirements.',
|
|
149
|
+
fix: 'Remove credit card numbers from source code immediately. Never store raw card numbers — use tokenization.',
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
rule: 'PII_EMAIL_HARDCODED',
|
|
153
|
+
title: 'Privacy: Real Email Address Hardcoded',
|
|
154
|
+
regex: /['"][a-zA-Z0-9._%+-]+@(?!example\.com|test\.com|placeholder|fake|dummy)[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}['"]/g,
|
|
155
|
+
severity: 'medium',
|
|
156
|
+
cwe: 'CWE-312',
|
|
157
|
+
owasp: 'A02:2021',
|
|
158
|
+
confidence: 'low',
|
|
159
|
+
description: 'Real email address hardcoded in source code (not @example.com). May be PII or a credential.',
|
|
160
|
+
fix: 'Use @example.com for test emails. Store real emails in environment variables or config.',
|
|
161
|
+
},
|
|
162
|
+
|
|
163
|
+
// ── Unencrypted PII Storage ──────────────────────────────────────────────
|
|
164
|
+
{
|
|
165
|
+
rule: 'PII_PLAINTEXT_PASSWORD_STORE',
|
|
166
|
+
title: 'Privacy: Password Stored in Plaintext',
|
|
167
|
+
regex: /(?:password|passwd|pwd)\s*[:=]\s*(?:req\.|request\.|body\.|input\.|data\.)(?:password|passwd)/gi,
|
|
168
|
+
severity: 'critical',
|
|
169
|
+
cwe: 'CWE-256',
|
|
170
|
+
owasp: 'A02:2021',
|
|
171
|
+
confidence: 'medium',
|
|
172
|
+
description: 'Password appears to be stored directly from user input without hashing. Passwords must be hashed before storage.',
|
|
173
|
+
fix: 'Hash passwords with bcrypt, scrypt, or argon2 before storing: await bcrypt.hash(password, 12)',
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
rule: 'PII_NO_ENCRYPTION_AT_REST',
|
|
177
|
+
title: 'Privacy: PII Column Without Encryption',
|
|
178
|
+
regex: /(?:CREATE\s+TABLE|addColumn|column|field|attribute)[\s\S]{0,200}(?:ssn|social_security|credit_card|card_number|bank_account|passport|national_id|driver_license)[\s\S]{0,100}(?:VARCHAR|TEXT|STRING|varchar|text|string)(?![\s\S]{0,100}encrypt)/gi,
|
|
179
|
+
severity: 'high',
|
|
180
|
+
cwe: 'CWE-311',
|
|
181
|
+
owasp: 'A02:2021',
|
|
182
|
+
confidence: 'medium',
|
|
183
|
+
description: 'Database column storing sensitive PII (SSN, credit card, passport) without encryption-at-rest annotation.',
|
|
184
|
+
fix: 'Encrypt sensitive PII columns at the application level or use database-level encryption.',
|
|
185
|
+
},
|
|
186
|
+
|
|
187
|
+
// ── IP Address & Geolocation Logging ─────────────────────────────────────
|
|
188
|
+
{
|
|
189
|
+
rule: 'PII_IP_LOGGING',
|
|
190
|
+
title: 'Privacy: IP Address Logged Without Anonymization',
|
|
191
|
+
regex: /(?:console\.log|logger\.\w+|log\.\w+)\s*\([\s\S]{0,100}(?:(?<![a-z])ip(?![a-z])|ipAddress|ip_address|remoteAddress|x-forwarded-for|client.?ip)/gi,
|
|
192
|
+
severity: 'medium',
|
|
193
|
+
cwe: 'CWE-532',
|
|
194
|
+
owasp: 'A09:2021',
|
|
195
|
+
confidence: 'medium',
|
|
196
|
+
description: 'IP addresses logged without anonymization. Under GDPR, IP addresses are personal data.',
|
|
197
|
+
fix: 'Anonymize IP addresses in logs: mask the last octet (192.168.1.xxx) or hash before logging.',
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
rule: 'PII_GEOLOCATION_STORAGE',
|
|
201
|
+
title: 'Privacy: Precise Geolocation Stored',
|
|
202
|
+
regex: /(?:latitude|longitude|lat|lng|geolocation|geoip|geo_location)[\s\S]{0,100}(?:save|store|insert|create|update|write|database|db|mongo|prisma|sequelize)/gi,
|
|
203
|
+
severity: 'medium',
|
|
204
|
+
cwe: 'CWE-359',
|
|
205
|
+
owasp: 'A01:2021',
|
|
206
|
+
confidence: 'low',
|
|
207
|
+
description: 'Precise geolocation data stored in database. May require explicit consent under GDPR/CCPA.',
|
|
208
|
+
fix: 'Reduce precision of stored geolocation (city-level, not street-level). Require explicit consent for precise location.',
|
|
209
|
+
},
|
|
210
|
+
|
|
211
|
+
// ── Cookie & Tracking Without Consent ────────────────────────────────────
|
|
212
|
+
{
|
|
213
|
+
rule: 'PII_TRACKING_NO_CONSENT',
|
|
214
|
+
title: 'Privacy: Tracking Script Without Consent Check',
|
|
215
|
+
regex: /(?:gtag|GoogleAnalytics|ga\s*\(|fbq\s*\(|_paq\.push|hotjar|hj\s*\(|intercom|drift|crisp)[\s\S]{0,100}(?:init|config|identify|track|page)(?![\s\S]{0,300}(?:consent|gdpr|cookie.?banner|opt.?in|permission|accept))/g,
|
|
216
|
+
severity: 'medium',
|
|
217
|
+
cwe: 'CWE-359',
|
|
218
|
+
owasp: 'A01:2021',
|
|
219
|
+
confidence: 'low',
|
|
220
|
+
description: 'Analytics or tracking script initialized without visible consent check. May violate GDPR ePrivacy Directive.',
|
|
221
|
+
fix: 'Load tracking scripts only after user consents. Use a consent management platform (CMP).',
|
|
222
|
+
},
|
|
223
|
+
];
|
|
224
|
+
|
|
225
|
+
// =============================================================================
|
|
226
|
+
// PII COMPLIANCE AGENT
|
|
227
|
+
// =============================================================================
|
|
228
|
+
|
|
229
|
+
export class PIIComplianceAgent extends BaseAgent {
|
|
230
|
+
constructor() {
|
|
231
|
+
super(
|
|
232
|
+
'PIIComplianceAgent',
|
|
233
|
+
'Detect PII exposure, privacy violations, and GDPR/CCPA compliance gaps',
|
|
234
|
+
'config'
|
|
235
|
+
);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async analyze(context) {
|
|
239
|
+
const { files } = context;
|
|
240
|
+
|
|
241
|
+
const codeFiles = files.filter(f => {
|
|
242
|
+
const ext = path.extname(f).toLowerCase();
|
|
243
|
+
return ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.py', '.rb', '.php', '.go', '.java', '.sql'].includes(ext);
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
let findings = [];
|
|
247
|
+
|
|
248
|
+
// ── 1. Scan code files with PII patterns ─────────────────────────────
|
|
249
|
+
for (const file of codeFiles) {
|
|
250
|
+
findings = findings.concat(this.scanFileWithPatterns(file, PATTERNS));
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ── 2. Check for missing data deletion endpoint (GDPR right to erasure)
|
|
254
|
+
findings = findings.concat(this._checkDataDeletion(context));
|
|
255
|
+
|
|
256
|
+
return findings;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Check if the project has user data deletion capability (GDPR Article 17).
|
|
261
|
+
*/
|
|
262
|
+
_checkDataDeletion(context) {
|
|
263
|
+
const { files } = context;
|
|
264
|
+
const findings = [];
|
|
265
|
+
|
|
266
|
+
// Check if there are user models/routes
|
|
267
|
+
const hasUserModel = files.some(f =>
|
|
268
|
+
/(?:user|account|customer|member|profile)(?:\.model|\.schema|\.entity|Model|Schema)/i.test(f)
|
|
269
|
+
);
|
|
270
|
+
|
|
271
|
+
if (!hasUserModel) return findings;
|
|
272
|
+
|
|
273
|
+
// Check if there's a delete user endpoint or function
|
|
274
|
+
const hasDeleteEndpoint = files.some(f => {
|
|
275
|
+
const content = this.readFile(f);
|
|
276
|
+
if (!content) return false;
|
|
277
|
+
return /(?:delete.*user|remove.*account|erase.*data|destroy.*profile|gdpr.*delete|data.*deletion|right.*erasure|forget.*me)/i.test(content);
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
if (!hasDeleteEndpoint) {
|
|
281
|
+
findings.push(createFinding({
|
|
282
|
+
file: 'project',
|
|
283
|
+
line: 0,
|
|
284
|
+
severity: 'medium',
|
|
285
|
+
category: this.category,
|
|
286
|
+
rule: 'PII_NO_DATA_DELETION',
|
|
287
|
+
title: 'Privacy: No User Data Deletion Capability',
|
|
288
|
+
description: 'Project has user models but no visible data deletion endpoint or function. GDPR Article 17 requires the ability to erase personal data on request.',
|
|
289
|
+
matched: 'No delete/erase/destroy user endpoint found',
|
|
290
|
+
confidence: 'low',
|
|
291
|
+
cwe: 'CWE-359',
|
|
292
|
+
owasp: 'A01:2021',
|
|
293
|
+
fix: 'Implement a user data deletion endpoint (DELETE /api/users/:id or similar) that removes all PII from your systems.',
|
|
294
|
+
}));
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return findings;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export default PIIComplianceAgent;
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Security Agent
|
|
3
|
+
* ====================
|
|
4
|
+
*
|
|
5
|
+
* Detects security vulnerabilities in Retrieval-Augmented Generation
|
|
6
|
+
* (RAG) implementations. RAG poisoning attacks are a proven threat
|
|
7
|
+
* vector — attackers corrupt knowledge bases to manipulate LLM outputs.
|
|
8
|
+
*
|
|
9
|
+
* Checks: unvalidated document ingestion, missing chunk isolation,
|
|
10
|
+
* embedding exposure, metadata leakage, no retrieval filtering,
|
|
11
|
+
* excessive context stuffing.
|
|
12
|
+
*
|
|
13
|
+
* Maps to: OWASP LLM08 (Vector & Embedding Weaknesses),
|
|
14
|
+
* OWASP Agentic AI ASI04 (Memory Poisoning)
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import path from 'path';
|
|
18
|
+
import { BaseAgent } from './base-agent.js';
|
|
19
|
+
|
|
20
|
+
// =============================================================================
|
|
21
|
+
// RAG SECURITY PATTERNS
|
|
22
|
+
// =============================================================================
|
|
23
|
+
|
|
24
|
+
const PATTERNS = [
|
|
25
|
+
// ── Document Ingestion ───────────────────────────────────────────────────
|
|
26
|
+
{
|
|
27
|
+
rule: 'RAG_UNSANITIZED_INGESTION',
|
|
28
|
+
title: 'RAG: Documents Ingested Without Sanitization',
|
|
29
|
+
regex: /(?:addDocuments|add_documents|upsert|insert|index\.add|from_documents)\s*\(\s*(?:documents|docs|chunks|texts|pages|uploads|files|userDocs|userFiles)/g,
|
|
30
|
+
severity: 'high',
|
|
31
|
+
cwe: 'CWE-20',
|
|
32
|
+
owasp: 'A03:2021',
|
|
33
|
+
description: 'Documents added to vector store without sanitization. Attackers can embed malicious instructions in documents that get retrieved and injected into LLM context.',
|
|
34
|
+
fix: 'Sanitize document content before ingestion: strip HTML/script tags, detect prompt injection patterns, validate content type.',
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
rule: 'RAG_USER_UPLOAD_TO_VECTORDB',
|
|
38
|
+
title: 'RAG: User Upload Directly to Vector Store',
|
|
39
|
+
regex: /(?:upload|multer|formidable|busboy|req\.file|req\.files)[\s\S]{0,500}(?:addDocuments|add_documents|upsert|vectorStore|index\.add|embed|from_documents)/g,
|
|
40
|
+
severity: 'critical',
|
|
41
|
+
cwe: 'CWE-434',
|
|
42
|
+
owasp: 'A03:2021',
|
|
43
|
+
description: 'User file uploads are fed directly into a vector database without review. This enables RAG poisoning — attackers upload documents containing hidden instructions.',
|
|
44
|
+
fix: 'Add a review/approval pipeline between user uploads and vector store ingestion. Scan uploads for prompt injection patterns.',
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
rule: 'RAG_NO_CONTENT_FILTER',
|
|
48
|
+
title: 'RAG: No Content Filtering on Ingestion',
|
|
49
|
+
regex: /(?:TextLoader|PDFLoader|CSVLoader|DirectoryLoader|WebBaseLoader|UnstructuredLoader|load_and_split|loadDocuments)\s*\((?![\s\S]{0,300}(?:filter|sanitize|clean|validate|strip|remove|moderate))/g,
|
|
50
|
+
severity: 'medium',
|
|
51
|
+
cwe: 'CWE-20',
|
|
52
|
+
owasp: 'A03:2021',
|
|
53
|
+
confidence: 'medium',
|
|
54
|
+
description: 'Document loaders used without content filtering. Loaded content goes directly to embedding/vector store.',
|
|
55
|
+
fix: 'Add content filtering after loading documents: remove scripts, detect injection patterns, validate format.',
|
|
56
|
+
},
|
|
57
|
+
|
|
58
|
+
// ── Chunk Isolation ──────────────────────────────────────────────────────
|
|
59
|
+
{
|
|
60
|
+
rule: 'RAG_NO_TENANT_ISOLATION',
|
|
61
|
+
title: 'RAG: Vector Store Without Tenant Isolation',
|
|
62
|
+
regex: /(?:vectorStore|pinecone|chroma|weaviate|qdrant|milvus|pgvector)[\s\S]{0,300}(?:add|upsert|insert|index)(?![\s\S]{0,200}(?:namespace|tenant|partition|collection|filter|user_id|org_id|metadata.*(?:user|tenant|org)))/g,
|
|
63
|
+
severity: 'high',
|
|
64
|
+
cwe: 'CWE-284',
|
|
65
|
+
owasp: 'A01:2021',
|
|
66
|
+
confidence: 'medium',
|
|
67
|
+
description: 'Documents stored in vector database without tenant/user isolation. Users can retrieve other users\' private documents.',
|
|
68
|
+
fix: 'Use namespaces, collections, or metadata filters to isolate documents by tenant/user.',
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
rule: 'RAG_SYSTEM_DOCS_WITH_USER_DOCS',
|
|
72
|
+
title: 'RAG: System Documents Mixed With User Documents',
|
|
73
|
+
regex: /(?:addDocuments|add_documents|upsert)[\s\S]{0,200}(?:system|internal|private|admin)[\s\S]{0,200}(?:user|public|external|upload)/g,
|
|
74
|
+
severity: 'medium',
|
|
75
|
+
cwe: 'CWE-668',
|
|
76
|
+
owasp: 'A01:2021',
|
|
77
|
+
confidence: 'low',
|
|
78
|
+
description: 'System/internal documents mixed with user documents in the same collection. User queries could retrieve sensitive internal docs.',
|
|
79
|
+
fix: 'Separate system documents from user documents in different collections or namespaces.',
|
|
80
|
+
},
|
|
81
|
+
|
|
82
|
+
// ── Retrieval & Query Safety ─────────────────────────────────────────────
|
|
83
|
+
{
|
|
84
|
+
rule: 'RAG_NO_RETRIEVAL_FILTER',
|
|
85
|
+
title: 'RAG: Retrieved Chunks Used Without Filtering',
|
|
86
|
+
regex: /(?:similarity_search|similaritySearch|query|retrieve|get_relevant|asRetriever)[\s\S]{0,300}(?:page_content|pageContent|text|content|document)[\s\S]{0,200}(?:prompt|messages|content|system|user)/g,
|
|
87
|
+
severity: 'high',
|
|
88
|
+
cwe: 'CWE-74',
|
|
89
|
+
owasp: 'A03:2021',
|
|
90
|
+
confidence: 'medium',
|
|
91
|
+
description: 'Retrieved document chunks injected into LLM prompt without filtering. Poisoned documents can override system instructions.',
|
|
92
|
+
fix: 'Filter retrieved chunks: remove instruction-like content, apply relevance score thresholds, limit number of chunks.',
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
rule: 'RAG_NO_RELEVANCE_THRESHOLD',
|
|
96
|
+
title: 'RAG: No Relevance Score Threshold on Retrieval',
|
|
97
|
+
regex: /(?:similarity_search|similaritySearch|asRetriever)\s*\(\s*(?:query|question|input)(?![\s\S]{0,200}(?:score_threshold|scoreThreshold|threshold|minScore|min_score|filter))/g,
|
|
98
|
+
severity: 'medium',
|
|
99
|
+
cwe: 'CWE-20',
|
|
100
|
+
owasp: 'A03:2021',
|
|
101
|
+
confidence: 'low',
|
|
102
|
+
description: 'Vector similarity search without relevance threshold. Low-relevance chunks may contain poisoned content designed to be retrieved for many queries.',
|
|
103
|
+
fix: 'Set a minimum relevance score threshold to filter out low-quality matches.',
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
rule: 'RAG_EXCESSIVE_CONTEXT',
|
|
107
|
+
title: 'RAG: Excessive Retrieved Context',
|
|
108
|
+
regex: /(?:k\s*[:=]\s*(?:[5-9]\d|\d{3,})|top_k\s*[:=]\s*(?:[5-9]\d|\d{3,})|search_kwargs.*k.*(?:[5-9]\d|\d{3,}))/g,
|
|
109
|
+
severity: 'medium',
|
|
110
|
+
cwe: 'CWE-400',
|
|
111
|
+
owasp: 'A04:2021',
|
|
112
|
+
confidence: 'low',
|
|
113
|
+
description: 'Retrieving a very large number of chunks (50+). Increases the attack surface for prompt injection via poisoned documents.',
|
|
114
|
+
fix: 'Limit retrieved chunks to the minimum needed (typically 3-10). More chunks = more injection surface.',
|
|
115
|
+
},
|
|
116
|
+
|
|
117
|
+
// ── Embedding & Data Exposure ────────────────────────────────────────────
|
|
118
|
+
{
|
|
119
|
+
rule: 'RAG_EMBEDDING_API_EXPOSED',
|
|
120
|
+
title: 'RAG: Embedding Endpoint Exposed Without Auth',
|
|
121
|
+
regex: /(?:app\.|router\.|api\.)(?:get|post)\s*\(\s*['"].*(?:embed|embedding|vectorize|encode)['"](?![\s\S]{0,300}(?:auth|middleware|protect|guard|jwt|bearer|session))/g,
|
|
122
|
+
severity: 'high',
|
|
123
|
+
cwe: 'CWE-306',
|
|
124
|
+
owasp: 'A07:2021',
|
|
125
|
+
confidence: 'medium',
|
|
126
|
+
description: 'Embedding API endpoint exposed without authentication. Attackers can enumerate embeddings or generate vectors for injection attacks.',
|
|
127
|
+
fix: 'Add authentication middleware to embedding endpoints.',
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
rule: 'RAG_METADATA_IN_RESPONSE',
|
|
131
|
+
title: 'RAG: Internal Metadata Exposed in Response',
|
|
132
|
+
regex: /(?:metadata|source|file_path|filePath|url|author|timestamp)[\s\S]{0,100}(?:res\.json|res\.send|response\.json|return\s*\{|jsonify)/g,
|
|
133
|
+
severity: 'medium',
|
|
134
|
+
cwe: 'CWE-200',
|
|
135
|
+
owasp: 'A01:2021',
|
|
136
|
+
confidence: 'low',
|
|
137
|
+
description: 'Internal document metadata (file paths, sources, authors) returned in API responses. Leaks information about the knowledge base structure.',
|
|
138
|
+
fix: 'Strip internal metadata before returning responses. Only expose necessary fields to the client.',
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
rule: 'RAG_EMBEDDING_IN_RESPONSE',
|
|
142
|
+
title: 'RAG: Raw Embeddings Returned to Client',
|
|
143
|
+
regex: /(?:embedding|vector|dense_vector)[\s\S]{0,100}(?:res\.json|res\.send|response|return|output)/g,
|
|
144
|
+
severity: 'medium',
|
|
145
|
+
cwe: 'CWE-200',
|
|
146
|
+
owasp: 'A01:2021',
|
|
147
|
+
confidence: 'low',
|
|
148
|
+
description: 'Raw embedding vectors returned in API responses. Research shows embeddings can be inverted to recover original text, leaking private data.',
|
|
149
|
+
fix: 'Never return raw embedding vectors in API responses. Embeddings should remain server-side only.',
|
|
150
|
+
},
|
|
151
|
+
|
|
152
|
+
// ── Model & Pipeline Safety ──────────────────────────────────────────────
|
|
153
|
+
{
|
|
154
|
+
rule: 'RAG_PICKLE_EMBEDDING_MODEL',
|
|
155
|
+
title: 'RAG: Embedding Model Loaded via Pickle',
|
|
156
|
+
regex: /(?:torch\.load|pickle\.load|joblib\.load)\s*\(\s*(?!.*safetensors)(?!.*weights_only\s*=\s*True)/g,
|
|
157
|
+
severity: 'critical',
|
|
158
|
+
cwe: 'CWE-502',
|
|
159
|
+
owasp: 'A08:2021',
|
|
160
|
+
description: 'ML model loaded from pickle format. Pickle files can contain arbitrary code that executes on load.',
|
|
161
|
+
fix: 'Use safetensors format for model loading. If using torch.load(), set weights_only=True.',
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
rule: 'RAG_TRUST_REMOTE_CODE',
|
|
165
|
+
title: 'RAG: Model Loaded With trust_remote_code=True',
|
|
166
|
+
regex: /trust_remote_code\s*=\s*True/g,
|
|
167
|
+
severity: 'high',
|
|
168
|
+
cwe: 'CWE-94',
|
|
169
|
+
owasp: 'A08:2021',
|
|
170
|
+
description: 'Embedding or LLM model loaded with trust_remote_code=True. Allows execution of arbitrary code from the model repository.',
|
|
171
|
+
fix: 'Set trust_remote_code=False. Audit model code before enabling remote code execution.',
|
|
172
|
+
},
|
|
173
|
+
];
|
|
174
|
+
|
|
175
|
+
// =============================================================================
|
|
176
|
+
// RAG SECURITY AGENT
|
|
177
|
+
// =============================================================================
|
|
178
|
+
|
|
179
|
+
export class RAGSecurityAgent extends BaseAgent {
|
|
180
|
+
constructor() {
|
|
181
|
+
super(
|
|
182
|
+
'RAGSecurityAgent',
|
|
183
|
+
'Detect RAG security vulnerabilities — poisoning, embedding exposure, tenant isolation gaps',
|
|
184
|
+
'llm'
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async analyze(context) {
|
|
189
|
+
const { files } = context;
|
|
190
|
+
|
|
191
|
+
const codeFiles = files.filter(f => {
|
|
192
|
+
const ext = path.extname(f).toLowerCase();
|
|
193
|
+
return ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.py', '.rb', '.go', '.java'].includes(ext);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
let findings = [];
|
|
197
|
+
for (const file of codeFiles) {
|
|
198
|
+
findings = findings.concat(this.scanFileWithPatterns(file, PATTERNS));
|
|
199
|
+
}
|
|
200
|
+
return findings;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
export default RAGSecurityAgent;
|
|
@@ -108,7 +108,11 @@ export class SBOMGenerator {
|
|
|
108
108
|
} catch { /* skip */ }
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
-
// ──
|
|
111
|
+
// ── Detect licenses from lock files ─────────────────────────────────────
|
|
112
|
+
const licenses = this._detectLicenses(rootPath);
|
|
113
|
+
|
|
114
|
+
// ── Build CycloneDX BOM (CRA-enhanced) ──────────────────────────────────
|
|
115
|
+
const projectMeta = this.getProjectMetadata(rootPath);
|
|
112
116
|
const bom = {
|
|
113
117
|
bomFormat: 'CycloneDX',
|
|
114
118
|
specVersion: '1.5',
|
|
@@ -119,23 +123,57 @@ export class SBOMGenerator {
|
|
|
119
123
|
tools: [{
|
|
120
124
|
vendor: 'ship-safe',
|
|
121
125
|
name: 'ship-safe',
|
|
122
|
-
version: '
|
|
126
|
+
version: '5.0.0',
|
|
123
127
|
}],
|
|
124
|
-
component:
|
|
128
|
+
component: projectMeta,
|
|
129
|
+
// EU CRA: supplier identification
|
|
130
|
+
supplier: this._getSupplier(rootPath),
|
|
131
|
+
// EU CRA: lifecycle phase
|
|
132
|
+
lifecycles: [{ phase: 'build' }],
|
|
125
133
|
},
|
|
126
|
-
components: components.map((c, i) =>
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
+
components: components.map((c, i) => {
|
|
135
|
+
const comp = {
|
|
136
|
+
'bom-ref': `component-${i}`,
|
|
137
|
+
type: c.type,
|
|
138
|
+
name: c.name,
|
|
139
|
+
version: c.version,
|
|
140
|
+
purl: c.purl,
|
|
141
|
+
scope: c.scope,
|
|
142
|
+
};
|
|
143
|
+
// EU CRA: attach license if known
|
|
144
|
+
const lic = licenses[c.name];
|
|
145
|
+
if (lic) {
|
|
146
|
+
comp.licenses = [{ license: { id: lic } }];
|
|
147
|
+
}
|
|
148
|
+
return comp;
|
|
149
|
+
}),
|
|
150
|
+
// EU CRA: vulnerability disclosure info
|
|
151
|
+
vulnerabilities: [],
|
|
134
152
|
};
|
|
135
153
|
|
|
136
154
|
return bom;
|
|
137
155
|
}
|
|
138
156
|
|
|
157
|
+
/**
|
|
158
|
+
* Attach known vulnerabilities to the SBOM (CRA requirement).
|
|
159
|
+
*/
|
|
160
|
+
attachVulnerabilities(bom, depVulns = []) {
|
|
161
|
+
bom.vulnerabilities = depVulns.map((v, i) => ({
|
|
162
|
+
'bom-ref': `vuln-${i}`,
|
|
163
|
+
id: v.id || v.package || `VULN-${i}`,
|
|
164
|
+
source: { name: 'ship-safe' },
|
|
165
|
+
ratings: [{
|
|
166
|
+
severity: v.severity || 'unknown',
|
|
167
|
+
method: 'other',
|
|
168
|
+
}],
|
|
169
|
+
description: v.description || '',
|
|
170
|
+
affects: [{
|
|
171
|
+
ref: v.package || 'unknown',
|
|
172
|
+
}],
|
|
173
|
+
}));
|
|
174
|
+
return bom;
|
|
175
|
+
}
|
|
176
|
+
|
|
139
177
|
/**
|
|
140
178
|
* Generate SBOM and write to file.
|
|
141
179
|
*/
|
|
@@ -165,6 +203,57 @@ export class SBOMGenerator {
|
|
|
165
203
|
};
|
|
166
204
|
}
|
|
167
205
|
|
|
206
|
+
/**
|
|
207
|
+
* EU CRA: Extract supplier info from package.json.
|
|
208
|
+
*/
|
|
209
|
+
_getSupplier(rootPath) {
|
|
210
|
+
const pkgPath = path.join(rootPath, 'package.json');
|
|
211
|
+
try {
|
|
212
|
+
if (fs.existsSync(pkgPath)) {
|
|
213
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
214
|
+
const author = typeof pkg.author === 'string' ? pkg.author
|
|
215
|
+
: pkg.author?.name || pkg.author?.email || null;
|
|
216
|
+
if (author) {
|
|
217
|
+
return { name: author, url: [pkg.homepage || pkg.repository?.url || ''].filter(Boolean) };
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
} catch { /* skip */ }
|
|
221
|
+
return { name: 'Unknown' };
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Detect licenses from node_modules (best-effort).
|
|
226
|
+
* Returns { packageName: 'MIT' | 'ISC' | ... }
|
|
227
|
+
*/
|
|
228
|
+
_detectLicenses(rootPath) {
|
|
229
|
+
const licenses = {};
|
|
230
|
+
const nodeModules = path.join(rootPath, 'node_modules');
|
|
231
|
+
const pkgPath = path.join(rootPath, 'package.json');
|
|
232
|
+
|
|
233
|
+
if (!fs.existsSync(pkgPath)) return licenses;
|
|
234
|
+
|
|
235
|
+
try {
|
|
236
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
237
|
+
const allDeps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
238
|
+
|
|
239
|
+
for (const name of Object.keys(allDeps)) {
|
|
240
|
+
const depPkgPath = path.join(nodeModules, name, 'package.json');
|
|
241
|
+
try {
|
|
242
|
+
if (fs.existsSync(depPkgPath)) {
|
|
243
|
+
const depPkg = JSON.parse(fs.readFileSync(depPkgPath, 'utf-8'));
|
|
244
|
+
if (depPkg.license) {
|
|
245
|
+
licenses[name] = typeof depPkg.license === 'string'
|
|
246
|
+
? depPkg.license
|
|
247
|
+
: depPkg.license.type || 'UNKNOWN';
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
} catch { /* skip */ }
|
|
251
|
+
}
|
|
252
|
+
} catch { /* skip */ }
|
|
253
|
+
|
|
254
|
+
return licenses;
|
|
255
|
+
}
|
|
256
|
+
|
|
168
257
|
uuid() {
|
|
169
258
|
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => {
|
|
170
259
|
const r = Math.random() * 16 | 0;
|
|
@@ -66,6 +66,12 @@ export class SupabaseRLSAgent extends BaseAgent {
|
|
|
66
66
|
super('SupabaseRLSAgent', 'Supabase Row Level Security audit', 'auth');
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
+
shouldRun(recon) {
|
|
70
|
+
return recon?.databases?.includes('supabase') ||
|
|
71
|
+
recon?.authPatterns?.includes('supabase-auth') ||
|
|
72
|
+
false;
|
|
73
|
+
}
|
|
74
|
+
|
|
69
75
|
async analyze(context) {
|
|
70
76
|
const { rootPath, files } = context;
|
|
71
77
|
let findings = [];
|