@rigour-labs/core 4.1.1 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/gates/hallucinated-imports-lang.d.ts +11 -0
- package/dist/gates/hallucinated-imports-lang.js +65 -12
- package/dist/gates/hallucinated-imports.d.ts +10 -0
- package/dist/gates/hallucinated-imports.js +203 -18
- package/dist/gates/phantom-apis.d.ts +18 -1
- package/dist/gates/phantom-apis.js +68 -8
- package/dist/gates/promise-safety.js +61 -1
- package/dist/gates/security-patterns.d.ts +5 -0
- package/dist/gates/security-patterns.js +51 -1
- package/dist/gates/test-quality.js +20 -0
- package/dist/hooks/checker.js +89 -0
- package/dist/hooks/dlp-templates.d.ts +26 -0
- package/dist/hooks/dlp-templates.js +281 -0
- package/dist/hooks/index.d.ts +5 -0
- package/dist/hooks/index.js +4 -0
- package/dist/hooks/input-validator.d.ts +70 -0
- package/dist/hooks/input-validator.js +461 -0
- package/dist/hooks/input-validator.test.d.ts +1 -0
- package/dist/hooks/input-validator.test.js +272 -0
- package/dist/hooks/standalone-dlp-checker.d.ts +18 -0
- package/dist/hooks/standalone-dlp-checker.js +91 -0
- package/dist/inference/model-manager.js +10 -1
- package/dist/templates/universal-config.js +33 -0
- package/dist/types/index.d.ts +230 -12
- package/dist/types/index.js +59 -0
- package/package.json +6 -6
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Input Validation Gate — AI Agent DLP (Data Loss Prevention)
|
|
3
|
+
*
|
|
4
|
+
* Scans user input text for credentials, secrets, and sensitive data
|
|
5
|
+
* BEFORE it reaches any AI agent. This is the pre-input counterpart
|
|
6
|
+
* to the existing post-output security patterns in checker.ts.
|
|
7
|
+
*
|
|
8
|
+
* Supported credential types:
|
|
9
|
+
* - AWS Access Keys (AKIA...)
|
|
10
|
+
* - OpenAI / Anthropic / GitHub API keys (sk-*, ghp_*, etc.)
|
|
11
|
+
* - Private keys (PEM format)
|
|
12
|
+
* - Database connection strings (postgresql://, mongodb://, mysql://, redis://)
|
|
13
|
+
* - Bearer tokens and JWTs
|
|
14
|
+
* - Generic password/secret assignments
|
|
15
|
+
* - .env format strings (KEY=value)
|
|
16
|
+
* - Azure / GCP / Stripe / Twilio keys
|
|
17
|
+
*
|
|
18
|
+
* @since v4.2.0 — AI Agent DLP layer
|
|
19
|
+
*/
|
|
20
|
+
export interface CredentialDetection {
|
|
21
|
+
type: string;
|
|
22
|
+
severity: 'critical' | 'high' | 'medium';
|
|
23
|
+
match: string;
|
|
24
|
+
redacted: string;
|
|
25
|
+
description: string;
|
|
26
|
+
recommendation: string;
|
|
27
|
+
compliance: string[];
|
|
28
|
+
position?: {
|
|
29
|
+
start: number;
|
|
30
|
+
end: number;
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
export interface InputValidationResult {
|
|
34
|
+
status: 'clean' | 'blocked' | 'warning';
|
|
35
|
+
detections: CredentialDetection[];
|
|
36
|
+
duration_ms: number;
|
|
37
|
+
scanned_length: number;
|
|
38
|
+
}
|
|
39
|
+
export interface InputValidationConfig {
|
|
40
|
+
enabled?: boolean;
|
|
41
|
+
block_on_detection?: boolean;
|
|
42
|
+
/** Minimum length for generic secret values to avoid false positives */
|
|
43
|
+
min_secret_length?: number;
|
|
44
|
+
/** Custom patterns to add (regex strings) */
|
|
45
|
+
custom_patterns?: string[];
|
|
46
|
+
/** Patterns to ignore (regex strings for whitelisting) */
|
|
47
|
+
ignore_patterns?: string[];
|
|
48
|
+
/** Log blocked inputs to audit trail */
|
|
49
|
+
audit_log?: boolean;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Scan input text for credential patterns.
|
|
53
|
+
* Returns all detections with type, severity, and recommendations.
|
|
54
|
+
*
|
|
55
|
+
* Designed to complete in <50ms for real-time pre-input hooks.
|
|
56
|
+
*/
|
|
57
|
+
export declare function scanInputForCredentials(input: string, config?: InputValidationConfig): InputValidationResult;
|
|
58
|
+
/**
|
|
59
|
+
* Format a blocked input result for display in IDE/terminal.
|
|
60
|
+
* Used by hooks and CLI for human-readable output.
|
|
61
|
+
*/
|
|
62
|
+
export declare function formatDLPAlert(result: InputValidationResult): string;
|
|
63
|
+
/**
|
|
64
|
+
* Generate a structured audit log entry for a DLP event.
|
|
65
|
+
*/
|
|
66
|
+
export declare function createDLPAuditEntry(result: InputValidationResult, metadata: {
|
|
67
|
+
agent: string;
|
|
68
|
+
timestamp?: string;
|
|
69
|
+
userId?: string;
|
|
70
|
+
}): Record<string, unknown>;
|
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Input Validation Gate — AI Agent DLP (Data Loss Prevention)
|
|
3
|
+
*
|
|
4
|
+
* Scans user input text for credentials, secrets, and sensitive data
|
|
5
|
+
* BEFORE it reaches any AI agent. This is the pre-input counterpart
|
|
6
|
+
* to the existing post-output security patterns in checker.ts.
|
|
7
|
+
*
|
|
8
|
+
* Supported credential types:
|
|
9
|
+
* - AWS Access Keys (AKIA...)
|
|
10
|
+
* - OpenAI / Anthropic / GitHub API keys (sk-*, ghp_*, etc.)
|
|
11
|
+
* - Private keys (PEM format)
|
|
12
|
+
* - Database connection strings (postgresql://, mongodb://, mysql://, redis://)
|
|
13
|
+
* - Bearer tokens and JWTs
|
|
14
|
+
* - Generic password/secret assignments
|
|
15
|
+
* - .env format strings (KEY=value)
|
|
16
|
+
* - Azure / GCP / Stripe / Twilio keys
|
|
17
|
+
*
|
|
18
|
+
* @since v4.2.0 — AI Agent DLP layer
|
|
19
|
+
*/
|
|
20
|
+
const CREDENTIAL_PATTERNS = [
|
|
21
|
+
// ── Cloud Provider Keys ───────────────────────────────────
|
|
22
|
+
{
|
|
23
|
+
type: 'aws_access_key',
|
|
24
|
+
regex: /\b(AKIA[0-9A-Z]{16})\b/g,
|
|
25
|
+
severity: 'critical',
|
|
26
|
+
description: 'AWS Access Key ID detected',
|
|
27
|
+
recommendation: 'Use process.env.AWS_ACCESS_KEY_ID or AWS credentials file instead',
|
|
28
|
+
compliance: ['SOC2-CC6.1', 'HIPAA-164.312', 'PCI-DSS-3.4'],
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
type: 'aws_secret_key',
|
|
32
|
+
regex: /(?:aws_secret_access_key|aws_secret_key|AWS_SECRET)\s*[:=]\s*['"]?([A-Za-z0-9/+=]{40})['"]?/gi,
|
|
33
|
+
severity: 'critical',
|
|
34
|
+
description: 'AWS Secret Access Key detected',
|
|
35
|
+
recommendation: 'Use process.env.AWS_SECRET_ACCESS_KEY instead',
|
|
36
|
+
compliance: ['SOC2-CC6.1', 'HIPAA-164.312', 'PCI-DSS-3.4'],
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
type: 'gcp_service_account',
|
|
40
|
+
regex: /("type"\s*:\s*"service_account"[\s\S]{0,200}"private_key"\s*:\s*"-----BEGIN)/g,
|
|
41
|
+
severity: 'critical',
|
|
42
|
+
description: 'GCP Service Account JSON key detected',
|
|
43
|
+
recommendation: 'Use GOOGLE_APPLICATION_CREDENTIALS env var pointing to key file',
|
|
44
|
+
compliance: ['SOC2-CC6.1', 'CIS-GCP-1.4'],
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
type: 'azure_key',
|
|
48
|
+
regex: /(?:AccountKey|SharedAccessKey|azure[_-]?(?:storage|cosmos|sb)[_-]?key)\s*[:=]\s*['"]?([A-Za-z0-9+/=]{44,88})['"]?/gi,
|
|
49
|
+
severity: 'critical',
|
|
50
|
+
description: 'Azure access key detected',
|
|
51
|
+
recommendation: 'Use Azure Managed Identity or environment variables',
|
|
52
|
+
compliance: ['SOC2-CC6.1', 'CIS-Azure-5.1'],
|
|
53
|
+
},
|
|
54
|
+
// ── API Keys (Provider-Specific Prefixes) ─────────────────
|
|
55
|
+
{
|
|
56
|
+
type: 'openai_key',
|
|
57
|
+
regex: /\b(sk-(?:proj-)?[A-Za-z0-9]{20,})\b/g,
|
|
58
|
+
severity: 'critical',
|
|
59
|
+
description: 'OpenAI API key detected',
|
|
60
|
+
recommendation: 'Use process.env.OPENAI_API_KEY instead',
|
|
61
|
+
compliance: ['SOC2-CC6.1'],
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
type: 'anthropic_key',
|
|
65
|
+
regex: /\b(sk-ant-[A-Za-z0-9-]{20,})\b/g,
|
|
66
|
+
severity: 'critical',
|
|
67
|
+
description: 'Anthropic API key detected',
|
|
68
|
+
recommendation: 'Use process.env.ANTHROPIC_API_KEY instead',
|
|
69
|
+
compliance: ['SOC2-CC6.1'],
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
type: 'github_token',
|
|
73
|
+
regex: /\b(ghp_[A-Za-z0-9]{36}|gho_[A-Za-z0-9]{36}|ghu_[A-Za-z0-9]{36}|ghs_[A-Za-z0-9]{36}|ghr_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9_]{22,})\b/g,
|
|
74
|
+
severity: 'critical',
|
|
75
|
+
description: 'GitHub token detected',
|
|
76
|
+
recommendation: 'Use process.env.GITHUB_TOKEN or gh auth login instead',
|
|
77
|
+
compliance: ['SOC2-CC6.1', 'CIS-SCM-1.2'],
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
type: 'stripe_key',
|
|
81
|
+
regex: /\b(sk_(?:live|test)_[A-Za-z0-9]{24,}|pk_(?:live|test)_[A-Za-z0-9]{24,}|rk_(?:live|test)_[A-Za-z0-9]{24,})\b/g,
|
|
82
|
+
severity: 'critical',
|
|
83
|
+
description: 'Stripe API key detected',
|
|
84
|
+
recommendation: 'Use process.env.STRIPE_SECRET_KEY instead',
|
|
85
|
+
compliance: ['SOC2-CC6.1', 'PCI-DSS-3.4'],
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
type: 'twilio_key',
|
|
89
|
+
regex: /\b(SK[a-f0-9]{32})\b/g,
|
|
90
|
+
severity: 'high',
|
|
91
|
+
description: 'Twilio API key detected',
|
|
92
|
+
recommendation: 'Use process.env.TWILIO_API_KEY instead',
|
|
93
|
+
compliance: ['SOC2-CC6.1'],
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
type: 'slack_token',
|
|
97
|
+
regex: /\b(xoxb-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}|xoxp-[0-9]{10,13}-[0-9]{10,13}-[0-9]{10,13}-[a-f0-9]{32}|xapp-[0-9]-[A-Z0-9]{10,13}-[0-9]{13}-[a-zA-Z0-9]{64})\b/g,
|
|
98
|
+
severity: 'critical',
|
|
99
|
+
description: 'Slack token detected',
|
|
100
|
+
recommendation: 'Use process.env.SLACK_TOKEN instead',
|
|
101
|
+
compliance: ['SOC2-CC6.1'],
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
type: 'sendgrid_key',
|
|
105
|
+
regex: /\b(SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43})\b/g,
|
|
106
|
+
severity: 'critical',
|
|
107
|
+
description: 'SendGrid API key detected',
|
|
108
|
+
recommendation: 'Use process.env.SENDGRID_API_KEY instead',
|
|
109
|
+
compliance: ['SOC2-CC6.1'],
|
|
110
|
+
},
|
|
111
|
+
// ── Private Keys ──────────────────────────────────────────
|
|
112
|
+
{
|
|
113
|
+
type: 'private_key',
|
|
114
|
+
regex: /-----BEGIN (?:RSA |EC |DSA |OPENSSH |ED25519 )?PRIVATE KEY-----/g,
|
|
115
|
+
severity: 'critical',
|
|
116
|
+
description: 'Private key detected in input',
|
|
117
|
+
recommendation: 'Never paste private keys into AI agents. Use key file references instead',
|
|
118
|
+
compliance: ['SOC2-CC6.1', 'HIPAA-164.312', 'PCI-DSS-3.5'],
|
|
119
|
+
},
|
|
120
|
+
// ── Database Connection Strings ───────────────────────────
|
|
121
|
+
{
|
|
122
|
+
type: 'database_url',
|
|
123
|
+
regex: /\b((?:postgres(?:ql)?|mysql|mariadb|mssql|mongodb(?:\+srv)?|redis|rediss|amqp|amqps):\/\/[^\s'"`,;}{)]+)/gi,
|
|
124
|
+
severity: 'critical',
|
|
125
|
+
description: 'Database connection string with credentials detected',
|
|
126
|
+
recommendation: 'Use process.env.DATABASE_URL instead',
|
|
127
|
+
compliance: ['SOC2-CC6.1', 'HIPAA-164.312', 'PCI-DSS-6.5'],
|
|
128
|
+
},
|
|
129
|
+
// ── Bearer Tokens & JWTs ──────────────────────────────────
|
|
130
|
+
{
|
|
131
|
+
type: 'bearer_token',
|
|
132
|
+
regex: /\b(Bearer\s+[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,})\b/g,
|
|
133
|
+
severity: 'critical',
|
|
134
|
+
description: 'Bearer token (JWT) detected',
|
|
135
|
+
recommendation: 'Use process.env.AUTH_TOKEN or a secure token store',
|
|
136
|
+
compliance: ['SOC2-CC6.1', 'OWASP-A2'],
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
type: 'jwt_token',
|
|
140
|
+
regex: /\b(eyJ[A-Za-z0-9_-]{10,}\.eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,})\b/g,
|
|
141
|
+
severity: 'high',
|
|
142
|
+
description: 'JWT token detected in input',
|
|
143
|
+
recommendation: 'Avoid sharing tokens with AI agents. Use environment variables',
|
|
144
|
+
compliance: ['SOC2-CC6.1', 'OWASP-A2'],
|
|
145
|
+
},
|
|
146
|
+
// ── Generic Password/Secret Assignments ───────────────────
|
|
147
|
+
{
|
|
148
|
+
type: 'password_assignment',
|
|
149
|
+
regex: /(?:password|passwd|pwd|secret|api[_-]?key|apikey|auth[_-]?token|access[_-]?token|client[_-]?secret|signing[_-]?key|encryption[_-]?key)\s*[:=]\s*['"]([^'"]{8,})['"]/gi,
|
|
150
|
+
severity: 'high',
|
|
151
|
+
description: 'Password or secret value detected',
|
|
152
|
+
recommendation: 'Use environment variable: process.env.<KEY_NAME>',
|
|
153
|
+
compliance: ['SOC2-CC6.1', 'CWE-798'],
|
|
154
|
+
},
|
|
155
|
+
// ── .env Format Strings ───────────────────────────────────
|
|
156
|
+
{
|
|
157
|
+
type: 'env_variable',
|
|
158
|
+
regex: /^[A-Z][A-Z0-9_]{2,}=(?![\s]*$)['"]?[^\s'"]{8,}['"]?\s*$/gm,
|
|
159
|
+
severity: 'high',
|
|
160
|
+
description: '.env format secret detected (KEY=value)',
|
|
161
|
+
recommendation: 'Reference the .env file path instead of pasting contents',
|
|
162
|
+
compliance: ['SOC2-CC6.1', 'CWE-798'],
|
|
163
|
+
},
|
|
164
|
+
// ── SSH Connection Strings ────────────────────────────────
|
|
165
|
+
{
|
|
166
|
+
type: 'ssh_credentials',
|
|
167
|
+
regex: /\bssh\s+(?:-i\s+\S+\s+)?[a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\s*(?:-p\s+\d+)?/g,
|
|
168
|
+
severity: 'medium',
|
|
169
|
+
description: 'SSH connection command detected',
|
|
170
|
+
recommendation: 'Use SSH config aliases instead of full connection strings',
|
|
171
|
+
compliance: ['SOC2-CC6.1'],
|
|
172
|
+
},
|
|
173
|
+
// ── IP Addresses with Credentials ─────────────────────────
|
|
174
|
+
{
|
|
175
|
+
type: 'credentials_in_url',
|
|
176
|
+
regex: /\b(https?:\/\/[^:]+:[^@]+@[^\s'"`,;}{)]+)\b/gi,
|
|
177
|
+
severity: 'critical',
|
|
178
|
+
description: 'URL with embedded credentials detected',
|
|
179
|
+
recommendation: 'Remove credentials from URLs. Use environment variables for auth',
|
|
180
|
+
compliance: ['SOC2-CC6.1', 'CWE-798', 'OWASP-A2'],
|
|
181
|
+
},
|
|
182
|
+
// ── Base64-Encoded Secrets (v4.2+) ──────────────────────────
|
|
183
|
+
{
|
|
184
|
+
type: 'base64_secret',
|
|
185
|
+
regex: /(?:api[_-]?key|password|secret|token|credential|auth)\s*[:=]\s*['"]?([A-Za-z0-9+/]{32,})={0,2}['"]?/gi,
|
|
186
|
+
severity: 'high',
|
|
187
|
+
description: 'Possible base64-encoded secret detected',
|
|
188
|
+
recommendation: 'Use environment variables instead of encoded inline secrets',
|
|
189
|
+
compliance: ['SOC2-CC6.1', 'CWE-798'],
|
|
190
|
+
},
|
|
191
|
+
// ── Hex-Encoded Secrets (v4.2+) ──────────────────────────────
|
|
192
|
+
{
|
|
193
|
+
type: 'hex_secret',
|
|
194
|
+
regex: /(?:api[_-]?key|password|secret|token|credential|auth)\s*[:=]\s*['"]?([0-9a-f]{32,})['"]?/gi,
|
|
195
|
+
severity: 'high',
|
|
196
|
+
description: 'Possible hex-encoded secret detected',
|
|
197
|
+
recommendation: 'Use environment variables instead of encoded inline secrets',
|
|
198
|
+
compliance: ['SOC2-CC6.1', 'CWE-798'],
|
|
199
|
+
},
|
|
200
|
+
// ── Multiline Private Keys (v4.2+) ───────────────────────────
|
|
201
|
+
{
|
|
202
|
+
type: 'private_key_full',
|
|
203
|
+
regex: /-----BEGIN (?:RSA |EC |DSA |OPENSSH |ED25519 )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH |ED25519 )?PRIVATE KEY-----/g,
|
|
204
|
+
severity: 'critical',
|
|
205
|
+
description: 'Full private key block detected in input',
|
|
206
|
+
recommendation: 'Never paste private keys into AI agents. Use key file references instead',
|
|
207
|
+
compliance: ['SOC2-CC6.1', 'HIPAA-164.312', 'PCI-DSS-3.5'],
|
|
208
|
+
},
|
|
209
|
+
// ── Package.json / CI Secrets (v4.2+) ────────────────────────
|
|
210
|
+
{
|
|
211
|
+
type: 'ci_secret',
|
|
212
|
+
regex: /(?:DOCKER_PASSWORD|NPM_TOKEN|PYPI_TOKEN|NUGET_API_KEY|SONAR_TOKEN|CODECOV_TOKEN|SNYK_TOKEN|SENTRY_DSN|DATADOG_API_KEY)\s*[:=]\s*['"]?([^\s'"`,;}{)]{8,})['"]?/gi,
|
|
213
|
+
severity: 'critical',
|
|
214
|
+
description: 'CI/CD or registry secret detected',
|
|
215
|
+
recommendation: 'Use CI secret management (GitHub Secrets, GitLab CI Variables)',
|
|
216
|
+
compliance: ['SOC2-CC6.1', 'CIS-SCM-2.3'],
|
|
217
|
+
},
|
|
218
|
+
];
|
|
219
|
+
// ── Normalization & Entropy ───────────────────────────────────────
|
|
220
|
+
/**
|
|
221
|
+
* Normalize input to defeat obfuscation tricks:
|
|
222
|
+
* - Remove zero-width characters (U+200B, U+200C, U+200D, U+FEFF)
|
|
223
|
+
* - Remove bidirectional control chars (RTLO, LRLO, etc.)
|
|
224
|
+
* - NFC unicode normalization
|
|
225
|
+
*/
|
|
226
|
+
function normalizeInput(input) {
|
|
227
|
+
return input
|
|
228
|
+
.replace(/[\u200B\u200C\u200D\uFEFF]/g, '') // zero-width
|
|
229
|
+
.replace(/[\u202A-\u202E\u061C\u2066-\u2069]/g, '') // bidi control
|
|
230
|
+
.normalize('NFC');
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Shannon entropy — measures randomness of a string.
|
|
234
|
+
* High entropy (>4.5) on long strings (>20 chars) suggests
|
|
235
|
+
* encoded/encrypted secrets that bypass regex patterns.
|
|
236
|
+
*/
|
|
237
|
+
function shannonEntropy(str) {
|
|
238
|
+
const freq = {};
|
|
239
|
+
for (const c of str)
|
|
240
|
+
freq[c] = (freq[c] || 0) + 1;
|
|
241
|
+
const len = str.length;
|
|
242
|
+
return Object.values(freq).reduce((sum, f) => {
|
|
243
|
+
const p = f / len;
|
|
244
|
+
return sum - p * Math.log2(p);
|
|
245
|
+
}, 0);
|
|
246
|
+
}
|
|
247
|
+
// ── Core Scanner ──────────────────────────────────────────────────
|
|
248
|
+
/**
|
|
249
|
+
* Redact a matched credential for safe display.
|
|
250
|
+
* Shows first 4 and last 2 chars, masks the rest.
|
|
251
|
+
*/
|
|
252
|
+
function redactMatch(match) {
|
|
253
|
+
if (match.length <= 10) {
|
|
254
|
+
return match.substring(0, 3) + '***';
|
|
255
|
+
}
|
|
256
|
+
return match.substring(0, 4) + '****' + match.substring(match.length - 2);
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Scan input text for credential patterns.
|
|
260
|
+
* Returns all detections with type, severity, and recommendations.
|
|
261
|
+
*
|
|
262
|
+
* Designed to complete in <50ms for real-time pre-input hooks.
|
|
263
|
+
*/
|
|
264
|
+
export function scanInputForCredentials(input, config = {}) {
|
|
265
|
+
const start = Date.now();
|
|
266
|
+
const detections = [];
|
|
267
|
+
if (!config.enabled && config.enabled !== undefined) {
|
|
268
|
+
return {
|
|
269
|
+
status: 'clean',
|
|
270
|
+
detections: [],
|
|
271
|
+
duration_ms: Date.now() - start,
|
|
272
|
+
scanned_length: input.length,
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
// ── Normalize to defeat obfuscation (zero-width chars, bidi, unicode) ──
|
|
276
|
+
input = normalizeInput(input);
|
|
277
|
+
const minSecretLength = config.min_secret_length ?? 8;
|
|
278
|
+
const ignoreRegexes = (config.ignore_patterns ?? []).map(p => new RegExp(p, 'gi'));
|
|
279
|
+
for (const pattern of CREDENTIAL_PATTERNS) {
|
|
280
|
+
// Reset regex state for global patterns
|
|
281
|
+
pattern.regex.lastIndex = 0;
|
|
282
|
+
let match;
|
|
283
|
+
while ((match = pattern.regex.exec(input)) !== null) {
|
|
284
|
+
const fullMatch = match[0];
|
|
285
|
+
const capturedValue = match[1] || fullMatch;
|
|
286
|
+
// Skip short matches to avoid false positives
|
|
287
|
+
if (capturedValue.length < minSecretLength && pattern.type !== 'private_key') {
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
// Check ignore patterns
|
|
291
|
+
const isIgnored = ignoreRegexes.some(ignore => {
|
|
292
|
+
ignore.lastIndex = 0;
|
|
293
|
+
return ignore.test(fullMatch);
|
|
294
|
+
});
|
|
295
|
+
if (isIgnored) {
|
|
296
|
+
continue;
|
|
297
|
+
}
|
|
298
|
+
detections.push({
|
|
299
|
+
type: pattern.type,
|
|
300
|
+
severity: pattern.severity,
|
|
301
|
+
match: fullMatch.slice(0, 80),
|
|
302
|
+
redacted: redactMatch(capturedValue),
|
|
303
|
+
description: pattern.description,
|
|
304
|
+
recommendation: pattern.recommendation,
|
|
305
|
+
compliance: pattern.compliance,
|
|
306
|
+
position: {
|
|
307
|
+
start: match.index,
|
|
308
|
+
end: match.index + fullMatch.length,
|
|
309
|
+
},
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
// Add custom patterns if configured
|
|
314
|
+
if (config.custom_patterns) {
|
|
315
|
+
for (const customRegex of config.custom_patterns) {
|
|
316
|
+
try {
|
|
317
|
+
const regex = new RegExp(customRegex, 'gi');
|
|
318
|
+
let match;
|
|
319
|
+
while ((match = regex.exec(input)) !== null) {
|
|
320
|
+
detections.push({
|
|
321
|
+
type: 'custom_pattern',
|
|
322
|
+
severity: 'high',
|
|
323
|
+
match: match[0].slice(0, 80),
|
|
324
|
+
redacted: redactMatch(match[0]),
|
|
325
|
+
description: `Custom pattern match: ${customRegex.slice(0, 40)}`,
|
|
326
|
+
recommendation: 'Use environment variables instead of inline secrets',
|
|
327
|
+
compliance: ['SOC2-CC6.1'],
|
|
328
|
+
position: {
|
|
329
|
+
start: match.index,
|
|
330
|
+
end: match.index + match[0].length,
|
|
331
|
+
},
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
catch {
|
|
336
|
+
// Invalid regex, skip
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
// ── Entropy-based detection: catch encoded/obfuscated secrets ──
|
|
341
|
+
// Scan for high-entropy strings assigned to suspicious variable names
|
|
342
|
+
const entropyRegex = /(?:key|secret|token|password|credential|auth|api_key)\s*[:=]\s*['"]([A-Za-z0-9+/=_-]{24,})['"]?/gi;
|
|
343
|
+
entropyRegex.lastIndex = 0;
|
|
344
|
+
let entropyMatch;
|
|
345
|
+
while ((entropyMatch = entropyRegex.exec(input)) !== null) {
|
|
346
|
+
const value = entropyMatch[1];
|
|
347
|
+
const entropy = shannonEntropy(value);
|
|
348
|
+
if (entropy > 4.5 && value.length >= 24) {
|
|
349
|
+
// Only add if not already caught by a more specific pattern
|
|
350
|
+
const alreadyCaught = detections.some(d => d.position && entropyMatch.index >= d.position.start && entropyMatch.index < d.position.end);
|
|
351
|
+
if (!alreadyCaught) {
|
|
352
|
+
detections.push({
|
|
353
|
+
type: 'high_entropy_secret',
|
|
354
|
+
severity: 'high',
|
|
355
|
+
match: entropyMatch[0].slice(0, 80),
|
|
356
|
+
redacted: redactMatch(value),
|
|
357
|
+
description: `High-entropy value (${entropy.toFixed(1)} bits) in secret assignment — likely encoded credential`,
|
|
358
|
+
recommendation: 'Use environment variables instead of inline secrets',
|
|
359
|
+
compliance: ['SOC2-CC6.1', 'CWE-798'],
|
|
360
|
+
position: { start: entropyMatch.index, end: entropyMatch.index + entropyMatch[0].length },
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
// Deduplicate overlapping detections (keep highest severity)
|
|
366
|
+
const deduped = deduplicateDetections(detections);
|
|
367
|
+
// Sort by severity (critical first)
|
|
368
|
+
const severityOrder = { critical: 0, high: 1, medium: 2 };
|
|
369
|
+
deduped.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
|
|
370
|
+
const blockOnDetection = config.block_on_detection ?? true;
|
|
371
|
+
const status = deduped.length === 0
|
|
372
|
+
? 'clean'
|
|
373
|
+
: blockOnDetection
|
|
374
|
+
? 'blocked'
|
|
375
|
+
: 'warning';
|
|
376
|
+
return {
|
|
377
|
+
status,
|
|
378
|
+
detections: deduped,
|
|
379
|
+
duration_ms: Date.now() - start,
|
|
380
|
+
scanned_length: input.length,
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Deduplicate overlapping detections — keep the higher severity one.
|
|
385
|
+
*/
|
|
386
|
+
function deduplicateDetections(detections) {
|
|
387
|
+
if (detections.length <= 1)
|
|
388
|
+
return detections;
|
|
389
|
+
const sorted = [...detections].sort((a, b) => {
|
|
390
|
+
const posA = a.position?.start ?? 0;
|
|
391
|
+
const posB = b.position?.start ?? 0;
|
|
392
|
+
return posA - posB;
|
|
393
|
+
});
|
|
394
|
+
const result = [];
|
|
395
|
+
const severityOrder = { critical: 0, high: 1, medium: 2 };
|
|
396
|
+
for (const detection of sorted) {
|
|
397
|
+
const overlapping = result.find(existing => {
|
|
398
|
+
if (!existing.position || !detection.position)
|
|
399
|
+
return false;
|
|
400
|
+
return detection.position.start < existing.position.end
|
|
401
|
+
&& detection.position.end > existing.position.start;
|
|
402
|
+
});
|
|
403
|
+
if (overlapping) {
|
|
404
|
+
// Keep the higher severity detection
|
|
405
|
+
if (severityOrder[detection.severity] < severityOrder[overlapping.severity]) {
|
|
406
|
+
const idx = result.indexOf(overlapping);
|
|
407
|
+
result[idx] = detection;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
else {
|
|
411
|
+
result.push(detection);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
return result;
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Format a blocked input result for display in IDE/terminal.
|
|
418
|
+
* Used by hooks and CLI for human-readable output.
|
|
419
|
+
*/
|
|
420
|
+
export function formatDLPAlert(result) {
|
|
421
|
+
if (result.status === 'clean') {
|
|
422
|
+
return '✓ Input clean — no credentials detected.';
|
|
423
|
+
}
|
|
424
|
+
const icon = result.status === 'blocked' ? '🛑' : '⚠️';
|
|
425
|
+
const header = result.status === 'blocked'
|
|
426
|
+
? `${icon} BLOCKED — ${result.detections.length} credential(s) detected in agent input`
|
|
427
|
+
: `${icon} WARNING — ${result.detections.length} credential(s) detected in agent input`;
|
|
428
|
+
const details = result.detections.map(d => {
|
|
429
|
+
const complianceStr = d.compliance.length > 0
|
|
430
|
+
? `\n Compliance: ${d.compliance.join(', ')}`
|
|
431
|
+
: '';
|
|
432
|
+
return [
|
|
433
|
+
` [${d.severity.toUpperCase()}] ${d.description}`,
|
|
434
|
+
` Detected: ${d.redacted}`,
|
|
435
|
+
` → ${d.recommendation}`,
|
|
436
|
+
complianceStr,
|
|
437
|
+
].filter(Boolean).join('\n');
|
|
438
|
+
}).join('\n\n');
|
|
439
|
+
return `${header}\n\n${details}\n\nDuration: ${result.duration_ms}ms | Scanned: ${result.scanned_length} chars`;
|
|
440
|
+
}
|
|
441
|
+
/**
|
|
442
|
+
* Generate a structured audit log entry for a DLP event.
|
|
443
|
+
*/
|
|
444
|
+
export function createDLPAuditEntry(result, metadata) {
|
|
445
|
+
return {
|
|
446
|
+
type: 'dlp_event',
|
|
447
|
+
timestamp: metadata.timestamp ?? new Date().toISOString(),
|
|
448
|
+
agent: metadata.agent,
|
|
449
|
+
userId: metadata.userId,
|
|
450
|
+
status: result.status,
|
|
451
|
+
scanned_length: result.scanned_length,
|
|
452
|
+
duration_ms: result.duration_ms,
|
|
453
|
+
detections: result.detections.map(d => ({
|
|
454
|
+
type: d.type,
|
|
455
|
+
severity: d.severity,
|
|
456
|
+
redacted: d.redacted,
|
|
457
|
+
description: d.description,
|
|
458
|
+
compliance: d.compliance,
|
|
459
|
+
})),
|
|
460
|
+
};
|
|
461
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|