@masyv/secretscan 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +122 -0
- package/core/Cargo.toml +51 -0
- package/core/src/hook/mod.rs +100 -0
- package/core/src/lib.rs +93 -0
- package/core/src/main.rs +481 -0
- package/core/src/patterns/builtin.rs +366 -0
- package/core/src/patterns/entropy.rs +129 -0
- package/core/src/patterns/mod.rs +83 -0
- package/core/src/redact/mod.rs +69 -0
- package/core/src/store/mod.rs +241 -0
- package/hooks/scan-output.sh +37 -0
- package/package.json +37 -0
- package/plugin/tool.json +50 -0
- package/scripts/build.sh +9 -0
- package/scripts/install.sh +24 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
//! 50+ built-in secret patterns covering all major providers and formats.
|
|
2
|
+
|
|
3
|
+
use super::Pattern;
|
|
4
|
+
use crate::Severity;
|
|
5
|
+
use regex::Regex;
|
|
6
|
+
|
|
7
|
+
macro_rules! pat {
|
|
8
|
+
($id:expr, $name:expr, $sev:expr, $re:expr) => {
|
|
9
|
+
Pattern {
|
|
10
|
+
id: $id,
|
|
11
|
+
name: $name,
|
|
12
|
+
severity: $sev,
|
|
13
|
+
regex: Regex::new($re).expect(concat!("invalid regex for ", $id)),
|
|
14
|
+
min_entropy: None,
|
|
15
|
+
context_keywords: &[],
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
($id:expr, $name:expr, $sev:expr, $re:expr, entropy: $e:expr) => {
|
|
19
|
+
Pattern {
|
|
20
|
+
id: $id,
|
|
21
|
+
name: $name,
|
|
22
|
+
severity: $sev,
|
|
23
|
+
regex: Regex::new($re).expect(concat!("invalid regex for ", $id)),
|
|
24
|
+
min_entropy: Some($e),
|
|
25
|
+
context_keywords: &[],
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
pub fn all_patterns() -> Vec<Pattern> {
|
|
31
|
+
vec![
|
|
32
|
+
// ── Anthropic ────────────────────────────────────────────────────────
|
|
33
|
+
pat!(
|
|
34
|
+
"anthropic_api_key",
|
|
35
|
+
"Anthropic API Key",
|
|
36
|
+
Severity::Critical,
|
|
37
|
+
r"sk-ant-[a-zA-Z0-9\-_]{80,}"
|
|
38
|
+
),
|
|
39
|
+
|
|
40
|
+
// ── OpenAI ───────────────────────────────────────────────────────────
|
|
41
|
+
pat!(
|
|
42
|
+
"openai_api_key",
|
|
43
|
+
"OpenAI API Key",
|
|
44
|
+
Severity::Critical,
|
|
45
|
+
r"sk-[a-zA-Z0-9]{48}"
|
|
46
|
+
),
|
|
47
|
+
pat!(
|
|
48
|
+
"openai_org",
|
|
49
|
+
"OpenAI Organization ID",
|
|
50
|
+
Severity::Medium,
|
|
51
|
+
r"org-[a-zA-Z0-9]{24}"
|
|
52
|
+
),
|
|
53
|
+
|
|
54
|
+
// ── AWS ───────────────────────���────────────────────────────���─────────
|
|
55
|
+
pat!(
|
|
56
|
+
"aws_access_key",
|
|
57
|
+
"AWS Access Key ID",
|
|
58
|
+
Severity::Critical,
|
|
59
|
+
r"(?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}"
|
|
60
|
+
),
|
|
61
|
+
pat!(
|
|
62
|
+
"aws_secret_key",
|
|
63
|
+
"AWS Secret Access Key",
|
|
64
|
+
Severity::Critical,
|
|
65
|
+
r#"(?i)aws[_\-\s]?secret[_\-\s]?(?:access[_\-\s]?)?key["':\s=]+([A-Za-z0-9/+=]{40})"#,
|
|
66
|
+
entropy: 4.0
|
|
67
|
+
),
|
|
68
|
+
pat!(
|
|
69
|
+
"aws_session_token",
|
|
70
|
+
"AWS Session Token",
|
|
71
|
+
Severity::High,
|
|
72
|
+
r#"(?i)aws[_\-\s]?session[_\-\s]?token["':\s=]+([A-Za-z0-9/+=]{100,})"#
|
|
73
|
+
),
|
|
74
|
+
|
|
75
|
+
// ── GitHub ───────────────────────────────────────────────────────────
|
|
76
|
+
pat!(
|
|
77
|
+
"github_pat",
|
|
78
|
+
"GitHub Personal Access Token",
|
|
79
|
+
Severity::Critical,
|
|
80
|
+
r"gh[pousr]_[A-Za-z0-9_]{36,255}"
|
|
81
|
+
),
|
|
82
|
+
pat!(
|
|
83
|
+
"github_oauth",
|
|
84
|
+
"GitHub OAuth Token",
|
|
85
|
+
Severity::High,
|
|
86
|
+
r"gho_[A-Za-z0-9_]{36}"
|
|
87
|
+
),
|
|
88
|
+
pat!(
|
|
89
|
+
"github_app_token",
|
|
90
|
+
"GitHub App Token",
|
|
91
|
+
Severity::High,
|
|
92
|
+
r"(?:ghu|ghs)_[A-Za-z0-9_]{36}"
|
|
93
|
+
),
|
|
94
|
+
|
|
95
|
+
// ── GitLab ───────────────────────────────────────────���───────────────
|
|
96
|
+
pat!(
|
|
97
|
+
"gitlab_pat",
|
|
98
|
+
"GitLab Personal Access Token",
|
|
99
|
+
Severity::High,
|
|
100
|
+
r"glpat-[a-zA-Z0-9\-_]{20}"
|
|
101
|
+
),
|
|
102
|
+
pat!(
|
|
103
|
+
"gitlab_pipeline",
|
|
104
|
+
"GitLab Pipeline Trigger Token",
|
|
105
|
+
Severity::Medium,
|
|
106
|
+
r"glptt-[a-f0-9]{40}"
|
|
107
|
+
),
|
|
108
|
+
|
|
109
|
+
// ── Stripe ────────────────────────��─────────────────────────────��────
|
|
110
|
+
pat!(
|
|
111
|
+
"stripe_secret",
|
|
112
|
+
"Stripe Secret Key",
|
|
113
|
+
Severity::Critical,
|
|
114
|
+
r"sk_live_[a-zA-Z0-9]{24,}"
|
|
115
|
+
),
|
|
116
|
+
pat!(
|
|
117
|
+
"stripe_restricted",
|
|
118
|
+
"Stripe Restricted Key",
|
|
119
|
+
Severity::High,
|
|
120
|
+
r"rk_live_[a-zA-Z0-9]{24,}"
|
|
121
|
+
),
|
|
122
|
+
pat!(
|
|
123
|
+
"stripe_test",
|
|
124
|
+
"Stripe Test Key",
|
|
125
|
+
Severity::Low,
|
|
126
|
+
r"sk_test_[a-zA-Z0-9]{24,}"
|
|
127
|
+
),
|
|
128
|
+
|
|
129
|
+
// ── Google ─────────────────────────���──────────────────────────���──────
|
|
130
|
+
pat!(
|
|
131
|
+
"google_api_key",
|
|
132
|
+
"Google API Key",
|
|
133
|
+
Severity::High,
|
|
134
|
+
r"AIza[0-9A-Za-z\-_]{35}"
|
|
135
|
+
),
|
|
136
|
+
pat!(
|
|
137
|
+
"google_oauth",
|
|
138
|
+
"Google OAuth Client Secret",
|
|
139
|
+
Severity::High,
|
|
140
|
+
r"GOCSPX-[a-zA-Z0-9\-_]{28}"
|
|
141
|
+
),
|
|
142
|
+
pat!(
|
|
143
|
+
"google_service_account",
|
|
144
|
+
"Google Service Account Key",
|
|
145
|
+
Severity::Critical,
|
|
146
|
+
r#""type"\s*:\s*"service_account""#
|
|
147
|
+
),
|
|
148
|
+
|
|
149
|
+
// ── Slack ────────────────────────────────────────────────────────���───
|
|
150
|
+
pat!(
|
|
151
|
+
"slack_bot_token",
|
|
152
|
+
"Slack Bot Token",
|
|
153
|
+
Severity::High,
|
|
154
|
+
r"xoxb-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}"
|
|
155
|
+
),
|
|
156
|
+
pat!(
|
|
157
|
+
"slack_user_token",
|
|
158
|
+
"Slack User Token",
|
|
159
|
+
Severity::High,
|
|
160
|
+
r"xoxp-[0-9]{10,13}-[0-9]{10,13}-[0-9]{10,13}-[a-f0-9]{32}"
|
|
161
|
+
),
|
|
162
|
+
pat!(
|
|
163
|
+
"slack_webhook",
|
|
164
|
+
"Slack Incoming Webhook",
|
|
165
|
+
Severity::Medium,
|
|
166
|
+
r"https://hooks\.slack\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[a-zA-Z0-9]+"
|
|
167
|
+
),
|
|
168
|
+
pat!(
|
|
169
|
+
"slack_app_token",
|
|
170
|
+
"Slack App Token",
|
|
171
|
+
Severity::High,
|
|
172
|
+
r"xapp-\d-[A-Z0-9]+-\d+-[a-f0-9]+"
|
|
173
|
+
),
|
|
174
|
+
|
|
175
|
+
// ── npm ────────────────────────────���────────────────────────────��────
|
|
176
|
+
pat!(
|
|
177
|
+
"npm_token",
|
|
178
|
+
"npm Access Token",
|
|
179
|
+
Severity::High,
|
|
180
|
+
r"npm_[a-zA-Z0-9]{36}"
|
|
181
|
+
),
|
|
182
|
+
|
|
183
|
+
// ── SendGrid ────────────────────────────��────────────────────────────
|
|
184
|
+
pat!(
|
|
185
|
+
"sendgrid_api_key",
|
|
186
|
+
"SendGrid API Key",
|
|
187
|
+
Severity::High,
|
|
188
|
+
r"SG\.[a-zA-Z0-9\-_]{22}\.[a-zA-Z0-9\-_]{43}"
|
|
189
|
+
),
|
|
190
|
+
|
|
191
|
+
// ── Twilio ───────────────────────────────────────────────────────────
|
|
192
|
+
pat!(
|
|
193
|
+
"twilio_account_sid",
|
|
194
|
+
"Twilio Account SID",
|
|
195
|
+
Severity::Medium,
|
|
196
|
+
r"AC[a-f0-9]{32}"
|
|
197
|
+
),
|
|
198
|
+
pat!(
|
|
199
|
+
"twilio_auth_token",
|
|
200
|
+
"Twilio Auth Token",
|
|
201
|
+
Severity::High,
|
|
202
|
+
r"((?i)twilio[_\-\s]?auth[_\-\s]?token[':\s=]+([a-f0-9]{32}))"
|
|
203
|
+
),
|
|
204
|
+
|
|
205
|
+
// ── Cloudflare ───────────────────────────────────────────────────────
|
|
206
|
+
pat!(
|
|
207
|
+
"cloudflare_api_key",
|
|
208
|
+
"Cloudflare API Key",
|
|
209
|
+
Severity::High,
|
|
210
|
+
r"((?i)cloudflare[_\-\s]?(?:api[_\-\s]?)?(?:key|token)[':\s=]+([a-zA-Z0-9_\-]{37,40}))"
|
|
211
|
+
),
|
|
212
|
+
pat!(
|
|
213
|
+
"cloudflare_global_key",
|
|
214
|
+
"Cloudflare Global API Key",
|
|
215
|
+
Severity::Critical,
|
|
216
|
+
r"[0-9a-f]{37}"
|
|
217
|
+
),
|
|
218
|
+
|
|
219
|
+
// ── Azure ────────────────────────────────────────────────────────────
|
|
220
|
+
pat!(
|
|
221
|
+
"azure_connection_string",
|
|
222
|
+
"Azure Storage Connection String",
|
|
223
|
+
Severity::Critical,
|
|
224
|
+
r"DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+"
|
|
225
|
+
),
|
|
226
|
+
pat!(
|
|
227
|
+
"azure_sas_token",
|
|
228
|
+
"Azure SAS Token",
|
|
229
|
+
Severity::High,
|
|
230
|
+
r"sv=\d{4}-\d{2}-\d{2}&s[a-z]=.*?&sig=[A-Za-z0-9%+/=]+"
|
|
231
|
+
),
|
|
232
|
+
|
|
233
|
+
// ── Database URLs ─────────────────────────────────────────────────────
|
|
234
|
+
pat!(
|
|
235
|
+
"postgres_url",
|
|
236
|
+
"PostgreSQL Connection URL with credentials",
|
|
237
|
+
Severity::Critical,
|
|
238
|
+
r"postgres(?:ql)?://[^:]+:[^@\s]{3,}@[^\s]+"
|
|
239
|
+
),
|
|
240
|
+
pat!(
|
|
241
|
+
"mysql_url",
|
|
242
|
+
"MySQL Connection URL with credentials",
|
|
243
|
+
Severity::Critical,
|
|
244
|
+
r"mysql://[^:]+:[^@\s]{3,}@[^\s]+"
|
|
245
|
+
),
|
|
246
|
+
pat!(
|
|
247
|
+
"mongodb_url",
|
|
248
|
+
"MongoDB Connection URL with credentials",
|
|
249
|
+
Severity::Critical,
|
|
250
|
+
r"mongodb(?:\+srv)?://[^:]+:[^@\s]{3,}@[^\s]+"
|
|
251
|
+
),
|
|
252
|
+
pat!(
|
|
253
|
+
"redis_url",
|
|
254
|
+
"Redis URL with password",
|
|
255
|
+
Severity::High,
|
|
256
|
+
r"redis://[^:]*:[^@\s]{3,}@[^\s]+"
|
|
257
|
+
),
|
|
258
|
+
|
|
259
|
+
// ── Private Keys ──────────────────────────────────────────────────────
|
|
260
|
+
pat!(
|
|
261
|
+
"private_key_pem",
|
|
262
|
+
"PEM Private Key",
|
|
263
|
+
Severity::Critical,
|
|
264
|
+
r"-----BEGIN (?:RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY(?:\s+BLOCK)?-----"
|
|
265
|
+
),
|
|
266
|
+
pat!(
|
|
267
|
+
"certificate",
|
|
268
|
+
"PEM Certificate",
|
|
269
|
+
Severity::Low,
|
|
270
|
+
r"-----BEGIN CERTIFICATE-----"
|
|
271
|
+
),
|
|
272
|
+
|
|
273
|
+
// ── JWT ─────────────────────────���─────────────────────────────��───────
|
|
274
|
+
pat!(
|
|
275
|
+
"jwt_token",
|
|
276
|
+
"JSON Web Token",
|
|
277
|
+
Severity::Medium,
|
|
278
|
+
r"ey[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+"
|
|
279
|
+
),
|
|
280
|
+
|
|
281
|
+
// ── Heroku ───────────────────────────────────────────────────────────��
|
|
282
|
+
pat!(
|
|
283
|
+
"heroku_api_key",
|
|
284
|
+
"Heroku API Key",
|
|
285
|
+
Severity::High,
|
|
286
|
+
r"((?i)heroku[_\-\s]?(?:api[_\-\s]?)?key[':\s=]+([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}))"
|
|
287
|
+
),
|
|
288
|
+
|
|
289
|
+
// ── Vercel ──────────────────────────────────���────────────────────���────
|
|
290
|
+
pat!(
|
|
291
|
+
"vercel_token",
|
|
292
|
+
"Vercel Access Token",
|
|
293
|
+
Severity::High,
|
|
294
|
+
r"((?i)vercel[_\-\s]?token[':\s=]+([a-zA-Z0-9]{24}))"
|
|
295
|
+
),
|
|
296
|
+
|
|
297
|
+
// ── Datadog ────────────────────────────────���────────────────────────��─
|
|
298
|
+
pat!(
|
|
299
|
+
"datadog_api_key",
|
|
300
|
+
"Datadog API Key",
|
|
301
|
+
Severity::High,
|
|
302
|
+
r"((?i)(?:datadog|dd)[_\-\s]?(?:api[_\-\s]?)?key[':\s=]+([a-f0-9]{32}))"
|
|
303
|
+
),
|
|
304
|
+
|
|
305
|
+
// ── Mailgun ───────────────────────────────────────────────────────────
|
|
306
|
+
pat!(
|
|
307
|
+
"mailgun_api_key",
|
|
308
|
+
"Mailgun API Key",
|
|
309
|
+
Severity::High,
|
|
310
|
+
r"key-[a-f0-9]{32}"
|
|
311
|
+
),
|
|
312
|
+
|
|
313
|
+
// ── Shopify ─────────────────────��─────────────────────────���───────────
|
|
314
|
+
pat!(
|
|
315
|
+
"shopify_token",
|
|
316
|
+
"Shopify Access Token",
|
|
317
|
+
Severity::High,
|
|
318
|
+
r"shpat_[a-fA-F0-9]{32}"
|
|
319
|
+
),
|
|
320
|
+
pat!(
|
|
321
|
+
"shopify_shared_secret",
|
|
322
|
+
"Shopify Shared Secret",
|
|
323
|
+
Severity::High,
|
|
324
|
+
r"shpss_[a-fA-F0-9]{32}"
|
|
325
|
+
),
|
|
326
|
+
|
|
327
|
+
// ── Discord ───────────────────────────────────────────────��───────────
|
|
328
|
+
pat!(
|
|
329
|
+
"discord_bot_token",
|
|
330
|
+
"Discord Bot Token",
|
|
331
|
+
Severity::High,
|
|
332
|
+
r"[MN][a-zA-Z0-9\-_]{23,25}\.[a-zA-Z0-9\-_]{6}\.[a-zA-Z0-9\-_]{27,38}"
|
|
333
|
+
),
|
|
334
|
+
pat!(
|
|
335
|
+
"discord_webhook",
|
|
336
|
+
"Discord Webhook URL",
|
|
337
|
+
Severity::Medium,
|
|
338
|
+
r"https://discord(?:app)?\.com/api/webhooks/[0-9]+/[a-zA-Z0-9_\-]+"
|
|
339
|
+
),
|
|
340
|
+
|
|
341
|
+
// ── HuggingFace ───────────────────────────────────────���───────────────
|
|
342
|
+
pat!(
|
|
343
|
+
"huggingface_token",
|
|
344
|
+
"HuggingFace Access Token",
|
|
345
|
+
Severity::High,
|
|
346
|
+
r"hf_[a-zA-Z0-9]{34,}"
|
|
347
|
+
),
|
|
348
|
+
|
|
349
|
+
// ── Env-file patterns ─────────────────────────────────────────────────
|
|
350
|
+
pat!(
|
|
351
|
+
"env_secret",
|
|
352
|
+
"Environment Variable with Secret",
|
|
353
|
+
Severity::Medium,
|
|
354
|
+
r#"(?i)(?:SECRET|PASSWORD|PASSWD|PWD|API_KEY|AUTH_TOKEN|PRIVATE_KEY|ACCESS_TOKEN)\s*=\s*["']?([A-Za-z0-9+/=_\-!@#$%^&*]{8,})["']?"#,
|
|
355
|
+
entropy: 3.5
|
|
356
|
+
),
|
|
357
|
+
|
|
358
|
+
// ── Generic credential patterns ───────────────────────────��───────────
|
|
359
|
+
pat!(
|
|
360
|
+
"basic_auth",
|
|
361
|
+
"HTTP Basic Auth Credentials",
|
|
362
|
+
Severity::High,
|
|
363
|
+
r"https?://[A-Za-z0-9_\-\.]+:[A-Za-z0-9_\-\.!@#$%^&*]{4,}@"
|
|
364
|
+
),
|
|
365
|
+
]
|
|
366
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
//! Shannon entropy analysis for detecting high-entropy strings that look like secrets.
|
|
2
|
+
|
|
3
|
+
use crate::{Finding, Severity};
|
|
4
|
+
use regex::Regex;
|
|
5
|
+
use once_cell::sync::Lazy;
|
|
6
|
+
|
|
7
|
+
/// Minimum entropy (bits per character) to flag as suspicious.
|
|
8
|
+
const HIGH_ENTROPY_THRESHOLD: f64 = 4.5;
|
|
9
|
+
/// Minimum length for entropy analysis.
|
|
10
|
+
const MIN_SECRET_LENGTH: usize = 20;
|
|
11
|
+
/// Maximum length (avoid flagging long base64 payloads like JWT bodies).
|
|
12
|
+
const MAX_SECRET_LENGTH: usize = 200;
|
|
13
|
+
|
|
14
|
+
/// Compute Shannon entropy of a string in bits per character.
|
|
15
|
+
pub fn shannon(s: &str) -> f64 {
|
|
16
|
+
if s.is_empty() {
|
|
17
|
+
return 0.0;
|
|
18
|
+
}
|
|
19
|
+
let len = s.len() as f64;
|
|
20
|
+
let mut freq = [0u32; 256];
|
|
21
|
+
for b in s.bytes() {
|
|
22
|
+
freq[b as usize] += 1;
|
|
23
|
+
}
|
|
24
|
+
freq.iter()
|
|
25
|
+
.filter(|&&c| c > 0)
|
|
26
|
+
.map(|&c| {
|
|
27
|
+
let p = c as f64 / len;
|
|
28
|
+
-p * p.log2()
|
|
29
|
+
})
|
|
30
|
+
.sum()
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/// Regex for candidate high-entropy tokens: alphanumeric + common secret chars.
|
|
34
|
+
static CANDIDATE_RE: Lazy<Regex> = Lazy::new(|| {
|
|
35
|
+
Regex::new(r"[A-Za-z0-9+/=_\-]{20,200}").unwrap()
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
/// Context keywords that, if nearby, raise the flag from INFO to MEDIUM.
|
|
39
|
+
static CONTEXT_KW_RE: Lazy<Regex> = Lazy::new(|| {
|
|
40
|
+
Regex::new(r"(?i)(secret|password|passwd|pwd|token|key|api[_\-]?key|credential|auth|private|bearer|access[_\-]?key)")
|
|
41
|
+
.unwrap()
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
/// Scan for generic high-entropy strings.
|
|
45
|
+
pub fn scan_high_entropy(text: &str) -> Vec<Finding> {
|
|
46
|
+
let mut findings = Vec::new();
|
|
47
|
+
|
|
48
|
+
for m in CANDIDATE_RE.find_iter(text) {
|
|
49
|
+
let s = m.as_str();
|
|
50
|
+
if s.len() < MIN_SECRET_LENGTH || s.len() > MAX_SECRET_LENGTH {
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
let e = shannon(s);
|
|
55
|
+
if e < HIGH_ENTROPY_THRESHOLD {
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Skip if it looks like a base64-encoded UUID or hash (common false positive)
|
|
60
|
+
if looks_like_hash(s) {
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Determine severity by context
|
|
65
|
+
let context_start = m.start().saturating_sub(80);
|
|
66
|
+
let context_end = (m.end() + 80).min(text.len());
|
|
67
|
+
let context = &text[context_start..context_end];
|
|
68
|
+
let severity = if CONTEXT_KW_RE.is_match(context) {
|
|
69
|
+
Severity::Medium
|
|
70
|
+
} else {
|
|
71
|
+
Severity::Low
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
let hash = blake3::hash(s.as_bytes());
|
|
75
|
+
let fingerprint = format!("{:.8}", hash.to_hex());
|
|
76
|
+
|
|
77
|
+
findings.push(Finding {
|
|
78
|
+
pattern_id: "high_entropy",
|
|
79
|
+
pattern_name: "High-entropy string",
|
|
80
|
+
severity,
|
|
81
|
+
matched: s.to_string(),
|
|
82
|
+
redacted: format!("[REDACTED:high_entropy:{}]", fingerprint),
|
|
83
|
+
fingerprint,
|
|
84
|
+
offset: m.start(),
|
|
85
|
+
length: s.len(),
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
findings
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/// Heuristic: looks like a hex hash (md5/sha256/blake3) — lower false-positive risk.
|
|
93
|
+
fn looks_like_hash(s: &str) -> bool {
|
|
94
|
+
let hex_chars = s.chars().filter(|c| c.is_ascii_hexdigit()).count();
|
|
95
|
+
let ratio = hex_chars as f64 / s.len() as f64;
|
|
96
|
+
// If >90% hex digits and length is 32, 40, 56, or 64 — it's probably a hash
|
|
97
|
+
ratio > 0.90 && matches!(s.len(), 32 | 40 | 56 | 64)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
#[cfg(test)]
|
|
101
|
+
mod tests {
|
|
102
|
+
use super::*;
|
|
103
|
+
|
|
104
|
+
#[test]
|
|
105
|
+
fn entropy_of_repeated_char() {
|
|
106
|
+
assert!(shannon("aaaaaaaaaa") < 1.0);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
#[test]
|
|
110
|
+
fn entropy_of_random_string() {
|
|
111
|
+
// A real API key-like string should have high entropy
|
|
112
|
+
let s = "xK9mP2nQ8rL5vT1wJ4hB7cF0dA3sE6uI";
|
|
113
|
+
assert!(shannon(s) > 4.0);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
#[test]
|
|
117
|
+
fn hash_not_flagged() {
|
|
118
|
+
// SHA256-like hex string should not be flagged
|
|
119
|
+
let s = "a3f5c2d1e4b6a7f8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2";
|
|
120
|
+
assert!(looks_like_hash(s));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
#[test]
|
|
124
|
+
fn high_entropy_detected() {
|
|
125
|
+
let text = "api_key = xK9mP2nQ8rL5vT1wJ4hB7cF0dA3sE6uI";
|
|
126
|
+
let findings = scan_high_entropy(text);
|
|
127
|
+
assert!(!findings.is_empty());
|
|
128
|
+
}
|
|
129
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
pub mod builtin;
|
|
2
|
+
pub mod entropy;
|
|
3
|
+
|
|
4
|
+
use crate::{Finding, Severity};
|
|
5
|
+
use once_cell::sync::Lazy;
|
|
6
|
+
use regex::Regex;
|
|
7
|
+
|
|
8
|
+
/// A compiled secret pattern.
|
|
9
|
+
pub struct Pattern {
|
|
10
|
+
pub id: &'static str,
|
|
11
|
+
pub name: &'static str,
|
|
12
|
+
pub severity: Severity,
|
|
13
|
+
pub regex: Regex,
|
|
14
|
+
/// Minimum entropy threshold (bits/char). None = skip entropy check.
|
|
15
|
+
pub min_entropy: Option<f64>,
|
|
16
|
+
/// Context keywords that raise confidence (e.g. "password", "secret")
|
|
17
|
+
pub context_keywords: &'static [&'static str],
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
impl Pattern {
|
|
21
|
+
pub fn scan(&self, text: &str) -> Vec<Finding> {
|
|
22
|
+
let mut findings = Vec::new();
|
|
23
|
+
for m in self.regex.find_iter(text) {
|
|
24
|
+
let matched = m.as_str();
|
|
25
|
+
|
|
26
|
+
// Entropy gate
|
|
27
|
+
if let Some(min_e) = self.min_entropy {
|
|
28
|
+
if entropy::shannon(matched) < min_e {
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Build short fingerprint (last 8 hex chars of blake3)
|
|
34
|
+
let hash = blake3::hash(matched.as_bytes());
|
|
35
|
+
let fingerprint = format!("{:.8}", hash.to_hex());
|
|
36
|
+
|
|
37
|
+
findings.push(Finding {
|
|
38
|
+
pattern_id: self.id,
|
|
39
|
+
pattern_name: self.name,
|
|
40
|
+
severity: self.severity,
|
|
41
|
+
matched: matched.to_string(),
|
|
42
|
+
redacted: format!("[REDACTED:{}:{}]", self.id, fingerprint),
|
|
43
|
+
fingerprint,
|
|
44
|
+
offset: m.start(),
|
|
45
|
+
length: matched.len(),
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
findings
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/// All compiled built-in patterns, initialised once.
|
|
53
|
+
pub static PATTERNS: Lazy<Vec<Pattern>> = Lazy::new(builtin::all_patterns);
|
|
54
|
+
|
|
55
|
+
/// Scan text against all registered patterns.
|
|
56
|
+
pub fn scan_all(text: &str) -> Vec<Finding> {
|
|
57
|
+
let mut findings: Vec<Finding> = PATTERNS.iter().flat_map(|p| p.scan(text)).collect();
|
|
58
|
+
|
|
59
|
+
// Add entropy-based generic detection
|
|
60
|
+
findings.extend(entropy::scan_high_entropy(text));
|
|
61
|
+
|
|
62
|
+
// Sort by offset, deduplicate overlapping findings (keep highest severity)
|
|
63
|
+
findings.sort_by_key(|f| f.offset);
|
|
64
|
+
dedup_overlapping(findings)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
fn dedup_overlapping(findings: Vec<Finding>) -> Vec<Finding> {
|
|
68
|
+
let mut result: Vec<Finding> = Vec::new();
|
|
69
|
+
for f in findings {
|
|
70
|
+
if let Some(last) = result.last_mut() {
|
|
71
|
+
let last_end = last.offset + last.length;
|
|
72
|
+
if f.offset < last_end {
|
|
73
|
+
// Overlapping — keep the higher severity one
|
|
74
|
+
if f.severity > last.severity {
|
|
75
|
+
*last = f;
|
|
76
|
+
}
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
result.push(f);
|
|
81
|
+
}
|
|
82
|
+
result
|
|
83
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
//! Redaction engine — replaces detected secrets in text with safe placeholders.
|
|
2
|
+
|
|
3
|
+
use crate::{Finding, ScanResult};
|
|
4
|
+
|
|
5
|
+
/// Apply all findings to the text, replacing each matched secret with its
|
|
6
|
+
/// redacted placeholder. Edits are applied in reverse order so byte offsets
|
|
7
|
+
/// stay valid.
|
|
8
|
+
pub fn apply_redactions(text: &str, findings: &[Finding]) -> String {
|
|
9
|
+
let mut result = text.to_string();
|
|
10
|
+
// Sort descending by offset so we don't invalidate later offsets
|
|
11
|
+
let mut sorted = findings.to_vec();
|
|
12
|
+
sorted.sort_by(|a, b| b.offset.cmp(&a.offset));
|
|
13
|
+
|
|
14
|
+
for f in &sorted {
|
|
15
|
+
let end = f.offset + f.length;
|
|
16
|
+
if end <= result.len() {
|
|
17
|
+
result.replace_range(f.offset..end, &f.redacted);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
result
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/// Full scan + redact pipeline. Returns the redacted text and scan metadata.
|
|
24
|
+
pub fn scan_and_redact(
|
|
25
|
+
text: &str,
|
|
26
|
+
tool_name: Option<&str>,
|
|
27
|
+
db_path: &std::path::Path,
|
|
28
|
+
session_id: &str,
|
|
29
|
+
) -> ScanResult {
|
|
30
|
+
use crate::patterns;
|
|
31
|
+
use crate::store::Store;
|
|
32
|
+
|
|
33
|
+
let findings = patterns::scan_all(text);
|
|
34
|
+
|
|
35
|
+
// Filter out allowlisted findings
|
|
36
|
+
let store = Store::open(db_path).ok();
|
|
37
|
+
let active_findings: Vec<Finding> = if let Some(ref s) = store {
|
|
38
|
+
findings
|
|
39
|
+
.into_iter()
|
|
40
|
+
.filter(|f| !s.is_allowed(&f.fingerprint).unwrap_or(false))
|
|
41
|
+
.collect()
|
|
42
|
+
} else {
|
|
43
|
+
findings
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
// Record findings to DB
|
|
47
|
+
if let Some(ref s) = store {
|
|
48
|
+
for f in &active_findings {
|
|
49
|
+
let _ = s.record_finding(f, tool_name, session_id);
|
|
50
|
+
}
|
|
51
|
+
let _ = s.record_scan(
|
|
52
|
+
session_id,
|
|
53
|
+
tool_name,
|
|
54
|
+
text.len(),
|
|
55
|
+
active_findings.len(),
|
|
56
|
+
active_findings.len(),
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
let redacted_text = apply_redactions(text, &active_findings);
|
|
61
|
+
let clean = active_findings.is_empty();
|
|
62
|
+
|
|
63
|
+
ScanResult {
|
|
64
|
+
original_len: text.len(),
|
|
65
|
+
redacted_text,
|
|
66
|
+
findings: active_findings,
|
|
67
|
+
clean,
|
|
68
|
+
}
|
|
69
|
+
}
|