@rigour-labs/core 5.2.7 → 5.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -243,6 +243,80 @@ function shannonEntropy(str) {
243
243
  return sum - p * Math.log2(p);
244
244
  }, 0);
245
245
  }
246
+ /**
247
+ * Smart context filter — decides if a detection is a real credential or a false positive.
248
+ *
249
+ * The challenge: build logs can contain BOTH false positives (variable names mentioned
250
+ * in npm warnings) AND real leaks (a password accidentally echoed in CI output).
251
+ *
252
+ * Strategy:
253
+ * 1. If the match has an actual SECRET VALUE (high entropy, long random string) → KEEP IT
254
+ * 2. If the match is just a variable NAME reference with no real value → SKIP IT
255
+ * 3. If the match is in a documentation/comment context → SKIP IT
256
+ */
257
+ function isLikelyFalsePositive(detection, input) {
258
+ const { match, type, position } = detection;
259
+ // Provider-specific prefixed keys are ALWAYS real (AKIA*, sk-*, ghp_*, etc.)
260
+ const prefixedTypes = [
261
+ 'aws_access_key', 'openai_key', 'anthropic_key', 'github_token',
262
+ 'stripe_key', 'slack_token', 'sendgrid_key', 'private_key',
263
+ 'private_key_full', 'jwt_token', 'bearer_token', 'database_url',
264
+ 'credentials_in_url', 'gcp_service_account',
265
+ ];
266
+ if (prefixedTypes.includes(type))
267
+ return false;
268
+ // Get the line containing the match
269
+ const lineStart = input.lastIndexOf('\n', (position?.start ?? 0)) + 1;
270
+ const lineEnd = input.indexOf('\n', (position?.end ?? match.length));
271
+ const line = input.slice(lineStart, lineEnd === -1 ? undefined : lineEnd).trim();
272
+ // npm notice/warn lines mentioning variable names (not values)
273
+ if (/^npm\s+(?:notice|warn|WARN|ERR!)/i.test(line))
274
+ return true;
275
+ // Docker build step output referencing env var names
276
+ if (/^#\d+\s+[\d.]+\s/.test(line)) {
277
+ // But if there's an actual assignment with a value, keep it
278
+ if (/[:=]\s*['"]?[A-Za-z0-9+/=_-]{20,}/.test(match))
279
+ return false;
280
+ return true;
281
+ }
282
+ // "digest:", "hash:", "checksum:" followed by hex — not credentials
283
+ if (/(?:digest|hash|checksum|sha256|sha1|md5)\s*[:=]/i.test(line))
284
+ return true;
285
+ // Error messages referencing env var names without values
286
+ if (/(?:missing|undefined|not set|not found|required)\s+.*(?:NPM_TOKEN|DOCKER_PASSWORD|PYPI_TOKEN)/i.test(line))
287
+ return true;
288
+ // For generic patterns (password_assignment, env_variable, ci_secret, base64/hex_secret):
289
+ // Check if the captured VALUE has enough entropy to be a real secret
290
+ const genericTypes = ['password_assignment', 'env_variable', 'ci_secret', 'base64_secret', 'hex_secret', 'high_entropy_secret'];
291
+ if (genericTypes.includes(type)) {
292
+ // Extract the value portion (after = or : )
293
+ const valueMatch = match.match(/[:=]\s*['"]?(.+?)['"]?\s*$/);
294
+ if (valueMatch) {
295
+ const value = valueMatch[1];
296
+ const entropy = shannonEntropyForFilter(value);
297
+ // Low entropy + short = likely a placeholder, example, or variable name
298
+ if (entropy < 3.0 && value.length < 16)
299
+ return true;
300
+ // Pure numeric strings in log context (build numbers, timestamps)
301
+ if (/^\d+$/.test(value))
302
+ return true;
303
+ }
304
+ }
305
+ return false;
306
+ }
307
+ /** Shannon entropy helper for the context filter */
308
+ function shannonEntropyForFilter(str) {
309
+ if (str.length === 0)
310
+ return 0;
311
+ const freq = {};
312
+ for (const c of str)
313
+ freq[c] = (freq[c] || 0) + 1;
314
+ const len = str.length;
315
+ return Object.values(freq).reduce((sum, f) => {
316
+ const p = f / len;
317
+ return sum - p * Math.log2(p);
318
+ }, 0);
319
+ }
246
320
  // ── Core Scanner ──────────────────────────────────────────────────
247
321
  /**
248
322
  * Redact a matched credential for safe display.
@@ -361,8 +435,10 @@ export function scanInputForCredentials(input, config = {}) {
361
435
  }
362
436
  }
363
437
  }
438
+ // ── Context filter: remove false positives from log/doc/CI output ──
439
+ const filtered = detections.filter(d => !isLikelyFalsePositive(d, input));
364
440
  // Deduplicate overlapping detections (keep highest severity)
365
- const deduped = deduplicateDetections(detections);
441
+ const deduped = deduplicateDetections(filtered);
366
442
  // Sort by severity (critical first)
367
443
  const severityOrder = { critical: 0, high: 1, medium: 2 };
368
444
  deduped.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rigour-labs/core",
3
- "version": "5.2.7",
3
+ "version": "5.2.9",
4
4
  "description": "AI-native quality gate engine with local Bayesian learning. AST analysis, drift detection, Fix Packet generation, and agent self-healing across TypeScript, JavaScript, Python, Go, Ruby, and C#.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://rigour.run",
@@ -66,11 +66,11 @@
66
66
  "@xenova/transformers": "^2.17.2",
67
67
  "sqlite3": "^5.1.7",
68
68
  "openai": "^4.104.0",
69
- "@rigour-labs/brain-darwin-arm64": "5.2.7",
70
- "@rigour-labs/brain-linux-arm64": "5.2.7",
71
- "@rigour-labs/brain-linux-x64": "5.2.7",
72
- "@rigour-labs/brain-win-x64": "5.2.7",
73
- "@rigour-labs/brain-darwin-x64": "5.2.7"
69
+ "@rigour-labs/brain-darwin-arm64": "5.2.9",
70
+ "@rigour-labs/brain-linux-arm64": "5.2.9",
71
+ "@rigour-labs/brain-linux-x64": "5.2.9",
72
+ "@rigour-labs/brain-darwin-x64": "5.2.9",
73
+ "@rigour-labs/brain-win-x64": "5.2.9"
74
74
  },
75
75
  "devDependencies": {
76
76
  "@types/fs-extra": "^11.0.4",