devguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. devguard/INTEGRATION_SUMMARY.md +121 -0
  2. devguard/__init__.py +3 -0
  3. devguard/__main__.py +6 -0
  4. devguard/checkers/__init__.py +41 -0
  5. devguard/checkers/api_usage.py +523 -0
  6. devguard/checkers/aws_cost.py +331 -0
  7. devguard/checkers/aws_iam.py +284 -0
  8. devguard/checkers/base.py +25 -0
  9. devguard/checkers/container.py +137 -0
  10. devguard/checkers/domain.py +189 -0
  11. devguard/checkers/firecrawl.py +117 -0
  12. devguard/checkers/fly.py +225 -0
  13. devguard/checkers/github.py +210 -0
  14. devguard/checkers/npm.py +327 -0
  15. devguard/checkers/npm_security.py +244 -0
  16. devguard/checkers/redteam.py +290 -0
  17. devguard/checkers/secret.py +279 -0
  18. devguard/checkers/swarm.py +376 -0
  19. devguard/checkers/tailscale.py +143 -0
  20. devguard/checkers/tailsnitch.py +303 -0
  21. devguard/checkers/tavily.py +179 -0
  22. devguard/checkers/vercel.py +192 -0
  23. devguard/cli.py +1510 -0
  24. devguard/cli_helpers.py +189 -0
  25. devguard/config.py +249 -0
  26. devguard/core.py +293 -0
  27. devguard/dashboard.py +715 -0
  28. devguard/discovery.py +363 -0
  29. devguard/http_client.py +142 -0
  30. devguard/llm_service.py +481 -0
  31. devguard/mcp_server.py +259 -0
  32. devguard/metrics.py +144 -0
  33. devguard/models.py +208 -0
  34. devguard/reporting.py +1571 -0
  35. devguard/sarif.py +295 -0
  36. devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
  37. devguard/scripts/README.md +221 -0
  38. devguard/scripts/auto_fix_recommendations.py +145 -0
  39. devguard/scripts/generate_npmignore.py +175 -0
  40. devguard/scripts/generate_security_report.py +324 -0
  41. devguard/scripts/prepublish_check.sh +29 -0
  42. devguard/scripts/redteam_npm_packages.py +1262 -0
  43. devguard/scripts/review_all_repos.py +300 -0
  44. devguard/spec.py +617 -0
  45. devguard/sweeps/__init__.py +23 -0
  46. devguard/sweeps/ai_editor_config_audit.py +697 -0
  47. devguard/sweeps/cargo_publish_audit.py +655 -0
  48. devguard/sweeps/dependency_audit.py +419 -0
  49. devguard/sweeps/gitignore_audit.py +336 -0
  50. devguard/sweeps/local_dev.py +260 -0
  51. devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
  52. devguard/sweeps/project_flaudit.py +636 -0
  53. devguard/sweeps/public_github_secrets.py +680 -0
  54. devguard/sweeps/publish_audit.py +478 -0
  55. devguard/sweeps/ssh_key_audit.py +327 -0
  56. devguard/utils.py +174 -0
  57. devguard-0.2.0.dist-info/METADATA +225 -0
  58. devguard-0.2.0.dist-info/RECORD +60 -0
  59. devguard-0.2.0.dist-info/WHEEL +4 -0
  60. devguard-0.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1262 @@
1
+ #!/usr/bin/env python3
2
+ """Red team analysis of npm packages for secrets and sensitive data."""
3
+
4
+ import asyncio
5
+ import base64
6
+ import binascii
7
+ import json
8
+ import logging
9
+ import re
10
+
11
+ # Import retry logic from devguard
12
+ import sys
13
+ import tarfile
14
+ import tempfile
15
+ from pathlib import Path
16
+ from typing import Any
17
+ from urllib.parse import quote
18
+
19
+ import httpx
20
+
21
+ devguard_path = Path(__file__).parent.parent.parent
22
+ sys.path.insert(0, str(devguard_path))
23
+ from devguard.http_client import retry_with_backoff
24
+
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Patterns for detecting secrets
29
+ SECRET_PATTERNS = [
30
+ # API keys
31
+ (r'["\']?api[_-]?key["\']?\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']', "API Key"),
32
+ (r'["\']?api[_-]?token["\']?\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']', "API Token"),
33
+ # AWS
34
+ (r"AKIA[0-9A-Z]{16}", "AWS Access Key ID"),
35
+ (
36
+ r'aws[_-]?secret[_-]?access[_-]?key["\']?\s*[:=]\s*["\']([a-zA-Z0-9+/=]{40})["\']',
37
+ "AWS Secret Key",
38
+ ),
39
+ # GitHub tokens
40
+ (r"ghp_[a-zA-Z0-9]{36}", "GitHub Personal Access Token"),
41
+ (r'github[_-]?token["\']?\s*[:=]\s*["\']([a-zA-Z0-9_\-]{36,})["\']', "GitHub Token"),
42
+ # Generic tokens
43
+ (r'["\']?token["\']?\s*[:=]\s*["\']([a-zA-Z0-9_\-]{32,})["\']', "Token"),
44
+ (r'["\']?secret["\']?\s*[:=]\s*["\']([a-zA-Z0-9_\-+/=]{20,})["\']', "Secret"),
45
+ (r'["\']?password["\']?\s*[:=]\s*["\']([^\'"\s]{8,})["\']', "Password"),
46
+ # Private keys
47
+ (r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----", "Private Key"),
48
+ (r"-----BEGIN\s+EC\s+PRIVATE\s+KEY-----", "EC Private Key"),
49
+ # Database URLs
50
+ (r'["\']?database[_-]?url["\']?\s*[:=]\s*["\']([^\'"]+)["\']', "Database URL"),
51
+ (r'postgresql://[^\'"\s]+', "PostgreSQL URL"),
52
+ (r'mongodb://[^\'"\s]+', "MongoDB URL"),
53
+ (r'mysql://[^\'"\s]+', "MySQL URL"),
54
+ # OAuth
55
+ (
56
+ r'["\']?client[_-]?secret["\']?\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']',
57
+ "OAuth Client Secret",
58
+ ),
59
+ # JWT secrets
60
+ (r'["\']?jwt[_-]?secret["\']?\s*[:=]\s*["\']([a-zA-Z0-9_\-+/=]{20,})["\']', "JWT Secret"),
61
+ # Slack tokens
62
+ (r"xox[baprs]-[0-9a-zA-Z\-]{10,}", "Slack Token"),
63
+ # Stripe keys
64
+ (r"sk_live_[0-9a-zA-Z]{24,}", "Stripe Live Secret Key"),
65
+ (r"sk_test_[0-9a-zA-Z]{24,}", "Stripe Test Secret Key"),
66
+ (r"pk_live_[0-9a-zA-Z]{24,}", "Stripe Live Publishable Key"),
67
+ # Email credentials
68
+ (r'smtp[_-]?password["\']?\s*[:=]\s*["\']([^\'"]+)["\']', "SMTP Password"),
69
+ # OpenAI/Anthropic keys
70
+ (r"sk-[a-zA-Z0-9]{32,}", "OpenAI API Key"),
71
+ (r"sk-ant-[a-zA-Z0-9\-_]{95,}", "Anthropic API Key"),
72
+ # Placeholder values that weren't replaced (must have at least 2 words/parts)
73
+ (r'["\']?(YOUR_|PLACEHOLDER_|REPLACE_|CHANGE_)[A-Z_]{5,}["\']?', "Unreplaced Placeholder"),
74
+ (r'["\']?(TODO_|FIXME_)[A-Z_]{8,}["\']?', "Unreplaced Placeholder"), # Longer for TODO/FIXME
75
+ ]
76
+
77
+ # Sensitive file patterns
78
+ SENSITIVE_FILES = [
79
+ r"\.env",
80
+ r"\.env\.local",
81
+ r"\.env\.production",
82
+ r"\.env\.development",
83
+ r"\.secrets",
84
+ r"secrets\.json",
85
+ r"config\.json",
86
+ r"credentials\.json",
87
+ r"id_rsa",
88
+ r"id_ed25519",
89
+ r"\.pem",
90
+ r"\.key",
91
+ r"\.p12",
92
+ r"\.pfx",
93
+ r"\.jks",
94
+ r"\.keystore",
95
+ ]
96
+
97
+
98
+ async def fetch_package_info(client: httpx.AsyncClient, package: str) -> dict[str, Any]:
99
+ """Fetch package metadata from npm registry."""
100
+ encoded_package = quote(package, safe="")
101
+ url = f"https://registry.npmjs.org/{encoded_package}"
102
+
103
+ response = await client.get(url, timeout=30.0)
104
+ response.raise_for_status()
105
+ return response.json()
106
+
107
+
108
+ async def download_package_tarball(client: httpx.AsyncClient, package: str, version: str) -> bytes:
109
+ """Download package tarball from npm registry."""
110
+ encoded_package = quote(package, safe="")
111
+ url = f"https://registry.npmjs.org/{encoded_package}/-/{package.split('/')[-1]}-{version}.tgz"
112
+
113
+ response = await client.get(url, timeout=60.0, follow_redirects=True)
114
+ response.raise_for_status()
115
+ return response.content
116
+
117
+
118
+ def extract_tarball(tarball_data: bytes, extract_to: Path) -> None:
119
+ """Extract tarball to directory."""
120
+ with tempfile.NamedTemporaryFile(suffix=".tgz", delete=False) as tmp:
121
+ tmp.write(tarball_data)
122
+ tmp_path = Path(tmp.name)
123
+
124
+ try:
125
+ with tarfile.open(tmp_path, "r:gz") as tar:
126
+ # Use filter='data' to avoid security warnings while allowing extraction
127
+ # This is safe for npm packages from the official registry
128
+ tar.extractall(extract_to, filter="data")
129
+ finally:
130
+ tmp_path.unlink()
131
+
132
+
133
+ def is_base64_encoded(s: str) -> bool:
134
+ """Check if string is valid base64."""
135
+ try:
136
+ if len(s) < 20: # Too short to be meaningful
137
+ return False
138
+ decoded = base64.b64decode(s, validate=True)
139
+ # Check if decoded data looks like text (not binary)
140
+ return all(32 <= b <= 126 or b in (9, 10, 13) for b in decoded[:100])
141
+ except Exception:
142
+ return False
143
+
144
+
145
+ def is_hex_encoded(s: str) -> bool:
146
+ """Check if string is valid hex."""
147
+ try:
148
+ if len(s) < 20 or len(s) % 2 != 0:
149
+ return False
150
+ decoded = binascii.unhexlify(s)
151
+ # Check if decoded data looks like text
152
+ return all(32 <= b <= 126 or b in (9, 10, 13) for b in decoded[:100])
153
+ except Exception:
154
+ return False
155
+
156
+
157
+ def scan_for_obfuscated_code(content: str) -> list[dict[str, Any]]:
158
+ """Scan for obfuscated code patterns with context-aware detection."""
159
+ findings = []
160
+
161
+ # Patterns indicating obfuscation
162
+ obfuscation_patterns = [
163
+ (r"eval\s*\(", "eval() usage - potential code obfuscation"),
164
+ (r"Function\s*\(", "Function() constructor - potential code obfuscation"),
165
+ (r"atob\s*\(", "atob() - base64 decoding at runtime"),
166
+ (r"btoa\s*\(", "btoa() - base64 encoding at runtime"),
167
+ (r"String\.fromCharCode\s*\(", "String.fromCharCode - potential string obfuscation"),
168
+ (r"unescape\s*\(", "unescape() - deprecated, potential obfuscation"),
169
+ (r"decodeURIComponent\s*\(", "decodeURIComponent in suspicious context"),
170
+ (r'\[["\']\w+["\']\]\s*\(', "Bracket notation function calls - potential obfuscation"),
171
+ ]
172
+
173
+ # Legitimate use patterns (reduce false positives)
174
+ legitimate_patterns = [
175
+ r"//.*test|spec|example", # In test/example context
176
+ r"/\*.*test|spec|example.*\*/", # In comments
177
+ r"console\.(log|debug)", # Near console.log (likely debugging)
178
+ r"JSON\.parse", # JSON parsing is legitimate
179
+ r"Buffer\.from", # Buffer operations are legitimate
180
+ ]
181
+
182
+ for pattern, description in obfuscation_patterns:
183
+ matches = list(re.finditer(pattern, content, re.IGNORECASE))
184
+ if matches:
185
+ for match in matches:
186
+ context_start = max(0, match.start() - 200)
187
+ context_end = min(len(content), match.end() + 200)
188
+ context = content[context_start:context_end]
189
+
190
+ # Skip if in legitimate context
191
+ is_legitimate = any(
192
+ re.search(legit_pattern, context, re.IGNORECASE)
193
+ for legit_pattern in legitimate_patterns
194
+ )
195
+ if is_legitimate:
196
+ continue
197
+
198
+ # Check for base64-like strings nearby
199
+ has_base64_nearby = bool(re.search(r"[A-Za-z0-9+/]{30,}={0,2}", context))
200
+
201
+ # Check for suspicious variable names (often used in obfuscation)
202
+ has_suspicious_vars = bool(re.search(r"\b[_$][a-z]{1,3}\b", context))
203
+
204
+ # Check for long hex strings (another obfuscation technique)
205
+ has_hex_strings = bool(
206
+ re.search(r'["\'][0-9a-f]{40,}["\']', context, re.IGNORECASE)
207
+ )
208
+
209
+ # Determine severity based on multiple factors
210
+ suspicious_factors = sum(
211
+ [
212
+ has_base64_nearby,
213
+ has_suspicious_vars,
214
+ has_hex_strings,
215
+ ]
216
+ )
217
+
218
+ severity = (
219
+ "HIGH"
220
+ if suspicious_factors >= 2
221
+ else ("MEDIUM" if suspicious_factors >= 1 else "LOW")
222
+ )
223
+
224
+ findings.append(
225
+ {
226
+ "type": "Obfuscated Code",
227
+ "line": content[: match.start()].count("\n") + 1,
228
+ "match": match.group(0),
229
+ "description": description,
230
+ "has_base64_nearby": has_base64_nearby,
231
+ "has_suspicious_vars": has_suspicious_vars,
232
+ "has_hex_strings": has_hex_strings,
233
+ "suspicious_factors": suspicious_factors,
234
+ "severity": severity,
235
+ }
236
+ )
237
+
238
+ return findings
239
+
240
+
241
+ def scan_for_encoded_secrets(content: str) -> list[dict[str, Any]]:
242
+ """Scan for base64/hex encoded secrets."""
243
+ findings = []
244
+
245
+ # Find potential base64 strings (20+ chars, base64 charset)
246
+ base64_pattern = r'["\']([A-Za-z0-9+/]{20,}={0,2})["\']'
247
+ for match in re.finditer(base64_pattern, content):
248
+ candidate = match.group(1)
249
+ if is_base64_encoded(candidate):
250
+ try:
251
+ decoded = base64.b64decode(candidate).decode("utf-8", errors="ignore")
252
+ # Check if decoded content looks like a secret
253
+ if any(
254
+ keyword in decoded.lower()
255
+ for keyword in ["password", "secret", "key", "token", "api"]
256
+ ):
257
+ findings.append(
258
+ {
259
+ "type": "Base64 Encoded Secret",
260
+ "line": content[: match.start()].count("\n") + 1,
261
+ "match": candidate[:50] + "..." if len(candidate) > 50 else candidate,
262
+ "decoded_preview": decoded[:50] + "..."
263
+ if len(decoded) > 50
264
+ else decoded,
265
+ "severity": "HIGH",
266
+ }
267
+ )
268
+ except Exception:
269
+ pass
270
+
271
+ # Find potential hex strings
272
+ hex_pattern = r'["\']([0-9a-fA-F]{40,})["\']'
273
+ for match in re.finditer(hex_pattern, content):
274
+ candidate = match.group(1)
275
+ if is_hex_encoded(candidate):
276
+ try:
277
+ decoded = binascii.unhexlify(candidate).decode("utf-8", errors="ignore")
278
+ if any(
279
+ keyword in decoded.lower() for keyword in ["password", "secret", "key", "token"]
280
+ ):
281
+ findings.append(
282
+ {
283
+ "type": "Hex Encoded Secret",
284
+ "line": content[: match.start()].count("\n") + 1,
285
+ "match": candidate[:50] + "..." if len(candidate) > 50 else candidate,
286
+ "decoded_preview": decoded[:50] + "..."
287
+ if len(decoded) > 50
288
+ else decoded,
289
+ "severity": "HIGH",
290
+ }
291
+ )
292
+ except Exception:
293
+ pass
294
+
295
+ return findings
296
+
297
+
298
+ def scan_file_for_secrets(file_path: Path) -> list[dict[str, Any]]:
299
+ """Scan a file for secret patterns."""
300
+ findings = []
301
+
302
+ try:
303
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
304
+ except Exception as e:
305
+ logger.debug(f"Could not read {file_path}: {e}")
306
+ return findings
307
+
308
+ # Standard pattern matching
309
+ for pattern, secret_type in SECRET_PATTERNS:
310
+ matches = re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE)
311
+ for match in matches:
312
+ # Extract the secret value (group 1 if available, else full match)
313
+ secret_value = match.group(1) if match.groups() else match.group(0)
314
+
315
+ # Skip obvious test/example values
316
+ test_indicators = [
317
+ "test",
318
+ "example",
319
+ "sample",
320
+ "demo",
321
+ "placeholder",
322
+ "123456",
323
+ "abcdef",
324
+ "xxxxx",
325
+ "yyyyy",
326
+ "dummy",
327
+ "your_",
328
+ "replace_",
329
+ "change_",
330
+ "todo_",
331
+ ]
332
+ secret_lower = secret_value.lower()
333
+ if any(indicator in secret_lower for indicator in test_indicators):
334
+ # Check if it's in a test file or documentation
335
+ file_lower = str(file_path).lower()
336
+ if any(
337
+ doc_indicator in file_lower
338
+ for doc_indicator in ["test", "spec", "example", "doc", "readme", ".md"]
339
+ ):
340
+ continue # Skip test/example values in test/docs
341
+
342
+ # Truncate for display
343
+ display_value = secret_value[:50] + "..." if len(secret_value) > 50 else secret_value
344
+
345
+ findings.append(
346
+ {
347
+ "type": secret_type,
348
+ "file": str(file_path),
349
+ "line": content[: match.start()].count("\n") + 1,
350
+ "match": display_value,
351
+ "severity": "HIGH",
352
+ }
353
+ )
354
+
355
+ # Check for encoded secrets
356
+ encoded_secrets = scan_for_encoded_secrets(content)
357
+ for finding in encoded_secrets:
358
+ finding["file"] = str(file_path)
359
+ findings.append(finding)
360
+
361
+ return findings
362
+
363
+
364
+ def scan_for_sensitive_files(root: Path) -> list[dict[str, Any]]:
365
+ """Scan for sensitive file names."""
366
+ findings = []
367
+
368
+ for file_path in root.rglob("*"):
369
+ if not file_path.is_file():
370
+ continue
371
+
372
+ file_name = file_path.name
373
+ file_path_str = str(file_path.relative_to(root))
374
+
375
+ # Skip example files (they're usually safe)
376
+ if ".example" in file_name.lower() or file_name.endswith(".example"):
377
+ continue
378
+
379
+ for pattern in SENSITIVE_FILES:
380
+ if re.search(pattern, file_name, re.IGNORECASE):
381
+ # Check if file contains actual secrets
382
+ try:
383
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
384
+ # Look for actual secret patterns in the file
385
+ has_secrets = False
386
+ for secret_pattern, _ in SECRET_PATTERNS[:10]: # Check first 10 patterns
387
+ if re.search(secret_pattern, content, re.IGNORECASE):
388
+ has_secrets = True
389
+ break
390
+
391
+ if has_secrets:
392
+ findings.append(
393
+ {
394
+ "type": "Sensitive File with Secrets",
395
+ "file": file_path_str,
396
+ "description": f"File matches sensitive pattern AND contains secrets: {pattern}",
397
+ "severity": "CRITICAL",
398
+ }
399
+ )
400
+ else:
401
+ findings.append(
402
+ {
403
+ "type": "Sensitive File",
404
+ "file": file_path_str,
405
+ "description": f"File matches sensitive pattern: {pattern}",
406
+ "severity": "MEDIUM",
407
+ }
408
+ )
409
+ except Exception:
410
+ # If we can't read it, still flag it
411
+ findings.append(
412
+ {
413
+ "type": "Sensitive File",
414
+ "file": file_path_str,
415
+ "description": f"File matches sensitive pattern: {pattern}",
416
+ "severity": "MEDIUM",
417
+ }
418
+ )
419
+ break
420
+
421
+ return findings
422
+
423
+
424
+ def analyze_package_json(pkg_json: dict[str, Any]) -> dict[str, Any]:
425
+ """Deep analysis of package.json."""
426
+ issues = {
427
+ "suspicious_scripts": [],
428
+ "missing_fields": [],
429
+ "exposed_repos": [],
430
+ "placeholder_values": [],
431
+ "files_field_issues": [],
432
+ "recommendations": [],
433
+ }
434
+
435
+ # Check if files field is set (more secure than .npmignore)
436
+ if "files" not in pkg_json:
437
+ issues["recommendations"].append(
438
+ "Consider using 'files' field in package.json instead of .npmignore for explicit allowlist"
439
+ )
440
+ else:
441
+ files_list = pkg_json.get("files", [])
442
+ # Check for common missing entries
443
+ if "package.json" not in files_list:
444
+ issues["files_field_issues"].append(
445
+ "package.json should be in files list (though npm includes it anyway)"
446
+ )
447
+
448
+ # Warn if files list is very large (might be too permissive)
449
+ if len(files_list) > 50:
450
+ issues["files_field_issues"].append(
451
+ f"Files list is very large ({len(files_list)} entries) - consider being more selective"
452
+ )
453
+
454
+ # Check scripts for suspicious patterns
455
+ scripts = pkg_json.get("scripts", {})
456
+ suspicious_patterns = [
457
+ (r"eval\s*\(", "eval in script"),
458
+ (r"curl.*http", "HTTP request in script"),
459
+ (r"wget.*http", "HTTP request in script"),
460
+ (r"\$.*API.*KEY", "API key reference"),
461
+ (r"\$.*SECRET", "Secret reference"),
462
+ (r"base64.*-d", "base64 decode"),
463
+ (r"\.env", "Environment file access"),
464
+ ]
465
+
466
+ # Check for postinstall/preinstall scripts (common attack vector)
467
+ install_scripts = ["postinstall", "preinstall", "install"]
468
+ for script_name in install_scripts:
469
+ if script_name in scripts:
470
+ script_content = scripts[script_name]
471
+ # Check for network requests in install scripts
472
+ if re.search(r"(curl|wget|fetch|axios|http)", script_content, re.IGNORECASE):
473
+ issues["suspicious_scripts"].append(
474
+ {
475
+ "script": script_name,
476
+ "issue": f"{script_name} script makes network requests - potential supply chain risk",
477
+ "content": script_content[:100],
478
+ }
479
+ )
480
+
481
+ for script_name, script_content in scripts.items():
482
+ for pattern, description in suspicious_patterns:
483
+ if re.search(pattern, script_content, re.IGNORECASE):
484
+ issues["suspicious_scripts"].append(
485
+ {
486
+ "script": script_name,
487
+ "issue": description,
488
+ "content": script_content[:100],
489
+ }
490
+ )
491
+
492
+ # Check for placeholder values
493
+ pkg_str = json.dumps(pkg_json, indent=2)
494
+ placeholder_patterns = [
495
+ r"YOUR_[A-Z_]+",
496
+ r"PLACEHOLDER_[A-Z_]+",
497
+ r"REPLACE_[A-Z_]+",
498
+ r"CHANGE_[A-Z_]+",
499
+ r"<YOUR_[^>]+>",
500
+ r"\[TODO:[^\]]+\]",
501
+ ]
502
+
503
+ # Check dependencies for known risky packages
504
+ dependencies = pkg_json.get("dependencies", {})
505
+ dev_dependencies = pkg_json.get("devDependencies", {})
506
+ all_deps = {**dependencies, **dev_dependencies}
507
+
508
+ # Known risky patterns in package names
509
+ risky_patterns = [
510
+ r"^eslint-config-", # Often used for supply chain attacks
511
+ r"^@types/", # TypeScript types - generally safe but check
512
+ ]
513
+
514
+ # Check for typosquatting patterns (very basic)
515
+ for dep_name in all_deps.keys():
516
+ # Check for suspicious characters or patterns
517
+ if any(char in dep_name for char in ["__", "--", ".."]):
518
+ issues["recommendations"].append(
519
+ f"Review dependency '{dep_name}' - suspicious characters in name"
520
+ )
521
+ for pattern in placeholder_patterns:
522
+ matches = re.finditer(pattern, pkg_str, re.IGNORECASE)
523
+ for match in matches:
524
+ issues["placeholder_values"].append(
525
+ {
526
+ "match": match.group(0),
527
+ "description": "Unreplaced placeholder in package.json",
528
+ }
529
+ )
530
+
531
+ # Check repository URLs for private repos
532
+ repo = pkg_json.get("repository")
533
+ if repo:
534
+ if isinstance(repo, dict):
535
+ repo_url = repo.get("url", "")
536
+ else:
537
+ repo_url = str(repo)
538
+
539
+ # Check for private repo indicators
540
+ if any(
541
+ domain in repo_url.lower() for domain in ["github.com", "gitlab.com", "bitbucket.org"]
542
+ ):
543
+ # Check if it's a private repo format (could expose internal structure)
544
+ if "/private/" in repo_url or "/internal/" in repo_url:
545
+ issues["exposed_repos"].append(
546
+ {
547
+ "url": repo_url,
548
+ "issue": "Potentially private repository URL exposed",
549
+ }
550
+ )
551
+
552
+ return issues
553
+
554
+
555
+ def analyze_package_contents(package_dir: Path) -> dict[str, Any]:
556
+ """Analyze package contents for secrets and sensitive data."""
557
+ findings = {
558
+ "secrets": [],
559
+ "sensitive_files": [],
560
+ "package_json": None,
561
+ "package_json_issues": None,
562
+ "large_files": [],
563
+ "source_maps": [],
564
+ "test_files_with_secrets": [],
565
+ "obfuscated_code": [],
566
+ "git_history": False,
567
+ "lock_files": [],
568
+ "ci_configs": [],
569
+ "npmignore_missing": False,
570
+ "comments_with_secrets": [],
571
+ "postinstall_scripts": [],
572
+ "dependency_risks": [],
573
+ "file_permissions": [],
574
+ "suspicious_package_names": [],
575
+ }
576
+
577
+ # Check for package.json
578
+ package_json_path = package_dir / "package" / "package.json"
579
+ if not package_json_path.exists():
580
+ # Try root level
581
+ package_json_path = package_dir / "package.json"
582
+
583
+ if package_json_path.exists():
584
+ try:
585
+ with open(package_json_path) as f:
586
+ findings["package_json"] = json.load(f)
587
+ findings["package_json_issues"] = analyze_package_json(findings["package_json"])
588
+ except Exception as e:
589
+ logger.warning(f"Could not parse package.json: {e}")
590
+
591
+ # Check for .npmignore
592
+ npmignore_path = package_dir / "package" / ".npmignore"
593
+ if not npmignore_path.exists():
594
+ npmignore_path = package_dir / ".npmignore"
595
+ findings["npmignore_missing"] = not npmignore_path.exists()
596
+
597
+ # Check for .git directory (shouldn't be published)
598
+ git_dir = package_dir / "package" / ".git"
599
+ if not git_dir.exists():
600
+ git_dir = package_dir / ".git"
601
+ findings["git_history"] = git_dir.exists() and git_dir.is_dir()
602
+
603
+ # Check package name for suspicious patterns (typosquatting indicators)
604
+ if findings["package_json"]:
605
+ pkg_name = findings["package_json"].get("name", "")
606
+ suspicious_patterns = [
607
+ (r"__", "Double underscore - potential typosquatting"),
608
+ (r"--", "Double dash - potential typosquatting"),
609
+ (r"\.\.", "Double dot - potential path traversal"),
610
+ (r"[A-Z]{3,}", "All caps - unusual naming"),
611
+ ]
612
+ for pattern, reason in suspicious_patterns:
613
+ if re.search(pattern, pkg_name):
614
+ findings["suspicious_package_names"].append(
615
+ {
616
+ "pattern": pattern,
617
+ "reason": reason,
618
+ "severity": "MEDIUM",
619
+ }
620
+ )
621
+
622
+ # Scan all files for secrets
623
+ for file_path in package_dir.rglob("*"):
624
+ if not file_path.is_file():
625
+ continue
626
+
627
+ # Skip node_modules if present
628
+ if "node_modules" in file_path.parts:
629
+ continue
630
+
631
+ # Check file size (flag files > 1MB)
632
+ try:
633
+ file_size = file_path.stat().st_size
634
+ if file_size > 1_000_000: # 1MB
635
+ findings["large_files"].append(
636
+ {
637
+ "file": str(file_path.relative_to(package_dir)),
638
+ "size_mb": round(file_size / 1_000_000, 2),
639
+ "severity": "LOW",
640
+ }
641
+ )
642
+
643
+ # Check file permissions (executable files in packages can be suspicious)
644
+ file_stat = file_path.stat()
645
+ is_executable = bool(file_stat.st_mode & 0o111) # Check execute bit
646
+ if is_executable and file_path.suffix not in [
647
+ ".sh",
648
+ ".bash",
649
+ ".zsh",
650
+ ".mjs",
651
+ ".js",
652
+ ".ts",
653
+ ]:
654
+ findings["file_permissions"].append(
655
+ {
656
+ "file": str(file_path.relative_to(package_dir)),
657
+ "issue": "Executable file with unusual extension",
658
+ "severity": "LOW",
659
+ }
660
+ )
661
+ except Exception:
662
+ pass
663
+
664
+ # Check for source maps
665
+ if file_path.suffix == ".map":
666
+ findings["source_maps"].append(
667
+ {
668
+ "file": str(file_path.relative_to(package_dir)),
669
+ "description": "Source map file - may expose source code structure",
670
+ "severity": "LOW",
671
+ }
672
+ )
673
+
674
+ # Check for lock files (shouldn't be published)
675
+ if file_path.name in ["package-lock.json", "yarn.lock", "pnpm-lock.yaml"]:
676
+ findings["lock_files"].append(
677
+ {
678
+ "file": str(file_path.relative_to(package_dir)),
679
+ "description": "Lock file should not be published",
680
+ "severity": "LOW",
681
+ }
682
+ )
683
+
684
+ # Check for CI/CD configs (may contain secrets)
685
+ ci_patterns = [
686
+ r"\.github/workflows/",
687
+ r"\.gitlab-ci\.yml",
688
+ r"\.circleci/",
689
+ r"\.travis\.yml",
690
+ r"\.drone\.yml",
691
+ r"azure-pipelines\.yml",
692
+ r"Jenkinsfile",
693
+ ]
694
+ file_path_str = str(file_path.relative_to(package_dir))
695
+ for pattern in ci_patterns:
696
+ if re.search(pattern, file_path_str, re.IGNORECASE):
697
+ findings["ci_configs"].append(
698
+ {
699
+ "file": file_path_str,
700
+ "description": "CI/CD configuration file - may contain secrets",
701
+ "severity": "MEDIUM",
702
+ }
703
+ )
704
+ break
705
+
706
+ # Skip binary files (rough heuristic)
707
+ try:
708
+ content = file_path.read_text(encoding="utf-8", errors="strict")
709
+ except (UnicodeDecodeError, IsADirectoryError):
710
+ continue
711
+
712
+ # Check if it's a test file
713
+ is_test_file = (
714
+ "test" in file_path.name.lower()
715
+ or "spec" in file_path.name.lower()
716
+ or "test" in str(file_path.parent).lower()
717
+ )
718
+
719
+ secrets = scan_file_for_secrets(file_path)
720
+ findings["secrets"].extend(secrets)
721
+
722
+ # If secrets found in test files, flag separately
723
+ if secrets and is_test_file:
724
+ findings["test_files_with_secrets"].extend(secrets)
725
+
726
+ # Check for obfuscated code (skip documentation files)
727
+ file_lower = str(file_path).lower()
728
+ is_doc_file = any(
729
+ ext in file_lower
730
+ for ext in [".md", ".txt", ".rst", ".adoc", "readme", "changelog", "license"]
731
+ )
732
+ if not is_doc_file:
733
+ obfuscated = scan_for_obfuscated_code(content)
734
+ for obf in obfuscated:
735
+ obf["file"] = str(file_path.relative_to(package_dir))
736
+ findings["obfuscated_code"].append(obf)
737
+
738
+ # Check comments for secrets (especially TODO/FIXME with sensitive info)
739
+ # Only flag if it looks like an actual secret, not just mentions the word
740
+ comment_patterns = [
741
+ # More specific patterns to reduce false positives
742
+ (
743
+ r'//.*(?:password|secret|key|token|api[_-]?key)\s*[:=]\s*["\']([^\'"]{15,})["\']',
744
+ "Secret in comment",
745
+ ),
746
+ (
747
+ r'#.*(?:password|secret|key|token|api[_-]?key)\s*[:=]\s*["\']([^\'"]{15,})["\']',
748
+ "Secret in comment",
749
+ ),
750
+ (
751
+ r'/\*.*(?:password|secret|key|token|api[_-]?key)\s*[:=]\s*["\']([^\'"]{15,})["\'].*\*/',
752
+ "Secret in comment",
753
+ ),
754
+ # TODO/FIXME with sensitive keywords (but not documentation about them)
755
+ (
756
+ r"(?:TODO|FIXME|XXX|HACK)[^:]*[:][^X]*(?:password|secret|key|token|api[_-]?key|credential)",
757
+ "TODO with sensitive keyword",
758
+ ),
759
+ ]
760
+ for pattern, desc in comment_patterns:
761
+ matches = re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE | re.DOTALL)
762
+ for match in matches:
763
+ # Skip if it's clearly a URL or example
764
+ match_text = match.group(0).lower()
765
+ if any(
766
+ skip in match_text
767
+ for skip in ["http://", "https://", "github.com", "example.com", "//", "www."]
768
+ ):
769
+ continue
770
+ findings["comments_with_secrets"].append(
771
+ {
772
+ "file": str(file_path.relative_to(package_dir)),
773
+ "line": content[: match.start()].count("\n") + 1,
774
+ "match": match.group(0)[:100],
775
+ "description": desc,
776
+ "severity": "MEDIUM",
777
+ }
778
+ )
779
+
780
+ # Scan for sensitive file names
781
+ findings["sensitive_files"] = scan_for_sensitive_files(package_dir)
782
+
783
+ # Check for postinstall scripts in package.json
784
+ if findings["package_json"]:
785
+ scripts = findings["package_json"].get("scripts", {})
786
+ install_scripts = ["postinstall", "preinstall", "install"]
787
+ for script_name in install_scripts:
788
+ if script_name in scripts:
789
+ script_content = scripts[script_name]
790
+ findings["postinstall_scripts"].append(
791
+ {
792
+ "script": script_name,
793
+ "content": script_content,
794
+ "severity": "MEDIUM",
795
+ "note": "Install scripts can be security risks - review carefully",
796
+ }
797
+ )
798
+
799
+ return findings
800
+
801
+
802
+ async def check_dependency_vulnerabilities(
803
+ client: httpx.AsyncClient, package: str, version: str
804
+ ) -> list[dict[str, Any]]:
805
+ """Check for known vulnerabilities in package dependencies."""
806
+ vulnerabilities = []
807
+
808
+ try:
809
+ # Use npm audit API
810
+ audit_payload = {
811
+ "name": f"devguard-check-{package}",
812
+ "version": "1.0.0",
813
+ "requires": {package: version},
814
+ "dependencies": {
815
+ package: {
816
+ "version": version,
817
+ }
818
+ },
819
+ }
820
+
821
+ async def fetch_audit():
822
+ response = await client.post(
823
+ "https://registry.npmjs.org/-/npm/v1/security/audits",
824
+ json=audit_payload,
825
+ timeout=30.0,
826
+ )
827
+ response.raise_for_status()
828
+ return response
829
+
830
+ try:
831
+ audit_response = await retry_with_backoff(fetch_audit, max_retries=3)
832
+ audit_data = audit_response.json()
833
+
834
+ advisories = audit_data.get("advisories", {})
835
+ for advisory_id, advisory_data in advisories.items():
836
+ severity_map = {
837
+ "low": "LOW",
838
+ "moderate": "MEDIUM",
839
+ "high": "HIGH",
840
+ "critical": "CRITICAL",
841
+ }
842
+
843
+ severity = severity_map.get(
844
+ advisory_data.get("severity", "moderate").lower(), "MEDIUM"
845
+ )
846
+
847
+ cves = advisory_data.get("cves", [])
848
+ cve_id = cves[0] if cves else None
849
+
850
+ vulnerabilities.append(
851
+ {
852
+ "advisory_id": advisory_id,
853
+ "severity": severity,
854
+ "title": advisory_data.get("title"),
855
+ "description": advisory_data.get("overview", "")[:200],
856
+ "cve": cve_id,
857
+ "vulnerable_versions": advisory_data.get("vulnerable_versions", []),
858
+ }
859
+ )
860
+ except httpx.HTTPStatusError as e:
861
+ if e.response.status_code != 404:
862
+ logger.debug(f"Could not check vulnerabilities for {package}: {e}")
863
+ except Exception as e:
864
+ logger.debug(f"Error checking vulnerabilities: {e}")
865
+ except Exception as e:
866
+ logger.debug(f"Error in vulnerability check: {e}")
867
+
868
+ return vulnerabilities
869
+
870
+
871
+ async def analyze_package(package: str, version: str | None = None) -> dict[str, Any]:
872
+ """Analyze a single npm package."""
873
+ logger.info(f"Analyzing {package}@{version or 'latest'}")
874
+
875
+ async with httpx.AsyncClient() as client:
876
+ # Get package info
877
+ package_info = await fetch_package_info(client, package)
878
+
879
+ # Determine version
880
+ if not version:
881
+ dist_tags = package_info.get("dist-tags", {})
882
+ version = dist_tags.get("latest")
883
+ if not version:
884
+ versions = package_info.get("versions", {})
885
+ if versions:
886
+ version = max(versions.keys())
887
+
888
+ if not version:
889
+ return {
890
+ "package": package,
891
+ "error": "Could not determine version",
892
+ }
893
+
894
+ logger.info(f"Downloading {package}@{version}")
895
+
896
+ # Download tarball
897
+ tarball_data = await download_package_tarball(client, package, version)
898
+
899
+ # Check for dependency vulnerabilities
900
+ dep_vulnerabilities = await check_dependency_vulnerabilities(client, package, version)
901
+
902
+ # Extract and analyze
903
+ with tempfile.TemporaryDirectory() as tmpdir:
904
+ extract_dir = Path(tmpdir)
905
+ extract_tarball(tarball_data, extract_dir)
906
+
907
+ # Find package directory (usually "package" subdirectory)
908
+ package_dir = extract_dir / "package"
909
+ if not package_dir.exists():
910
+ package_dir = extract_dir
911
+
912
+ findings = analyze_package_contents(package_dir)
913
+ findings["dependency_vulnerabilities"] = dep_vulnerabilities
914
+
915
+ return {
916
+ "package": package,
917
+ "version": version,
918
+ "findings": findings,
919
+ }
920
+
921
+
922
+ async def main():
923
+ """Main entry point."""
924
+ # Replace with your own packages to audit
925
+ packages = [
926
+ "example-package",
927
+ ]
928
+
929
+ versions = {
930
+ "example-package": "1.0.0",
931
+ }
932
+
933
+ results = []
934
+
935
+ for package in packages:
936
+ try:
937
+ version = versions.get(package)
938
+ result = await analyze_package(package, version)
939
+ results.append(result)
940
+ except Exception as e:
941
+ logger.error(f"Error analyzing {package}: {e}")
942
+ results.append(
943
+ {
944
+ "package": package,
945
+ "error": str(e),
946
+ }
947
+ )
948
+
949
+ # Print results
950
+ print("\n" + "=" * 80)
951
+ print("NPM PACKAGE SECURITY ANALYSIS")
952
+ print("=" * 80 + "\n")
953
+
954
+ for result in results:
955
+ if "error" in result:
956
+ print(f"❌ {result['package']}: {result['error']}\n")
957
+ continue
958
+
959
+ package = result["package"]
960
+ version = result["version"]
961
+ findings = result["findings"]
962
+
963
+ print(f"📦 {package}@{version}")
964
+ print("-" * 80)
965
+
966
+ # Secrets found
967
+ secrets = findings["secrets"]
968
+ if secrets:
969
+ print(f"\n🔴 SECRETS FOUND: {len(secrets)}")
970
+ for secret in secrets[:10]: # Limit to first 10
971
+ print(f" • {secret['type']} in {secret['file']}:{secret['line']}")
972
+ print(f" Match: {secret['match']}")
973
+ if len(secrets) > 10:
974
+ print(f" ... and {len(secrets) - 10} more")
975
+ else:
976
+ print("\n✅ No secrets detected in code")
977
+
978
+ # Sensitive files
979
+ sensitive_files = findings["sensitive_files"]
980
+ if sensitive_files:
981
+ print(f"\n🔴 SENSITIVE FILES: {len(sensitive_files)}")
982
+ for file_info in sensitive_files:
983
+ severity_icon = "🔴" if file_info["severity"] == "CRITICAL" else "🟡"
984
+ print(f" {severity_icon} {file_info['file']} ({file_info['severity']})")
985
+ print(f" {file_info['description']}")
986
+ else:
987
+ print("\n✅ No sensitive file names detected")
988
+
989
+ # Large files
990
+ large_files = findings.get("large_files", [])
991
+ if large_files:
992
+ print(f"\n🟡 LARGE FILES: {len(large_files)}")
993
+ for file_info in large_files:
994
+ print(f" • {file_info['file']} ({file_info['size_mb']} MB)")
995
+ else:
996
+ print("\n✅ No unusually large files detected")
997
+
998
+ # Source maps
999
+ source_maps = findings.get("source_maps", [])
1000
+ if source_maps:
1001
+ print(f"\n🟡 SOURCE MAPS: {len(source_maps)}")
1002
+ for file_info in source_maps[:5]: # Limit display
1003
+ print(f" • {file_info['file']}")
1004
+ if len(source_maps) > 5:
1005
+ print(f" ... and {len(source_maps) - 5} more")
1006
+
1007
+ # Test files with secrets
1008
+ test_secrets = findings.get("test_files_with_secrets", [])
1009
+ if test_secrets:
1010
+ print(f"\n🔴 SECRETS IN TEST FILES: {len(test_secrets)}")
1011
+ print(" ⚠️ Test files should not contain real secrets!")
1012
+ for secret in test_secrets[:5]:
1013
+ print(f" • {secret['type']} in {secret['file']}:{secret['line']}")
1014
+
1015
+ # Obfuscated code
1016
+ obfuscated = findings.get("obfuscated_code", [])
1017
+ if obfuscated:
1018
+ high_severity = [o for o in obfuscated if o.get("severity") in ["HIGH", "MEDIUM"]]
1019
+ if high_severity:
1020
+ severity_icon = (
1021
+ "🔴" if any(o.get("severity") == "HIGH" for o in high_severity) else "🟡"
1022
+ )
1023
+ print(
1024
+ f"\n{severity_icon} OBFUSCATED CODE: {len(obfuscated)} ({len(high_severity)} high/medium severity)"
1025
+ )
1026
+ print(" ⚠️ Potential code obfuscation detected!")
1027
+ for obf in sorted(
1028
+ high_severity,
1029
+ key=lambda x: {"HIGH": 0, "MEDIUM": 1, "LOW": 2}.get(x.get("severity"), 3),
1030
+ )[:5]:
1031
+ factors = []
1032
+ if obf.get("has_base64_nearby"):
1033
+ factors.append("base64")
1034
+ if obf.get("has_suspicious_vars"):
1035
+ factors.append("suspicious vars")
1036
+ if obf.get("has_hex_strings"):
1037
+ factors.append("hex strings")
1038
+ factors_str = f" ({', '.join(factors)})" if factors else ""
1039
+ print(
1040
+ f" • [{obf.get('severity', 'UNKNOWN')}] {obf['description']} in {obf['file']}:{obf['line']}{factors_str}"
1041
+ )
1042
+ else:
1043
+ print(f"\n🟡 OBFUSCATED CODE: {len(obfuscated)} (low severity - likely legitimate)")
1044
+
1045
+ # Git history
1046
+ if findings.get("git_history"):
1047
+ print("\n🔴 GIT HISTORY DETECTED")
1048
+ print(" ⚠️ .git directory found in package - should not be published!")
1049
+
1050
+ # Lock files
1051
+ lock_files = findings.get("lock_files", [])
1052
+ if lock_files:
1053
+ print(f"\n🟡 LOCK FILES: {len(lock_files)}")
1054
+ for lock in lock_files:
1055
+ print(f" • {lock['file']}")
1056
+
1057
+ # CI/CD configs
1058
+ ci_configs = findings.get("ci_configs", [])
1059
+ if ci_configs:
1060
+ print(f"\n🟡 CI/CD CONFIGS: {len(ci_configs)}")
1061
+ print(" ⚠️ CI/CD configs may contain secrets - review carefully!")
1062
+ for ci in ci_configs[:5]:
1063
+ print(f" • {ci['file']}")
1064
+
1065
+ # Missing .npmignore
1066
+ if findings.get("npmignore_missing"):
1067
+ print("\n🟡 MISSING .npmignore")
1068
+ print(" ⚠️ No .npmignore found - ensure sensitive files are excluded")
1069
+ print(" 💡 Recommendation: Add .npmignore or use 'files' field in package.json")
1070
+ print(" 💡 Generate one with: uv run python devguard/scripts/generate_npmignore.py")
1071
+
1072
+ # Comments with secrets
1073
+ comment_secrets = findings.get("comments_with_secrets", [])
1074
+ if comment_secrets:
1075
+ print(f"\n🟡 SECRETS IN COMMENTS: {len(comment_secrets)}")
1076
+ for comment in comment_secrets[:5]:
1077
+ print(f" • {comment['file']}:{comment['line']}")
1078
+ print(f" {comment['match'][:80]}...")
1079
+
1080
+ # Postinstall scripts
1081
+ postinstall_scripts = findings.get("postinstall_scripts", [])
1082
+ if postinstall_scripts:
1083
+ print(f"\n🟡 INSTALL SCRIPTS: {len(postinstall_scripts)}")
1084
+ print(" ⚠️ Install scripts can be security risks - review carefully!")
1085
+ for script_info in postinstall_scripts:
1086
+ print(f" • {script_info['script']}: {script_info['content'][:80]}...")
1087
+
1088
+ # Suspicious package names
1089
+ suspicious_names = findings.get("suspicious_package_names", [])
1090
+ if suspicious_names:
1091
+ print(f"\n🟡 SUSPICIOUS PACKAGE NAME PATTERNS: {len(suspicious_names)}")
1092
+ for name_issue in suspicious_names:
1093
+ print(f" • {name_issue['reason']}")
1094
+
1095
+ # File permissions
1096
+ file_perms = findings.get("file_permissions", [])
1097
+ if file_perms:
1098
+ print(f"\n🟡 UNUSUAL FILE PERMISSIONS: {len(file_perms)}")
1099
+ for perm_issue in file_perms[:5]:
1100
+ print(f" • {perm_issue['file']}: {perm_issue['issue']}")
1101
+
1102
+ # Dependency vulnerabilities
1103
+ dep_vulns = findings.get("dependency_vulnerabilities", [])
1104
+ if dep_vulns:
1105
+ print(f"\n🔴 DEPENDENCY VULNERABILITIES: {len(dep_vulns)}")
1106
+ critical_vulns = [v for v in dep_vulns if v.get("severity") in ["CRITICAL", "HIGH"]]
1107
+ if critical_vulns:
1108
+ print(" ⚠️ Critical/High severity vulnerabilities found!")
1109
+ for vuln in critical_vulns[:5]:
1110
+ print(f" • [{vuln['severity']}] {vuln.get('title', 'Unknown')}")
1111
+ if vuln.get("cve"):
1112
+ print(f" CVE: {vuln['cve']}")
1113
+ else:
1114
+ print(" ⚠️ Medium/Low severity vulnerabilities found")
1115
+ for vuln in dep_vulns[:5]:
1116
+ print(f" • [{vuln['severity']}] {vuln.get('title', 'Unknown')[:60]}")
1117
+
1118
+ # Package.json issues
1119
+ pkg_issues = findings.get("package_json_issues")
1120
+ if pkg_issues:
1121
+ if pkg_issues.get("suspicious_scripts"):
1122
+ print(f"\n🟡 SUSPICIOUS SCRIPTS: {len(pkg_issues['suspicious_scripts'])}")
1123
+ for script in pkg_issues["suspicious_scripts"]:
1124
+ print(f" • {script['script']}: {script['issue']}")
1125
+
1126
+ if pkg_issues.get("placeholder_values"):
1127
+ print(f"\n🟡 PLACEHOLDER VALUES: {len(pkg_issues['placeholder_values'])}")
1128
+ for placeholder in pkg_issues["placeholder_values"][:5]:
1129
+ print(f" • {placeholder['match']}")
1130
+
1131
+ if pkg_issues.get("exposed_repos"):
1132
+ print(f"\n🟡 EXPOSED REPOSITORIES: {len(pkg_issues['exposed_repos'])}")
1133
+ for repo in pkg_issues["exposed_repos"]:
1134
+ print(f" • {repo['url']}")
1135
+
1136
+ if pkg_issues.get("files_field_issues"):
1137
+ print(f"\n🟡 FILES FIELD ISSUES: {len(pkg_issues['files_field_issues'])}")
1138
+ for issue in pkg_issues["files_field_issues"]:
1139
+ print(f" • {issue}")
1140
+
1141
+ if pkg_issues.get("recommendations"):
1142
+ print("\n💡 RECOMMENDATIONS:")
1143
+ for rec in pkg_issues["recommendations"]:
1144
+ print(f" • {rec}")
1145
+
1146
+ # Package.json info
1147
+ pkg_json = findings.get("package_json")
1148
+ if pkg_json:
1149
+ print("\n📄 Package Info:")
1150
+ print(f" Name: {pkg_json.get('name', 'N/A')}")
1151
+ print(f" Version: {pkg_json.get('version', 'N/A')}")
1152
+ print(f" Description: {pkg_json.get('description', 'N/A')[:100]}")
1153
+
1154
+ # Check for scripts that might expose secrets
1155
+ scripts = pkg_json.get("scripts", {})
1156
+ if scripts:
1157
+ print(f" Scripts: {', '.join(scripts.keys())}")
1158
+
1159
+ print("\n")
1160
+
1161
+ # Summary
1162
+ print("=" * 80)
1163
+ print("SUMMARY")
1164
+ print("=" * 80)
1165
+
1166
+ total_secrets = sum(
1167
+ len(r.get("findings", {}).get("secrets", [])) for r in results if "findings" in r
1168
+ )
1169
+ total_sensitive_files = sum(
1170
+ len(r.get("findings", {}).get("sensitive_files", [])) for r in results if "findings" in r
1171
+ )
1172
+ critical_files = sum(
1173
+ 1
1174
+ for r in results
1175
+ if "findings" in r
1176
+ for f in r["findings"].get("sensitive_files", [])
1177
+ if f.get("severity") == "CRITICAL"
1178
+ )
1179
+ test_secrets = sum(
1180
+ len(r.get("findings", {}).get("test_files_with_secrets", []))
1181
+ for r in results
1182
+ if "findings" in r
1183
+ )
1184
+ obfuscated_count = sum(
1185
+ len(r.get("findings", {}).get("obfuscated_code", [])) for r in results if "findings" in r
1186
+ )
1187
+ git_history_count = sum(1 for r in results if r.get("findings", {}).get("git_history"))
1188
+ lock_files_count = sum(
1189
+ len(r.get("findings", {}).get("lock_files", [])) for r in results if "findings" in r
1190
+ )
1191
+ ci_configs_count = sum(
1192
+ len(r.get("findings", {}).get("ci_configs", [])) for r in results if "findings" in r
1193
+ )
1194
+ comment_secrets_count = sum(
1195
+ len(r.get("findings", {}).get("comments_with_secrets", []))
1196
+ for r in results
1197
+ if "findings" in r
1198
+ )
1199
+ dep_vuln_count = sum(
1200
+ len(r.get("findings", {}).get("dependency_vulnerabilities", []))
1201
+ for r in results
1202
+ if "findings" in r
1203
+ )
1204
+ critical_dep_vulns = sum(
1205
+ 1
1206
+ for r in results
1207
+ if "findings" in r
1208
+ for v in r["findings"].get("dependency_vulnerabilities", [])
1209
+ if v.get("severity") in ["CRITICAL", "HIGH"]
1210
+ )
1211
+
1212
+ print(f"Total packages analyzed: {len(packages)}")
1213
+ print(f"Total secrets found: {total_secrets}")
1214
+ print(f"Total sensitive files: {total_sensitive_files}")
1215
+ print(f"Critical files (with secrets): {critical_files}")
1216
+ print(f"Secrets in test files: {test_secrets}")
1217
+ print(f"Obfuscated code patterns: {obfuscated_count}")
1218
+ print(f"Packages with git history: {git_history_count}")
1219
+ print(f"Lock files published: {lock_files_count}")
1220
+ print(f"CI/CD configs: {ci_configs_count}")
1221
+ print(f"Secrets in comments: {comment_secrets_count}")
1222
+ print(f"Dependency vulnerabilities: {dep_vuln_count}")
1223
+ print(f"Critical/High dep vulnerabilities: {critical_dep_vulns}")
1224
+
1225
+ if total_secrets > 0 or critical_files > 0 or test_secrets > 0 or critical_dep_vulns > 0:
1226
+ print(
1227
+ "\n🔴 ACTION REQUIRED: Review findings above and remove any exposed secrets or fix vulnerabilities!"
1228
+ )
1229
+ elif total_sensitive_files > 0:
1230
+ print(
1231
+ "\n🟡 REVIEW RECOMMENDED: Some sensitive file names detected (may be false positives)"
1232
+ )
1233
+ else:
1234
+ print("\n✅ No obvious security issues detected")
1235
+
1236
+ # Additional recommendations
1237
+ missing_npmignore = sum(1 for r in results if r.get("findings", {}).get("npmignore_missing"))
1238
+ if missing_npmignore > 0:
1239
+ print(f"\n💡 RECOMMENDATION: {missing_npmignore} package(s) missing .npmignore")
1240
+ print(" Consider generating .npmignore files for better security")
1241
+ print(" Run: uv run python devguard/scripts/generate_npmignore.py")
1242
+
1243
+ if obfuscated_count > 0:
1244
+ print(f"\n💡 RECOMMENDATION: Review {obfuscated_count} obfuscated code patterns")
1245
+ print(" Ensure they are legitimate uses (e.g., base64 encoding for data, not secrets)")
1246
+
1247
+ if dep_vuln_count > 0:
1248
+ print(f"\n💡 RECOMMENDATION: {dep_vuln_count} dependency vulnerabilities found")
1249
+ print(" Run 'npm audit' or update vulnerable dependencies")
1250
+ if critical_dep_vulns > 0:
1251
+ print(f" ⚠️ {critical_dep_vulns} CRITICAL/HIGH severity - prioritize fixing!")
1252
+
1253
+ print(
1254
+ "\n💡 Generate detailed JSON report: uv run python devguard/scripts/generate_security_report.py"
1255
+ )
1256
+ print(
1257
+ "💡 Get automated fix recommendations: uv run python devguard/scripts/auto_fix_recommendations.py"
1258
+ )
1259
+
1260
+
1261
+ if __name__ == "__main__":
1262
+ asyncio.run(main())