@oculum/scanner 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/dist/formatters/cli-terminal.d.ts +27 -0
  2. package/dist/formatters/cli-terminal.d.ts.map +1 -0
  3. package/dist/formatters/cli-terminal.js +412 -0
  4. package/dist/formatters/cli-terminal.js.map +1 -0
  5. package/dist/formatters/github-comment.d.ts +41 -0
  6. package/dist/formatters/github-comment.d.ts.map +1 -0
  7. package/dist/formatters/github-comment.js +306 -0
  8. package/dist/formatters/github-comment.js.map +1 -0
  9. package/dist/formatters/grouping.d.ts +52 -0
  10. package/dist/formatters/grouping.d.ts.map +1 -0
  11. package/dist/formatters/grouping.js +152 -0
  12. package/dist/formatters/grouping.js.map +1 -0
  13. package/dist/formatters/index.d.ts +9 -0
  14. package/dist/formatters/index.d.ts.map +1 -0
  15. package/dist/formatters/index.js +35 -0
  16. package/dist/formatters/index.js.map +1 -0
  17. package/dist/formatters/vscode-diagnostic.d.ts +103 -0
  18. package/dist/formatters/vscode-diagnostic.d.ts.map +1 -0
  19. package/dist/formatters/vscode-diagnostic.js +151 -0
  20. package/dist/formatters/vscode-diagnostic.js.map +1 -0
  21. package/dist/index.d.ts +52 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +648 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/layer1/comments.d.ts +8 -0
  26. package/dist/layer1/comments.d.ts.map +1 -0
  27. package/dist/layer1/comments.js +203 -0
  28. package/dist/layer1/comments.js.map +1 -0
  29. package/dist/layer1/config-audit.d.ts +8 -0
  30. package/dist/layer1/config-audit.d.ts.map +1 -0
  31. package/dist/layer1/config-audit.js +252 -0
  32. package/dist/layer1/config-audit.js.map +1 -0
  33. package/dist/layer1/entropy.d.ts +8 -0
  34. package/dist/layer1/entropy.d.ts.map +1 -0
  35. package/dist/layer1/entropy.js +500 -0
  36. package/dist/layer1/entropy.js.map +1 -0
  37. package/dist/layer1/file-flags.d.ts +7 -0
  38. package/dist/layer1/file-flags.d.ts.map +1 -0
  39. package/dist/layer1/file-flags.js +112 -0
  40. package/dist/layer1/file-flags.js.map +1 -0
  41. package/dist/layer1/index.d.ts +36 -0
  42. package/dist/layer1/index.d.ts.map +1 -0
  43. package/dist/layer1/index.js +132 -0
  44. package/dist/layer1/index.js.map +1 -0
  45. package/dist/layer1/patterns.d.ts +8 -0
  46. package/dist/layer1/patterns.d.ts.map +1 -0
  47. package/dist/layer1/patterns.js +482 -0
  48. package/dist/layer1/patterns.js.map +1 -0
  49. package/dist/layer1/urls.d.ts +8 -0
  50. package/dist/layer1/urls.d.ts.map +1 -0
  51. package/dist/layer1/urls.js +296 -0
  52. package/dist/layer1/urls.js.map +1 -0
  53. package/dist/layer1/weak-crypto.d.ts +7 -0
  54. package/dist/layer1/weak-crypto.d.ts.map +1 -0
  55. package/dist/layer1/weak-crypto.js +291 -0
  56. package/dist/layer1/weak-crypto.js.map +1 -0
  57. package/dist/layer2/ai-agent-tools.d.ts +19 -0
  58. package/dist/layer2/ai-agent-tools.d.ts.map +1 -0
  59. package/dist/layer2/ai-agent-tools.js +528 -0
  60. package/dist/layer2/ai-agent-tools.js.map +1 -0
  61. package/dist/layer2/ai-endpoint-protection.d.ts +36 -0
  62. package/dist/layer2/ai-endpoint-protection.d.ts.map +1 -0
  63. package/dist/layer2/ai-endpoint-protection.js +332 -0
  64. package/dist/layer2/ai-endpoint-protection.js.map +1 -0
  65. package/dist/layer2/ai-execution-sinks.d.ts +18 -0
  66. package/dist/layer2/ai-execution-sinks.d.ts.map +1 -0
  67. package/dist/layer2/ai-execution-sinks.js +496 -0
  68. package/dist/layer2/ai-execution-sinks.js.map +1 -0
  69. package/dist/layer2/ai-fingerprinting.d.ts +7 -0
  70. package/dist/layer2/ai-fingerprinting.d.ts.map +1 -0
  71. package/dist/layer2/ai-fingerprinting.js +654 -0
  72. package/dist/layer2/ai-fingerprinting.js.map +1 -0
  73. package/dist/layer2/ai-prompt-hygiene.d.ts +19 -0
  74. package/dist/layer2/ai-prompt-hygiene.d.ts.map +1 -0
  75. package/dist/layer2/ai-prompt-hygiene.js +356 -0
  76. package/dist/layer2/ai-prompt-hygiene.js.map +1 -0
  77. package/dist/layer2/ai-rag-safety.d.ts +21 -0
  78. package/dist/layer2/ai-rag-safety.d.ts.map +1 -0
  79. package/dist/layer2/ai-rag-safety.js +459 -0
  80. package/dist/layer2/ai-rag-safety.js.map +1 -0
  81. package/dist/layer2/ai-schema-validation.d.ts +25 -0
  82. package/dist/layer2/ai-schema-validation.d.ts.map +1 -0
  83. package/dist/layer2/ai-schema-validation.js +375 -0
  84. package/dist/layer2/ai-schema-validation.js.map +1 -0
  85. package/dist/layer2/auth-antipatterns.d.ts +20 -0
  86. package/dist/layer2/auth-antipatterns.d.ts.map +1 -0
  87. package/dist/layer2/auth-antipatterns.js +333 -0
  88. package/dist/layer2/auth-antipatterns.js.map +1 -0
  89. package/dist/layer2/byok-patterns.d.ts +12 -0
  90. package/dist/layer2/byok-patterns.d.ts.map +1 -0
  91. package/dist/layer2/byok-patterns.js +299 -0
  92. package/dist/layer2/byok-patterns.js.map +1 -0
  93. package/dist/layer2/dangerous-functions.d.ts +7 -0
  94. package/dist/layer2/dangerous-functions.d.ts.map +1 -0
  95. package/dist/layer2/dangerous-functions.js +1375 -0
  96. package/dist/layer2/dangerous-functions.js.map +1 -0
  97. package/dist/layer2/data-exposure.d.ts +16 -0
  98. package/dist/layer2/data-exposure.d.ts.map +1 -0
  99. package/dist/layer2/data-exposure.js +279 -0
  100. package/dist/layer2/data-exposure.js.map +1 -0
  101. package/dist/layer2/framework-checks.d.ts +7 -0
  102. package/dist/layer2/framework-checks.d.ts.map +1 -0
  103. package/dist/layer2/framework-checks.js +388 -0
  104. package/dist/layer2/framework-checks.js.map +1 -0
  105. package/dist/layer2/index.d.ts +58 -0
  106. package/dist/layer2/index.d.ts.map +1 -0
  107. package/dist/layer2/index.js +380 -0
  108. package/dist/layer2/index.js.map +1 -0
  109. package/dist/layer2/logic-gates.d.ts +7 -0
  110. package/dist/layer2/logic-gates.d.ts.map +1 -0
  111. package/dist/layer2/logic-gates.js +182 -0
  112. package/dist/layer2/logic-gates.js.map +1 -0
  113. package/dist/layer2/risky-imports.d.ts +7 -0
  114. package/dist/layer2/risky-imports.d.ts.map +1 -0
  115. package/dist/layer2/risky-imports.js +161 -0
  116. package/dist/layer2/risky-imports.js.map +1 -0
  117. package/dist/layer2/variables.d.ts +8 -0
  118. package/dist/layer2/variables.d.ts.map +1 -0
  119. package/dist/layer2/variables.js +152 -0
  120. package/dist/layer2/variables.js.map +1 -0
  121. package/dist/layer3/anthropic.d.ts +83 -0
  122. package/dist/layer3/anthropic.d.ts.map +1 -0
  123. package/dist/layer3/anthropic.js +1745 -0
  124. package/dist/layer3/anthropic.js.map +1 -0
  125. package/dist/layer3/index.d.ts +24 -0
  126. package/dist/layer3/index.d.ts.map +1 -0
  127. package/dist/layer3/index.js +119 -0
  128. package/dist/layer3/index.js.map +1 -0
  129. package/dist/layer3/openai.d.ts +25 -0
  130. package/dist/layer3/openai.d.ts.map +1 -0
  131. package/dist/layer3/openai.js +238 -0
  132. package/dist/layer3/openai.js.map +1 -0
  133. package/dist/layer3/package-check.d.ts +63 -0
  134. package/dist/layer3/package-check.d.ts.map +1 -0
  135. package/dist/layer3/package-check.js +508 -0
  136. package/dist/layer3/package-check.js.map +1 -0
  137. package/dist/modes/incremental.d.ts +66 -0
  138. package/dist/modes/incremental.d.ts.map +1 -0
  139. package/dist/modes/incremental.js +200 -0
  140. package/dist/modes/incremental.js.map +1 -0
  141. package/dist/tiers.d.ts +125 -0
  142. package/dist/tiers.d.ts.map +1 -0
  143. package/dist/tiers.js +234 -0
  144. package/dist/tiers.js.map +1 -0
  145. package/dist/types.d.ts +175 -0
  146. package/dist/types.d.ts.map +1 -0
  147. package/dist/types.js +50 -0
  148. package/dist/types.js.map +1 -0
  149. package/dist/utils/auth-helper-detector.d.ts +56 -0
  150. package/dist/utils/auth-helper-detector.d.ts.map +1 -0
  151. package/dist/utils/auth-helper-detector.js +360 -0
  152. package/dist/utils/auth-helper-detector.js.map +1 -0
  153. package/dist/utils/context-helpers.d.ts +96 -0
  154. package/dist/utils/context-helpers.d.ts.map +1 -0
  155. package/dist/utils/context-helpers.js +493 -0
  156. package/dist/utils/context-helpers.js.map +1 -0
  157. package/dist/utils/diff-detector.d.ts +53 -0
  158. package/dist/utils/diff-detector.d.ts.map +1 -0
  159. package/dist/utils/diff-detector.js +104 -0
  160. package/dist/utils/diff-detector.js.map +1 -0
  161. package/dist/utils/diff-parser.d.ts +80 -0
  162. package/dist/utils/diff-parser.d.ts.map +1 -0
  163. package/dist/utils/diff-parser.js +202 -0
  164. package/dist/utils/diff-parser.js.map +1 -0
  165. package/dist/utils/imported-auth-detector.d.ts +37 -0
  166. package/dist/utils/imported-auth-detector.d.ts.map +1 -0
  167. package/dist/utils/imported-auth-detector.js +251 -0
  168. package/dist/utils/imported-auth-detector.js.map +1 -0
  169. package/dist/utils/middleware-detector.d.ts +55 -0
  170. package/dist/utils/middleware-detector.d.ts.map +1 -0
  171. package/dist/utils/middleware-detector.js +260 -0
  172. package/dist/utils/middleware-detector.js.map +1 -0
  173. package/dist/utils/oauth-flow-detector.d.ts +41 -0
  174. package/dist/utils/oauth-flow-detector.d.ts.map +1 -0
  175. package/dist/utils/oauth-flow-detector.js +202 -0
  176. package/dist/utils/oauth-flow-detector.js.map +1 -0
  177. package/dist/utils/path-exclusions.d.ts +55 -0
  178. package/dist/utils/path-exclusions.d.ts.map +1 -0
  179. package/dist/utils/path-exclusions.js +222 -0
  180. package/dist/utils/path-exclusions.js.map +1 -0
  181. package/dist/utils/project-context-builder.d.ts +119 -0
  182. package/dist/utils/project-context-builder.d.ts.map +1 -0
  183. package/dist/utils/project-context-builder.js +534 -0
  184. package/dist/utils/project-context-builder.js.map +1 -0
  185. package/dist/utils/registry-clients.d.ts +93 -0
  186. package/dist/utils/registry-clients.d.ts.map +1 -0
  187. package/dist/utils/registry-clients.js +273 -0
  188. package/dist/utils/registry-clients.js.map +1 -0
  189. package/dist/utils/trpc-analyzer.d.ts +78 -0
  190. package/dist/utils/trpc-analyzer.d.ts.map +1 -0
  191. package/dist/utils/trpc-analyzer.js +297 -0
  192. package/dist/utils/trpc-analyzer.js.map +1 -0
  193. package/package.json +45 -0
  194. package/src/__tests__/benchmark/fixtures/false-positives.ts +227 -0
  195. package/src/__tests__/benchmark/fixtures/index.ts +68 -0
  196. package/src/__tests__/benchmark/fixtures/layer1/config-audit.ts +364 -0
  197. package/src/__tests__/benchmark/fixtures/layer1/hardcoded-secrets.ts +173 -0
  198. package/src/__tests__/benchmark/fixtures/layer1/high-entropy.ts +234 -0
  199. package/src/__tests__/benchmark/fixtures/layer1/index.ts +31 -0
  200. package/src/__tests__/benchmark/fixtures/layer1/sensitive-urls.ts +90 -0
  201. package/src/__tests__/benchmark/fixtures/layer1/weak-crypto.ts +197 -0
  202. package/src/__tests__/benchmark/fixtures/layer2/ai-agent-tools.ts +170 -0
  203. package/src/__tests__/benchmark/fixtures/layer2/ai-endpoint-protection.ts +418 -0
  204. package/src/__tests__/benchmark/fixtures/layer2/ai-execution-sinks.ts +189 -0
  205. package/src/__tests__/benchmark/fixtures/layer2/ai-fingerprinting.ts +316 -0
  206. package/src/__tests__/benchmark/fixtures/layer2/ai-prompt-hygiene.ts +178 -0
  207. package/src/__tests__/benchmark/fixtures/layer2/ai-rag-safety.ts +184 -0
  208. package/src/__tests__/benchmark/fixtures/layer2/ai-schema-validation.ts +434 -0
  209. package/src/__tests__/benchmark/fixtures/layer2/auth-antipatterns.ts +159 -0
  210. package/src/__tests__/benchmark/fixtures/layer2/byok-patterns.ts +112 -0
  211. package/src/__tests__/benchmark/fixtures/layer2/dangerous-functions.ts +246 -0
  212. package/src/__tests__/benchmark/fixtures/layer2/data-exposure.ts +168 -0
  213. package/src/__tests__/benchmark/fixtures/layer2/framework-checks.ts +346 -0
  214. package/src/__tests__/benchmark/fixtures/layer2/index.ts +67 -0
  215. package/src/__tests__/benchmark/fixtures/layer2/injection-vulnerabilities.ts +239 -0
  216. package/src/__tests__/benchmark/fixtures/layer2/logic-gates.ts +246 -0
  217. package/src/__tests__/benchmark/fixtures/layer2/risky-imports.ts +231 -0
  218. package/src/__tests__/benchmark/fixtures/layer2/variables.ts +167 -0
  219. package/src/__tests__/benchmark/index.ts +29 -0
  220. package/src/__tests__/benchmark/run-benchmark.ts +144 -0
  221. package/src/__tests__/benchmark/run-depth-validation.ts +206 -0
  222. package/src/__tests__/benchmark/run-real-world-test.ts +243 -0
  223. package/src/__tests__/benchmark/security-benchmark-script.ts +1737 -0
  224. package/src/__tests__/benchmark/tier-integration-script.ts +177 -0
  225. package/src/__tests__/benchmark/types.ts +144 -0
  226. package/src/__tests__/benchmark/utils/test-runner.ts +475 -0
  227. package/src/__tests__/regression/known-false-positives.test.ts +467 -0
  228. package/src/__tests__/snapshots/__snapshots__/scan-depth.test.ts.snap +178 -0
  229. package/src/__tests__/snapshots/scan-depth.test.ts +258 -0
  230. package/src/__tests__/validation/analyze-results.ts +542 -0
  231. package/src/__tests__/validation/extract-for-triage.ts +146 -0
  232. package/src/__tests__/validation/fp-deep-analysis.ts +327 -0
  233. package/src/__tests__/validation/run-validation.ts +364 -0
  234. package/src/__tests__/validation/triage-template.md +132 -0
  235. package/src/formatters/cli-terminal.ts +446 -0
  236. package/src/formatters/github-comment.ts +382 -0
  237. package/src/formatters/grouping.ts +190 -0
  238. package/src/formatters/index.ts +47 -0
  239. package/src/formatters/vscode-diagnostic.ts +243 -0
  240. package/src/index.ts +823 -0
  241. package/src/layer1/comments.ts +218 -0
  242. package/src/layer1/config-audit.ts +289 -0
  243. package/src/layer1/entropy.ts +583 -0
  244. package/src/layer1/file-flags.ts +127 -0
  245. package/src/layer1/index.ts +181 -0
  246. package/src/layer1/patterns.ts +516 -0
  247. package/src/layer1/urls.ts +334 -0
  248. package/src/layer1/weak-crypto.ts +328 -0
  249. package/src/layer2/ai-agent-tools.ts +601 -0
  250. package/src/layer2/ai-endpoint-protection.ts +387 -0
  251. package/src/layer2/ai-execution-sinks.ts +580 -0
  252. package/src/layer2/ai-fingerprinting.ts +758 -0
  253. package/src/layer2/ai-prompt-hygiene.ts +411 -0
  254. package/src/layer2/ai-rag-safety.ts +511 -0
  255. package/src/layer2/ai-schema-validation.ts +421 -0
  256. package/src/layer2/auth-antipatterns.ts +394 -0
  257. package/src/layer2/byok-patterns.ts +336 -0
  258. package/src/layer2/dangerous-functions.ts +1563 -0
  259. package/src/layer2/data-exposure.ts +315 -0
  260. package/src/layer2/framework-checks.ts +433 -0
  261. package/src/layer2/index.ts +473 -0
  262. package/src/layer2/logic-gates.ts +206 -0
  263. package/src/layer2/risky-imports.ts +186 -0
  264. package/src/layer2/variables.ts +166 -0
  265. package/src/layer3/anthropic.ts +2030 -0
  266. package/src/layer3/index.ts +130 -0
  267. package/src/layer3/package-check.ts +604 -0
  268. package/src/modes/incremental.ts +293 -0
  269. package/src/tiers.ts +318 -0
  270. package/src/types.ts +284 -0
  271. package/src/utils/auth-helper-detector.ts +443 -0
  272. package/src/utils/context-helpers.ts +535 -0
  273. package/src/utils/diff-detector.ts +135 -0
  274. package/src/utils/diff-parser.ts +272 -0
  275. package/src/utils/imported-auth-detector.ts +320 -0
  276. package/src/utils/middleware-detector.ts +333 -0
  277. package/src/utils/oauth-flow-detector.ts +246 -0
  278. package/src/utils/path-exclusions.ts +266 -0
  279. package/src/utils/project-context-builder.ts +707 -0
  280. package/src/utils/registry-clients.ts +351 -0
  281. package/src/utils/trpc-analyzer.ts +382 -0
@@ -0,0 +1,583 @@
1
+ /**
2
+ * Layer 1: High-Entropy String Detection
3
+ * Uses Shannon entropy to detect potential secrets that don't match known patterns
4
+ */
5
+
6
+ import type { Vulnerability } from '../types'
7
+ import {
8
+ isTestOrMockFile,
9
+ isComment,
10
+ isScannerOrFixtureFile,
11
+ isExampleFile,
12
+ isFixtureFile,
13
+ isExampleDirectory,
14
+ } from '../utils/context-helpers'
15
+
16
+ // Shannon entropy calculation
17
+ export function calculateEntropy(str: string): number {
18
+ if (str.length === 0) return 0
19
+
20
+ const freq: Record<string, number> = {}
21
+ for (const char of str) {
22
+ freq[char] = (freq[char] || 0) + 1
23
+ }
24
+
25
+ let entropy = 0
26
+ const len = str.length
27
+ for (const char in freq) {
28
+ const p = freq[char] / len
29
+ entropy -= p * Math.log2(p)
30
+ }
31
+
32
+ return entropy
33
+ }
34
+
35
+ // Extract string literals from code
36
+ function extractStringLiterals(content: string): Array<{ value: string; line: number; lineContent: string }> {
37
+ const strings: Array<{ value: string; line: number; lineContent: string }> = []
38
+ const lines = content.split('\n')
39
+
40
+ // Patterns for string literals
41
+ const patterns = [
42
+ /"([^"\\]|\\.){20,}"/g, // Double-quoted strings 20+ chars
43
+ /'([^'\\]|\\.){20,}'/g, // Single-quoted strings 20+ chars
44
+ /`([^`\\]|\\.){20,}`/g, // Template literals 20+ chars
45
+ ]
46
+
47
+ lines.forEach((line, index) => {
48
+ for (const pattern of patterns) {
49
+ let match
50
+ const regex = new RegExp(pattern.source, pattern.flags)
51
+ while ((match = regex.exec(line)) !== null) {
52
+ // Remove quotes and get the actual string value
53
+ const value = match[0].slice(1, -1)
54
+ strings.push({
55
+ value,
56
+ line: index + 1,
57
+ lineContent: line.trim(),
58
+ })
59
+ }
60
+ }
61
+ })
62
+
63
+ return strings
64
+ }
65
+
66
+ // Check if string looks like a known safe pattern (URLs, paths, etc.)
67
+ function isSafePattern(str: string): boolean {
68
+ const safePatterns = [
69
+ /^https?:\/\//i, // URLs
70
+ /^\/[a-z0-9_/-]+$/i, // File paths
71
+ /^\d{4}-\d{2}-\d{2}/, // Dates
72
+ /^[a-f0-9]{32}$/i, // MD5 hashes (often used as IDs)
73
+ /^[a-f0-9]{40}$/i, // SHA1 hashes
74
+ /^[a-f0-9]{64}$/i, // SHA256 hashes
75
+ /^data:[a-z]+\/[a-z]+;base64,/i, // Data URLs
76
+ /^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$/i, // Emails
77
+ /^\s*$/, // Whitespace only
78
+ /^[a-z\s]+$/i, // Only letters and spaces (likely text)
79
+ /^\/?[\(\)\[\]\{\}\|\?\*\+\.\^\$\\:!_a-z0-9/-]+$/i, // Regex patterns (route matchers, etc.)
80
+ ]
81
+
82
+ return safePatterns.some(pattern => pattern.test(str))
83
+ }
84
+
85
+ // Check if string is a PEM header/footer (not an actual secret)
86
+ function isPEMHeader(str: string): boolean {
87
+ const pemPatterns = [
88
+ /^-{3,}BEGIN\s+(PRIVATE|PUBLIC|RSA|DSA|EC|ENCRYPTED|CERTIFICATE)/i,
89
+ /^-{3,}END\s+(PRIVATE|PUBLIC|RSA|DSA|EC|ENCRYPTED|CERTIFICATE)/i,
90
+ /-----BEGIN\s+\w+\s+KEY-----/i,
91
+ /-----END\s+\w+\s+KEY-----/i,
92
+ ]
93
+ return pemPatterns.some(p => p.test(str))
94
+ }
95
+
96
+ // Check if string looks like encrypted/encoded content (not the key itself)
97
+ function isEncryptedContent(str: string, lineContent: string): boolean {
98
+ // Patterns for encrypted content blocks (not the key)
99
+ const encryptedPatterns = [
100
+ /encrypted_content/i,
101
+ /ciphertext/i,
102
+ /encrypted_data/i,
103
+ /encrypted_value/i,
104
+ // Base64 encoded binary data (very long, uniform character set)
105
+ /^[A-Za-z0-9+/]{100,}={0,2}$/, // Long base64 strings are often encrypted payloads
106
+ ]
107
+
108
+ // Check line context for encrypted content indicators
109
+ const contextIndicators = [
110
+ /["']encrypted_content["']\s*:/i,
111
+ /["']ciphertext["']\s*:/i,
112
+ /gAAAA/, // Fernet encryption prefix
113
+ ]
114
+
115
+ return (
116
+ encryptedPatterns.some(p => p.test(str)) ||
117
+ contextIndicators.some(p => p.test(lineContent))
118
+ )
119
+ }
120
+
121
+
122
+ // Check if string looks like a JWT segment (base64url encoded, starts with eyJ)
123
+ function isJWTSegment(str: string): boolean {
124
+ // JWT segments typically start with 'eyJ' (base64 for '{"')
125
+ // Full JWT format: header.payload.signature (all base64url)
126
+ if (str.startsWith('eyJ') && /^[A-Za-z0-9_-]+$/.test(str)) {
127
+ return true
128
+ }
129
+ // Check for full JWT pattern (3 dot-separated base64url segments)
130
+ if (/^eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$/.test(str)) {
131
+ return true
132
+ }
133
+ return false
134
+ }
135
+
136
+ // Check if string looks like a regex/route matcher pattern
137
+ function isRegexPattern(str: string): boolean {
138
+ // Common regex metacharacters and patterns
139
+ const regexIndicators = ['(?', '(?!', '(?:', '(?=', '\\.', '\\.', '.*', '.+', '[^', '|', '$)', '^', '$']
140
+ const indicatorCount = regexIndicators.filter(ind => str.includes(ind)).length
141
+
142
+ // If it has multiple regex indicators, it's likely a regex pattern
143
+ return indicatorCount >= 2
144
+ }
145
+
146
+ // Check if string is a template literal with code expressions
147
+ function isTemplateWithCode(str: string, lineContent: string): boolean {
148
+ // Check if the line contains template literal syntax with expressions
149
+ if (!lineContent.includes('`') && !lineContent.includes('${')) {
150
+ return false
151
+ }
152
+
153
+ // Common code patterns inside template literals that create high entropy
154
+ const codePatterns = [
155
+ /\$\{[^}]*\.(toString|padStart|padEnd|toFixed|toLocaleString)\s*\(/i, // Method calls
156
+ /\$\{[^}]*\?\.[^}]*\}/, // Optional chaining
157
+ /\$\{[^}]*\s*\?\s*[^:]+\s*:\s*[^}]+\}/, // Ternary operators
158
+ /var\s*\(\s*\$\{/, // CSS var() with template
159
+ /\$\{[^}]*\.find\s*\(/i, // Array methods
160
+ /\$\{[^}]*\.map\s*\(/i,
161
+ /\$\{[^}]*\.filter\s*\(/i,
162
+ /\$\{new\s+Date\(\)/i, // Date formatting
163
+ ]
164
+
165
+ return codePatterns.some(pattern => pattern.test(lineContent))
166
+ }
167
+
168
+ // Check if string is human-readable text/markdown content
169
+ function isHumanReadableContent(str: string): boolean {
170
+ // Skip short strings
171
+ if (str.length < 30) return false
172
+
173
+ // Check for markdown indicators
174
+ const markdownIndicators = ['## ', '# ', '**', '- [ ]', '- ', '\n\n', '\\n']
175
+ const hasMarkdown = markdownIndicators.some(ind => str.includes(ind))
176
+
177
+ // Check word-like pattern ratio (spaces between word-like tokens)
178
+ const words = str.split(/\s+/).filter(w => w.length > 0)
179
+ const wordLikeTokens = words.filter(w => /^[a-zA-Z][a-zA-Z0-9'-]*[:.!?,]?$/.test(w))
180
+
181
+ // If more than 50% of tokens look like words, it's probably text
182
+ const wordRatio = words.length > 0 ? wordLikeTokens.length / words.length : 0
183
+
184
+ return hasMarkdown || wordRatio > 0.5
185
+ }
186
+
187
+ // Check if string looks like a UI/display string (model names, descriptions, etc.)
188
+ function isUIString(str: string, lineContent: string): boolean {
189
+ // Common UI string patterns
190
+ const uiPatterns = [
191
+ /['"`].*Claude.*['"`]/i,
192
+ /['"`].*GPT.*['"`]/i,
193
+ /['"`].*Sonnet.*['"`]/i,
194
+ /['"`].*for\s+(chat|embeddings|completion).*['"`]/i,
195
+ /['"`]Uses\s+/i,
196
+ /['"`]Note:\s*/i,
197
+ /placeholder['"`:]/i,
198
+ /description['"`:]/i,
199
+ /label['"`:]/i,
200
+ /title['"`:]/i,
201
+ /message['"`:]/i,
202
+ /tooltip['"`:]/i,
203
+ ]
204
+
205
+ return uiPatterns.some(pattern => pattern.test(lineContent))
206
+ }
207
+
208
+ // Check if string is in a React/JSX UI context (component props, JSX text)
209
+ function isJSXUIContext(lineContent: string): boolean {
210
+ // JSX patterns that indicate UI context
211
+ const jsxUIPatterns = [
212
+ // Component props (common UI props)
213
+ /\b(placeholder|title|label|message|description|tooltip|alt|aria-label|name|id|className|testId|data-testid)\s*=\s*['"`]/i,
214
+ // JSX text children (text between tags)
215
+ />\s*['"`][^<]*['"`]\s*</,
216
+ // Common UI component names
217
+ /<(Button|Text|Label|Title|Heading|Paragraph|Span|Input|Tooltip|Badge|Alert|Toast)/i,
218
+ // Return statement with JSX template literal
219
+ /return\s+`[^`]*\$\{/,
220
+ // Template literals used for display
221
+ /['"`]Synced\s+/i,
222
+ /['"`]\d+\s*(h|hr|hour|m|min|minute|s|sec|second)s?\s+ago['"`]/i,
223
+ // Display formatting patterns
224
+ /\.toLocaleString\s*\(|\.toFixed\s*\(|\.padStart\s*\(/,
225
+ ]
226
+
227
+ return jsxUIPatterns.some(pattern => pattern.test(lineContent))
228
+ }
229
+
230
+ // Check if string is natural language (high ratio of common English words)
231
+ function isNaturalLanguage(str: string): boolean {
232
+ // Skip short strings
233
+ if (str.length < 25) return false
234
+
235
+ // Common English words that appear in natural language
236
+ const commonWords = new Set([
237
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
238
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
239
+ 'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'to', 'of',
240
+ 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'up', 'about', 'into',
241
+ 'through', 'during', 'before', 'after', 'above', 'below', 'between',
242
+ 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when',
243
+ 'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other',
244
+ 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than',
245
+ 'too', 'very', 'just', 'also', 'now', 'and', 'but', 'or', 'if', 'as',
246
+ 'your', 'you', 'this', 'that', 'it', 'they', 'we', 'he', 'she', 'my',
247
+ 'their', 'our', 'his', 'her', 'its', 'ago', 'synced', 'updated', 'created',
248
+ ])
249
+
250
+ // Split into words and count common ones
251
+ const words = str.toLowerCase().split(/\s+/).filter(w => w.length > 1)
252
+ if (words.length < 3) return false
253
+
254
+ const commonWordCount = words.filter(w => commonWords.has(w.replace(/[^a-z]/g, ''))).length
255
+ const commonWordRatio = commonWordCount / words.length
256
+
257
+ // If more than 30% of words are common English words, it's likely natural language
258
+ return commonWordRatio > 0.3
259
+ }
260
+
261
+ // Check if string looks like CSS/Tailwind classes
262
+ function isCSSClasses(str: string): boolean {
263
+ // Tailwind/CSS class patterns
264
+ const cssIndicators = [
265
+ 'flex', 'grid', 'block', 'inline', 'hidden',
266
+ 'items-', 'justify-', 'gap-', 'space-',
267
+ 'text-', 'font-', 'bg-', 'border-', 'rounded',
268
+ 'px-', 'py-', 'pt-', 'pb-', 'pl-', 'pr-', 'p-',
269
+ 'mx-', 'my-', 'mt-', 'mb-', 'ml-', 'mr-', 'm-',
270
+ 'w-', 'h-', 'min-', 'max-',
271
+ 'hover:', 'focus:', 'active:', 'disabled:',
272
+ 'sm:', 'md:', 'lg:', 'xl:', '2xl:',
273
+ 'dark:', 'light:',
274
+ 'transition', 'duration-', 'ease-',
275
+ 'absolute', 'relative', 'fixed', 'sticky',
276
+ 'top-', 'bottom-', 'left-', 'right-',
277
+ 'z-', 'overflow-', 'opacity-',
278
+ 'ring-', 'shadow-', 'outline-',
279
+ ]
280
+
281
+ // Count how many CSS-like tokens are in the string
282
+ const tokens = str.toLowerCase().split(/\s+/)
283
+ const cssTokenCount = tokens.filter(token =>
284
+ cssIndicators.some(indicator => token.includes(indicator))
285
+ ).length
286
+
287
+ // If more than 30% of tokens look like CSS classes, it's probably CSS
288
+ return cssTokenCount > 0 && (cssTokenCount / tokens.length) > 0.3
289
+ }
290
+
291
+ // Check if string looks like CSS-in-JS (styled-components, emotion, etc.)
292
+ function isCSSInJS(lineContent: string): boolean {
293
+ const cssInJSPatterns = [
294
+ /styled\./, // styled.div, styled.button
295
+ /styled\(/, // styled(Component)
296
+ /css`/, // css`` template literal
297
+ /keyframes`/, // keyframes`` template literal
298
+ /@emotion/, // @emotion imports
299
+ /createGlobalStyle/, // styled-components global
300
+ /\$\{\s*props\s*=>/, // ${props => ...} in styled
301
+ /\$\{\s*\(\s*\{/, // ${({ theme }) => ...}
302
+ ]
303
+ return cssInJSPatterns.some(p => p.test(lineContent))
304
+ }
305
+
306
+ // Check if file is documentation/README
307
+ function isDocumentationFile(filePath: string): boolean {
308
+ const docPatterns = [
309
+ /README/i,
310
+ /CHANGELOG/i,
311
+ /CONTRIBUTING/i,
312
+ /LICENSE/i,
313
+ /CODE_OF_CONDUCT/i,
314
+ /SECURITY/i,
315
+ /AUTHORS/i,
316
+ /HISTORY/i,
317
+ /\.md$/i,
318
+ /\.mdx$/i,
319
+ /\.rst$/i, // reStructuredText
320
+ /\.adoc$/i, // AsciiDoc
321
+ /\.txt$/i, // Plain text docs
322
+ /\/docs\//i,
323
+ /\/documentation\//i,
324
+ /\/wiki\//i,
325
+ /\/guides?\//i,
326
+ /\/tutorials?\//i,
327
+ /\/examples?\//i, // Example directories often have sample configs
328
+ ]
329
+ return docPatterns.some(p => p.test(filePath))
330
+ }
331
+
332
+ // Check if string is a console.log/debug statement content
333
+ function isDebugLogContent(lineContent: string): boolean {
334
+ const debugPatterns = [
335
+ /console\.(log|debug|info|warn|error)\s*\(/i,
336
+ /logger\.(log|debug|info|warn|error)\s*\(/i,
337
+ /\[.*Debug.*\]/i,
338
+ /\[.*Log.*\]/i,
339
+ ]
340
+ return debugPatterns.some(pattern => pattern.test(lineContent))
341
+ }
342
+
343
+
344
+ // Check if string is inline style (JSX or HTML)
345
+ function isInlineStyle(lineContent: string): boolean {
346
+ // JSX inline styles
347
+ const jsxStylePatterns = [
348
+ /style\s*=\s*\{\{/, // style={{...}}
349
+ /style\s*=\s*\{[^}]*:/, // style={{ color: ... }}
350
+ /className\s*=\s*["`'][^"`']*gradient/i, // gradient classes
351
+ /className\s*=\s*["`'][^"`']*bg-/i, // bg- classes
352
+ ]
353
+
354
+ // HTML inline styles
355
+ const htmlStylePatterns = [
356
+ /style\s*=\s*["'][^"']*:/, // style="color: ..."
357
+ /<style[^>]*>/i, // <style> tags
358
+ /background:\s*linear-gradient/i, // CSS gradients
359
+ /background:\s*radial-gradient/i, // Radial gradients
360
+ ]
361
+
362
+ return [...jsxStylePatterns, ...htmlStylePatterns].some(p => p.test(lineContent))
363
+ }
364
+
365
+ // Check if string contains CSS tokens (colors, units, functions)
366
+ function hasCSSTokens(str: string): boolean {
367
+ const cssTokens = [
368
+ // CSS units
369
+ /\d+px\b/, /\d+%\b/, /\d+em\b/, /\d+rem\b/, /\d+deg\b/, /\d+vh\b/, /\d+vw\b/,
370
+
371
+ // Hex colors (standalone or in context)
372
+ /#[0-9a-f]{3,8}\b/i,
373
+
374
+ // CSS color functions
375
+ /rgb\s*\(/, /rgba\s*\(/, /hsl\s*\(/, /hsla\s*\(/,
376
+ /oklab\s*\(/, /oklch\s*\(/, /lab\s*\(/, /lch\s*\(/, // Modern color functions
377
+
378
+ // CSS gradients (all types)
379
+ /linear-gradient/, /radial-gradient/, /conic-gradient/,
380
+ /repeating-linear-gradient/, /repeating-radial-gradient/,
381
+
382
+ // Gradient direction keywords (Tailwind-style)
383
+ /\bfrom-/, /\bto-/, /\bvia-/,
384
+
385
+ // CSS custom properties
386
+ /var\s*\(--/,
387
+
388
+ // Common CSS properties
389
+ /\bopacity\s*:\s*[\d.]+/,
390
+ /\btransform\s*:/,
391
+ /\btransition\s*:/,
392
+ /\banimation\s*:/,
393
+
394
+ // Box shadow patterns
395
+ /\bshadow-/, /box-shadow/,
396
+ /\d+px\s+\d+px\s+\d+px/, // Shadow offset pattern
397
+
398
+ // Color stops in gradients
399
+ /\b\d+%\s*(,|$)/, // Percentage color stops
400
+ ]
401
+
402
+ // Single strong indicators (only need 1 match)
403
+ const strongIndicators = [
404
+ /^#[0-9a-f]{6}$/i, // Standalone 6-digit hex color
405
+ /^#[0-9a-f]{8}$/i, // Standalone 8-digit hex color with alpha
406
+ /linear-gradient\s*\(/, // Gradient function
407
+ /radial-gradient\s*\(/,
408
+ /conic-gradient\s*\(/,
409
+ /rgba?\s*\(\s*\d/, // rgb/rgba with numbers
410
+ /hsla?\s*\(\s*\d/, // hsl/hsla with numbers
411
+ ]
412
+
413
+ // If any strong indicator matches, it's definitely CSS
414
+ if (strongIndicators.some(pattern => pattern.test(str))) {
415
+ return true
416
+ }
417
+
418
+ // Must match at least 2 CSS indicators to be confident it's CSS
419
+ const tokenCount = cssTokens.filter(pattern => pattern.test(str)).length
420
+ return tokenCount >= 2
421
+ }
422
+
423
+ // Check if value/line contains environment variable placeholders (shell scripts, test files)
424
+ function isEnvVarPlaceholder(lineContent: string, value: string): boolean {
425
+ // Shell script patterns
426
+ const shellEnvPatterns = [
427
+ /\$[A-Z_][A-Z0-9_]*/, // $VAR_NAME
428
+ /\$\{[A-Z_][A-Z0-9_]*\}/, // ${VAR_NAME}
429
+ /\bexport\s+[A-Z_][A-Z0-9_]*=["']?\$/, // export VAR=$OTHER
430
+ /:\s*\$\{[A-Z_][A-Z0-9_]*:-/, // ${VAR:-default}
431
+ ]
432
+
433
+ // Test file env var patterns (common placeholder names)
434
+ const testEnvPatterns = [
435
+ /FREE_KEY|PRO_KEY|ULTRA_KEY|TEST_KEY/i,
436
+ /BASE_URL|API_URL|ENDPOINT_URL/i,
437
+ /YOUR_[A-Z_]*KEY|REPLACE_[A-Z_]*KEY/i,
438
+ /\$\{?\w+\}?_KEY|\$\{?\w+\}?_TOKEN/i, // $SOME_KEY, ${SOME_TOKEN}
439
+ ]
440
+
441
+ return (
442
+ shellEnvPatterns.some(p => p.test(lineContent)) ||
443
+ testEnvPatterns.some(p => p.test(value)) ||
444
+ testEnvPatterns.some(p => p.test(lineContent))
445
+ )
446
+ }
447
+
448
+ export function detectHighEntropyStrings(
449
+ content: string,
450
+ filePath: string
451
+ ): Vulnerability[] {
452
+ const vulnerabilities: Vulnerability[] = []
453
+
454
+ // Skip scanner/fixture files to avoid self-detection
455
+ if (isScannerOrFixtureFile(filePath)) {
456
+ return vulnerabilities
457
+ }
458
+
459
+ // Skip fixture files (__fixtures__, .fixture., mock-data, etc.)
460
+ if (isFixtureFile(filePath)) {
461
+ return vulnerabilities
462
+ }
463
+
464
+ // Skip example files
465
+ if (isExampleFile(filePath)) {
466
+ return vulnerabilities
467
+ }
468
+
469
+ // Skip example directories (/examples/, /demos/, /tutorials/, etc.)
470
+ if (isExampleDirectory(filePath)) {
471
+ return vulnerabilities
472
+ }
473
+
474
+ // Skip documentation/README files
475
+ if (isDocumentationFile(filePath)) {
476
+ return vulnerabilities
477
+ }
478
+
479
+ const strings = extractStringLiterals(content)
480
+
481
+ for (const { value, line, lineContent } of strings) {
482
+ // Skip comments
483
+ if (isComment(lineContent)) continue
484
+
485
+ // Skip PEM headers/footers (they look high-entropy but aren't secrets)
486
+ if (isPEMHeader(value)) continue
487
+
488
+ // Skip encrypted content blocks (the payload, not the key)
489
+ if (isEncryptedContent(value, lineContent)) continue
490
+
491
+ // Skip JWT segments (handled by patterns.ts for specific detection)
492
+ if (isJWTSegment(value)) continue
493
+
494
+ // Skip inline styles (CSS/JSX style={{...}} or style="...")
495
+ if (isInlineStyle(lineContent)) continue
496
+
497
+ // Skip strings with CSS tokens (colors, gradients, units)
498
+ if (hasCSSTokens(value)) continue
499
+
500
+ // Skip environment variable placeholders (shell scripts, test files)
501
+ if (isEnvVarPlaceholder(lineContent, value)) continue
502
+
503
+ // Skip safe patterns
504
+ if (isSafePattern(value)) continue
505
+
506
+ // Skip CSS/Tailwind class strings
507
+ if (isCSSClasses(value)) continue
508
+
509
+ // Skip CSS-in-JS patterns (styled-components, emotion)
510
+ if (isCSSInJS(lineContent)) continue
511
+
512
+ // Skip debug log statements (they often contain env var names which look high-entropy)
513
+ if (isDebugLogContent(lineContent)) continue
514
+
515
+ // Skip regex/route matcher patterns
516
+ if (isRegexPattern(value)) continue
517
+
518
+ // Skip template literals with code expressions (they look high-entropy but aren't secrets)
519
+ if (isTemplateWithCode(value, lineContent)) continue
520
+
521
+ // Skip human-readable text/markdown content
522
+ if (isHumanReadableContent(value)) continue
523
+
524
+ // Skip UI strings (model names, descriptions, etc.)
525
+ if (isUIString(value, lineContent)) continue
526
+
527
+ // Skip JSX UI context (component props, JSX text - like "Synced ${hours}h ago")
528
+ if (isJSXUIContext(lineContent)) continue
529
+
530
+ // Skip natural language strings (high ratio of common English words)
531
+ if (isNaturalLanguage(value)) continue
532
+
533
+ // Calculate entropy
534
+ const entropy = calculateEntropy(value)
535
+
536
+ // Determine if this is a test file (lower severity)
537
+ const inTestFile = isTestOrMockFile(filePath)
538
+
539
+ // Two thresholds:
540
+ // - entropy > 4.5 for strings > 20 chars (standard)
541
+ // - entropy > 4.2 for strings 16-20 chars (slightly stricter to reduce FPs)
542
+ const meetsThreshold =
543
+ (entropy > 4.5 && value.length > 20) ||
544
+ (entropy > 4.2 && value.length >= 16 && value.length <= 20)
545
+
546
+ if (meetsThreshold) {
547
+ // Additional check: should have mix of character types
548
+ const hasLower = /[a-z]/.test(value)
549
+ const hasUpper = /[A-Z]/.test(value)
550
+ const hasDigit = /[0-9]/.test(value)
551
+ const hasSpecial = /[^a-zA-Z0-9]/.test(value)
552
+ const charTypes = [hasLower, hasUpper, hasDigit, hasSpecial].filter(Boolean).length
553
+
554
+ // Only flag if it has at least 2 character types (looks like a secret)
555
+ if (charTypes >= 2) {
556
+ // Final check: skip CSS-like strings that passed earlier filters
557
+ const looksLikeCSS = /gradient|rgba?|hsla?|#[0-9a-f]{3,8}/i.test(value)
558
+ if (looksLikeCSS) continue
559
+ // Lower severity for test files
560
+ const baseSeverity = entropy > 5.0 ? 'high' : 'medium'
561
+ const severity = inTestFile ? 'low' : baseSeverity
562
+ const confidence = inTestFile ? 'low' : (entropy > 5.0 ? 'high' : 'medium')
563
+
564
+ vulnerabilities.push({
565
+ id: `entropy-${filePath}-${line}`,
566
+ filePath,
567
+ lineNumber: line,
568
+ lineContent,
569
+ severity,
570
+ category: 'high_entropy_string',
571
+ title: 'Potential hardcoded secret detected',
572
+ description: `High-entropy string found (entropy: ${entropy.toFixed(2)}). This may be a hardcoded secret, API key, or password.${inTestFile ? ' (in test file)' : ''}`,
573
+ suggestedFix: 'Move this value to an environment variable and access it via process.env',
574
+ confidence,
575
+ layer: 1,
576
+ requiresAIValidation: true, // Entropy findings must be validated by AI
577
+ })
578
+ }
579
+ }
580
+ }
581
+
582
+ return vulnerabilities
583
+ }