credsweeper 1.12.2__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (46) hide show
  1. credsweeper/__init__.py +1 -1
  2. credsweeper/__main__.py +15 -8
  3. credsweeper/app.py +7 -2
  4. credsweeper/common/keyword_pattern.py +6 -3
  5. credsweeper/common/morpheme_checklist.txt +11 -1
  6. credsweeper/config/config.py +1 -0
  7. credsweeper/credentials/line_data.py +16 -0
  8. credsweeper/deep_scanner/deep_scanner.py +12 -6
  9. credsweeper/deep_scanner/rtf_scanner.py +41 -0
  10. credsweeper/deep_scanner/strings_scanner.py +52 -0
  11. credsweeper/file_handler/byte_content_provider.py +10 -1
  12. credsweeper/file_handler/file_path_extractor.py +2 -0
  13. credsweeper/file_handler/text_content_provider.py +7 -1
  14. credsweeper/filters/__init__.py +1 -1
  15. credsweeper/filters/group/token_pattern.py +2 -2
  16. credsweeper/filters/group/weird_base36_token.py +2 -2
  17. credsweeper/filters/group/weird_base64_token.py +2 -2
  18. credsweeper/filters/value_file_path_check.py +5 -3
  19. credsweeper/filters/value_github_check.py +3 -2
  20. credsweeper/filters/value_morphemes_check.py +43 -0
  21. credsweeper/filters/value_string_type_check.py +1 -0
  22. credsweeper/ml_model/features/feature.py +1 -18
  23. credsweeper/ml_model/features/file_extension.py +1 -1
  24. credsweeper/ml_model/features/has_html_tag.py +10 -8
  25. credsweeper/ml_model/features/is_secret_numeric.py +4 -3
  26. credsweeper/ml_model/features/rule_name.py +1 -1
  27. credsweeper/ml_model/features/word_in.py +9 -32
  28. credsweeper/ml_model/features/word_in_path.py +2 -3
  29. credsweeper/ml_model/features/word_in_postamble.py +1 -4
  30. credsweeper/ml_model/features/word_in_preamble.py +1 -4
  31. credsweeper/ml_model/features/word_in_transition.py +1 -4
  32. credsweeper/ml_model/features/word_in_value.py +2 -3
  33. credsweeper/ml_model/features/word_in_variable.py +2 -3
  34. credsweeper/ml_model/ml_config.json +15 -8
  35. credsweeper/ml_model/ml_model.onnx +0 -0
  36. credsweeper/ml_model/ml_validator.py +1 -1
  37. credsweeper/rules/config.yaml +129 -128
  38. credsweeper/scanner/scanner.py +12 -7
  39. credsweeper/secret/config.json +18 -5
  40. credsweeper/utils/util.py +19 -16
  41. {credsweeper-1.12.2.dist-info → credsweeper-1.13.0.dist-info}/METADATA +7 -7
  42. {credsweeper-1.12.2.dist-info → credsweeper-1.13.0.dist-info}/RECORD +45 -43
  43. credsweeper/filters/value_couple_keyword_check.py +0 -28
  44. {credsweeper-1.12.2.dist-info → credsweeper-1.13.0.dist-info}/WHEEL +0 -0
  45. {credsweeper-1.12.2.dist-info → credsweeper-1.13.0.dist-info}/entry_points.txt +0 -0
  46. {credsweeper-1.12.2.dist-info → credsweeper-1.13.0.dist-info}/licenses/LICENSE +0 -0
@@ -3,7 +3,7 @@
3
3
  confidence: weak
4
4
  type: pattern
5
5
  values:
6
- - (?P<variable>(\w*(?i:비밀번호|비번|패스워드|키|암호화?|토큰|(?<!by)pass(?!ed|ing|ion|es|age)|\bpwd?\b|token|secret|key|cred)\w*)\s*(설정은|[=:!]{1,3}))?\s*([._0-9A-Za-z\[\]]*get(env)?\s*\(\s*(?(variable)[^,]+|[\"'\\]*(\\*([\"']|&(quot|apos|#3[49]);)){0,4}(\w*(?i:(?<!by)pass(?!ed|ing|ion|es|age|\s+[a-z]{3,80})|\bpwd?\b|token|secret|key|cred)\w*))(\\*([\"']|&(quot|apos|#3[49]);)){0,4})\s*,\s*(default\s*=\s*)?([brufl@]{1,2}(?=\\*[\"'&]))?(?P<lq>(\\*([\"']|&(quot|apos|#3[49]);)){1,4})(?P<value>(.(?!(?P=lq))){4,80}.?)
6
+ - (?P<variable>(\w*(?i:비밀번호|비번|패스워드|키|암호화?|토큰|(?<!by)pass(?!e[dns]|ing|ion|age)|\bpwd?\b|token|secret|key|cred)\w*)\s*(설정은|[=:!]{1,3}))?\s*([._0-9A-Za-z\[\]]*get(env)?\s*\(\s*(?(variable)[^,]+|[\"'\\]*(\\*([\"']|&(quot|apos|#3[49]);)){0,4}(\w*(?i:(?<!by)pass(?!e[dns]|ing|ion|age|\s+[a-z]{3,80})|\bpwd?\b|token|secret|key|cred)\w*))(\\*([\"']|&(quot|apos|#3[49]);)){0,4})\s*,\s*(default\s*=\s*)?([brufl@]{1,2}(?=\\*[\"'&]))?(?P<lq>(\\*([\"']|&(quot|apos|#3[49]);)){1,4})(?P<value>(.(?!(?P=lq))){4,80}.?)
7
7
  filter_type:
8
8
  - ValueAllowlistCheck
9
9
  - LineGitBinaryCheck
@@ -34,7 +34,7 @@
34
34
  confidence: weak
35
35
  type: pattern
36
36
  values:
37
- - (?P<wrap>[\"'`(])?\s*(?P<variable>(\w*(?i:(?<!by)passw?o?r?d?s?(?!ed|ing|ion|es|age)|pwd?\b|\bp/w\b|token|secret|key|credential)\w*|비밀번호|비번|패스워드|키|암호화?|토큰))[\"'`]*(\s+(?i:is|are|was|were)(\s*[:-])?\s+|\s*(설정은|[=:!]{1,3})\s*)(?P<quote>[\"'`]{1,6})?(?P<value>(?(quote)(?(wrap)[^\"'`)]{4,80}|[^\"'`]{4,80})|(?(wrap)[^\"'`)]{4,80}|\S{4,80})))
37
+ - (?P<wrap>[\"'`(])?\s*(?P<variable>(\w*(?i:(?<!by)passw?o?r?d?s?(?!e[dns]|ing|ion|age)|pwd?\b|\bp/w\b|token|secret|key|credential)\w*|비밀번호|비번|패스워드|키|암호화?|토큰))[\"'`]*(\s+(?i:is|are|was|were)(\s*[:-])?\s+|\s*(설정은|[=:!]{1,3})\s*)(?P<quote>[\"'`]{1,6})?(?P<value>(?(quote)(?(wrap)[^\"'`)]{4,80}|[^\"'`]{4,80})|(?(wrap)[^\"'`)]{4,80}|\S{4,80})))
38
38
  filter_type:
39
39
  - ValueAllowlistCheck
40
40
  - LineGitBinaryCheck
@@ -73,7 +73,7 @@
73
73
  - ValueAllowlistCheck
74
74
  - ValuePatternCheck(4)
75
75
  - ValueEntropyBase64Check
76
- - ValueCoupleKeywordCheck
76
+ - ValueMorphemesCheck
77
77
  min_line_len: 16
78
78
  required_substrings:
79
79
  - token
@@ -90,7 +90,7 @@
90
90
  confidence: moderate
91
91
  type: pattern
92
92
  values:
93
- - (?P<variable>[\"'`]?(?i:(?<!id[ :/])pa[as]swo?r?ds?|pwd?|p/w|비밀번호|비번|패스워드|암호)[\"'`]?)((\s)*[=:](\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,31}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
93
+ - (?P<variable>[\"'`]?(?i:(?<!id[ :/])pa[as]swo?r?ds?|pwd?|p/w|비밀번호|비번|패스워드|암호)[\"'`]?)((\s)*[=:](\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,64}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
94
94
  filter_type:
95
95
  - ValueAllowlistCheck
96
96
  - ValuePatternCheck(4)
@@ -118,7 +118,7 @@
118
118
  confidence: moderate
119
119
  type: pattern
120
120
  values:
121
- - (^|\s|(?P<variable>(?i:\bip[\s/]{1,80}id[\s/]{1,80}pw[\s/:]{0,80}))|(?P<url>://))(?P<ip>(?<![0-9.])[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}(?![0-9.]))((\s*[(])?|(?(variable)[\s,/]{1,80}|(?(url)[,]|[,/])))\s*\w[\w.-]{3,80}[\s,/]{1,80}(?P<value>(?(url)(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9_+=~!@#$%^&*;?-])){7,31}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)|(?-i:(?P<e>[A-Z])|(?P<f>[a-z])|(?P<g>[0-9/_+=~!@#$%^&*;?-])){7,31}(?(e)(?(f)(?(g)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)))(?:\s|[^/]|$)
121
+ - (^|\s|(?P<variable>(?i:\bip[\s/]{1,80}id[\s/]{1,80}pw[\s/:]{0,80}))|(?P<url>://))(?P<ip>(?<![0-9.])[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}(?![0-9.]))((\s*[(])?|(?(variable)[\s,/]{1,80}|(?(url)[,]|[,/])))\s*\w[\w.-]{3,80}[\s,/]{1,80}(?P<value>(?(url)(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9_+=~!@#$%^&*;?-])){7,64}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)|(?-i:(?P<e>[A-Z])|(?P<f>[a-z])|(?P<g>[0-9/_+=~!@#$%^&*;?-])){7,64}(?(e)(?(f)(?(g)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)))(?:\s|[^/]|$)
122
122
  filter_type:
123
123
  - ValueAllowlistCheck
124
124
  - ValuePatternCheck(4)
@@ -134,7 +134,7 @@
134
134
  confidence: moderate
135
135
  type: pattern
136
136
  values:
137
- - (?P<ddash>--)?(?P<variable>\w*(?i:pa[as]swords?|passwd?|pwd|\bp/w|\bpw|비밀번호|비번|패스워드|암호))\s*?(?(ddash)[ =]|[:=/>-]{1,2})\s*(?P<quote>[\"'`]{1,8})?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){4,31}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)(?P=quote)|(\s|$))
137
+ - (?P<ddash>--)?(?P<variable>\w*(?i:pa[as]swords?|passwd?|pwd|\bp/w|\bpw|비밀번호|비번|패스워드|암호))\s*?(?(ddash)[ =]|[:=/>-]{1,2})\s*(?P<quote>[\"'`]{1,8})?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){4,64}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)(?P=quote)|(\s|$))
138
138
  - (?P<ddash>--)?(?P<variable>(?i:user\s*)?(?i:id|login|account|root|admin|user|name|wifi|role|host|default|계정|아이디))\s*?(?(ddash)[ =]|[ :=])\s*?(?P<value>\S+)
139
139
  filter_type:
140
140
  - ValueAllowlistCheck
@@ -157,7 +157,7 @@
157
157
  confidence: moderate
158
158
  type: pattern
159
159
  values:
160
- - (?P<variable>[\w.-]{0,80}(?i:(?P<id>\bid\b)|id\b|user|name|계정|아이디)[\w.-]{0,80}(?(id)[ :(/]{1,80}|[:(/]{1,80})(?i:pa[as]swo?r?ds?|pwd?|비밀번호|비번|패스워드|암호))\)?(\s*->\s*|[ =:)(/]{1,80}|\s+is\s+|\s+are\s+|\s*는\s*|\s*은\s*|\s*설정은\s*)\(?(?P<id_value>[\w.-]{2,31})[ :\(/\"',]{1,80}(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){4,31}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))
160
+ - (?P<variable>[\w.-]{0,80}(?i:(?P<id>\bid\b)|id\b|user|name|계정|아이디)[\w.-]{0,80}(?(id)[ :(/]{1,80}|[:(/]{1,80})(?i:pa[as]swo?r?ds?|pwd?|비밀번호|비번|패스워드|암호))\)?(\s*->\s*|[ =:)(/]{1,80}|\s+is\s+|\s+are\s+|\s*는\s*|\s*은\s*|\s*설정은\s*)\(?(?P<id_value>[\w.-]{2,64})[ :\(/\"',]{1,80}(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){4,64}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))
161
161
  filter_type:
162
162
  - ValueAllowlistCheck
163
163
  - ValuePatternCheck(4)
@@ -174,24 +174,6 @@
174
174
  target:
175
175
  - doc
176
176
 
177
- - name: SQL Password
178
- severity: medium
179
- confidence: weak
180
- type: pattern
181
- values:
182
- - (\\[nrt]|\b)(?i:(?P<variable>(CREATE|ALTER|SET\s{1,8}PASSWORD|INSERT(\s{1,8}IGNORE)?|UPDATE\s{1,8}[^\s;]{1,80})\s{1,8}(LOGIN|USER|ROLE|FOR|INTO|SET)\s{1,8}([^\s;]{1,80}\s{1,8}|VALUES\s*\(){1,8}(IDENTIFIED((\s{1,8}WITH\s{1,8}\S{1,80})?\s{1,8}(BY|AS))|(=|WITH)?\s*PASSWORD\b(\s*=)?)))\s*(?P<wrap>[(]\s*)?(?P<value_leftquote>((?P<esq>\\{1,8})?([\"'`]|&(quot|apos|#3[49]);)){1,4})?(?P<value>(?(value_leftquote)((?!(?P=value_leftquote))(?(esq)((?!(?P=esq)([\"'`]|&(quot|apos|#3[49]);)).)|((?!(?P=value_leftquote)).)))|(?!&(quot|apos|#3[49]);)(\\+([ tnr]|[^\s\"'`])|[^\s\"'`,;\\])){3,80})(?(value_leftquote)(?P<value_rightquote>(?<!\\)(?P=value_leftquote))|(?(wrap)[)]|[\s\"'`,;]))
183
- filter_type:
184
- - ValueAllowlistCheck
185
- - ValuePatternCheck
186
- min_line_len: 8
187
- required_substrings:
188
- - password
189
- - identified
190
- target:
191
- - doc
192
- - code
193
- use_ml: true
194
-
195
177
  - name: UUID
196
178
  severity: info
197
179
  confidence: strong
@@ -249,7 +231,7 @@
249
231
  - LineSpecificKeyCheck
250
232
  - ValuePatternCheck
251
233
  - ValueBase64PartCheck
252
- - ValueCoupleKeywordCheck(3)
234
+ - ValueMorphemesCheck
253
235
  required_substrings:
254
236
  - A
255
237
  min_line_len: 20
@@ -317,21 +299,6 @@
317
299
  - code
318
300
  - doc
319
301
 
320
- - name: Github Old Token
321
- severity: high
322
- confidence: moderate
323
- type: pattern
324
- values:
325
- - (?i)((git)[0-9A-Za-z_-]{0,80}(token|key|api)[0-9A-Za-z_-]{0,80}(\s)*(=|:|:=)(\s)*(["']?)(?P<value>[0-9a-z]{40})(["']?))
326
- filter_type: TokenPattern
327
- use_ml: true
328
- required_substrings:
329
- - git
330
- min_line_len: 47
331
- target:
332
- - code
333
- - doc
334
-
335
302
  - name: Google API Key
336
303
  severity: high
337
304
  confidence: moderate
@@ -480,7 +447,7 @@
480
447
  - (?P<variable>\b[dk])[^0-9A-Za-z_-]{1,8}(?P<value>[0-9A-Za-z_-]{22,8000})(?![=0-9A-Za-z_-])
481
448
  filter_type:
482
449
  - ValuePatternCheck
483
- - ValueCoupleKeywordCheck(3)
450
+ - ValueMorphemesCheck
484
451
  required_substrings:
485
452
  - kty
486
453
  min_line_len: 8
@@ -507,10 +474,9 @@
507
474
  confidence: moderate
508
475
  type: pattern
509
476
  values:
510
- - (?:^|[^0-9A-Za-z_+-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>key-[0-9A-Za-z_-]{32})(?![0-9A-Za-z_-])
511
- filter_type: GeneralPattern
512
- required_substrings:
513
- - key-
477
+ - (?:^|[^0-9A-Za-z_+-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>key-[0-9a-f]{32}|[0-9a-f]{32}-[0-9a-f]{8}-[0-9a-f]{8})(?![0-9A-Za-z_-])
478
+ filter_type: TokenPattern
479
+ required_regex: "[0-9A-Za-z_/+-]{15}"
514
480
  min_line_len: 36
515
481
  target:
516
482
  - code
@@ -728,76 +694,6 @@
728
694
  - code
729
695
  - doc
730
696
 
731
- - name: CMD ConvertTo-SecureString
732
- severity: high
733
- confidence: moderate
734
- type: pattern
735
- values:
736
- - (?P<variable>ConvertTo-SecureString(\s\s*-(String|AsPlainText|Force))*)\s\s*(?P<value_leftquote>(\\?[\"']){1,3})?(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,800})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
737
- filter_type: GeneralKeyword
738
- use_ml: true
739
- required_substrings:
740
- - convertto-securestring
741
- min_line_len: 27
742
- target:
743
- - code
744
-
745
- - name: CMD Password
746
- severity: high
747
- confidence: moderate
748
- type: pattern
749
- values:
750
- - (^|\W|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<variable>-[A-Za-z_-]*(?i:pass(in|out|word|phrase)))(\s|\\?[\"'],)\s*(?!-)(?P<value_leftquote>(\\?[\"']){1,3})?(pass:)?(?!file:|env:|fd:)(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,80})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
751
- filter_type: GeneralKeyword
752
- use_ml: true
753
- required_substrings:
754
- - pass
755
- min_line_len: 12
756
- target:
757
- - code
758
-
759
- - name: CMD Token
760
- severity: high
761
- confidence: moderate
762
- type: pattern
763
- values:
764
- - (^|\W|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<variable>-[A-Za-z_-]*(?i:token))(\s|\\?[\"'],)\s*(?!-)(?P<value_leftquote>(\\?[\"']){1,3})?(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,4000})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
765
- filter_type: GeneralKeyword
766
- use_ml: true
767
- required_substrings:
768
- - token
769
- min_line_len: 12
770
- target:
771
- - code
772
-
773
- - name: CMD Secret
774
- severity: high
775
- confidence: moderate
776
- type: pattern
777
- values:
778
- - (^|\W|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<variable>-[A-Za-z_-]*(?i:secret)[A-Za-z_-]*)(\s|\\?[\"'],)\s*(?!-)(?P<value_leftquote>(\\?[\"']){1,3})?(pass:)?(?!file:|env:|fd:)(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,4000})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
779
- filter_type: GeneralKeyword
780
- use_ml: true
781
- required_substrings:
782
- - secret
783
- min_line_len: 12
784
- target:
785
- - code
786
-
787
- - name: URL Credentials
788
- severity: high
789
- confidence: moderate
790
- type: pattern
791
- values:
792
- - (?P<value_leftquote>[\"'])?(?P<variable>[+0-9A-Za-z-]{2,80}://)([^\s\'"<>\[\]^~`{|}:/]{0,80}:){1,3}(?P<value>[^\s\'"<>\[\]^~`{|}@:/]{3,80})@[^\s\'"<>\[\]^~`{|}@:/]{1,800}\\{0,8}(?P<value_rightquote>[\"'])?
793
- filter_type: UrlCredentialsGroup
794
- use_ml: true
795
- required_substrings:
796
- - ://
797
- min_line_len: 10
798
- target:
799
- - code
800
-
801
697
  - name: Telegram Bot API Token
802
698
  severity: high
803
699
  confidence: moderate
@@ -832,7 +728,8 @@
832
728
  type: pattern
833
729
  values:
834
730
  - (?:^|[^0-9A-Za-z_+-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>npm_[0-9A-Za-z_-]{36,255})
835
- filter_type: TokenPattern
731
+ filter_type:
732
+ - ValueGitHubCheck
836
733
  required_substrings:
837
734
  - npm_
838
735
  min_line_len: 40
@@ -1086,12 +983,13 @@
1086
983
  confidence: strong
1087
984
  type: pattern
1088
985
  values:
1089
- - (?P<value>do[op]_v1_[a-f0-9]{64})(?![0-9A-Za-z_-])
986
+ - (?P<value>do[opr]_v1_[a-f0-9]{64})(?![0-9A-Za-z_-])
1090
987
  filter_type: TokenPattern
1091
988
  min_line_len: 71
1092
989
  required_substrings:
1093
990
  - doo_v1_
1094
991
  - dop_v1_
992
+ - dor_v1_
1095
993
  target:
1096
994
  - code
1097
995
  - doc
@@ -1275,7 +1173,7 @@
1275
1173
  - (?P<value>[0-9A-Za-z_-]{14}\.atlasv1\.[0-9A-Za-z_-]{67})(?![0-9A-Za-z_-])
1276
1174
  filter_type:
1277
1175
  - ValuePatternCheck
1278
- - ValueCoupleKeywordCheck(3)
1176
+ - ValueMorphemesCheck
1279
1177
  min_line_len: 90
1280
1178
  required_substrings:
1281
1179
  - .atlasv1.
@@ -1291,7 +1189,7 @@
1291
1189
  - (?:^|[^0-9A-Za-z_+-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>S[ACNOPUX][A-Z2-7]{40,200})(?![=0-9A-Za-z_+-])
1292
1190
  min_line_len: 42
1293
1191
  filter_type:
1294
- - ValueCoupleKeywordCheck
1192
+ - ValueMorphemesCheck
1295
1193
  - ValuePatternCheck
1296
1194
  - ValueEntropyBase32Check
1297
1195
  - ValueBase32DataCheck
@@ -1316,7 +1214,7 @@
1316
1214
  values:
1317
1215
  - (?:^|[^0-9A-Za-z_+-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>([A-Z2-7]{16}){1,2})(?![=0-9A-Za-z_+-])
1318
1216
  filter_type:
1319
- - ValueCoupleKeywordCheck
1217
+ - ValueMorphemesCheck
1320
1218
  - ValuePatternCheck
1321
1219
  - ValueEntropyBase32Check
1322
1220
  - ValueBase32DataCheck
@@ -1337,7 +1235,7 @@
1337
1235
  min_line_len: 51
1338
1236
  filter_type:
1339
1237
  - ValuePatternCheck
1340
- - ValueCoupleKeywordCheck
1238
+ - ValueMorphemesCheck
1341
1239
  required_substrings:
1342
1240
  - T3BlbkFJ
1343
1241
  - 9wZW5BS
@@ -1355,7 +1253,7 @@
1355
1253
  min_line_len: 36
1356
1254
  filter_type:
1357
1255
  - ValuePatternCheck
1358
- - ValueCoupleKeywordCheck
1256
+ - ValueMorphemesCheck
1359
1257
  required_substrings:
1360
1258
  - dckr_pat_
1361
1259
  - dckr_oat_
@@ -1372,7 +1270,7 @@
1372
1270
  min_line_len: 85
1373
1271
  filter_type:
1374
1272
  - ValuePatternCheck
1375
- - ValueCoupleKeywordCheck
1273
+ - ValueMorphemesCheck
1376
1274
  required_substrings:
1377
1275
  - SWMTKN-1-
1378
1276
  target:
@@ -1388,7 +1286,7 @@
1388
1286
  min_line_len: 52
1389
1287
  filter_type:
1390
1288
  - ValuePatternCheck
1391
- - ValueCoupleKeywordCheck(3)
1289
+ - ValueMorphemesCheck
1392
1290
  required_substrings:
1393
1291
  - SWMKEY-1-
1394
1292
  target:
@@ -1404,7 +1302,7 @@
1404
1302
  min_line_len: 56
1405
1303
  filter_type:
1406
1304
  - ValuePatternCheck
1407
- - ValueCoupleKeywordCheck
1305
+ - ValueMorphemesCheck
1408
1306
  required_substrings:
1409
1307
  - WGdyb3FY
1410
1308
  - hncm9xW
@@ -1515,7 +1413,7 @@
1515
1413
  values:
1516
1414
  - (?P<variable>discord(?:app)?\.com/api/webhooks)(?P<value>/[0-9]{16,22}/[0-9A-Za-z_-]{40,100})
1517
1415
  filter_type:
1518
- - ValueCoupleKeywordCheck(3)
1416
+ - ValueMorphemesCheck
1519
1417
  required_substrings:
1520
1418
  - discordapp.com/api/webhooks
1521
1419
  - discord.com/api/webhooks
@@ -1602,6 +1500,109 @@
1602
1500
  - code
1603
1501
  - doc
1604
1502
 
1503
+ - name: SQL Password
1504
+ severity: medium
1505
+ confidence: weak
1506
+ type: pattern
1507
+ values:
1508
+ - (\\[nrt]|\b)(?i:(?P<variable>(CREATE|ALTER|SET\s{1,8}PASSWORD|INSERT(\s{1,8}IGNORE)?|UPDATE\s{1,8}[^\s;]{1,80})\s{1,8}(LOGIN|USER|ROLE|FOR|INTO|SET)\s{1,8}([^\s;]{1,80}\s{1,8}|VALUES\s*\(){1,8}(IDENTIFIED((\s{1,8}WITH\s{1,8}\S{1,80})?\s{1,8}(BY|AS))|(=|WITH)?\s*PASSWORD\b(\s*=)?)))\s*(?P<wrap>[(]\s*)?(?P<value_leftquote>((?P<esq>\\{1,8})?([\"'`]|&(quot|apos|#3[49]);)){1,4})?(?P<value>(?(value_leftquote)((?!(?P=value_leftquote))(?(esq)((?!(?P=esq)([\"'`]|&(quot|apos|#3[49]);)).)|((?!(?P=value_leftquote)).)))|(?!&(quot|apos|#3[49]);)(\\+([ tnr]|[^\s\"'`])|[^\s\"'`,;\\])){3,80})(?(value_leftquote)(?P<value_rightquote>(?<!\\)(?P=value_leftquote))|(?(wrap)[)]|[\s\"'`,;]))
1509
+ filter_type:
1510
+ - ValueAllowlistCheck
1511
+ - ValuePatternCheck
1512
+ use_ml: true
1513
+ min_line_len: 8
1514
+ required_substrings:
1515
+ - password
1516
+ - identified
1517
+ target:
1518
+ - doc
1519
+ - code
1520
+
1521
+ - name: CURL User Password
1522
+ severity: high
1523
+ confidence: moderate
1524
+ type: pattern
1525
+ values:
1526
+ - (?P<variable>curl)\s.*(-[uU]|--(proxy-)?user)\s\s*(?P<value_leftquote>(\\*[\"']){1,3})?(?(value_leftquote)[^\"'\\:]|[^\s\"'\\:]){0,64}:(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,64})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
1527
+ filter_type: GeneralKeyword
1528
+ use_ml: true
1529
+ required_substrings:
1530
+ - curl
1531
+ min_line_len: 16
1532
+ target:
1533
+ - code
1534
+
1535
+ - name: CMD ConvertTo-SecureString
1536
+ severity: high
1537
+ confidence: moderate
1538
+ type: pattern
1539
+ values:
1540
+ - (?P<variable>ConvertTo-SecureString(\s\s*-(String|AsPlainText|Force))*)\s\s*(?P<value_leftquote>(\\?[\"']){1,3})?(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,800})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
1541
+ filter_type: GeneralKeyword
1542
+ use_ml: true
1543
+ required_substrings:
1544
+ - convertto-securestring
1545
+ min_line_len: 27
1546
+ target:
1547
+ - code
1548
+
1549
+ - name: CMD Password
1550
+ severity: high
1551
+ confidence: moderate
1552
+ type: pattern
1553
+ values:
1554
+ - (^|\W|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<variable>-[A-Za-z_-]*(?i:pass(in|out|word|phrase)))(\s|\\?[\"'],)\s*(?!-)(?P<value_leftquote>(\\?[\"']){1,3})?(pass:)?(?!file:|env:|fd:)(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,80})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
1555
+ filter_type: GeneralKeyword
1556
+ use_ml: true
1557
+ required_substrings:
1558
+ - pass
1559
+ min_line_len: 12
1560
+ target:
1561
+ - code
1562
+
1563
+ - name: CMD Token
1564
+ severity: high
1565
+ confidence: moderate
1566
+ type: pattern
1567
+ values:
1568
+ - (^|\W|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<variable>-[A-Za-z_-]*(?i:token|oauth2-bearer))(\s|\\?[\"'],)\s*(?!-)(?P<value_leftquote>(\\?[\"']){1,3})?(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,4000})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
1569
+ filter_type: GeneralKeyword
1570
+ use_ml: true
1571
+ required_substrings:
1572
+ - token
1573
+ - oauth2-bearer
1574
+ min_line_len: 12
1575
+ target:
1576
+ - code
1577
+
1578
+ - name: CMD Secret
1579
+ severity: high
1580
+ confidence: moderate
1581
+ type: pattern
1582
+ values:
1583
+ - (^|\W|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<variable>-[A-Za-z_-]*(?i:secret)[A-Za-z_-]*)(\s|\\?[\"'],)\s*(?!-)(?P<value_leftquote>(\\?[\"']){1,3})?(pass:)?(?!file:|env:|fd:)(?P<value>(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,4000})(?(value_leftquote)(?P<value_rightquote>(\\?[\"']){1,3}))
1584
+ filter_type: GeneralKeyword
1585
+ use_ml: true
1586
+ required_substrings:
1587
+ - secret
1588
+ min_line_len: 12
1589
+ target:
1590
+ - code
1591
+
1592
+ - name: URL Credentials
1593
+ severity: high
1594
+ confidence: moderate
1595
+ type: pattern
1596
+ values:
1597
+ - (?P<value_leftquote>[\"'])?(?P<variable>[+0-9A-Za-z-]{2,80}://)([^\s\'"<>\[\]^~`{|}:/]{0,80}:){1,3}(?P<value>[^\s\'"<>\[\]^~`{|}@:/]{3,80})@[^\s\'"<>\[\]^~`{|}@:/]{1,800}\\{0,8}(?P<value_rightquote>[\"'])?
1598
+ filter_type: UrlCredentialsGroup
1599
+ use_ml: true
1600
+ required_substrings:
1601
+ - ://
1602
+ min_line_len: 10
1603
+ target:
1604
+ - code
1605
+
1605
1606
  - name: API
1606
1607
  severity: low
1607
1608
  confidence: moderate
@@ -1677,7 +1678,7 @@
1677
1678
  confidence: moderate
1678
1679
  type: keyword
1679
1680
  values:
1680
- - (?<!by)pass(?!ed|ing|ion|es|age|\s+[a-z]{3,80})|pw(d|\b)
1681
+ - (?<!by)pass(?!e[dns]|ing|ion|age|\s+[a-z]{3,80})|pw(d|\b)
1681
1682
  filter_type: PasswordKeyword
1682
1683
  use_ml: true
1683
1684
  min_line_len: 10
@@ -19,6 +19,8 @@ from credsweeper.utils.util import Util
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
+ RULES_PATH = APP_PATH / "rules" / "config.yaml"
23
+
22
24
 
23
25
  class Scanner:
24
26
  """Advanced Credential Scanner base class.
@@ -66,11 +68,11 @@ class Scanner:
66
68
  return True
67
69
  return False
68
70
 
69
- def _set_rules_scanners(self, rule_path: Union[None, str, Path]) -> None:
71
+ def _set_rules_scanners(self, rules_path: Union[None, str, Path]) -> None:
70
72
  """Auxiliary method to fill rules, determine min_pattern_len and set scanners"""
71
- if rule_path is None:
72
- rule_path = APP_PATH / "rules" / "config.yaml"
73
- rule_templates = Util.yaml_load(rule_path)
73
+ if rules_path is None:
74
+ rules_path = RULES_PATH
75
+ rule_templates = Util.yaml_load(rules_path)
74
76
  if rule_templates and isinstance(rule_templates, list):
75
77
  rule_names = set()
76
78
  for rule_template in rule_templates:
@@ -98,7 +100,7 @@ class Scanner:
98
100
  logger.warning(f"Unknown rule type:{rule.rule_type}")
99
101
  self.rules_scanners.append((rule, self.get_scanner(rule)))
100
102
  else:
101
- raise RuntimeError(f"Wrong rules '{rule_templates}' were read from '{rule_path}'")
103
+ raise RuntimeError(f"Wrong rules '{rule_templates}' were read from '{rules_path}'")
102
104
 
103
105
  def _is_available(self, rule: Rule) -> bool:
104
106
  """separate the method to reduce complexity"""
@@ -153,8 +155,11 @@ class Scanner:
153
155
  target_line_stripped_len >= self.min_keyword_len and ( #
154
156
  '=' in target_line_stripped
155
157
  or ':' in target_line_stripped
156
- or "#define" in target_line_stripped
157
- or "%define" in target_line_stripped
158
+ or ("define" in target_line_stripped
159
+ and ('(' in target_line_stripped and ',' in target_line_stripped
160
+ or "#define" in target_line_stripped
161
+ or "%define" in target_line_stripped)
162
+ )
158
163
  or "%global" in target_line_stripped
159
164
  or "set" in target_line_stripped_lower
160
165
  or "%3d" in target_line_stripped_lower
@@ -12,18 +12,21 @@
12
12
  ".rpm",
13
13
  ".tar",
14
14
  ".war",
15
+ ".whl",
15
16
  ".xz",
16
17
  ".zip"
17
18
  ],
18
19
  "documents": [
19
- ".xlsx",
20
+ ".doc",
20
21
  ".docx",
21
- ".pptx",
22
- ".xls",
23
22
  ".odp",
24
23
  ".ods",
25
24
  ".odt",
26
- ".pdf"
25
+ ".pdf",
26
+ ".ppt",
27
+ ".pptx",
28
+ ".xls",
29
+ ".xlsx"
27
30
  ],
28
31
  "extension": [
29
32
  ".7z",
@@ -45,16 +48,23 @@
45
48
  ".info",
46
49
  ".jpeg",
47
50
  ".jpg",
51
+ ".lib",
48
52
  ".map",
49
53
  ".m4a",
50
54
  ".mat",
51
55
  ".mo",
56
+ ".mov",
52
57
  ".mp3",
53
58
  ".mp4",
59
+ ".mpg",
60
+ ".mkv",
54
61
  ".npy",
55
62
  ".npz",
56
63
  ".obj",
64
+ ".oga",
57
65
  ".ogg",
66
+ ".ogv",
67
+ ".ops",
58
68
  ".pak",
59
69
  ".png",
60
70
  ".psd",
@@ -71,8 +81,10 @@
71
81
  ".so",
72
82
  ".sum",
73
83
  ".svg",
84
+ ".swf",
74
85
  ".tif",
75
86
  ".tiff",
87
+ ".tlb",
76
88
  ".ttf",
77
89
  ".vcxproj",
78
90
  ".vdproj",
@@ -81,6 +93,7 @@
81
93
  ".webp",
82
94
  ".wma",
83
95
  ".woff",
96
+ ".woff2",
84
97
  ".yuv"
85
98
  ],
86
99
  "path": [
@@ -164,7 +177,7 @@
164
177
  "tizen"
165
178
  ],
166
179
  "check_for_literals": true,
167
- "max_password_value_length": 31,
180
+ "max_password_value_length": 64,
168
181
  "max_url_cred_value_length": 80,
169
182
  "line_data_output": [
170
183
  "line",
credsweeper/utils/util.py CHANGED
@@ -141,15 +141,6 @@ class Util:
141
141
  min_entropy = Util.get_min_data_entropy(data_len)
142
142
  return entropy < min_entropy
143
143
 
144
- @staticmethod
145
- def is_known(data: Union[bytes, bytearray]) -> bool:
146
- """Returns True if any known binary format is found to prevent extra scan a file without an extension."""
147
- if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x7f\x45\x4c\x46") and 127 <= len(data):
148
- # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
149
- # minimal ELF is 127 bytes https://github.com/tchajed/minimal-elf
150
- return True
151
- return False
152
-
153
144
  @staticmethod
154
145
  def is_binary(data: Union[bytes, bytearray]) -> bool:
155
146
  """
@@ -218,13 +209,12 @@ class Util:
218
209
  try:
219
210
  if binary_suggest and LATIN_1 == encoding and (Util.is_binary(content) or not Util.is_latin1(content)):
220
211
  # LATIN_1 may convert data (bytes in range 0x80:0xFF are transformed)
221
- # so skip this encoding when checking binaries
222
- logger.warning("Binary file detected %s", repr(content[:8]))
223
212
  break
224
- text = content.decode(encoding, errors="strict")
225
- if content != text.encode(encoding, errors="strict"):
213
+ _text = content.decode(encoding=encoding, errors="strict")
214
+ if content != _text.encode(encoding=encoding, errors="strict"):
226
215
  # the check helps to detect a real encoding
227
216
  raise UnicodeError
217
+ text = _text
228
218
  break
229
219
  except UnicodeError:
230
220
  binary_suggest = True
@@ -233,6 +223,11 @@ class Util:
233
223
  logger.error(f"Unexpected Error: Can't read content as {encoding}. Error message: {exc}")
234
224
  return text
235
225
 
226
+ @staticmethod
227
+ def split_text(text: str) -> List[str]:
228
+ """Splits a text into lines, handling all common line endings (e.g., LF, CRLF, CR)."""
229
+ return text.replace("\r\n", '\n').replace('\r', '\n').split('\n')
230
+
236
231
  @staticmethod
237
232
  def decode_bytes(content: bytes, encodings: Optional[List[str]] = None) -> List[str]:
238
233
  """Decode content using different encodings.
@@ -251,7 +246,7 @@ class Util:
251
246
 
252
247
  """
253
248
  if text := Util.decode_text(content, encodings):
254
- lines = text.replace('\r\n', '\n').replace('\r', '\n').split('\n')
249
+ lines = Util.split_text(text)
255
250
  else:
256
251
  lines = []
257
252
  return lines
@@ -355,13 +350,20 @@ class Util:
355
350
  return True
356
351
  return False
357
352
 
358
- @classmethod
359
- def is_sqlite3(cls, data):
353
+ @staticmethod
354
+ def is_sqlite3(data: Union[bytes, bytearray]):
360
355
  """According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
361
356
  if isinstance(data, (bytes, bytearray)) and data.startswith(b"SQLite format 3\0"):
362
357
  return True
363
358
  return False
364
359
 
360
+ @staticmethod
361
+ def is_rtf(data: Union[bytes, bytearray]):
362
+ """According https://en.wikipedia.org/wiki/List_of_file_signatures - Rich Text Format"""
363
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"{\\rtf1") and data.endswith(b"}"):
364
+ return True
365
+ return False
366
+
365
367
  @staticmethod
366
368
  def is_asn1(data: Union[bytes, bytearray]) -> int:
367
369
  """Only sequence type 0x30 and size correctness are checked
@@ -575,6 +577,7 @@ class Util:
575
577
  """decode text to bytes with / without padding detect and urlsafe symbols"""
576
578
  value = text.translate(Util.WHITESPACE_TRANS_TABLE)
577
579
  if padding_safe:
580
+ value = value.rstrip('=') # python 3.10 workaround
578
581
  pad_num = 0x3 & len(value)
579
582
  if pad_num:
580
583
  value += '=' * (4 - pad_num)