credsweeper 1.12.2__py3-none-any.whl → 1.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (48) hide show
  1. credsweeper/__init__.py +1 -1
  2. credsweeper/__main__.py +15 -8
  3. credsweeper/app.py +7 -2
  4. credsweeper/common/keyword_pattern.py +6 -3
  5. credsweeper/common/morpheme_checklist.txt +24 -6
  6. credsweeper/config/config.py +1 -0
  7. credsweeper/credentials/line_data.py +21 -6
  8. credsweeper/deep_scanner/deep_scanner.py +12 -6
  9. credsweeper/deep_scanner/jks_scanner.py +11 -2
  10. credsweeper/deep_scanner/pkcs_scanner.py +4 -0
  11. credsweeper/deep_scanner/rtf_scanner.py +41 -0
  12. credsweeper/deep_scanner/strings_scanner.py +52 -0
  13. credsweeper/file_handler/byte_content_provider.py +10 -1
  14. credsweeper/file_handler/file_path_extractor.py +2 -0
  15. credsweeper/file_handler/text_content_provider.py +7 -1
  16. credsweeper/filters/__init__.py +1 -1
  17. credsweeper/filters/group/token_pattern.py +2 -2
  18. credsweeper/filters/group/weird_base36_token.py +2 -2
  19. credsweeper/filters/group/weird_base64_token.py +2 -2
  20. credsweeper/filters/value_file_path_check.py +5 -3
  21. credsweeper/filters/value_github_check.py +3 -2
  22. credsweeper/filters/value_morphemes_check.py +43 -0
  23. credsweeper/filters/value_string_type_check.py +1 -0
  24. credsweeper/ml_model/features/feature.py +1 -18
  25. credsweeper/ml_model/features/file_extension.py +1 -1
  26. credsweeper/ml_model/features/has_html_tag.py +10 -8
  27. credsweeper/ml_model/features/is_secret_numeric.py +4 -3
  28. credsweeper/ml_model/features/rule_name.py +1 -1
  29. credsweeper/ml_model/features/word_in.py +9 -32
  30. credsweeper/ml_model/features/word_in_path.py +2 -3
  31. credsweeper/ml_model/features/word_in_postamble.py +1 -4
  32. credsweeper/ml_model/features/word_in_preamble.py +1 -4
  33. credsweeper/ml_model/features/word_in_transition.py +1 -4
  34. credsweeper/ml_model/features/word_in_value.py +2 -3
  35. credsweeper/ml_model/features/word_in_variable.py +2 -3
  36. credsweeper/ml_model/ml_config.json +15 -8
  37. credsweeper/ml_model/ml_model.onnx +0 -0
  38. credsweeper/ml_model/ml_validator.py +1 -1
  39. credsweeper/rules/config.yaml +129 -128
  40. credsweeper/scanner/scanner.py +12 -7
  41. credsweeper/secret/config.json +18 -5
  42. credsweeper/utils/util.py +19 -16
  43. {credsweeper-1.12.2.dist-info → credsweeper-1.13.1.dist-info}/METADATA +7 -7
  44. {credsweeper-1.12.2.dist-info → credsweeper-1.13.1.dist-info}/RECORD +47 -45
  45. credsweeper/filters/value_couple_keyword_check.py +0 -28
  46. {credsweeper-1.12.2.dist-info → credsweeper-1.13.1.dist-info}/WHEEL +0 -0
  47. {credsweeper-1.12.2.dist-info → credsweeper-1.13.1.dist-info}/entry_points.txt +0 -0
  48. {credsweeper-1.12.2.dist-info → credsweeper-1.13.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  from credsweeper.common.constants import GroupType
2
2
  from credsweeper.config.config import Config
3
- from credsweeper.filters import ValueCoupleKeywordCheck, ValueCamelCaseCheck, ValueNumberCheck, ValuePatternCheck
3
+ from credsweeper.filters import ValueMorphemesCheck, ValueCamelCaseCheck, ValueNumberCheck, ValuePatternCheck
4
4
  from credsweeper.filters.group.group import Group
5
5
 
6
6
 
@@ -10,7 +10,7 @@ class TokenPattern(Group):
10
10
  def __init__(self, config: Config) -> None:
11
11
  super().__init__(config, GroupType.DEFAULT)
12
12
  self.filters = [
13
- ValueCoupleKeywordCheck(),
13
+ ValueMorphemesCheck(),
14
14
  ValueNumberCheck(),
15
15
  ValueCamelCaseCheck(),
16
16
  ValuePatternCheck(),
@@ -1,6 +1,6 @@
1
1
  from credsweeper.common.constants import GroupType
2
2
  from credsweeper.config.config import Config
3
- from credsweeper.filters import ValueCoupleKeywordCheck, ValuePatternCheck, ValueNumberCheck, ValueEntropyBase36Check, \
3
+ from credsweeper.filters import ValueMorphemesCheck, ValuePatternCheck, ValueNumberCheck, ValueEntropyBase36Check, \
4
4
  ValueTokenBase36Check
5
5
  from credsweeper.filters.group.group import Group
6
6
 
@@ -11,7 +11,7 @@ class WeirdBase36Token(Group):
11
11
  def __init__(self, config: Config) -> None:
12
12
  super().__init__(config, GroupType.DEFAULT)
13
13
  self.filters = [
14
- ValueCoupleKeywordCheck(),
14
+ ValueMorphemesCheck(threshold=1),
15
15
  ValuePatternCheck(),
16
16
  ValueNumberCheck(),
17
17
  ValueTokenBase36Check(),
@@ -1,6 +1,6 @@
1
1
  from credsweeper.common.constants import GroupType
2
2
  from credsweeper.config.config import Config
3
- from credsweeper.filters import ValueCoupleKeywordCheck, ValueNotPartEncodedCheck, \
3
+ from credsweeper.filters import ValueMorphemesCheck, ValueNotPartEncodedCheck, \
4
4
  ValueBase64DataCheck, ValueEntropyBase64Check, ValuePatternCheck, ValueNumberCheck, ValueTokenBase64Check, \
5
5
  ValueBase64PartCheck
6
6
  from credsweeper.filters.group.group import Group
@@ -12,7 +12,7 @@ class WeirdBase64Token(Group):
12
12
  def __init__(self, config: Config) -> None:
13
13
  super().__init__(config, GroupType.DEFAULT)
14
14
  self.filters = [
15
- ValueCoupleKeywordCheck(),
15
+ ValueMorphemesCheck(threshold=1),
16
16
  ValueNumberCheck(),
17
17
  ValueBase64DataCheck(),
18
18
  ValueTokenBase64Check(),
@@ -35,6 +35,8 @@ class ValueFilePathCheck(Filter):
35
35
 
36
36
  """
37
37
  value = line_data.value
38
+ bit_length = len(value).bit_length()
39
+ morpheme_threshold = 1 if 6 > bit_length else bit_length - 4
38
40
  contains_unix_separator = '/' in value
39
41
  if contains_unix_separator:
40
42
  if ("://" in value #
@@ -45,14 +47,14 @@ class ValueFilePathCheck(Filter):
45
47
  or value.startswith("//") and ':' == line_data.separator):
46
48
  # common case for url definition or aliases
47
49
  # or _keyword_://example.com where : is the separator
48
- return static_keyword_checklist.check_morphemes(value.lower(), 1)
50
+ return static_keyword_checklist.check_morphemes(value.lower(), morpheme_threshold)
49
51
  # base64 encoded data might look like linux path
50
52
  min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(value))
51
53
  # get minimal entropy to compare with shannon entropy of found value
52
54
  # min_entropy == 0 means that the value cannot be checked with the entropy due high variance
53
55
  for i in value:
54
56
  if i not in self.base64stdpad_possible_set:
55
- # value contains wrong BASE64STDPAD_CHARS symbols like -_
57
+ # value contains wrong BASE64STDPAD_CHARS symbols like -_.
56
58
  break
57
59
  else:
58
60
  # all symbols are from base64 alphabet
@@ -74,5 +76,5 @@ class ValueFilePathCheck(Filter):
74
76
  break
75
77
  else:
76
78
  if contains_unix_separator ^ contains_windows_separator:
77
- return static_keyword_checklist.check_morphemes(value.lower(), 1)
79
+ return static_keyword_checklist.check_morphemes(value.lower(), morpheme_threshold)
78
80
  return False
@@ -12,7 +12,7 @@ from credsweeper.filters.filter import Filter
12
12
 
13
13
 
14
14
  class ValueGitHubCheck(Filter):
15
- """GitHub Classic Token validation"""
15
+ """NPM or GitHub Classic Token validation"""
16
16
 
17
17
  def __init__(self, config: Optional[Config] = None) -> None:
18
18
  pass
@@ -29,8 +29,9 @@ class ValueGitHubCheck(Filter):
29
29
 
30
30
  """
31
31
  # https://github.blog/2021-04-05-behind-githubs-new-authentication-token-formats/
32
+ # https://github.blog/security/announcing-npms-new-access-token-format/
32
33
  with contextlib.suppress(Exception):
33
- if line_data.value.startswith("gh") and '_' == line_data.value[3]:
34
+ if (line_data.value.startswith("gh") and '_' == line_data.value[3]) or line_data.value.startswith("npm_"):
34
35
  token = line_data.value[4:-6]
35
36
  data = token.encode(ASCII, errors="strict")
36
37
  crc32sum = binascii.crc32(data)
@@ -0,0 +1,43 @@
1
+ from typing import Optional
2
+
3
+ from credsweeper.common import static_keyword_checklist
4
+ from credsweeper.common.constants import MAX_LINE_LENGTH
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
7
+ from credsweeper.file_handler.analysis_target import AnalysisTarget
8
+ from credsweeper.filters.filter import Filter
9
+
10
+
11
+ class ValueMorphemesCheck(Filter):
12
+ """Check value for a threshold of morphemes count"""
13
+
14
+ THRESHOLDS_X3 = int(MAX_LINE_LENGTH).bit_length()
15
+ # one morpheme is very likely to be random generated even for 3 symbols
16
+ MAX_MORPHEMES_LIMIT = max(1, THRESHOLDS_X3 - 4)
17
+
18
+ def __init__(self, config: Optional[Config] = None, threshold: Optional[int] = None) -> None:
19
+ # threshold - minimum morphemes number in a value
20
+ if threshold is None:
21
+ # use dynamic thresholds
22
+ self.thresholds = [max(1, x - 4) for x in range(ValueMorphemesCheck.THRESHOLDS_X3)]
23
+ elif isinstance(threshold, int) and 0 <= threshold:
24
+ # constant thresholds for any pattern
25
+ self.thresholds = [threshold] * ValueMorphemesCheck.THRESHOLDS_X3
26
+ else:
27
+ raise ValueError(f"Wrong type of pattern length {type(threshold)} = {repr(threshold)}")
28
+
29
+ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
30
+ """Run filter checks on received credential candidate data 'line_data'.
31
+
32
+ Args:
33
+ line_data: credential candidate data
34
+ target: multiline target from which line data was obtained
35
+
36
+ Return:
37
+ True, if need to filter candidate and False if left
38
+
39
+ """
40
+ threshold_id = len(line_data.value).bit_length()
41
+ # use the last (max) threshold in very huge value
42
+ threshold = self.thresholds[threshold_id] if len(self.thresholds) > threshold_id else self.thresholds[-1]
43
+ return static_keyword_checklist.check_morphemes(line_data.value.lower(), threshold)
@@ -51,6 +51,7 @@ class ValueStringTypeCheck(Filter):
51
51
  and not line_data.is_comment() \
52
52
  and not line_data.is_well_quoted_value \
53
53
  and not line_data.is_quoted \
54
+ and not '0' <= line_data.value[0] <= '9' \
54
55
  and line_data.separator and '=' in line_data.separator:
55
56
  # heterogeneous code e.g. YAML in Python uses colon sign instead equals
56
57
  return True
@@ -10,7 +10,7 @@ class Feature(ABC):
10
10
  """Base class for features."""
11
11
 
12
12
  def __init__(self):
13
- self.words = []
13
+ pass
14
14
 
15
15
  def __call__(self, candidates: List[Candidate]) -> np.ndarray:
16
16
  """Call base class for features.
@@ -25,20 +25,3 @@ class Feature(ABC):
25
25
  def extract(self, candidate: Candidate) -> Any:
26
26
  """Abstract method of base class"""
27
27
  raise NotImplementedError
28
-
29
- @property
30
- def words(self) -> List[str]:
31
- """getter"""
32
- return self.__words
33
-
34
- @words.setter
35
- def words(self, words: List[str]) -> None:
36
- """setter"""
37
- self.__words = words
38
-
39
- def any_word_in_(self, a_string: str) -> bool:
40
- """Returns true if any words in a string"""
41
- for i in self.words:
42
- if i in a_string:
43
- return True
44
- return False
@@ -19,7 +19,7 @@ class FileExtension(WordIn):
19
19
 
20
20
  def __call__(self, candidates: List[Candidate]) -> np.ndarray:
21
21
  extension_set = set(candidate.line_data_list[0].file_type.lower() for candidate in candidates)
22
- return self.word_in_set(extension_set)
22
+ return self.word_in_(extension_set)
23
23
 
24
24
  def extract(self, candidate: Candidate) -> Any:
25
25
  raise NotImplementedError
@@ -1,17 +1,18 @@
1
1
  from credsweeper.common.constants import CHUNK_SIZE
2
2
  from credsweeper.credentials.candidate import Candidate
3
- from credsweeper.ml_model.features.feature import Feature
3
+ from credsweeper.ml_model.features.word_in import WordIn
4
4
  from credsweeper.utils.util import Util
5
5
 
6
6
 
7
- class HasHtmlTag(Feature):
7
+ class HasHtmlTag(WordIn):
8
8
  """Feature is true if line has HTML tags (HTML file)."""
9
9
 
10
+ HTML_WORDS = [
11
+ '< img', '<img', '< script', '<script', '< p', '<p', '< link', '<link', '< meta', '<meta', '< a', '<a'
12
+ ]
13
+
10
14
  def __init__(self) -> None:
11
- super().__init__()
12
- self.words = [
13
- '< img', '<img', '< script', '<script', '< p', '<p', '< link', '<link', '< meta', '<meta', '< a', '<a'
14
- ]
15
+ super().__init__(HasHtmlTag.HTML_WORDS)
15
16
 
16
17
  def extract(self, candidate: Candidate) -> bool:
17
18
  subtext = Util.subtext(candidate.line_data_list[0].line, candidate.line_data_list[0].value_start, CHUNK_SIZE)
@@ -19,8 +20,9 @@ class HasHtmlTag(Feature):
19
20
  if '<' not in candidate_line_data_list_0_line_lower:
20
21
  # early check
21
22
  return False
22
- if self.any_word_in_(candidate_line_data_list_0_line_lower):
23
- return True
23
+ for i in self.words:
24
+ if i in candidate_line_data_list_0_line_lower:
25
+ return True
24
26
  if "/>" in candidate_line_data_list_0_line_lower or "</" in candidate_line_data_list_0_line_lower:
25
27
  # possible closed tag
26
28
  return True
@@ -1,3 +1,5 @@
1
+ import contextlib
2
+
1
3
  from credsweeper.credentials.candidate import Candidate
2
4
  from credsweeper.ml_model.features.feature import Feature
3
5
 
@@ -6,8 +8,7 @@ class IsSecretNumeric(Feature):
6
8
  """Feature is true if candidate value is a numerical value."""
7
9
 
8
10
  def extract(self, candidate: Candidate) -> bool:
9
- try:
11
+ with contextlib.suppress(ValueError):
10
12
  float(candidate.line_data_list[0].value)
11
13
  return True
12
- except ValueError:
13
- return False
14
+ return False
@@ -19,7 +19,7 @@ class RuleName(WordIn):
19
19
 
20
20
  def __call__(self, candidates: List[Candidate]) -> np.ndarray:
21
21
  candidate_rule_set = set(x.rule_name for x in candidates)
22
- return self.word_in_set(candidate_rule_set)
22
+ return self.word_in_(candidate_rule_set)
23
23
 
24
24
  def extract(self, candidate: Candidate) -> Any:
25
25
  raise NotImplementedError
@@ -1,5 +1,5 @@
1
1
  from abc import abstractmethod
2
- from typing import List, Any, Tuple, Set
2
+ from typing import List, Any, Set, Union
3
3
 
4
4
  import numpy as np
5
5
 
@@ -18,42 +18,19 @@ class WordIn(Feature):
18
18
  if len(self.enumerated_words) != self.dimension:
19
19
  raise RuntimeError(f"Check duplicates:{words}")
20
20
 
21
- @property
22
- def enumerated_words(self) -> List[Tuple[int, str]]:
23
- """getter for speedup"""
24
- return self.__enumerated_words
25
-
26
- @enumerated_words.setter
27
- def enumerated_words(self, enumerated_words: List[Tuple[int, str]]) -> None:
28
- """setter for speedup"""
29
- self.__enumerated_words = enumerated_words
30
-
31
- @property
32
- def dimension(self) -> int:
33
- """getter"""
34
- return self.__dimension
35
-
36
- @dimension.setter
37
- def dimension(self, dimension: int) -> None:
38
- """setter"""
39
- self.__dimension = dimension
40
-
41
21
  @abstractmethod
42
22
  def extract(self, candidate: Candidate) -> Any:
43
23
  raise NotImplementedError
44
24
 
45
- def word_in_str(self, a_string: str) -> np.ndarray:
46
- """Returns array with words included in a string"""
47
- result: np.ndarray = np.zeros(shape=[self.dimension], dtype=np.int8)
48
- for i, word in self.enumerated_words:
49
- if word in a_string:
50
- result[i] = 1
51
- return np.array([result])
25
+ @property
26
+ def zero(self) -> np.ndarray:
27
+ """Returns zero filled array for case of empty input"""
28
+ return np.zeros(shape=[self.dimension], dtype=np.int8)
52
29
 
53
- def word_in_set(self, a_strings_set: Set[str]) -> np.ndarray:
54
- """Returns array with words matches in a_strings_set"""
55
- result: np.ndarray = np.zeros(shape=[self.dimension], dtype=np.int8)
30
+ def word_in_(self, iterable_data: Union[str, List[str], Set[str]]) -> np.ndarray:
31
+ """Returns array with words included in a string"""
32
+ result: np.ndarray = self.zero
56
33
  for i, word in self.enumerated_words:
57
- if word in a_strings_set:
34
+ if word in iterable_data:
58
35
  result[i] = 1
59
36
  return np.array([result])
@@ -19,9 +19,8 @@ class WordInPath(WordIn):
19
19
  posix_lower_path = path.as_posix().lower() if path.is_absolute() else f"./{path.as_posix().lower()}"
20
20
  # prevent extra confusion from the same word in extension
21
21
  path_without_extension, _ = os.path.splitext(posix_lower_path)
22
- return self.word_in_str(path_without_extension)
23
- else:
24
- return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
22
+ return self.word_in_(path_without_extension)
23
+ return np.array([self.zero])
25
24
 
26
25
  def extract(self, candidate: Candidate) -> Any:
27
26
  raise NotImplementedError
@@ -15,7 +15,4 @@ class WordInPostamble(WordIn):
15
15
  else candidate.line_data_list[0].value_end + ML_HUNK
16
16
  postamble = candidate.line_data_list[0].line[candidate.line_data_list[0].value_end:postamble_end].strip()
17
17
 
18
- if postamble:
19
- return self.word_in_str(postamble.lower())
20
- else:
21
- return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
18
+ return self.word_in_(postamble.lower()) if postamble else np.array([self.zero])
@@ -20,7 +20,4 @@ class WordInPreamble(WordIn):
20
20
  else candidate.line_data_list[0].value_start - ML_HUNK
21
21
  preamble = candidate.line_data_list[0].line[preamble_start:candidate.line_data_list[0].value_start].strip()
22
22
 
23
- if preamble:
24
- return self.word_in_str(preamble.lower())
25
- else:
26
- return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
23
+ return self.word_in_(preamble.lower()) if preamble else np.array([self.zero])
@@ -15,7 +15,4 @@ class WordInTransition(WordIn):
15
15
  else:
16
16
  transition = ''
17
17
 
18
- if transition:
19
- return self.word_in_str(transition.lower())
20
- else:
21
- return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
18
+ return self.word_in_(transition.lower()) if transition else np.array([self.zero])
@@ -10,6 +10,5 @@ class WordInValue(WordIn):
10
10
  def extract(self, candidate: Candidate) -> np.ndarray:
11
11
  """Returns array of matching words for first line"""
12
12
  if value := candidate.line_data_list[0].value:
13
- return self.word_in_str(value.lower())
14
- else:
15
- return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
13
+ return self.word_in_(value.lower())
14
+ return np.array([self.zero])
@@ -10,6 +10,5 @@ class WordInVariable(WordIn):
10
10
  def extract(self, candidate: Candidate) -> np.ndarray:
11
11
  """Returns array of matching words for first line"""
12
12
  if variable := candidate.line_data_list[0].variable:
13
- return self.word_in_str(variable.lower())
14
- else:
15
- return np.zeros(shape=[self.dimension], dtype=np.int8)
13
+ return self.word_in_(variable.lower())
14
+ return np.array([self.zero])
@@ -10,6 +10,7 @@
10
10
  "features": [
11
11
  {
12
12
  "type": "RuleSeverity",
13
+ "comment": "INFO=0.0, LOW=0.25, MEDIUM=0.5, HIGH=0.75, CRITICAL=1.0",
13
14
  "kwargs": {}
14
15
  },
15
16
  {
@@ -62,7 +63,7 @@
62
63
  "type": "SearchInAttribute",
63
64
  "comment": "Repeated symbol",
64
65
  "kwargs": {
65
- "pattern": ".*(?:(\\S)(\\S))((\\1.)|(.\\2)){7,}",
66
+ "pattern": "(?:(\\S)(\\S))((\\1.)|(.\\2)){7,}",
66
67
  "attribute": "value"
67
68
  }
68
69
  },
@@ -70,7 +71,7 @@
70
71
  "type": "SearchInAttribute",
71
72
  "comment": "SHA marker",
72
73
  "kwargs": {
73
- "pattern": ".*(?i:sha)[_-]?(224|256|384|512)",
74
+ "pattern": "(?i:sha)[_-]?(224|256|384|512)",
74
75
  "attribute": "value"
75
76
  }
76
77
  },
@@ -126,7 +127,7 @@
126
127
  "type": "SearchInAttribute",
127
128
  "comment": "VariableNotAllowedNameCheck",
128
129
  "kwargs": {
129
- "pattern": "(?i:(filters?|pub(lic)?)_?key)",
130
+ "pattern": "(?i:(sha[_-]?(224|256|384|512)|projects?|filters?|pub(lic)?)_?key)",
130
131
  "attribute": "variable"
131
132
  }
132
133
  },
@@ -134,7 +135,7 @@
134
135
  "type": "SearchInAttribute",
135
136
  "comment": "VariableNotAllowedNameCheck",
136
137
  "kwargs": {
137
- "pattern": "(?i:(id|size|name|type|manager|algorithm|view|error)$)",
138
+ "pattern": "(?i:(id|sum|size|name|type|manager|algorithm|pattern|view|error|date(time)?|time(stamp)?|tag|version|hash|rate)$)",
138
139
  "attribute": "variable"
139
140
  }
140
141
  },
@@ -245,8 +246,10 @@
245
246
  "crypt",
246
247
  "crypted",
247
248
  "decrypt",
249
+ "edited",
248
250
  "encrypt",
249
251
  "example",
252
+ "expire",
250
253
  "fake",
251
254
  "file",
252
255
  "foo",
@@ -260,7 +263,8 @@
260
263
  "pass",
261
264
  "public",
262
265
  "pwd",
263
- "rsa-",
266
+ "redacted",
267
+ "rsa",
264
268
  "salt",
265
269
  "secret",
266
270
  "sha",
@@ -339,6 +343,7 @@
339
343
  "get",
340
344
  "e.g.",
341
345
  "equal",
346
+ "env",
342
347
  "example",
343
348
  "expect",
344
349
  "line",
@@ -484,6 +489,7 @@
484
489
  ".bat",
485
490
  ".bats",
486
491
  ".bazel",
492
+ ".bin",
487
493
  ".build",
488
494
  ".bundle",
489
495
  ".bzl",
@@ -504,7 +510,6 @@
504
510
  ".csp",
505
511
  ".csv",
506
512
  ".dist",
507
- ".doc",
508
513
  ".dockerfile",
509
514
  ".edited",
510
515
  ".eex",
@@ -527,6 +532,8 @@
527
532
  ".gtpl",
528
533
  ".h",
529
534
  ".haml",
535
+ ".har",
536
+ ".hpp",
530
537
  ".hs",
531
538
  ".html",
532
539
  ".idl",
@@ -657,8 +664,8 @@
657
664
  "CMD Password",
658
665
  "CMD Secret",
659
666
  "CMD Token",
667
+ "CURL User Password",
660
668
  "Credential",
661
- "Github Old Token",
662
669
  "Key",
663
670
  "Nonce",
664
671
  "Password",
@@ -671,4 +678,4 @@
671
678
  }
672
679
  }
673
680
  ]
674
- }
681
+ }
Binary file
@@ -272,7 +272,7 @@ class MlValidator:
272
272
  if head != tail:
273
273
  probability[head:tail] = self._batch_call_model(line_input_list, variable_input_list, value_input_list,
274
274
  features_list)
275
- is_cred = probability > self.threshold
275
+ is_cred = self.threshold <= probability
276
276
  if logger.isEnabledFor(logging.DEBUG):
277
277
  for i, decision in enumerate(is_cred):
278
278
  logger.debug("ML decision: %s with prediction: %s for value: %s", decision, probability[i],