guarddog 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. guarddog/analyzer/analyzer.py +51 -23
  2. guarddog/analyzer/metadata/__init__.py +2 -0
  3. guarddog/analyzer/metadata/bundled_binary.py +6 -6
  4. guarddog/analyzer/metadata/deceptive_author.py +3 -1
  5. guarddog/analyzer/metadata/detector.py +7 -2
  6. guarddog/analyzer/metadata/empty_information.py +8 -3
  7. guarddog/analyzer/metadata/go/typosquatting.py +17 -9
  8. guarddog/analyzer/metadata/npm/bundled_binary.py +7 -2
  9. guarddog/analyzer/metadata/npm/deceptive_author.py +1 -1
  10. guarddog/analyzer/metadata/npm/direct_url_dependency.py +2 -1
  11. guarddog/analyzer/metadata/npm/empty_information.py +10 -7
  12. guarddog/analyzer/metadata/npm/potentially_compromised_email_domain.py +4 -3
  13. guarddog/analyzer/metadata/npm/release_zero.py +13 -5
  14. guarddog/analyzer/metadata/npm/typosquatting.py +44 -13
  15. guarddog/analyzer/metadata/npm/unclaimed_maintainer_email_domain.py +3 -2
  16. guarddog/analyzer/metadata/npm/utils.py +4 -5
  17. guarddog/analyzer/metadata/potentially_compromised_email_domain.py +8 -4
  18. guarddog/analyzer/metadata/pypi/__init__.py +12 -6
  19. guarddog/analyzer/metadata/pypi/bundled_binary.py +7 -2
  20. guarddog/analyzer/metadata/pypi/deceptive_author.py +1 -1
  21. guarddog/analyzer/metadata/pypi/empty_information.py +16 -5
  22. guarddog/analyzer/metadata/pypi/potentially_compromised_email_domain.py +4 -3
  23. guarddog/analyzer/metadata/pypi/release_zero.py +16 -6
  24. guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -27
  25. guarddog/analyzer/metadata/pypi/single_python_file.py +9 -4
  26. guarddog/analyzer/metadata/pypi/typosquatting.py +73 -26
  27. guarddog/analyzer/metadata/pypi/unclaimed_maintainer_email_domain.py +6 -2
  28. guarddog/analyzer/metadata/pypi/utils.py +1 -4
  29. guarddog/analyzer/metadata/release_zero.py +1 -1
  30. guarddog/analyzer/metadata/repository_integrity_mismatch.py +10 -3
  31. guarddog/analyzer/metadata/resources/top_pypi_packages.json +29998 -29986
  32. guarddog/analyzer/metadata/typosquatting.py +12 -8
  33. guarddog/analyzer/metadata/unclaimed_maintainer_email_domain.py +7 -2
  34. guarddog/analyzer/metadata/utils.py +1 -1
  35. guarddog/analyzer/sourcecode/__init__.py +34 -7
  36. guarddog/analyzer/sourcecode/api-obfuscation.yml +42 -0
  37. guarddog/analyzer/sourcecode/code-execution.yml +1 -0
  38. guarddog/analyzer/sourcecode/dll-hijacking.yml +5 -0
  39. guarddog/analyzer/sourcecode/shady-links.yml +1 -1
  40. guarddog/analyzer/sourcecode/suspicious_passwd_access_linux.yar +12 -0
  41. guarddog/analyzer/sourcecode/unicode.yml +75 -0
  42. guarddog/ecosystems.py +3 -0
  43. guarddog/scanners/__init__.py +5 -0
  44. guarddog/scanners/extension_scanner.py +152 -0
  45. guarddog/scanners/github_action_scanner.py +6 -2
  46. guarddog/scanners/go_project_scanner.py +1 -1
  47. guarddog/scanners/npm_package_scanner.py +12 -4
  48. guarddog/scanners/pypi_package_scanner.py +9 -3
  49. guarddog/scanners/pypi_project_scanner.py +9 -12
  50. guarddog/scanners/scanner.py +1 -0
  51. guarddog/utils/archives.py +134 -9
  52. guarddog/utils/config.py +24 -2
  53. guarddog/utils/package_info.py +3 -1
  54. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/METADATA +10 -9
  55. guarddog-2.7.1.dist-info/RECORD +100 -0
  56. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/WHEEL +1 -1
  57. guarddog-2.6.0.dist-info/RECORD +0 -96
  58. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info}/entry_points.txt +0 -0
  59. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/LICENSE +0 -0
  60. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/LICENSE-3rdparty.csv +0 -0
  61. {guarddog-2.6.0.dist-info → guarddog-2.7.1.dist-info/licenses}/NOTICE +0 -0
@@ -5,14 +5,16 @@ from guarddog.analyzer.metadata.detector import Detector
5
5
 
6
6
 
7
7
  class TyposquatDetector(Detector):
8
- MESSAGE_TEMPLATE = "This package closely resembles the following package names, and might be a typosquatting " \
9
- "attempt: %s"
8
+ MESSAGE_TEMPLATE = (
9
+ "This package closely resembles the following package names, and might be a typosquatting "
10
+ "attempt: %s"
11
+ )
10
12
 
11
13
  def __init__(self) -> None:
12
14
  self.popular_packages = self._get_top_packages() # Find top PyPI packages
13
15
  super().__init__(
14
16
  name="typosquatting",
15
- description="Identify packages that are named closely to an highly popular package"
17
+ description="Identify packages that are named closely to an highly popular package",
16
18
  )
17
19
 
18
20
  @abc.abstractmethod
@@ -37,19 +39,19 @@ class TyposquatDetector(Detector):
37
39
  # Addition to name2
38
40
  if len(name1) > len(name2):
39
41
  for i in range(len(name1)):
40
- if name1[:i] + name1[i + 1:] == name2:
42
+ if name1[:i] + name1[i + 1 :] == name2:
41
43
  return True
42
44
 
43
45
  # Addition to name1
44
46
  elif len(name2) > len(name1):
45
47
  for i in range(len(name2)):
46
- if name2[:i] + name2[i + 1:] == name1:
48
+ if name2[:i] + name2[i + 1 :] == name1:
47
49
  return True
48
50
 
49
51
  # Edit character
50
52
  else:
51
53
  for i in range(len(name1)):
52
- if name1[:i] + name1[i + 1:] == name2[:i] + name2[i + 1:]:
54
+ if name1[:i] + name1[i + 1 :] == name2[:i] + name2[i + 1 :]:
53
55
  return True
54
56
 
55
57
  return False
@@ -68,7 +70,7 @@ class TyposquatDetector(Detector):
68
70
 
69
71
  if len(name1) == len(name2):
70
72
  for i in range(len(name1) - 1):
71
- swapped_name1 = name1[:i] + name1[i + 1] + name1[i] + name1[i + 2:]
73
+ swapped_name1 = name1[:i] + name1[i + 1] + name1[i] + name1[i + 2 :]
72
74
  if swapped_name1 == name2:
73
75
  return True
74
76
 
@@ -106,7 +108,9 @@ class TyposquatDetector(Detector):
106
108
  bool: True
107
109
  """
108
110
 
109
- return self._is_distance_one_Levenshtein(package1, package2) or self._is_swapped_typo(package1, package2)
111
+ return self._is_distance_one_Levenshtein(
112
+ package1, package2
113
+ ) or self._is_swapped_typo(package1, package2)
110
114
 
111
115
  @abc.abstractmethod
112
116
  def _get_confused_forms(self, package_name) -> list:
@@ -20,8 +20,13 @@ class UnclaimedMaintainerEmailDomainDetector(Detector):
20
20
  )
21
21
  self.ecosystem = ecosystem
22
22
 
23
- def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
24
- version: Optional[str] = None) -> tuple[bool, str]:
23
+ def detect(
24
+ self,
25
+ package_info,
26
+ path: Optional[str] = None,
27
+ name: Optional[str] = None,
28
+ version: Optional[str] = None,
29
+ ) -> tuple[bool, str]:
25
30
  """
26
31
  Uses a package's information to determine
27
32
  if the maintainer's email domain is unclaimed and thus exposed to hijacking
@@ -26,7 +26,7 @@ def get_domain_creation_date(domain) -> tuple[Optional[datetime], bool]:
26
26
 
27
27
  try:
28
28
  domain_information = whois.whois(domain)
29
- except whois.parser.PywhoisError as e:
29
+ except whois.exceptions.PywhoisError as e:
30
30
  # The domain doesn't exist at all, if that's the case we consider it vulnerable
31
31
  # since someone could register it
32
32
  return None, (not str(e).lower().startswith("no match for"))
@@ -11,17 +11,23 @@ from guarddog.ecosystems import ECOSYSTEM
11
11
 
12
12
  current_dir = pathlib.Path(__file__).parent.resolve()
13
13
 
14
+ EXTENSION_YARA_PREFIX = "extension_"
14
15
 
15
16
  # These data class aim to reduce the spreading of the logic
16
- # Instead of using the a dict as a structure and parse it difffently depending on the type
17
+ # Instead of using the a dict as a structure and parse it difffently
18
+ # depending on the type
19
+
20
+
17
21
  @dataclass
18
22
  class SourceCodeRule:
19
23
  """
20
24
  Base class for source code rules
21
25
  """
26
+
22
27
  id: str
23
28
  file: str
24
29
  description: str
30
+ ecosystem: Optional[ECOSYSTEM] # None means "any ecosystem"
25
31
 
26
32
 
27
33
  @dataclass
@@ -29,6 +35,7 @@ class YaraRule(SourceCodeRule):
29
35
  """
30
36
  Yara rule just reimplements base
31
37
  """
38
+
32
39
  pass
33
40
 
34
41
 
@@ -38,7 +45,7 @@ class SempgrepRule(SourceCodeRule):
38
45
  Semgrep rule are language specific
39
46
  Content of rule in yaml format is accessible through rule_content
40
47
  """
41
- ecosystem: ECOSYSTEM
48
+
42
49
  rule_content: dict
43
50
 
44
51
 
@@ -54,7 +61,8 @@ def get_sourcecode_rules(
54
61
  for rule in SOURCECODE_RULES:
55
62
  if kind and not isinstance(rule, kind):
56
63
  continue
57
- if not (getattr(rule, "ecosystem", ecosystem) == ecosystem):
64
+ # Include rules that match the specific ecosystem OR rules that apply to any ecosystem (None)
65
+ if rule.ecosystem is not None and rule.ecosystem != ecosystem:
58
66
  continue
59
67
  yield rule
60
68
 
@@ -78,13 +86,15 @@ for file_name in semgrep_rule_file_names:
78
86
  case "javascript" | "typescript" | "json":
79
87
  ecosystems.add(ECOSYSTEM.NPM)
80
88
  ecosystems.add(ECOSYSTEM.GITHUB_ACTION)
89
+ ecosystems.add(ECOSYSTEM.EXTENSION)
81
90
  case "go":
82
91
  ecosystems.add(ECOSYSTEM.GO)
83
92
  case _:
84
93
  continue
85
94
 
86
95
  for ecosystem in ecosystems:
87
- # avoids duplicates when multiple languages are supported by a rule
96
+ # avoids duplicates when multiple languages are supported
97
+ # by a rule
88
98
  if not next(
89
99
  filter(
90
100
  lambda r: r.id == rule["id"],
@@ -96,7 +106,9 @@ for file_name in semgrep_rule_file_names:
96
106
  SempgrepRule(
97
107
  id=rule["id"],
98
108
  ecosystem=ecosystem,
99
- description=rule.get("metadata", {}).get("description", ""),
109
+ description=rule.get("metadata", {}).get(
110
+ "description", ""
111
+ ),
100
112
  file=file_name,
101
113
  rule_content=rule,
102
114
  )
@@ -109,11 +121,26 @@ yara_rule_file_names = list(
109
121
  # refer to README.md for more information
110
122
  for file_name in yara_rule_file_names:
111
123
  rule_id = pathlib.Path(file_name).stem
112
- description_regex = fr'\s*rule\s+{rule_id}[^}}]+meta:[^}}]+description\s*=\s*\"(.+?)\"'
124
+ description_regex = (
125
+ rf"\s*rule\s+{rule_id}[^}}]+meta:[^}}]+description\s*=\s*\"(.+?)\""
126
+ )
127
+
128
+ # Determine ecosystem based on filename prefix
129
+ rule_ecosystem: Optional[ECOSYSTEM] = (
130
+ ECOSYSTEM.EXTENSION if file_name.startswith(EXTENSION_YARA_PREFIX) else None
131
+ )
113
132
 
114
133
  with open(os.path.join(current_dir, file_name), "r") as fd:
115
134
  match = re.search(description_regex, fd.read())
116
135
  rule_description = ""
117
136
  if match:
118
137
  rule_description = match.group(1)
119
- SOURCECODE_RULES.append(YaraRule(id=rule_id, file=file_name, description=rule_description))
138
+
139
+ SOURCECODE_RULES.append(
140
+ YaraRule(
141
+ id=rule_id,
142
+ file=file_name,
143
+ description=rule_description,
144
+ ecosystem=rule_ecosystem,
145
+ )
146
+ )
@@ -0,0 +1,42 @@
1
+ rules:
2
+ - id: api-obfuscation
3
+ languages:
4
+ - python
5
+ message: This package uses obfuscated API calls that may evade static analysis detection
6
+ metadata:
7
+ description: Identify obfuscated API calls using alternative Python syntax patterns
8
+ severity: WARNING
9
+ patterns:
10
+ - pattern-either:
11
+ # Covered cases:
12
+ # 1) __dict__ access patterns: $MODULE.__dict__[$METHOD](...) / .__call__(...)
13
+ # 2) __getattribute__ patterns: $MODULE.__getattribute__($METHOD)(...) / .__call__(...)
14
+ # 3) getattr patterns: getattr($MODULE, $METHOD)(...) / .__call__(...)
15
+ # It also covers the case where $MODULE is imported as __import__('mod')
16
+ - patterns:
17
+ - pattern-either:
18
+ - pattern: $MODULE.__dict__[$METHOD]($...ARGS)
19
+ - pattern: $MODULE.__dict__[$METHOD].__call__($...ARGS)
20
+ - pattern: $MODULE.__getattribute__($METHOD)($...ARGS)
21
+ - pattern: $MODULE.__getattribute__($METHOD).__call__($...ARGS)
22
+ - pattern: getattr($MODULE, $METHOD)($...ARGS)
23
+ - pattern: getattr($MODULE, $METHOD).__call__($...ARGS)
24
+ - metavariable-regex:
25
+ metavariable: $MODULE
26
+ regex: "^[A-Za-z_][A-Za-z0-9_\\.]*$|^__import__\\([\"'][A-Za-z_][A-Za-z0-9_]*[\"']\\)$"
27
+ - metavariable-regex:
28
+ metavariable: $METHOD
29
+ regex: "^[\"'][A-Za-z_][A-Za-z0-9_]*[\"']$"
30
+
31
+ # --- Additional Cases: __import__('mod').method(...) / .__call__(...)
32
+ - patterns:
33
+ - pattern-either:
34
+ - pattern: __import__($MODULE).$METHOD($...ARGS)
35
+ - pattern: __import__($MODULE).$METHOD.__call__($...ARGS)
36
+ - metavariable-regex:
37
+ metavariable: $MODULE
38
+ regex: "^[\"'][A-Za-z_][A-Za-z0-9_]*[\"']$"
39
+ - metavariable-regex:
40
+ metavariable: $METHOD
41
+ # avoid matching __getattribute__
42
+ regex: "[^(__getattribute__)][A-Za-z_][A-Za-z0-9_]*"
@@ -123,6 +123,7 @@ rules:
123
123
  include:
124
124
  - "*/setup.py"
125
125
  - "*/code-execution.py"
126
+ - "*/__init__.py"
126
127
  severity: WARNING
127
128
 
128
129
 
@@ -55,8 +55,13 @@ rules:
55
55
  # dll injection
56
56
  - pattern-either:
57
57
  - pattern: ....WriteProcessMemory
58
+ - pattern: getattr(..., "WriteProcessMemory")
58
59
  - pattern: ....CreateRemoteThread
60
+ - pattern: getattr(..., "CreateRemoteThread")
59
61
  - pattern: ....LoadLibraryA
62
+ - pattern: getattr(..., "LoadLibraryA")
63
+ - pattern: ....CDLL
64
+ - pattern: getattr(..., "CDLL")
60
65
 
61
66
  # phantom dll
62
67
  - patterns:
@@ -43,7 +43,7 @@ rules:
43
43
  - pattern-regex: ((?:https?:\/\/)?[^\n\[\/\?#"']*?(files\.catbox\.moe)\b)
44
44
 
45
45
  # top-level domains
46
- - pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?\.(link|xyz|tk|ml|ga|cf|gq|pw|top|club|mw|bd|ke|am|sbs|date|quest|cd|bid|cd|ws|icu|cam|uno|email|stream|zip)\/)
46
+ - pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?\.(link|xyz|tk|ml|ga|cf|gq|pw|top|club|mw|bd|ke|am|sbs|date|quest|cd|bid|cd|ws|icu|cam|uno|email|stream|zip)\b)
47
47
  # IPv4
48
48
  - pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))
49
49
  # IPv6
@@ -0,0 +1,12 @@
1
+ rule suspicious_passwd_access_linux
2
+ {
3
+ meta:
4
+ author = "T HAMDOUNI, Datadog"
5
+ description = "Detects suspicious read access to /etc/passwd file, which is often targeted by malware for credential harvesting"
6
+
7
+ strings:
8
+ $cli = /(cat|less|more|head|tail)\s+.{0,100}\/etc\/passwd/ nocase
9
+ $read = /(readFile|readFileSync)\(\s*['"]\/etc\/passwd/ nocase
10
+ condition:
11
+ $cli or $read
12
+ }
@@ -0,0 +1,75 @@
1
+ # Ignores string contents to reduce false positives!
2
+
3
+ rules:
4
+ - id: unicode
5
+ message:
6
+ This package uses uncommon unicode characters in its code, it may try to
7
+ avoid detection.
8
+ metadata:
9
+ description: Identify suspicious unicode characters
10
+ languages:
11
+ - python
12
+ severity: WARNING
13
+ patterns:
14
+ # ignore comments
15
+ - pattern-not-regex: \#(.*)$
16
+
17
+ # ignore strings
18
+ - pattern-not-regex: (["'].*?["'])
19
+ - pattern-not-regex: ("""(.|\n)*?""")
20
+ - pattern-not-regex: ('''(.|\n)*?''')
21
+
22
+ - pattern-either:
23
+ - pattern-regex: ([ªᵃₐⓐa𝐚𝑎𝒂𝒶𝓪𝔞𝕒𝖆𝖺𝗮𝘢𝙖𝚊])
24
+ - pattern-regex: ([ᵇⓑb𝐛𝑏𝒃𝒷𝓫𝔟𝕓𝖇𝖻𝗯𝘣𝙗𝚋])
25
+ - pattern-regex: ([ᶜⅽⓒc𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌])
26
+ - pattern-regex: ([ᵈⅆⅾⓓd𝐝𝑑𝒅𝒹𝓭𝔡𝕕𝖉𝖽𝗱𝘥𝙙𝚍])
27
+ - pattern-regex: ([ᵉₑℯⅇⓔe𝐞𝑒𝒆𝓮𝔢𝕖𝖊𝖾𝗲𝘦𝙚𝚎])
28
+ - pattern-regex: ([ᶠⓕf𝐟𝑓𝒇𝒻𝓯𝔣𝕗𝖋𝖿𝗳𝘧𝙛𝚏])
29
+ - pattern-regex: ([ᵍℊⓖg𝐠𝑔𝒈𝓰𝔤𝕘𝖌𝗀𝗴𝘨𝙜𝚐])
30
+ - pattern-regex: ([ʰₕℎⓗh𝐡𝒉𝒽𝓱𝔥𝕙𝖍𝗁𝗵𝘩𝙝𝚑])
31
+ - pattern-regex: ([ᵢⁱℹⅈⅰⓘi𝐢𝑖𝒊𝒾𝓲𝔦𝕚𝖎𝗂𝗶𝘪𝙞𝚒])
32
+ - pattern-regex: ([ʲⅉⓙⱼj𝐣𝑗𝒋𝒿𝓳𝔧𝕛𝖏𝗃𝗷𝘫𝙟𝚓])
33
+ - pattern-regex: ([ᵏₖⓚk𝐤𝑘𝒌𝓀𝓴𝔨𝕜𝖐𝗄𝗸𝘬𝙠𝚔])
34
+ - pattern-regex: ([ˡₗℓⅼⓛl𝐥𝑙𝒍𝓁𝓵𝔩𝕝𝖑𝗅𝗹𝘭𝙡𝚕])
35
+ - pattern-regex: ([ᵐₘⅿⓜm𝐦𝑚𝒎𝓂𝓶𝔪𝕞𝖒𝗆𝗺𝘮𝙢𝚖])
36
+ - pattern-regex: ([ⁿₙⓝn𝐧𝑛𝒏𝓃𝓷𝔫𝕟𝖓𝗇𝗻𝘯𝙣𝚗])
37
+ - pattern-regex: ([ºᵒₒℴⓞo𝐨𝑜𝒐𝓸𝔬𝕠𝖔𝗈𝗼𝘰𝙤𝚘])
38
+ - pattern-regex: ([ᵖₚⓟp𝐩𝑝𝒑𝓅𝓹𝔭𝕡𝖕𝗉𝗽𝘱𝙥𝚙])
39
+ - pattern-regex: ([ⓠq𐞥𝐪𝑞𝒒𝓆𝓺𝔮𝕢𝖖𝗊𝗾𝘲𝙦𝚚])
40
+ - pattern-regex: ([ʳᵣⓡr𝐫𝑟𝒓𝓇𝓻𝔯𝕣𝖗𝗋𝗿𝘳𝙧𝚛])
41
+ - pattern-regex: ([ſˢₛⓢs𝐬𝑠𝒔𝓈𝓼𝔰𝕤𝖘𝗌𝘀𝘴𝙨𝚜])
42
+ - pattern-regex: ([ᵗₜⓣt𝐭𝑡𝒕𝓉𝓽𝔱𝕥𝖙𝗍𝘁𝘵𝙩𝚝])
43
+ - pattern-regex: ([ᵘᵤⓤu𝐮𝑢𝒖𝓊𝓾𝔲𝕦𝖚𝗎𝘂𝘶𝙪𝚞])
44
+ - pattern-regex: ([ᵛᵥⅴⓥv𝐯𝑣𝒗𝓋𝓿𝔳𝕧𝖛𝗏𝘃𝘷𝙫𝚟])
45
+ - pattern-regex: ([ʷⓦw𝐰𝑤𝒘𝓌𝔀𝔴𝕨𝖜𝗐𝘄𝘸𝙬𝚠])
46
+ - pattern-regex: ([ˣₓⅹⓧx𝐱𝑥𝒙𝓍𝔁𝔵𝕩𝖝𝗑𝘅𝘹𝙭𝚡])
47
+ - pattern-regex: ([ʸⓨy𝐲𝑦𝒚𝓎𝔂𝔶𝕪𝖞𝗒𝘆𝘺𝙮𝚢])
48
+ - pattern-regex: ([ᶻⓩz𝐳𝑧𝒛𝓏𝔃𝔷𝕫𝖟𝗓𝘇𝘻𝙯𝚣])
49
+
50
+ - pattern-regex: ([ᴬⒶA𝐀𝐴𝑨𝒜𝓐𝔄𝔸𝕬𝖠𝗔𝘈𝘼𝙰🄰])
51
+ - pattern-regex: ([ᴮℬⒷB𝐁𝐵𝑩𝓑𝔅𝔹𝕭𝖡𝗕𝘉𝘽𝙱🄱])
52
+ - pattern-regex: ([ℂℭⅭⒸꟲC𝐂𝐶𝑪𝒞𝓒𝕮𝖢𝗖𝘊𝘾𝙲🄫🄲])
53
+ - pattern-regex: ([ᴰⅅⅮⒹD𝐃𝐷𝑫𝒟𝓓𝔇𝔻𝕯𝖣𝗗𝘋𝘿𝙳🄳])
54
+ - pattern-regex: ([ᴱℰⒺE𝐄𝐸𝑬𝓔𝔈𝔼𝕰𝖤𝗘𝘌𝙀𝙴🄴])
55
+ - pattern-regex: ([ℱⒻꟳF𝐅𝐹𝑭𝓕𝔉𝔽𝕱𝖥𝗙𝘍𝙁𝙵🄵])
56
+ - pattern-regex: ([ᴳⒼG𝐆𝐺𝑮𝒢𝓖𝔊𝔾𝕲𝖦𝗚𝘎𝙂𝙶🄶])
57
+ - pattern-regex: ([ᴴℋℌℍⒽH𝐇𝐻𝑯𝓗𝕳𝖧𝗛𝘏𝙃𝙷🄷])
58
+ - pattern-regex: ([ᴵℐℑⅠⒾI𝐈𝐼𝑰𝓘𝕀𝕴𝖨𝗜𝘐𝙄𝙸🄸])
59
+ - pattern-regex: ([ᴶⒿJ𝐉𝐽𝑱𝒥𝓙𝔍𝕁𝕵𝖩𝗝𝘑𝙅𝙹🄹])
60
+ - pattern-regex: ([ᴷKⓀK𝐊𝐾𝑲𝒦𝓚𝔎𝕂𝕶𝖪𝗞𝘒𝙆𝙺🄺])
61
+ - pattern-regex: ([ᴸℒⅬⓁL𝐋𝐿𝑳𝓛𝔏𝕃𝕷𝖫𝗟𝘓𝙇𝙻🄻])
62
+ - pattern-regex: ([ᴹℳⅯⓂM𝐌𝑀𝑴𝓜𝔐𝕄𝕸𝖬𝗠𝘔𝙈𝙼🄼])
63
+ - pattern-regex: ([ᴺℕⓃN𝐍𝑁𝑵𝒩𝓝𝔑𝕹𝖭𝗡𝘕𝙉𝙽🄽])
64
+ - pattern-regex: ([ᴼⓄO𝐎𝑂𝑶𝒪𝓞𝔒𝕆𝕺𝖮𝗢𝘖𝙊𝙾🄾])
65
+ - pattern-regex: ([ᴾℙⓅP𝐏𝑃𝑷𝒫𝓟𝔓𝕻𝖯𝗣𝘗𝙋𝙿🄿])
66
+ - pattern-regex: ([ℚⓆꟴQ𝐐𝑄𝑸𝒬𝓠𝔔𝕼𝖰𝗤𝘘𝙌𝚀🅀])
67
+ - pattern-regex: ([ᴿℛℜℝⓇR𝐑𝑅𝑹𝓡𝕽𝖱𝗥𝘙𝙍𝚁🄬🅁])
68
+ - pattern-regex: ([ⓈS𝐒𝑆𝑺𝒮𝓢𝔖𝕊𝕾𝖲𝗦𝘚𝙎𝚂🅂])
69
+ - pattern-regex: ([ᵀⓉT𝐓𝑇𝑻𝒯𝓣𝔗𝕋𝕿𝖳𝗧𝘛𝙏𝚃🅃])
70
+ - pattern-regex: ([ᵁⓊU𝐔𝑈𝑼𝒰𝓤𝔘𝕌𝖀𝖴𝗨𝘜𝙐𝚄🅄])
71
+ - pattern-regex: ([ⅤⓋⱽV𝐕𝑉𝑽𝒱𝓥𝔙𝕍𝖁𝖵𝗩𝘝𝙑𝚅🅅])
72
+ - pattern-regex: ([ᵂⓌW𝐖𝑊𝑾𝒲𝓦𝔚𝕎𝖂𝖶𝗪𝘞𝙒𝚆🅆])
73
+ - pattern-regex: ([ⅩⓍX𝐗𝑋𝑿𝒳𝓧𝔛𝕏𝖃𝖷𝗫𝘟𝙓𝚇🅇])
74
+ - pattern-regex: ([ⓎY𝐘𝑌𝒀𝒴𝓨𝔜𝕐𝖄𝖸𝗬𝘠𝙔𝚈🅈])
75
+ - pattern-regex: ([ℤℨⓏZ𝐙𝑍𝒁𝒵𝓩𝖅𝖹𝗭𝘡𝙕𝚉🅉])
guarddog/ecosystems.py CHANGED
@@ -6,6 +6,7 @@ class ECOSYSTEM(Enum):
6
6
  NPM = "npm"
7
7
  GO = "go"
8
8
  GITHUB_ACTION = "github-action"
9
+ EXTENSION = "extension"
9
10
 
10
11
 
11
12
  def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
@@ -18,5 +19,7 @@ def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
18
19
  return "go"
19
20
  case ECOSYSTEM.GITHUB_ACTION:
20
21
  return "GitHub Action"
22
+ case ECOSYSTEM.EXTENSION:
23
+ return "Extension"
21
24
  case _:
22
25
  return ecosystem.value
@@ -8,6 +8,7 @@ from .pypi_project_scanner import PypiRequirementsScanner
8
8
  from .go_package_scanner import GoModuleScanner
9
9
  from .go_project_scanner import GoDependenciesScanner
10
10
  from .github_action_scanner import GithubActionScanner
11
+ from .extension_scanner import ExtensionScanner
11
12
  from .scanner import PackageScanner, ProjectScanner
12
13
  from ..ecosystems import ECOSYSTEM
13
14
 
@@ -33,6 +34,8 @@ def get_package_scanner(ecosystem: ECOSYSTEM) -> Optional[PackageScanner]:
33
34
  return GoModuleScanner()
34
35
  case ECOSYSTEM.GITHUB_ACTION:
35
36
  return GithubActionScanner()
37
+ case ECOSYSTEM.EXTENSION:
38
+ return ExtensionScanner()
36
39
  return None
37
40
 
38
41
 
@@ -57,4 +60,6 @@ def get_project_scanner(ecosystem: ECOSYSTEM) -> Optional[ProjectScanner]:
57
60
  return GoDependenciesScanner()
58
61
  case ECOSYSTEM.GITHUB_ACTION:
59
62
  return GitHubActionDependencyScanner()
63
+ case ECOSYSTEM.EXTENSION:
64
+ return None # we're not including dependency scanning for this PR
60
65
  return None
@@ -0,0 +1,152 @@
1
+ import logging
2
+ import os
3
+ import typing
4
+
5
+ import requests
6
+
7
+ from guarddog.analyzer.analyzer import Analyzer
8
+ from guarddog.ecosystems import ECOSYSTEM
9
+ from guarddog.scanners.scanner import PackageScanner, noop
10
+
11
+ log = logging.getLogger("guarddog")
12
+
13
+ MARKETPLACE_URL = (
14
+ "https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery"
15
+ )
16
+ MARKETPLACE_HEADERS = {
17
+ "Content-Type": "application/json",
18
+ "Accept": "application/json;api-version=3.0-preview.1",
19
+ }
20
+ MARKETPLACE_DOWNLOAD_LINK_ASSET_TYPE = "Microsoft.VisualStudio.Services.VSIXPackage"
21
+ VSIX_FILE_EXTENSION = ".vsix"
22
+
23
+ # VSCode Marketplace API filter types
24
+ # FilterType 7 = publisherName.extensionName (search by exact extension identifier)
25
+ MARKETPLACE_FILTER_TYPE_EXTENSION_NAME = 7
26
+
27
+ # VSCode Marketplace API flags (bitwise combination)
28
+ # 446 = IncludeVersions | IncludeFiles | IncludeMetadata
29
+ MARKETPLACE_FLAGS_FULL_METADATA = 446
30
+
31
+
32
+ class ExtensionScanner(PackageScanner):
33
+ def __init__(self) -> None:
34
+ super().__init__(Analyzer(ECOSYSTEM.EXTENSION))
35
+
36
+ def download_and_get_package_info(
37
+ self, directory: str, package_name: str, version=None
38
+ ) -> typing.Tuple[dict, str]:
39
+ """
40
+ Downloads a VSCode extension from the marketplace and extracts it
41
+
42
+ Args:
43
+ directory: Directory to download to
44
+ package_name: Extension identifier (publisher.extension format)
45
+ version: Specific version or default to latest
46
+
47
+ Returns:
48
+ Tuple of (marketplace API response, extracted_path)
49
+ """
50
+ marketplace_data, vsix_url = self._get_marketplace_info_and_url(
51
+ package_name, version
52
+ )
53
+
54
+ vsix_path = os.path.join(
55
+ directory, package_name.replace("/", "-") + VSIX_FILE_EXTENSION
56
+ )
57
+ extracted_path = vsix_path.removesuffix(VSIX_FILE_EXTENSION)
58
+
59
+ log.debug(f"Downloading VSCode extension from {vsix_url}")
60
+
61
+ self.download_compressed(vsix_url, vsix_path, extracted_path)
62
+
63
+ return marketplace_data, extracted_path
64
+
65
+ def _get_marketplace_info_and_url(
66
+ self, package_name: str, version: typing.Optional[str] = None
67
+ ) -> typing.Tuple[dict, str]:
68
+ """Get marketplace metadata and VSIX download URL"""
69
+ payload = {
70
+ "filters": [
71
+ {
72
+ "criteria": [
73
+ {
74
+ "filterType": MARKETPLACE_FILTER_TYPE_EXTENSION_NAME,
75
+ "value": package_name,
76
+ }
77
+ ]
78
+ }
79
+ ],
80
+ "flags": MARKETPLACE_FLAGS_FULL_METADATA,
81
+ }
82
+
83
+ response = requests.post(
84
+ MARKETPLACE_URL, headers=MARKETPLACE_HEADERS, json=payload
85
+ )
86
+
87
+ response.raise_for_status()
88
+
89
+ data = response.json()
90
+
91
+ if not data.get("results") or not data["results"][0].get("extensions"):
92
+ raise ValueError(f"Extension {package_name} not found in marketplace")
93
+
94
+ extension_info = data["results"][0]["extensions"][0]
95
+ versions = extension_info.get("versions", [])
96
+
97
+ if not versions:
98
+ raise ValueError(
99
+ f"No versions available for this extension: {package_name}"
100
+ )
101
+
102
+ target_version = None
103
+ if version is None:
104
+ # if not version is provided, default to latest
105
+ target_version = versions[0]
106
+ else:
107
+ for v in versions:
108
+ if v.get("version") == version:
109
+ target_version = v
110
+ break
111
+ if target_version is None:
112
+ raise ValueError(
113
+ f"Version {version} not found for extension: {package_name}"
114
+ )
115
+
116
+ # Extract download URL
117
+ files = target_version.get("files", [])
118
+ vsix_url = None
119
+ for file_info in files:
120
+ if file_info.get("assetType") == MARKETPLACE_DOWNLOAD_LINK_ASSET_TYPE:
121
+ vsix_url = file_info.get("source")
122
+ break
123
+
124
+ if not vsix_url:
125
+ raise ValueError(
126
+ f"No VSIX download link available for this extension: {package_name}"
127
+ )
128
+
129
+ return data, vsix_url
130
+
131
+ def scan_local(
132
+ self, path: str, rules=None, callback: typing.Callable[[dict], None] = noop
133
+ ) -> dict:
134
+ """
135
+ Scan a local VSCode extension directory
136
+
137
+ Args:
138
+ path: Path to extension directory containing package.json
139
+ rules: Set of rules to use
140
+ callback: Callback to apply to analyzer output
141
+
142
+ Returns:
143
+ Scan results
144
+ """
145
+ if rules is not None:
146
+ rules = set(rules)
147
+
148
+ # Use only sourcecode analysis for local scans, consistent with other ecosystems
149
+ results = self.analyzer.analyze_sourcecode(path, rules=rules)
150
+ callback(results)
151
+
152
+ return results
@@ -15,7 +15,9 @@ class GithubActionScanner(PackageScanner):
15
15
  def __init__(self) -> None:
16
16
  super().__init__(Analyzer(ECOSYSTEM.GITHUB_ACTION))
17
17
 
18
- def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
18
+ def download_and_get_package_info(
19
+ self, directory: str, package_name: str, version=None
20
+ ) -> typing.Tuple[dict, str]:
19
21
  repo = self._get_repo(package_name)
20
22
  tarball_url = self._get_git_tarball_url(repo, version)
21
23
 
@@ -25,7 +27,9 @@ class GithubActionScanner(PackageScanner):
25
27
  if file_extension == "":
26
28
  file_extension = ".zip"
27
29
 
28
- zippath = os.path.join(directory, package_name.replace("/", "-") + file_extension)
30
+ zippath = os.path.join(
31
+ directory, package_name.replace("/", "-") + file_extension
32
+ )
29
33
  unzippedpath = zippath.removesuffix(file_extension)
30
34
  self.download_compressed(tarball_url, zippath, unzippedpath)
31
35
 
@@ -54,7 +54,7 @@ class GoDependenciesScanner(ProjectScanner):
54
54
  lambda d: d.name == name,
55
55
  dependencies,
56
56
  ),
57
- None
57
+ None,
58
58
  )
59
59
  if not dep:
60
60
  dep = Dependency(name=name, versions=set())
@@ -17,9 +17,13 @@ class NPMPackageScanner(PackageScanner):
17
17
  def __init__(self) -> None:
18
18
  super().__init__(Analyzer(ECOSYSTEM.NPM))
19
19
 
20
- def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
20
+ def download_and_get_package_info(
21
+ self, directory: str, package_name: str, version=None
22
+ ) -> typing.Tuple[dict, str]:
21
23
  git_target = None
22
- if urlparse(package_name).hostname is not None and package_name.endswith('.git'):
24
+ if urlparse(package_name).hostname is not None and package_name.endswith(
25
+ ".git"
26
+ ):
23
27
  git_target = package_name
24
28
 
25
29
  if not package_name.startswith("@") and package_name.count("/") == 1:
@@ -33,7 +37,9 @@ class NPMPackageScanner(PackageScanner):
33
37
  response = requests.get(url)
34
38
 
35
39
  if response.status_code != 200:
36
- raise Exception("Received status code: " + str(response.status_code) + " from npm")
40
+ raise Exception(
41
+ "Received status code: " + str(response.status_code) + " from npm"
42
+ )
37
43
  data = response.json()
38
44
  if "name" not in data:
39
45
  raise Exception(f"Error retrieving package: {package_name}")
@@ -45,7 +51,9 @@ class NPMPackageScanner(PackageScanner):
45
51
 
46
52
  tarball_url = details["dist"]["tarball"]
47
53
  file_extension = pathlib.Path(tarball_url).suffix
48
- zippath = os.path.join(directory, package_name.replace("/", "-") + file_extension)
54
+ zippath = os.path.join(
55
+ directory, package_name.replace("/", "-") + file_extension
56
+ )
49
57
  unzippedpath = zippath.removesuffix(file_extension)
50
58
  self.download_compressed(tarball_url, zippath, unzippedpath)
51
59
 
@@ -12,7 +12,9 @@ class PypiPackageScanner(PackageScanner):
12
12
  def __init__(self) -> None:
13
13
  super().__init__(Analyzer(ECOSYSTEM.PYPI))
14
14
 
15
- def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
15
+ def download_and_get_package_info(
16
+ self, directory: str, package_name: str, version=None
17
+ ) -> typing.Tuple[dict, str]:
16
18
  extract_dir = self.download_package(package_name, directory, version)
17
19
  return get_package_info(package_name), extract_dir
18
20
 
@@ -40,7 +42,9 @@ class PypiPackageScanner(PackageScanner):
40
42
  version = data["info"]["version"]
41
43
 
42
44
  if version not in releases:
43
- raise Exception(f"Version {version} for package {package_name} doesn't exist.")
45
+ raise Exception(
46
+ f"Version {version} for package {package_name} doesn't exist."
47
+ )
44
48
 
45
49
  files = releases[version]
46
50
  url, file_extension = None, None
@@ -52,7 +56,9 @@ class PypiPackageScanner(PackageScanner):
52
56
  break
53
57
 
54
58
  if not (url and file_extension):
55
- raise Exception(f"Compressed file for {package_name} does not exist on PyPI.")
59
+ raise Exception(
60
+ f"Compressed file for {package_name} does not exist on PyPI."
61
+ )
56
62
 
57
63
  # Path to compressed package
58
64
  zippath = os.path.join(directory, package_name + file_extension)