guarddog 2.7.0__py3-none-any.whl → 2.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guarddog/analyzer/metadata/__init__.py +3 -0
- guarddog/analyzer/metadata/go/typosquatting.py +13 -6
- guarddog/analyzer/metadata/npm/typosquatting.py +43 -12
- guarddog/analyzer/metadata/pypi/repository_integrity_mismatch.py +53 -164
- guarddog/analyzer/metadata/pypi/typosquatting.py +51 -17
- guarddog/analyzer/metadata/repository_integrity_mismatch.py +202 -2
- guarddog/analyzer/metadata/resources/top_pypi_packages.json +29998 -29986
- guarddog/analyzer/metadata/resources/top_rubygems_packages.json +976 -0
- guarddog/analyzer/metadata/rubygems/__init__.py +26 -0
- guarddog/analyzer/metadata/rubygems/bundled_binary.py +13 -0
- guarddog/analyzer/metadata/rubygems/empty_information.py +24 -0
- guarddog/analyzer/metadata/rubygems/release_zero.py +22 -0
- guarddog/analyzer/metadata/rubygems/repository_integrity_mismatch.py +49 -0
- guarddog/analyzer/metadata/rubygems/typosquatting.py +140 -0
- guarddog/analyzer/metadata/utils.py +24 -1
- guarddog/analyzer/sourcecode/__init__.py +2 -0
- guarddog/analyzer/sourcecode/api-obfuscation.yml +35 -40
- guarddog/analyzer/sourcecode/code-execution.yml +20 -0
- guarddog/analyzer/sourcecode/exec-base64.yml +19 -0
- guarddog/analyzer/sourcecode/exfiltrate-sensitive-data.yml +31 -5
- guarddog/analyzer/sourcecode/npm-api-obfuscation.yml +51 -0
- guarddog/analyzer/sourcecode/rubygems-code-execution.yml +67 -0
- guarddog/analyzer/sourcecode/rubygems-exec-base64.yml +26 -0
- guarddog/analyzer/sourcecode/rubygems-exfiltrate-sensitive-data.yml +70 -0
- guarddog/analyzer/sourcecode/rubygems-install-hook.yml +45 -0
- guarddog/analyzer/sourcecode/rubygems-network-on-require.yml +78 -0
- guarddog/analyzer/sourcecode/rubygems-serialize-environment.yml +38 -0
- guarddog/analyzer/sourcecode/shady-links.yml +1 -1
- guarddog/ecosystems.py +3 -0
- guarddog/scanners/__init__.py +6 -0
- guarddog/scanners/rubygems_package_scanner.py +112 -0
- guarddog/scanners/rubygems_project_scanner.py +75 -0
- guarddog/scanners/scanner.py +34 -8
- guarddog/utils/archives.py +133 -9
- guarddog/utils/config.py +24 -2
- guarddog-2.8.4.dist-info/METADATA +471 -0
- {guarddog-2.7.0.dist-info → guarddog-2.8.4.dist-info}/RECORD +42 -26
- {guarddog-2.7.0.dist-info → guarddog-2.8.4.dist-info}/WHEEL +1 -1
- guarddog-2.7.0.dist-info/METADATA +0 -40
- {guarddog-2.7.0.dist-info → guarddog-2.8.4.dist-info}/entry_points.txt +0 -0
- {guarddog-2.7.0.dist-info → guarddog-2.8.4.dist-info}/licenses/LICENSE +0 -0
- {guarddog-2.7.0.dist-info → guarddog-2.8.4.dist-info}/licenses/LICENSE-3rdparty.csv +0 -0
- {guarddog-2.7.0.dist-info → guarddog-2.8.4.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
- id: rubygems-exfiltrate-sensitive-data
|
|
3
|
+
languages:
|
|
4
|
+
- ruby
|
|
5
|
+
mode: taint
|
|
6
|
+
message: |
|
|
7
|
+
This package reads sensitive data and sends it to a remote server.
|
|
8
|
+
This could indicate credential theft or data exfiltration.
|
|
9
|
+
metadata:
|
|
10
|
+
description: Identify when a package reads and exfiltrates sensitive data from the local system
|
|
11
|
+
pattern-sources:
|
|
12
|
+
- pattern-either:
|
|
13
|
+
# Environment variables
|
|
14
|
+
- pattern: ENV
|
|
15
|
+
- pattern: ENV[...]
|
|
16
|
+
- pattern: ENV.fetch(...)
|
|
17
|
+
- pattern: ENV.to_h
|
|
18
|
+
- pattern: ENV.to_hash
|
|
19
|
+
|
|
20
|
+
# Specific sensitive env vars
|
|
21
|
+
- pattern: ENV['HOME']
|
|
22
|
+
- pattern: ENV['USER']
|
|
23
|
+
- pattern: ENV['USERNAME']
|
|
24
|
+
- pattern: ENV['AWS_ACCESS_KEY_ID']
|
|
25
|
+
- pattern: ENV['AWS_SECRET_ACCESS_KEY']
|
|
26
|
+
- pattern: ENV['AWS_SESSION_TOKEN']
|
|
27
|
+
- pattern: ENV['GITHUB_TOKEN']
|
|
28
|
+
- pattern: ENV['GH_TOKEN']
|
|
29
|
+
|
|
30
|
+
# System info
|
|
31
|
+
- pattern: Socket.gethostname
|
|
32
|
+
- pattern: Etc.getlogin
|
|
33
|
+
- pattern: Etc.getpwuid(...)
|
|
34
|
+
|
|
35
|
+
# Reading sensitive files
|
|
36
|
+
- pattern: File.read("~/.ssh/...")
|
|
37
|
+
- pattern: File.read("~/.aws/...")
|
|
38
|
+
- pattern: File.read("~/.netrc")
|
|
39
|
+
- pattern: File.read("~/.git-credentials")
|
|
40
|
+
|
|
41
|
+
# Dir patterns for sensitive locations
|
|
42
|
+
- pattern: Dir.home
|
|
43
|
+
- pattern: Dir.glob("~/.ssh/*")
|
|
44
|
+
- pattern: Dir.glob("~/.aws/*")
|
|
45
|
+
pattern-sinks:
|
|
46
|
+
- pattern-either:
|
|
47
|
+
# Net::HTTP
|
|
48
|
+
- pattern: Net::HTTP.post(...)
|
|
49
|
+
- pattern: Net::HTTP.post_form(...)
|
|
50
|
+
- pattern: $HTTP.request(...)
|
|
51
|
+
- pattern: $HTTP.post(...)
|
|
52
|
+
- pattern: $HTTP.put(...)
|
|
53
|
+
|
|
54
|
+
# open-uri
|
|
55
|
+
- pattern: URI.open(...)
|
|
56
|
+
- pattern: OpenURI.open_uri(...)
|
|
57
|
+
|
|
58
|
+
# HTTParty, Faraday, RestClient
|
|
59
|
+
- pattern: HTTParty.post(...)
|
|
60
|
+
- pattern: HTTParty.put(...)
|
|
61
|
+
- pattern: Faraday.post(...)
|
|
62
|
+
- pattern: Faraday.put(...)
|
|
63
|
+
- pattern: RestClient.post(...)
|
|
64
|
+
- pattern: RestClient.put(...)
|
|
65
|
+
|
|
66
|
+
# Socket
|
|
67
|
+
- pattern: $SOCKET.write(...)
|
|
68
|
+
- pattern: $SOCKET.send(...)
|
|
69
|
+
- pattern: TCPSocket.new(...)
|
|
70
|
+
severity: WARNING
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
- id: rubygems-install-hook
|
|
3
|
+
languages:
|
|
4
|
+
- ruby
|
|
5
|
+
message: |
|
|
6
|
+
This package uses Gem::Installer hooks which execute code during gem installation.
|
|
7
|
+
This is a common technique for malicious gems to run code when installed.
|
|
8
|
+
metadata:
|
|
9
|
+
description: Identify when a gem registers installation hooks
|
|
10
|
+
patterns:
|
|
11
|
+
- pattern-either:
|
|
12
|
+
# Post-install hooks
|
|
13
|
+
- pattern: Gem.post_install(...)
|
|
14
|
+
- pattern: Gem.post_install { ... }
|
|
15
|
+
- pattern: Gem.post_install do ... end
|
|
16
|
+
- pattern: Gem::Installer.post_install(...)
|
|
17
|
+
- pattern: Gem::Installer.post_install { ... }
|
|
18
|
+
- pattern: Gem::Installer.post_install do ... end
|
|
19
|
+
|
|
20
|
+
# Pre-install hooks
|
|
21
|
+
- pattern: Gem.pre_install(...)
|
|
22
|
+
- pattern: Gem.pre_install { ... }
|
|
23
|
+
- pattern: Gem.pre_install do ... end
|
|
24
|
+
- pattern: Gem::Installer.pre_install(...)
|
|
25
|
+
- pattern: Gem::Installer.pre_install { ... }
|
|
26
|
+
- pattern: Gem::Installer.pre_install do ... end
|
|
27
|
+
|
|
28
|
+
# Post-uninstall hooks
|
|
29
|
+
- pattern: Gem.post_uninstall(...)
|
|
30
|
+
- pattern: Gem.post_uninstall { ... }
|
|
31
|
+
- pattern: Gem.post_uninstall do ... end
|
|
32
|
+
|
|
33
|
+
# Pre-uninstall hooks
|
|
34
|
+
- pattern: Gem.pre_uninstall(...)
|
|
35
|
+
- pattern: Gem.pre_uninstall { ... }
|
|
36
|
+
- pattern: Gem.pre_uninstall do ... end
|
|
37
|
+
|
|
38
|
+
# Extension building (can run arbitrary code)
|
|
39
|
+
- pattern: |
|
|
40
|
+
Gem::Specification.new do |$S|
|
|
41
|
+
...
|
|
42
|
+
$S.extensions = ...
|
|
43
|
+
...
|
|
44
|
+
end
|
|
45
|
+
severity: WARNING
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
- id: rubygems-network-on-require
|
|
3
|
+
languages:
|
|
4
|
+
- ruby
|
|
5
|
+
message: |
|
|
6
|
+
This package makes network requests at the top level, which means it runs
|
|
7
|
+
when the gem is required. Malicious gems often use this to phone home or
|
|
8
|
+
download additional payloads.
|
|
9
|
+
metadata:
|
|
10
|
+
description: Identify when a gem makes network requests when required
|
|
11
|
+
patterns:
|
|
12
|
+
- pattern-either:
|
|
13
|
+
# Net::HTTP at top level
|
|
14
|
+
- pattern: |
|
|
15
|
+
require 'net/http'
|
|
16
|
+
...
|
|
17
|
+
Net::HTTP.$METHOD(...)
|
|
18
|
+
- pattern: |
|
|
19
|
+
require "net/http"
|
|
20
|
+
...
|
|
21
|
+
Net::HTTP.$METHOD(...)
|
|
22
|
+
|
|
23
|
+
# open-uri at top level
|
|
24
|
+
- pattern: |
|
|
25
|
+
require 'open-uri'
|
|
26
|
+
...
|
|
27
|
+
URI.open(...)
|
|
28
|
+
- pattern: |
|
|
29
|
+
require "open-uri"
|
|
30
|
+
...
|
|
31
|
+
URI.open(...)
|
|
32
|
+
- pattern: |
|
|
33
|
+
require 'open-uri'
|
|
34
|
+
...
|
|
35
|
+
open(...)
|
|
36
|
+
- pattern: |
|
|
37
|
+
require "open-uri"
|
|
38
|
+
...
|
|
39
|
+
open(...)
|
|
40
|
+
|
|
41
|
+
# HTTParty at top level
|
|
42
|
+
- pattern: |
|
|
43
|
+
require 'httparty'
|
|
44
|
+
...
|
|
45
|
+
HTTParty.$METHOD(...)
|
|
46
|
+
- pattern: |
|
|
47
|
+
require "httparty"
|
|
48
|
+
...
|
|
49
|
+
HTTParty.$METHOD(...)
|
|
50
|
+
|
|
51
|
+
# Faraday at top level
|
|
52
|
+
- pattern: |
|
|
53
|
+
require 'faraday'
|
|
54
|
+
...
|
|
55
|
+
Faraday.$METHOD(...)
|
|
56
|
+
- pattern: |
|
|
57
|
+
require "faraday"
|
|
58
|
+
...
|
|
59
|
+
Faraday.$METHOD(...)
|
|
60
|
+
|
|
61
|
+
# Socket connections at top level
|
|
62
|
+
- pattern: |
|
|
63
|
+
require 'socket'
|
|
64
|
+
...
|
|
65
|
+
TCPSocket.new(...)
|
|
66
|
+
- pattern: |
|
|
67
|
+
require "socket"
|
|
68
|
+
...
|
|
69
|
+
TCPSocket.new(...)
|
|
70
|
+
- pattern: |
|
|
71
|
+
require 'socket'
|
|
72
|
+
...
|
|
73
|
+
TCPSocket.open(...)
|
|
74
|
+
- pattern: |
|
|
75
|
+
require "socket"
|
|
76
|
+
...
|
|
77
|
+
TCPSocket.open(...)
|
|
78
|
+
severity: WARNING
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
rules:
|
|
2
|
+
- id: rubygems-serialize-environment
|
|
3
|
+
languages:
|
|
4
|
+
- ruby
|
|
5
|
+
message: |
|
|
6
|
+
This package serializes the entire ENV hash, which may indicate
|
|
7
|
+
an attempt to steal environment variables including secrets and credentials.
|
|
8
|
+
metadata:
|
|
9
|
+
description: Identify when a package serializes ENV to exfiltrate environment variables
|
|
10
|
+
patterns:
|
|
11
|
+
- pattern-either:
|
|
12
|
+
# JSON serialization
|
|
13
|
+
- pattern: ENV.to_h.to_json
|
|
14
|
+
- pattern: ENV.to_hash.to_json
|
|
15
|
+
- pattern: JSON.dump(ENV)
|
|
16
|
+
- pattern: JSON.dump(ENV.to_h)
|
|
17
|
+
- pattern: JSON.dump(ENV.to_hash)
|
|
18
|
+
- pattern: JSON.generate(ENV)
|
|
19
|
+
- pattern: JSON.generate(ENV.to_h)
|
|
20
|
+
- pattern: JSON.generate(ENV.to_hash)
|
|
21
|
+
|
|
22
|
+
# YAML serialization
|
|
23
|
+
- pattern: ENV.to_h.to_yaml
|
|
24
|
+
- pattern: ENV.to_hash.to_yaml
|
|
25
|
+
- pattern: YAML.dump(ENV)
|
|
26
|
+
- pattern: YAML.dump(ENV.to_h)
|
|
27
|
+
- pattern: YAML.dump(ENV.to_hash)
|
|
28
|
+
|
|
29
|
+
# Marshal serialization
|
|
30
|
+
- pattern: Marshal.dump(ENV)
|
|
31
|
+
- pattern: Marshal.dump(ENV.to_h)
|
|
32
|
+
- pattern: Marshal.dump(ENV.to_hash)
|
|
33
|
+
|
|
34
|
+
# Converting to string for sending
|
|
35
|
+
- pattern: ENV.to_h.inspect
|
|
36
|
+
- pattern: ENV.to_hash.inspect
|
|
37
|
+
- pattern: ENV.inspect
|
|
38
|
+
severity: WARNING
|
|
@@ -43,7 +43,7 @@ rules:
|
|
|
43
43
|
- pattern-regex: ((?:https?:\/\/)?[^\n\[\/\?#"']*?(files\.catbox\.moe)\b)
|
|
44
44
|
|
|
45
45
|
# top-level domains
|
|
46
|
-
- pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?\.(link|xyz|tk|ml|ga|cf|gq|pw|top|club|mw|bd|ke|am|sbs|date|quest|cd|bid|cd|ws|icu|cam|uno|email|stream|zip)
|
|
46
|
+
- pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?\.(link|xyz|tk|ml|ga|cf|gq|pw|top|club|mw|bd|ke|am|sbs|date|quest|cd|bid|cd|ws|icu|cam|uno|email|stream|zip)\b)
|
|
47
47
|
# IPv4
|
|
48
48
|
- pattern-regex: (https?:\/\/[^\n\[\/\?#"']*?(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))
|
|
49
49
|
# IPv6
|
guarddog/ecosystems.py
CHANGED
|
@@ -7,6 +7,7 @@ class ECOSYSTEM(Enum):
|
|
|
7
7
|
GO = "go"
|
|
8
8
|
GITHUB_ACTION = "github-action"
|
|
9
9
|
EXTENSION = "extension"
|
|
10
|
+
RUBYGEMS = "rubygems"
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
|
|
@@ -21,5 +22,7 @@ def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
|
|
|
21
22
|
return "GitHub Action"
|
|
22
23
|
case ECOSYSTEM.EXTENSION:
|
|
23
24
|
return "Extension"
|
|
25
|
+
case ECOSYSTEM.RUBYGEMS:
|
|
26
|
+
return "RubyGems"
|
|
24
27
|
case _:
|
|
25
28
|
return ecosystem.value
|
guarddog/scanners/__init__.py
CHANGED
|
@@ -9,6 +9,8 @@ from .go_package_scanner import GoModuleScanner
|
|
|
9
9
|
from .go_project_scanner import GoDependenciesScanner
|
|
10
10
|
from .github_action_scanner import GithubActionScanner
|
|
11
11
|
from .extension_scanner import ExtensionScanner
|
|
12
|
+
from .rubygems_package_scanner import RubyGemsPackageScanner
|
|
13
|
+
from .rubygems_project_scanner import RubyGemsRequirementsScanner
|
|
12
14
|
from .scanner import PackageScanner, ProjectScanner
|
|
13
15
|
from ..ecosystems import ECOSYSTEM
|
|
14
16
|
|
|
@@ -36,6 +38,8 @@ def get_package_scanner(ecosystem: ECOSYSTEM) -> Optional[PackageScanner]:
|
|
|
36
38
|
return GithubActionScanner()
|
|
37
39
|
case ECOSYSTEM.EXTENSION:
|
|
38
40
|
return ExtensionScanner()
|
|
41
|
+
case ECOSYSTEM.RUBYGEMS:
|
|
42
|
+
return RubyGemsPackageScanner()
|
|
39
43
|
return None
|
|
40
44
|
|
|
41
45
|
|
|
@@ -62,4 +66,6 @@ def get_project_scanner(ecosystem: ECOSYSTEM) -> Optional[ProjectScanner]:
|
|
|
62
66
|
return GitHubActionDependencyScanner()
|
|
63
67
|
case ECOSYSTEM.EXTENSION:
|
|
64
68
|
return None # we're not including dependency scanning for this PR
|
|
69
|
+
case ECOSYSTEM.RUBYGEMS:
|
|
70
|
+
return RubyGemsRequirementsScanner()
|
|
65
71
|
return None
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from guarddog.analyzer.analyzer import Analyzer
|
|
8
|
+
from guarddog.ecosystems import ECOSYSTEM
|
|
9
|
+
from guarddog.scanners.scanner import PackageScanner
|
|
10
|
+
from guarddog.utils.archives import safe_extract
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger("guarddog")
|
|
13
|
+
|
|
14
|
+
RUBYGEMS_API_URL = "https://rubygems.org/api/v1"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RubyGemsPackageScanner(PackageScanner):
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
super().__init__(Analyzer(ECOSYSTEM.RUBYGEMS))
|
|
20
|
+
|
|
21
|
+
def _extract_archive(self, archive_path: str, target_path: str) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Override to handle .gem files which are nested tar archives.
|
|
24
|
+
The outer tar contains data.tar.gz which has the actual source code.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
archive_path (str): path to the .gem file
|
|
28
|
+
target_path (str): directory to extract the source code into
|
|
29
|
+
"""
|
|
30
|
+
if not archive_path.endswith(".gem"):
|
|
31
|
+
# Fall back to default behavior for non-gem archives
|
|
32
|
+
super()._extract_archive(archive_path, target_path)
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
os.makedirs(target_path, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
# Extract outer .gem archive to a temporary location
|
|
38
|
+
outer_extract = os.path.join(target_path, "_gem_contents")
|
|
39
|
+
os.makedirs(outer_extract, exist_ok=True)
|
|
40
|
+
|
|
41
|
+
log.debug(f"Extracting outer gem archive {archive_path}")
|
|
42
|
+
safe_extract(archive_path, outer_extract)
|
|
43
|
+
|
|
44
|
+
# Find the inner data archive (data.tar.gz or data.tar)
|
|
45
|
+
data_tar_path = os.path.join(outer_extract, "data.tar.gz")
|
|
46
|
+
if not os.path.exists(data_tar_path):
|
|
47
|
+
data_tar_path = os.path.join(outer_extract, "data.tar")
|
|
48
|
+
|
|
49
|
+
if not os.path.exists(data_tar_path):
|
|
50
|
+
raise Exception(f"data.tar.gz not found in gem {archive_path}")
|
|
51
|
+
|
|
52
|
+
# Extract the inner data archive to the final target
|
|
53
|
+
log.debug(f"Extracting inner data archive {data_tar_path}")
|
|
54
|
+
safe_extract(data_tar_path, target_path)
|
|
55
|
+
|
|
56
|
+
log.debug(f"Successfully extracted gem files to {target_path}")
|
|
57
|
+
|
|
58
|
+
def download_and_get_package_info(
|
|
59
|
+
self, directory: str, package_name: str, version=None
|
|
60
|
+
) -> Tuple[dict, str]:
|
|
61
|
+
gem_info = self._get_gem_info(package_name)
|
|
62
|
+
|
|
63
|
+
if version is None:
|
|
64
|
+
version = gem_info["version"]
|
|
65
|
+
|
|
66
|
+
extract_dir = self._download_gem(package_name, version, directory)
|
|
67
|
+
return gem_info, extract_dir
|
|
68
|
+
|
|
69
|
+
def _get_gem_info(self, package_name: str) -> dict:
|
|
70
|
+
url = f"{RUBYGEMS_API_URL}/gems/{package_name}.json"
|
|
71
|
+
log.debug(f"Fetching gem info from {url}")
|
|
72
|
+
response = requests.get(url)
|
|
73
|
+
response.raise_for_status()
|
|
74
|
+
return response.json()
|
|
75
|
+
|
|
76
|
+
def _get_gem_version_info(self, package_name: str, version: str) -> dict:
|
|
77
|
+
url = f"{RUBYGEMS_API_URL}/versions/{package_name}.json"
|
|
78
|
+
log.debug(f"Fetching version info from {url}")
|
|
79
|
+
response = requests.get(url)
|
|
80
|
+
response.raise_for_status()
|
|
81
|
+
|
|
82
|
+
versions = response.json()
|
|
83
|
+
for v in versions:
|
|
84
|
+
if v["number"] == version:
|
|
85
|
+
return v
|
|
86
|
+
|
|
87
|
+
raise Exception(f"Version {version} for gem {package_name} not found")
|
|
88
|
+
|
|
89
|
+
def _download_gem(self, package_name: str, version: str, directory: str) -> str:
|
|
90
|
+
"""
|
|
91
|
+
Downloads and extracts a RubyGem package.
|
|
92
|
+
|
|
93
|
+
Uses the parent class's download_compressed method which will call our
|
|
94
|
+
overridden _extract_archive method to handle the nested .gem format.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
package_name (str): name of the gem
|
|
98
|
+
version (str): version of the gem
|
|
99
|
+
directory (str): directory to download and extract to
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
str: path to the extracted gem contents
|
|
103
|
+
"""
|
|
104
|
+
gem_url = f"https://rubygems.org/gems/{package_name}-{version}.gem"
|
|
105
|
+
gem_path = os.path.join(directory, f"{package_name}-{version}.gem")
|
|
106
|
+
extract_dir = os.path.join(directory, package_name)
|
|
107
|
+
|
|
108
|
+
# Use parent class method which handles download and extraction
|
|
109
|
+
# The extraction will use our overridden _extract_archive method
|
|
110
|
+
self.download_compressed(gem_url, gem_path, extract_dir)
|
|
111
|
+
|
|
112
|
+
return extract_dir
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
from guarddog.scanners.rubygems_package_scanner import RubyGemsPackageScanner
|
|
7
|
+
from guarddog.scanners.scanner import ProjectScanner, Dependency, DependencyVersion
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger("guarddog")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RubyGemsRequirementsScanner(ProjectScanner):
|
|
13
|
+
"""
|
|
14
|
+
Scans all gems in the Gemfile.lock file of a project
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
super().__init__(RubyGemsPackageScanner())
|
|
19
|
+
|
|
20
|
+
def parse_requirements(self, raw_requirements: str) -> List[Dependency]:
|
|
21
|
+
"""
|
|
22
|
+
Parses Gemfile.lock and extracts gem names and versions.
|
|
23
|
+
|
|
24
|
+
Gemfile.lock format:
|
|
25
|
+
GEM
|
|
26
|
+
remote: https://rubygems.org/
|
|
27
|
+
specs:
|
|
28
|
+
actioncable (7.0.4)
|
|
29
|
+
actionpack (= 7.0.4)
|
|
30
|
+
rails (7.0.4)
|
|
31
|
+
...
|
|
32
|
+
"""
|
|
33
|
+
dependencies: List[Dependency] = []
|
|
34
|
+
lines = raw_requirements.splitlines()
|
|
35
|
+
|
|
36
|
+
in_gem_specs = False
|
|
37
|
+
gem_pattern = re.compile(r"^ (\S+) \(([^)]+)\)$")
|
|
38
|
+
|
|
39
|
+
for idx, line in enumerate(lines):
|
|
40
|
+
if line.strip() == "GEM":
|
|
41
|
+
continue
|
|
42
|
+
elif line.strip() == "specs:":
|
|
43
|
+
in_gem_specs = True
|
|
44
|
+
continue
|
|
45
|
+
elif line and not line.startswith(" "):
|
|
46
|
+
in_gem_specs = False
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
if not in_gem_specs:
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
match = gem_pattern.match(line)
|
|
53
|
+
if match:
|
|
54
|
+
name = match.group(1)
|
|
55
|
+
version = match.group(2)
|
|
56
|
+
|
|
57
|
+
dep = next(
|
|
58
|
+
filter(lambda d: d.name == name, dependencies),
|
|
59
|
+
None,
|
|
60
|
+
)
|
|
61
|
+
if not dep:
|
|
62
|
+
dep = Dependency(name=name, versions=set())
|
|
63
|
+
dependencies.append(dep)
|
|
64
|
+
|
|
65
|
+
dep.versions.add(DependencyVersion(version=version, location=idx + 1))
|
|
66
|
+
|
|
67
|
+
return dependencies
|
|
68
|
+
|
|
69
|
+
def find_requirements(self, directory: str) -> list[str]:
|
|
70
|
+
requirement_files = []
|
|
71
|
+
for root, dirs, files in os.walk(directory):
|
|
72
|
+
for name in files:
|
|
73
|
+
if name == "Gemfile.lock":
|
|
74
|
+
requirement_files.append(os.path.join(root, name))
|
|
75
|
+
return requirement_files
|
guarddog/scanners/scanner.py
CHANGED
|
@@ -187,24 +187,50 @@ class PackageScanner:
|
|
|
187
187
|
name, tmpdirname, version, rules, write_package_info
|
|
188
188
|
)
|
|
189
189
|
|
|
190
|
-
def
|
|
191
|
-
"""Downloads
|
|
190
|
+
def _fetch_archive(self, url: str, archive_path: str) -> None:
|
|
191
|
+
"""Downloads an archive file from a URL.
|
|
192
|
+
|
|
193
|
+
This method can be overridden by subclasses if custom download logic is needed.
|
|
192
194
|
|
|
193
195
|
Args:
|
|
194
196
|
url (str): download link
|
|
195
|
-
archive_path (str): path to
|
|
196
|
-
target_path (str): path to unzip compressed file
|
|
197
|
+
archive_path (str): path to save the downloaded file
|
|
197
198
|
"""
|
|
198
|
-
|
|
199
|
-
log.debug(f"Downloading package archive from {url} into {target_path}")
|
|
199
|
+
log.debug(f"Downloading package archive from {url}")
|
|
200
200
|
response = requests.get(url, stream=True)
|
|
201
201
|
|
|
202
202
|
with open(archive_path, "wb") as f:
|
|
203
203
|
f.write(response.raw.read())
|
|
204
204
|
|
|
205
|
+
def _extract_archive(self, archive_path: str, target_path: str) -> None:
|
|
206
|
+
"""Extracts an archive file to a target directory.
|
|
207
|
+
|
|
208
|
+
This method can be overridden by subclasses to handle special archive formats
|
|
209
|
+
(e.g., nested archives like .gem files).
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
archive_path (str): path to the archive file
|
|
213
|
+
target_path (str): directory to extract files into
|
|
214
|
+
"""
|
|
215
|
+
safe_extract(archive_path, target_path)
|
|
216
|
+
log.debug(f"Successfully extracted files to {target_path}")
|
|
217
|
+
|
|
218
|
+
def download_compressed(self, url, archive_path, target_path):
|
|
219
|
+
"""Downloads a compressed file and extracts it.
|
|
220
|
+
|
|
221
|
+
This is a template method that orchestrates the download, extraction, and cleanup
|
|
222
|
+
process. Subclasses can override individual steps (_fetch_archive, _extract_archive)
|
|
223
|
+
to customize behavior.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
url (str): download link
|
|
227
|
+
archive_path (str): path to download compressed file
|
|
228
|
+
target_path (str): path to unzip compressed file
|
|
229
|
+
"""
|
|
230
|
+
self._fetch_archive(url, archive_path)
|
|
231
|
+
|
|
205
232
|
try:
|
|
206
|
-
|
|
207
|
-
log.debug(f"Successfully extracted files to {target_path}")
|
|
233
|
+
self._extract_archive(archive_path, target_path)
|
|
208
234
|
finally:
|
|
209
235
|
log.debug(f"Removing temporary archive file {archive_path}")
|
|
210
236
|
os.remove(archive_path)
|