gem-guardian 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/{main.yml → ci.yml} +3 -21
  3. data/.rubocop.yml +12 -0
  4. data/CHANGELOG.md +25 -1
  5. data/CODE_OF_CONDUCT.md +1 -1
  6. data/Gemfile +0 -1
  7. data/README.md +397 -49
  8. data/Rakefile +27 -27
  9. data/bin/console +2 -2
  10. data/gem-guardian.gemspec +11 -9
  11. data/lib/gem/guardian/artifact_store.rb +13 -2
  12. data/lib/gem/guardian/checksum_provider.rb +181 -0
  13. data/lib/gem/guardian/cli.rb +99 -7
  14. data/lib/gem/guardian/configuration.rb +88 -0
  15. data/lib/gem/guardian/dependency.rb +5 -1
  16. data/lib/gem/guardian/github_release_verifier.rb +2 -2
  17. data/lib/gem/guardian/lockfile_parser.rb +32 -6
  18. data/lib/gem/guardian/progress.rb +66 -0
  19. data/lib/gem/guardian/provenance_verifier.rb +1 -3
  20. data/lib/gem/guardian/registry.rb +83 -0
  21. data/lib/gem/guardian/registry_audit.rb +81 -0
  22. data/lib/gem/guardian/report_builder.rb +3 -4
  23. data/lib/gem/guardian/result_printer.rb +35 -5
  24. data/lib/gem/guardian/rubygems_client.rb +366 -21
  25. data/lib/gem/guardian/verifier.rb +119 -12
  26. data/lib/gem/guardian/version.rb +1 -1
  27. data/lib/gem/guardian.rb +4 -0
  28. data/script/registry_provenance_audit.rb +41 -0
  29. metadata +16 -19
  30. data/sig/gem/guardian/artifact_store.rbs +0 -22
  31. data/sig/gem/guardian/checksum.rbs +0 -14
  32. data/sig/gem/guardian/cli.rbs +0 -60
  33. data/sig/gem/guardian/dependency.rbs +0 -18
  34. data/sig/gem/guardian/error.rbs +0 -26
  35. data/sig/gem/guardian/lockfile_parser.rbs +0 -55
  36. data/sig/gem/guardian/rubygems_client.rbs +0 -46
  37. data/sig/gem/guardian/verifier.rbs +0 -40
  38. data/sig/gem/guardian/version.rbs +0 -10
  39. data/sig/gem/guardian.rbs +0 -4
@@ -6,6 +6,8 @@ module Gem
6
6
  class LockfileParser
7
7
  # Matches dependency lines in the specs section.
8
8
  GEM_LINE = /^ {4}([A-Za-z0-9_.-]+) \(([^)]+)\)/
9
+ # Matches Bundler remote lines inside GEM sections.
10
+ REMOTE_LINE = /^ remote: (.+)$/
9
11
  # Matches checksum lines in the CHECKSUMS section.
10
12
  CHECKSUM_LINE = /^ {2}([A-Za-z0-9_.-]+) \(([^)]+)\) (.+)$/
11
13
  # Parsed lockfile data for the verify command.
@@ -45,11 +47,13 @@ module Gem
45
47
  dependencies = []
46
48
  checksums = {}
47
49
  section = nil
50
+ source = nil
48
51
 
49
52
  File.readlines(@path, chomp: true).each do |line|
50
53
  section = section_for(line, section)
51
- parse_specs_line(line, dependencies) if section == :specs
52
- parse_checksums_line(line, checksums) if section == :checksums
54
+ source = source_for(line, section, source)
55
+ parse_specs_line(line, dependencies, source) if section == :specs
56
+ parse_checksums_line(line, checksums, dependencies) if section == :checksums
53
57
  end
54
58
 
55
59
  LockfileData.new(dependencies, checksums, checksums.any?)
@@ -69,6 +73,8 @@ module Gem
69
73
 
70
74
  def section_for(line, current_section)
71
75
  case line
76
+ when "GEM"
77
+ :gem
72
78
  when " specs:"
73
79
  :specs
74
80
  when "CHECKSUMS"
@@ -80,17 +86,31 @@ module Gem
80
86
  end
81
87
  end
82
88
 
83
- def parse_specs_line(line, dependencies)
89
+ def source_for(line, section, current_source)
90
+ return nil unless %i[gem specs].include?(section)
91
+
92
+ match = REMOTE_LINE.match(line)
93
+ return normalize_source(match[1]) if match
94
+ return nil if section == :gem
95
+
96
+ current_source
97
+ end
98
+
99
+ def normalize_source(source)
100
+ source.to_s.delete_suffix("/") == RubygemsClient::DEFAULT_HOST ? nil : source
101
+ end
102
+
103
+ def parse_specs_line(line, dependencies, source)
84
104
  match = GEM_LINE.match(line)
85
105
  return unless match
86
106
 
87
107
  name = match[1]
88
108
  version_and_platform = match[2]
89
109
  version, platform = split_version_and_platform(version_and_platform)
90
- dependencies << Dependency.new(name:, version:, platform:)
110
+ dependencies << Dependency.new(name:, version:, platform:, source:)
91
111
  end
92
112
 
93
- def parse_checksums_line(line, checksums)
113
+ def parse_checksums_line(line, checksums, dependencies)
94
114
  match = CHECKSUM_LINE.match(line)
95
115
  return unless match
96
116
 
@@ -98,11 +118,17 @@ module Gem
98
118
  version_and_platform = match[2]
99
119
  checksum_blob = match[3]
100
120
  version, platform = split_version_and_platform(version_and_platform)
101
- dependency = Dependency.new(name:, version:, platform:)
121
+ dependency = dependency_for_checksum(dependencies, name, version, platform)
102
122
  checksums[dependency] ||= {}
103
123
  register_checksum_pairs(checksums[dependency], checksum_blob)
104
124
  end
105
125
 
126
+ def dependency_for_checksum(dependencies, name, version, platform)
127
+ dependencies.find do |dependency|
128
+ dependency.name == name && dependency.version == version && dependency.platform == platform
129
+ end || Dependency.new(name:, version:, platform:)
130
+ end
131
+
106
132
  def register_checksum_pairs(checksum_store, checksum_blob)
107
133
  checksum_blob.split(",").each do |pair|
108
134
  algorithm, digest = pair.split("=", 2).map(&:strip)
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gem
4
+ module Guardian
5
+ # Minimal single-line terminal progress helper.
6
+ #
7
+ # Progress output is intentionally disabled for non-TTY streams so JSON output,
8
+ # CI logs, and tests do not receive carriage-return noise. Interactive terminals
9
+ # get in-place updates via `\r`; callers should use +finish+ when a logical step
10
+ # is complete and a newline should be emitted.
11
+ module Progress
12
+ module_function
13
+
14
+ @last_width = 0
15
+
16
+ # Writes or refreshes an in-place progress message.
17
+ #
18
+ # The message is rendered with a carriage return so repeated calls update
19
+ # the same terminal line. If a later message is shorter than the previous
20
+ # one, trailing characters are cleared with spaces. Output is skipped for
21
+ # non-TTY streams unless +force+ is true, which keeps JSON output and CI logs
22
+ # clean.
23
+ #
24
+ # @param message [#to_s] progress text to render
25
+ # @param io [IO] stream that receives the progress message
26
+ # @param force [Boolean] when true, writes even if +io+ is not a TTY
27
+ # @return [void] returns no value; writes progress as a side effect
28
+ def update(message, io: $stdout, force: false)
29
+ return unless enabled?(io, force:)
30
+
31
+ message = message.to_s
32
+ padding = " " * [@last_width - message.length, 0].max
33
+ io.print "\r#{message}#{padding}"
34
+ io.flush
35
+ @last_width = message.length
36
+ end
37
+
38
+ # Completes the current progress line and emits a newline.
39
+ #
40
+ # Call this after a logical step finishes so the next human-readable result
41
+ # starts on a clean line. When +message+ is provided, the line is refreshed
42
+ # one final time before the newline is written.
43
+ #
44
+ # @param message [#to_s, nil] optional final message for the progress line
45
+ # @param io [IO] stream that receives the progress message
46
+ # @param force [Boolean] when true, writes even if +io+ is not a TTY
47
+ # @return [void] returns no value; writes a final progress line as a side effect
48
+ def finish(message = nil, io: $stdout, force: false)
49
+ return unless enabled?(io, force:)
50
+
51
+ update(message, io:, force:) if message
52
+ io.puts
53
+ @last_width = 0
54
+ end
55
+
56
+ # Returns whether progress output should be written to the provided stream.
57
+ #
58
+ # @param io [IO] candidate progress stream
59
+ # @param force [Boolean] bypasses TTY detection when true
60
+ # @return [Boolean] +true+ when progress output should be emitted
61
+ def enabled?(io = $stdout, force: false)
62
+ force || (io.respond_to?(:tty?) && io.tty?)
63
+ end
64
+ end
65
+ end
66
+ end
@@ -37,7 +37,6 @@ module Gem
37
37
 
38
38
  private
39
39
 
40
- # rubocop:disable Metrics/MethodLength
41
40
  def build_result(dependency, provenance, artifact_sha256)
42
41
  github_release = github_release_result(provenance)
43
42
  status = combine_status(provenance_status(provenance, artifact_sha256), github_release&.status)
@@ -70,9 +69,8 @@ module Gem
70
69
  github_release:
71
70
  }
72
71
  end
73
- # rubocop:enable Metrics/ParameterLists
74
- # rubocop:enable Metrics/MethodLength
75
72
 
73
+ # rubocop:enable Metrics/ParameterLists
76
74
  def provenance_status(provenance, artifact_sha256)
77
75
  return :unsupported unless provenance.trusted_publishing
78
76
  return :verified unless provenance.sha256 && artifact_sha256
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rubygems/spec_fetcher"
4
+ require "uri"
5
+
6
+ module Gem
7
+ module Guardian
8
+ # Enumerates gems visible through RubyGems-compatible registry sources.
9
+ #
10
+ # This is intentionally a small library API rather than a supported CLI command.
11
+ # It is useful for research scripts that want to inspect the latest gem entries
12
+ # visible from the current `Gem.sources` configuration, including private
13
+ # RubyGems-compatible registries such as GitHub Packages, Gemfury, CodeArtifact,
14
+ # or self-hosted gem servers.
15
+ class Registry
16
+ # One latest gem entry discovered from a registry index.
17
+ Entry = Data.define(:name, :version, :platform, :source) do
18
+ # Converts this registry entry into a gem-guardian dependency.
19
+ def dependency
20
+ Dependency.new(name:, version:, platform:, source:)
21
+ end
22
+ end
23
+
24
+ # @param sources [Gem::SourceList, Array<String, Gem::Source>] registry sources to inspect
25
+ # @param spec_fetcher [Gem::SpecFetcher] RubyGems spec fetcher
26
+ def initialize(sources: Gem.sources, spec_fetcher: nil)
27
+ @sources = normalize_sources(sources)
28
+ @spec_fetcher = spec_fetcher || Gem::SpecFetcher.new(@sources)
29
+ end
30
+
31
+ # Yields latest gem entries visible from the configured sources.
32
+ def each_latest_spec(limit: nil)
33
+ return enum_for(:each_latest_spec, limit:) unless block_given?
34
+
35
+ count = 0
36
+ latest_spec_tuples.each do |spec, source|
37
+ break if limit && count >= limit
38
+
39
+ yield build_entry(spec, source)
40
+ count += 1
41
+ end
42
+ end
43
+
44
+ # Returns latest gem entries visible from the configured sources.
45
+ def latest_specs(limit: nil)
46
+ each_latest_spec(limit:).to_a
47
+ end
48
+
49
+ private
50
+
51
+ def latest_spec_tuples
52
+ @spec_fetcher.detect(:latest) { true }
53
+ end
54
+
55
+ def build_entry(spec, source)
56
+ Entry.new(
57
+ name: spec.name,
58
+ version: spec.version.to_s,
59
+ platform: platform_string(spec.platform),
60
+ source: sanitized_source_uri(source)
61
+ )
62
+ end
63
+
64
+ def platform_string(platform)
65
+ value = platform.to_s
66
+ value.empty? ? "ruby" : value
67
+ end
68
+
69
+ def normalize_sources(sources)
70
+ return sources if sources.respond_to?(:each_source)
71
+
72
+ Gem::SourceList.from(Array(sources))
73
+ end
74
+
75
+ def sanitized_source_uri(source)
76
+ uri = URI.parse(source.respond_to?(:uri) ? source.uri.to_s : source.to_s)
77
+ uri.user = nil
78
+ uri.password = nil
79
+ uri.to_s.delete_suffix("/")
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gem
4
+ module Guardian
5
+ # Audits provenance support across gems visible from configured registry sources.
6
+ #
7
+ # The audit intentionally verifies provenance metadata only. It does not download
8
+ # every artifact by default because a full checksum audit of a registry can be
9
+ # expensive and unfriendly to remote services. Project-level checksum verification
10
+ # remains the responsibility of `gem-guardian verify` and Bundler lockfiles.
11
+ class RegistryAudit
12
+ # One audited registry entry.
13
+ EntryResult = Data.define(:entry, :provenance) do
14
+ # Returns the dependency represented by this entry.
15
+ def dependency
16
+ entry.dependency
17
+ end
18
+ end
19
+
20
+ # Summary of a registry provenance audit.
21
+ Result = Data.define(:results) do
22
+ # Count by provenance status.
23
+ def counts
24
+ results.each_with_object(Hash.new(0)) do |result, memo|
25
+ memo[result.provenance.status] += 1
26
+ end
27
+ end
28
+
29
+ # Entries with verified provenance.
30
+ def verified
31
+ by_status(:verified)
32
+ end
33
+
34
+ # Entries without Trusted Publishing provenance support.
35
+ def unsupported
36
+ by_status(:unsupported)
37
+ end
38
+
39
+ # Entries that errored while checking provenance.
40
+ def errors
41
+ by_status(:error)
42
+ end
43
+
44
+ # Entries whose provenance checksum mismatched the artifact checksum.
45
+ def mismatches
46
+ by_status(:mismatch)
47
+ end
48
+
49
+ # Total audited entries.
50
+ def total
51
+ results.size
52
+ end
53
+
54
+ private
55
+
56
+ def by_status(status)
57
+ results.select { |result| result.provenance.status == status }
58
+ end
59
+ end
60
+
61
+ # @param registry [Registry] registry enumerator
62
+ # @param provenance_verifier [ProvenanceVerifier] provenance checker
63
+ def initialize(registry: Registry.new, provenance_verifier: ProvenanceVerifier.new)
64
+ @registry = registry
65
+ @provenance_verifier = provenance_verifier
66
+ end
67
+
68
+ # Runs the audit.
69
+ #
70
+ # @param limit [Integer, nil] maximum number of latest entries to inspect
71
+ # @return [Result] aggregate audit result containing per-gem provenance outcomes
72
+ def run(limit: nil)
73
+ Result.new(
74
+ @registry.each_latest_spec(limit:).map do |entry|
75
+ EntryResult.new(entry:, provenance: @provenance_verifier.verify(entry.dependency))
76
+ end
77
+ )
78
+ end
79
+ end
80
+ end
81
+ end
@@ -61,7 +61,6 @@ module Gem
61
61
  end
62
62
 
63
63
  # Returns the non-error provenance fields.
64
- # rubocop:disable Metrics/MethodLength
65
64
  def provenance_fields(result)
66
65
  {
67
66
  status: result.status,
@@ -76,10 +75,8 @@ module Gem
76
75
  attestation_url: result.attestation_url
77
76
  }
78
77
  end
79
- # rubocop:enable Metrics/MethodLength
80
78
 
81
79
  # Returns the GitHub release details for a provenance result.
82
- # rubocop:disable Metrics/MethodLength
83
80
  def github_release_hash(result)
84
81
  return nil unless result
85
82
 
@@ -96,13 +93,15 @@ module Gem
96
93
  error: error_hash(result.error)
97
94
  }
98
95
  end
99
- # rubocop:enable Metrics/MethodLength
100
96
 
101
97
  # Returns the checksum payload for a verification result.
102
98
  def checksum_hash(result)
103
99
  {
104
100
  status: result.status,
105
101
  expected_sha256: result.expected_sha256,
102
+ registry_sha256: result.respond_to?(:registry_sha256) ? result.registry_sha256 : nil,
103
+ registry_checksum_provider: result.respond_to?(:registry_checksum_provider) ? result.registry_checksum_provider : nil,
104
+ registry_checksum_uri: result.respond_to?(:registry_checksum_uri) ? result.registry_checksum_uri : nil,
106
105
  actual_sha256: result.actual_sha256,
107
106
  artifact_path: result.artifact_path,
108
107
  checksum_source: result.checksum_source,
@@ -29,16 +29,19 @@ module Gem
29
29
 
30
30
  # Prints a successful verification result.
31
31
  def print_ok_result(result, label, lockfile_mode)
32
- prefix = lockfile_mode && result.checksum_source == :rubygems ? "FALLBACK" : "PASS"
32
+ prefix = ok_result_prefix(result, lockfile_mode)
33
33
  @stdout.puts "#{prefix} #{label}"
34
34
  @stdout.puts " sha256 #{result.actual_sha256}"
35
- @stdout.puts " source #{result.checksum_source}" if lockfile_mode && result.checksum_source
35
+ @stdout.puts " source #{result.checksum_source}" if show_checksum_source?(result, lockfile_mode)
36
+ print_registry_cross_check(result)
37
+ print_registry_provider(result)
36
38
  end
37
39
 
38
40
  # Prints a checksum mismatch.
39
41
  def print_mismatch_result(result, label)
40
42
  @stdout.puts "FAIL #{label}"
41
43
  @stdout.puts " expected #{result.expected_sha256}"
44
+ @stdout.puts " registry #{result.registry_sha256}" if result.respond_to?(:registry_sha256) && result.registry_sha256
42
45
  @stdout.puts " actual #{result.actual_sha256}"
43
46
  end
44
47
 
@@ -110,12 +113,14 @@ module Gem
110
113
  Usage:
111
114
  gem-guardian verify [--lockfile Gemfile.lock] [--json] [--provenance]
112
115
  gem-guardian verify GEM:VERSION[:PLATFORM] [GEM:VERSION[:PLATFORM] ...]
116
+ gem-guardian verify --lockfile Gemfile.lock [--provenance] GEM:VERSION[:PLATFORM] [...]
113
117
  gem-guardian version
114
118
  gem-guardian help
115
119
 
116
120
  Examples:
117
121
  gem-guardian verify
118
- gem-guardian verify sidekiq:8.1.6
122
+ gem-guardian verify rails:8.1.3
123
+ gem-guardian verify --lockfile Gemfile.lock --provenance mammoth:0.1.1
119
124
  gem-guardian verify cdc-sidekiq:0.1.1
120
125
  gem-guardian verify nokogiri:1.18.9:x86_64-linux
121
126
  gem-guardian verify --json --provenance ratomic:0.4.1
@@ -123,6 +128,33 @@ module Gem
123
128
 
124
129
  private
125
130
 
131
+ def ok_result_prefix(result, lockfile_mode)
132
+ return "RECORDED" if result.checksum_source == :artifact
133
+ return "FALLBACK" if lockfile_mode && result.checksum_source == :registry
134
+
135
+ "PASS"
136
+ end
137
+
138
+ def show_checksum_source?(result, _lockfile_mode)
139
+ result.checksum_source
140
+ end
141
+
142
+ def print_registry_cross_check(result)
143
+ return unless result.respond_to?(:registry_sha256)
144
+ return unless result.registry_sha256
145
+ return if result.checksum_source == :registry
146
+
147
+ @stdout.puts " registry #{result.registry_sha256}"
148
+ end
149
+
150
+ def print_registry_provider(result)
151
+ return unless result.respond_to?(:registry_checksum_provider)
152
+ return unless result.registry_checksum_provider
153
+
154
+ @stdout.puts " provider #{result.registry_checksum_provider}"
155
+ @stdout.puts " verify #{result.registry_checksum_uri}" if result.registry_checksum_uri
156
+ end
157
+
126
158
  def result_label(result)
127
159
  dependency = result.dependency
128
160
  "#{dependency.name} #{dependency.version} #{dependency.platform}"
@@ -142,7 +174,6 @@ module Gem
142
174
  end
143
175
 
144
176
  # Returns the GitHub release fields to render for a provenance result.
145
- # rubocop:disable Metrics/MethodLength
146
177
  def github_release_fields(result)
147
178
  [
148
179
  ["github release", result.status],
@@ -156,7 +187,6 @@ module Gem
156
187
  ["release url", result.release_url]
157
188
  ]
158
189
  end
159
- # rubocop:enable Metrics/MethodLength
160
190
 
161
191
  # Prints a GitHub release provenance result.
162
192
  def print_github_release_result(result)