gitlab-secret_detection 0.19.0 → 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24a3afdfa8519bd53576f9fe18ffffc12d9ed32d80e27610d03300666423e8a7
4
- data.tar.gz: f7b000df1c5c6e712e528f388a44506f3cfce79bd6044de44ad106087f59d52f
3
+ metadata.gz: 92079cc4159944de4812acba9f6c0f7ec7f36a0a1c4f0770849fc2e35740d7da
4
+ data.tar.gz: 5cda8188ccc3d46d47b074a3126543db9215fd5d99def8082bae2f8f720d5454
5
5
  SHA512:
6
- metadata.gz: 4ca94bc1d02d099c7f7404b321abbc1bf7be811112e564dc44c1256b42e5aeea6d4ab207280d0ccb88ef2c62f7f17eb3fbb47f6eadc717ef5cd33f4e66cf5e26
7
- data.tar.gz: f8815666aa5ff2f129c40eb42814449d9201755a91ee2b4f3bb33b48bbe3364a9e3d78b36ec8adf2fdd94e51a03d448d9d36fbf12326f4312a14cb31843bdb96
6
+ metadata.gz: d0a74f47adcfbf6e8ec0ab72366d5151b2860788e4adba51944e7ddc03b73eef119b9b8f10890454273845ca2c94c8d74be8a5519adc6ff97a3ca485e47464b7
7
+ data.tar.gz: 399a729fa667c6174e185c10eef898e51f6f1827c0584242560033d0233dc19ce307d9a75c97eb18acf18e19f647d20d31efe1e11fb984ace4640d6dd7ddd85b
data/README.md CHANGED
@@ -62,20 +62,21 @@ the approach:
62
62
 
63
63
  Usage `make <command>`
64
64
 
65
- | Command | Description |
66
- |---------------------|---------------------------------------------------------------------------------------------------------------------------------|
67
- | `install_secret_detection_rules` | Downloads secret-detection-rules based on package version defined in RULES_VERSION |
68
- | `install` | Installs ruby gems in the project using Ruby bundler |
69
- | `lint_fix` | Fixes all the fixable Rubocop lint offenses |
70
- | `gem_clean` | Cleans existing gem file(if any) generated through gem build process |
71
- | `gem_build` | Builds Ruby gem file wrapping secret detection logic (lib directory) |
72
- | `generate_proto` | Generates ruby(.rb) files for the Protobud Service Definition files(.proto) |
73
- | `grpc_docker_build` | Builds a docker container image for gRPC server |
74
- | `grpc_docker_serve` | Runs gRPC server via docker container listening on port 8080. Run `grpc_docker_build` make command before running this command. |
75
- | `grpc_serve` | Runs gRPC server on the CLI listening on port 50001. Run `install` make command before running this command. |
76
- | `run_core_tests` | Runs RSpec tests for Secret Detection core logic |
77
- | `run_grpc_tests` | Runs RSpec tests for Secret Detection gRPC endpoints |
78
- | `run_all_tests` | Runs all the RSpec tests in the project |
65
+ | Command | Description |
66
+ |----------------------------------|---------------------------------------------------------------------------------------------------------------------------------|
67
+ | `install_secret_detection_rules` | Downloads secret-detection-rules based on package version defined in RULES_VERSION |
68
+ | `install` | Installs ruby gems in the project using Ruby bundler |
69
+ | `lint_fix` | Fixes all the fixable Rubocop lint offenses |
70
+ | `gem_clean` | Cleans existing gem file(if any) generated through gem build process |
71
+ | `gem_build` | Builds Ruby gem file wrapping secret detection logic (lib directory) |
72
+ | `generate_proto` | Generates ruby(.rb) files for the Protobud Service Definition files(.proto) |
73
+ | `grpc_docker_build` | Builds a docker container image for gRPC server |
74
+ | `grpc_docker_serve` | Runs gRPC server via docker container listening on port 8080. Run `grpc_docker_build` make command before running this command. |
75
+ | `grpc_serve` | Runs gRPC server on the CLI listening on port 50001. Run `install` make command before running this command. |
76
+ | `run_core_tests` | Runs RSpec tests for Secret Detection core logic |
77
+ | `run_grpc_tests` | Runs RSpec tests for Secret Detection gRPC endpoints |
78
+ | `run_utils_tests` | Runs RSpec tests for Secret Detection utilities |
79
+ | `run_all_tests` | Runs all the RSpec tests in the project |
79
80
 
80
81
 
81
82
  ## Secret Detection Rules
@@ -27,13 +27,32 @@ module Gitlab
27
27
 
28
28
  # parses given ruleset file and returns the parsed rules
29
29
  def parse_ruleset
30
- # rule_file_content = File.read(path)
30
+ logger.info(
31
+ message: "Parsing local ruleset file",
32
+ ruleset_path: RULESET_FILE_PATH
33
+ )
31
34
  rules_data = TomlRB.load_file(path, symbolize_keys: true).freeze
35
+ ruleset_version = extract_ruleset_version
36
+
37
+ logger.info(
38
+ message: "Ruleset details fetched for running Secret Detection scan",
39
+ total_rules: rules_data[:rules]&.length,
40
+ ruleset_version:
41
+ )
32
42
  rules_data[:rules].freeze
33
43
  rescue StandardError => e
34
- logger.error "Failed to parse secret detection ruleset from '#{path}' path: #{e}"
44
+ logger.error(message: "Failed to parse local secret detection ruleset: #{e.message}")
35
45
  raise Core::Scanner::RulesetParseError, e
36
46
  end
47
+
48
+ def extract_ruleset_version
49
+ @ruleset_version ||= if File.readable?(RULESET_FILE_PATH)
50
+ first_line = File.open(RULESET_FILE_PATH, &:gets)
51
+ first_line&.split(":")&.[](1)&.strip
52
+ end
53
+ rescue StandardError => e
54
+ logger.error(message: "Failed to extract Secret Detection Ruleset version from ruleset file: #{e.message}")
55
+ end
37
56
  end
38
57
  end
39
58
  end
@@ -111,12 +111,29 @@ module Gitlab
111
111
  payload_timeout:,
112
112
  pattern_matcher: build_pattern_matcher(tags:),
113
113
  exclusions:
114
- }
114
+ }.freeze
115
+
116
+ logger.info(
117
+ message: "Scan input parameters for running Secret Detection scan",
118
+ timeout:,
119
+ payload_timeout:,
120
+ given_total_payloads: payloads.length,
121
+ scannable_payloads_post_keyword_filter: matched_payloads.length,
122
+ tags:,
123
+ run_in_subprocess: subprocess,
124
+ given_exclusions: format_exclusions_hash(exclusions)
125
+ )
115
126
 
116
127
  secrets, applied_exclusions = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
117
128
 
118
129
  scan_status = overall_scan_status(secrets)
119
130
 
131
+ logger.info(
132
+ message: "Secret Detection scan completed with #{secrets.length} secrets detected in the given payloads",
133
+ detected_secrets_metadata: format_detected_secrets_metadata(secrets),
134
+ applied_exclusions: format_exclusions_arr(applied_exclusions)
135
+ )
136
+
120
137
  Core::Response.new(status: scan_status, results: secrets, applied_exclusions:)
121
138
  end
122
139
  rescue Timeout::Error => e
@@ -135,7 +152,18 @@ module Gitlab
135
152
  # are same as +DEFAULT_PATTERN_MATCHER_TAGS+ then returns the eagerly loaded default
136
153
  # pattern matcher created during initialization.
137
154
  def build_pattern_matcher(tags:, include_missing_tags: false)
138
- return default_pattern_matcher if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_pattern_matcher.nil?
155
+ if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_pattern_matcher.nil?
156
+ logger.info(
157
+ message: "Given tags input matches default matcher tags, using pre-defined RE2 Pattern Matcher"
158
+ )
159
+ return default_pattern_matcher
160
+ end
161
+
162
+ logger.info(
163
+ message: "Creating a new RE2 Pattern Matcher with given tags",
164
+ tags:,
165
+ include_missing_tags:
166
+ )
139
167
 
140
168
  matcher = RE2::Set.new
141
169
 
@@ -154,7 +182,7 @@ module Gitlab
154
182
  end
155
183
 
156
184
  unless matcher.compile
157
- logger.error "Failed to compile secret detection rulesets in RE::Set"
185
+ logger.error "Failed to compile secret detection ruleset in RE::Set"
158
186
 
159
187
  raise RulesetCompilationError
160
188
  end
@@ -174,7 +202,18 @@ module Gitlab
174
202
  end
175
203
 
176
204
  def build_keyword_matcher(tags:, include_missing_tags: false)
177
- return default_keyword_matcher if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_keyword_matcher.nil?
205
+ if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_keyword_matcher.nil?
206
+ logger.info(
207
+ message: "Given tags input matches default tags, using pre-defined RE2 Keyword Matcher"
208
+ )
209
+ return default_keyword_matcher
210
+ end
211
+
212
+ logger.info(
213
+ message: "Creating a new RE2 Keyword Matcher..",
214
+ tags:,
215
+ include_missing_tags:
216
+ )
178
217
 
179
218
  include_keywords = Set.new
180
219
 
@@ -187,15 +226,28 @@ module Gitlab
187
226
  include_keywords.merge(rule[:keywords]) unless rule[:keywords].nil?
188
227
  end
189
228
 
190
- return nil if include_keywords.empty?
229
+ if include_keywords.empty?
230
+ logger.error(
231
+ message: "No rule keywords found a match with given rule tags, returning empty RE2 Keyword Matcher"
232
+ )
233
+ return nil
234
+ end
191
235
 
192
236
  keywords_regex = include_keywords.join('|')
193
237
 
238
+ logger.debug(
239
+ message: "Creating RE2 Keyword Matcher with set of rule keywords",
240
+ keywords: include_keywords.to_a
241
+ )
242
+
194
243
  RE2("\\b(#{keywords_regex})")
195
244
  end
196
245
 
197
246
  def filter_by_keywords(keyword_matcher, payloads)
198
- return [] if keyword_matcher.nil?
247
+ if keyword_matcher.nil?
248
+ logger.warn "No RE2 Keyword Matcher instance available, skipping payload filter by rule keywords step.."
249
+ return payloads
250
+ end
199
251
 
200
252
  matched_payloads = []
201
253
  payloads.each do |payload|
@@ -204,6 +256,20 @@ module Gitlab
204
256
  matched_payloads << payload
205
257
  end
206
258
 
259
+ total_payloads_retained = matched_payloads.length == payloads.length ? 'all' : matched_payloads.length
260
+ log_message = if matched_payloads.empty?
261
+ "No payloads available to scan further after keyword-matching, exiting Secret Detection scan"
262
+ else
263
+ "Retained #{total_payloads_retained} payloads to scan further after keyword-matching step"
264
+ end
265
+
266
+ logger.info(
267
+ message: log_message,
268
+ given_total_payloads: payloads.length,
269
+ matched_payloads: matched_payloads.length,
270
+ payloads_to_scan_further: matched_payloads.map(&:id)
271
+ )
272
+
207
273
  matched_payloads
208
274
  end
209
275
 
@@ -218,6 +284,11 @@ module Gitlab
218
284
  )
219
285
  all_applied_exclusions = Set.new
220
286
 
287
+ logger.info(
288
+ message: "Running Secret Detection scan sequentially",
289
+ payload_timeout:
290
+ )
291
+
221
292
  all_findings = payloads.flat_map do |payload|
222
293
  Timeout.timeout(payload_timeout) do
223
294
  findings, applied_exclusions = find_secrets_in_payload(
@@ -249,6 +320,12 @@ module Gitlab
249
320
 
250
321
  grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
251
322
 
323
+ logger.info(
324
+ message: "Running Secret Detection scan within a subprocess",
325
+ grouped_payloads: grouped_payloads.length,
326
+ payload_timeout:
327
+ )
328
+
252
329
  found_secrets = Parallel.flat_map(
253
330
  grouped_payloads,
254
331
  in_processes: MAX_PROCS_PER_REQUEST,
@@ -291,8 +368,10 @@ module Gitlab
291
368
  .each_with_index do |line, index|
292
369
  unless raw_value_exclusions.empty?
293
370
  raw_value_exclusions.each do |exclusion|
294
- line.gsub!(exclusion.value, '') # replace input that doesn't contain allowed value in it
295
- applied_exclusions << exclusion
371
+ # replace input that doesn't contain allowed value in it
372
+ # replace exclusion value, `.gsub!` returns 'self' if replaced otherwise 'nil'
373
+ excl_replaced = !!line.gsub!(exclusion.value, '')
374
+ applied_exclusions << exclusion if excl_replaced
296
375
  end
297
376
  end
298
377
 
@@ -323,6 +402,13 @@ module Gitlab
323
402
  end
324
403
  end
325
404
 
405
+ logger.info(
406
+ message: "Secret Detection scan found #{findings.length} secret leaks in the payload(id:#{payload.id})",
407
+ payload_id: payload.id,
408
+ detected_rules: findings.map { |f| "#{f.type}:#{f.line_number}" },
409
+ applied_exclusions: format_exclusions_arr(applied_exclusions)
410
+ )
411
+
326
412
  [findings, applied_exclusions]
327
413
  rescue StandardError => e
328
414
  logger.error "Secret Detection scan failed on the payload(id:#{payload.id}): #{e}"
@@ -338,10 +424,20 @@ module Gitlab
338
424
  # Validates the given payloads by verifying the type and
339
425
  # presence of `id` and `data` fields necessary for the scan
340
426
  def validate_scan_input(payloads)
341
- return false if payloads.nil? || !payloads.instance_of?(Array)
427
+ if payloads.nil? || !payloads.instance_of?(Array)
428
+ logger.debug(message: "Scan input validation error: payloads arg is empty or not instance of array")
429
+ return false
430
+ end
342
431
 
343
432
  payloads.all? do |payload|
344
- payload.respond_to?(:id) && payload.respond_to?(:data)
433
+ has_valid_fields = payload.respond_to?(:id) && payload.respond_to?(:data)
434
+ unless has_valid_fields
435
+ logger.debug(
436
+ message: "Scan input validation error: one of the payloads does not respond to `id` or `data`"
437
+ )
438
+ end
439
+
440
+ has_valid_fields
345
441
  end
346
442
  end
347
443
 
@@ -390,6 +486,75 @@ module Gitlab
390
486
 
391
487
  chunk_indexes
392
488
  end
489
+
490
+ # Returns array of strings with each representing a masked exclusion
491
+ #
492
+ # Example: For given arg exclusions = {
493
+ # rule: ["gitlab_personal_access_token", "aws_key"],
494
+ # path: ["test.py"],
495
+ # raw_value: ["ABC123XYZ"]
496
+ # }
497
+ #
498
+ # The output will look like the following:
499
+ # [
500
+ # "rule=gitlab_personal_access_token,aws_key",
501
+ # "raw_value=AB*****YZ",
502
+ # "paths=test.py"
503
+ # ]
504
+ def format_exclusions_hash(exclusions = {})
505
+ masked_raw_values = exclusions.fetch(:raw_value, []).map do |exclusion|
506
+ Gitlab::SecretDetection::Utils::Masker.mask_secret(exclusion.value)
507
+ end.join(", ")
508
+ paths = exclusions.fetch(:path, []).map(&:value).join(", ")
509
+ rules = exclusions.fetch(:rule, []).map(&:value).join(", ")
510
+
511
+ out = []
512
+
513
+ out << "rules=#{rules}" unless rules.empty?
514
+ out << "raw_values=#{masked_raw_values}" unless masked_raw_values.empty?
515
+ out << "paths=#{paths}" unless paths.empty?
516
+
517
+ out
518
+ end
519
+
520
+ def format_exclusions_arr(exclusions = [])
521
+ return [] if exclusions.empty?
522
+
523
+ masked_raw_values = Set.new
524
+ paths = Set.new
525
+ rules = Set.new
526
+
527
+ exclusions.each do |exclusion|
528
+ case exclusion.exclusion_type
529
+ when :EXCLUSION_TYPE_RAW_VALUE
530
+ masked_raw_values << Gitlab::SecretDetection::Utils::Masker.mask_secret(exclusion.value)
531
+ when :EXCLUSION_TYPE_RULE
532
+ rules << exclusion.value
533
+ when :EXCLUSION_TYPE_PATH
534
+ paths << exclusion.value
535
+ else
536
+ logger.warn("Unknown exclusion type #{exclusion.exclusion_type}")
537
+ end
538
+ end
539
+
540
+ out = []
541
+
542
+ out << "rules=#{rules.join(',')}" unless rules.empty?
543
+ out << "raw_values=#{masked_raw_values.join(',')}" unless masked_raw_values.empty?
544
+ out << "paths=#{paths.join(',')}" unless paths.empty?
545
+
546
+ out
547
+ end
548
+
549
+ def format_detected_secrets_metadata(findings = [])
550
+ return [] if findings.empty?
551
+
552
+ found_secrets = findings.filter do |f|
553
+ f.status == Core::Status::FOUND
554
+ end
555
+
556
+ found_secrets.map { |f| "#{f.payload_id}=>#{f.type}:#{f.line_number}" }
557
+ end
393
558
  end
394
559
  end
395
560
  end
@@ -45,19 +45,34 @@ module Gitlab
45
45
  }.freeze
46
46
 
47
47
  # Implementation for /Scan RPC method
48
- def scan(request, _call)
49
- scan_request_action(request)
48
+ def scan(request, call)
49
+ scan_request_action(request, call)
50
50
  end
51
51
 
52
52
  # Implementation for /ScanStream RPC method
53
- def scan_stream(requests, _call)
54
- request_action = ->(r) { scan_request_action(r) }
53
+ def scan_stream(requests, call)
54
+ request_action = ->(r) { scan_request_action(r, call) }
55
55
  StreamEnumerator.new(requests, request_action).each_item
56
56
  end
57
57
 
58
58
  private
59
59
 
60
- def scan_request_action(request)
60
+ def scan_request_action(request, call)
61
+ if request.nil?
62
+ logger.error(
63
+ message: "FATAL: Secret Detection gRPC scan request is `nil`",
64
+ deadline: call.deadline,
65
+ cancelled: call.cancelled?
66
+ )
67
+ return Gitlab::SecretDetection::GRPC::ScanResponse.new(
68
+ results: [],
69
+ status: Gitlab::SecretDetection::GRPC::ScanResponse::Status::STATUS_INPUT_ERROR,
70
+ applied_exclusions: []
71
+ )
72
+ end
73
+
74
+ logger.info(message: "Secret Detection gRPC scan request received")
75
+
61
76
  validate_request(request)
62
77
 
63
78
  payloads = request.payloads.to_a
@@ -66,7 +81,7 @@ module Gitlab
66
81
  request.exclusions.each do |exclusion|
67
82
  case exclusion.exclusion_type
68
83
  when :EXCLUSION_TYPE_RAW_VALUE
69
- exclusions[:raw] << exclusion
84
+ exclusions[:raw_value] << exclusion
70
85
  when :EXCLUSION_TYPE_RULE
71
86
  exclusions[:rule] << exclusion
72
87
  when :EXCLUSION_TYPE_PATH
@@ -85,7 +100,8 @@ module Gitlab
85
100
  payload_timeout: request.payload_timeout_secs
86
101
  )
87
102
  rescue StandardError => e
88
- logger.error("Failed to run the scan: #{e}")
103
+ logger.error(message: "Failed to run the secret detection scan", exception: e)
104
+ logger.error(e.backtrace&.join("\n"))
89
105
  raise ::GRPC::Unknown, e.message
90
106
  end
91
107
 
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gitlab
4
+ module SecretDetection
5
+ module Utils
6
+ class Masker
7
+ DEFAULT_VISIBLE_CHAR_COUNT = 3
8
+ DEFAULT_MASK_CHAR_COUNT = 5
9
+ DEFAULT_MASK_CHAR = '*'
10
+
11
+ class << self
12
+ def mask_secret(
13
+ raw_secret_value,
14
+ mask_char: DEFAULT_MASK_CHAR,
15
+ visible_chars_count: DEFAULT_VISIBLE_CHAR_COUNT,
16
+ mask_chars_count: DEFAULT_MASK_CHAR_COUNT
17
+ )
18
+ return '' if raw_secret_value.nil? || raw_secret_value.empty?
19
+ return raw_secret_value if raw_secret_value.length <= visible_chars_count # Too short to mask
20
+
21
+ chars = raw_secret_value.chars
22
+ position = 0
23
+
24
+ while position < chars.length
25
+ # Show 'visible_chars_count' characters
26
+ position += visible_chars_count
27
+
28
+ # Mask next 'mask_chars' characters if available
29
+ mask_chars_count.times do
30
+ break if position >= chars.length
31
+
32
+ chars[position] = mask_char
33
+ position += 1
34
+ end
35
+ end
36
+
37
+ chars.join
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require_relative 'utils/certificate'
4
4
  require_relative 'utils/memoize'
5
+ require_relative 'utils/masker'
5
6
 
6
7
  module Gitlab
7
8
  module SecretDetection
@@ -8,7 +8,7 @@ module Gitlab
8
8
  # https://gitlab.com/gitlab-org/gitlab/-/issues/514015
9
9
  #
10
10
  # Ensure to maintain the same version in CHANGELOG file.
11
- VERSION = "0.19.0"
11
+ VERSION = "0.19.1"
12
12
 
13
13
  # SD_ENV env var is used to determine which environment the
14
14
  # server is running. This var is defined in `.runway/env-<env>.yml` files.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-secret_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.19.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - group::secret detection
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2025-02-13 00:00:00.000000000 Z
13
+ date: 2025-02-19 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: grpc
@@ -127,6 +127,7 @@ files:
127
127
  - lib/gitlab/secret_detection/grpc/scanner_service.rb
128
128
  - lib/gitlab/secret_detection/utils.rb
129
129
  - lib/gitlab/secret_detection/utils/certificate.rb
130
+ - lib/gitlab/secret_detection/utils/masker.rb
130
131
  - lib/gitlab/secret_detection/utils/memoize.rb
131
132
  - lib/gitlab/secret_detection/version.rb
132
133
  - proto/secret_detection.proto