gitlab-secret_detection 0.19.0 → 0.19.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24a3afdfa8519bd53576f9fe18ffffc12d9ed32d80e27610d03300666423e8a7
4
- data.tar.gz: f7b000df1c5c6e712e528f388a44506f3cfce79bd6044de44ad106087f59d52f
3
+ metadata.gz: 92079cc4159944de4812acba9f6c0f7ec7f36a0a1c4f0770849fc2e35740d7da
4
+ data.tar.gz: 5cda8188ccc3d46d47b074a3126543db9215fd5d99def8082bae2f8f720d5454
5
5
  SHA512:
6
- metadata.gz: 4ca94bc1d02d099c7f7404b321abbc1bf7be811112e564dc44c1256b42e5aeea6d4ab207280d0ccb88ef2c62f7f17eb3fbb47f6eadc717ef5cd33f4e66cf5e26
7
- data.tar.gz: f8815666aa5ff2f129c40eb42814449d9201755a91ee2b4f3bb33b48bbe3364a9e3d78b36ec8adf2fdd94e51a03d448d9d36fbf12326f4312a14cb31843bdb96
6
+ metadata.gz: d0a74f47adcfbf6e8ec0ab72366d5151b2860788e4adba51944e7ddc03b73eef119b9b8f10890454273845ca2c94c8d74be8a5519adc6ff97a3ca485e47464b7
7
+ data.tar.gz: 399a729fa667c6174e185c10eef898e51f6f1827c0584242560033d0233dc19ce307d9a75c97eb18acf18e19f647d20d31efe1e11fb984ace4640d6dd7ddd85b
data/README.md CHANGED
@@ -62,20 +62,21 @@ the approach:
62
62
 
63
63
  Usage `make <command>`
64
64
 
65
- | Command | Description |
66
- |---------------------|---------------------------------------------------------------------------------------------------------------------------------|
67
- | `install_secret_detection_rules` | Downloads secret-detection-rules based on package version defined in RULES_VERSION |
68
- | `install` | Installs ruby gems in the project using Ruby bundler |
69
- | `lint_fix` | Fixes all the fixable Rubocop lint offenses |
70
- | `gem_clean` | Cleans existing gem file(if any) generated through gem build process |
71
- | `gem_build` | Builds Ruby gem file wrapping secret detection logic (lib directory) |
72
- | `generate_proto` | Generates ruby(.rb) files for the Protobud Service Definition files(.proto) |
73
- | `grpc_docker_build` | Builds a docker container image for gRPC server |
74
- | `grpc_docker_serve` | Runs gRPC server via docker container listening on port 8080. Run `grpc_docker_build` make command before running this command. |
75
- | `grpc_serve` | Runs gRPC server on the CLI listening on port 50001. Run `install` make command before running this command. |
76
- | `run_core_tests` | Runs RSpec tests for Secret Detection core logic |
77
- | `run_grpc_tests` | Runs RSpec tests for Secret Detection gRPC endpoints |
78
- | `run_all_tests` | Runs all the RSpec tests in the project |
65
+ | Command | Description |
66
+ |----------------------------------|---------------------------------------------------------------------------------------------------------------------------------|
67
+ | `install_secret_detection_rules` | Downloads secret-detection-rules based on package version defined in RULES_VERSION |
68
+ | `install` | Installs ruby gems in the project using Ruby bundler |
69
+ | `lint_fix` | Fixes all the fixable Rubocop lint offenses |
70
+ | `gem_clean` | Cleans existing gem file(if any) generated through gem build process |
71
+ | `gem_build` | Builds Ruby gem file wrapping secret detection logic (lib directory) |
72
+ | `generate_proto` | Generates ruby(.rb) files for the Protobud Service Definition files(.proto) |
73
+ | `grpc_docker_build` | Builds a docker container image for gRPC server |
74
+ | `grpc_docker_serve` | Runs gRPC server via docker container listening on port 8080. Run `grpc_docker_build` make command before running this command. |
75
+ | `grpc_serve` | Runs gRPC server on the CLI listening on port 50001. Run `install` make command before running this command. |
76
+ | `run_core_tests` | Runs RSpec tests for Secret Detection core logic |
77
+ | `run_grpc_tests` | Runs RSpec tests for Secret Detection gRPC endpoints |
78
+ | `run_utils_tests` | Runs RSpec tests for Secret Detection utilities |
79
+ | `run_all_tests` | Runs all the RSpec tests in the project |
79
80
 
80
81
 
81
82
  ## Secret Detection Rules
@@ -27,13 +27,32 @@ module Gitlab
27
27
 
28
28
  # parses given ruleset file and returns the parsed rules
29
29
  def parse_ruleset
30
- # rule_file_content = File.read(path)
30
+ logger.info(
31
+ message: "Parsing local ruleset file",
32
+ ruleset_path: RULESET_FILE_PATH
33
+ )
31
34
  rules_data = TomlRB.load_file(path, symbolize_keys: true).freeze
35
+ ruleset_version = extract_ruleset_version
36
+
37
+ logger.info(
38
+ message: "Ruleset details fetched for running Secret Detection scan",
39
+ total_rules: rules_data[:rules]&.length,
40
+ ruleset_version:
41
+ )
32
42
  rules_data[:rules].freeze
33
43
  rescue StandardError => e
34
- logger.error "Failed to parse secret detection ruleset from '#{path}' path: #{e}"
44
+ logger.error(message: "Failed to parse local secret detection ruleset: #{e.message}")
35
45
  raise Core::Scanner::RulesetParseError, e
36
46
  end
47
+
48
+ def extract_ruleset_version
49
+ @ruleset_version ||= if File.readable?(RULESET_FILE_PATH)
50
+ first_line = File.open(RULESET_FILE_PATH, &:gets)
51
+ first_line&.split(":")&.[](1)&.strip
52
+ end
53
+ rescue StandardError => e
54
+ logger.error(message: "Failed to extract Secret Detection Ruleset version from ruleset file: #{e.message}")
55
+ end
37
56
  end
38
57
  end
39
58
  end
@@ -111,12 +111,29 @@ module Gitlab
111
111
  payload_timeout:,
112
112
  pattern_matcher: build_pattern_matcher(tags:),
113
113
  exclusions:
114
- }
114
+ }.freeze
115
+
116
+ logger.info(
117
+ message: "Scan input parameters for running Secret Detection scan",
118
+ timeout:,
119
+ payload_timeout:,
120
+ given_total_payloads: payloads.length,
121
+ scannable_payloads_post_keyword_filter: matched_payloads.length,
122
+ tags:,
123
+ run_in_subprocess: subprocess,
124
+ given_exclusions: format_exclusions_hash(exclusions)
125
+ )
115
126
 
116
127
  secrets, applied_exclusions = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
117
128
 
118
129
  scan_status = overall_scan_status(secrets)
119
130
 
131
+ logger.info(
132
+ message: "Secret Detection scan completed with #{secrets.length} secrets detected in the given payloads",
133
+ detected_secrets_metadata: format_detected_secrets_metadata(secrets),
134
+ applied_exclusions: format_exclusions_arr(applied_exclusions)
135
+ )
136
+
120
137
  Core::Response.new(status: scan_status, results: secrets, applied_exclusions:)
121
138
  end
122
139
  rescue Timeout::Error => e
@@ -135,7 +152,18 @@ module Gitlab
135
152
  # are same as +DEFAULT_PATTERN_MATCHER_TAGS+ then returns the eagerly loaded default
136
153
  # pattern matcher created during initialization.
137
154
  def build_pattern_matcher(tags:, include_missing_tags: false)
138
- return default_pattern_matcher if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_pattern_matcher.nil?
155
+ if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_pattern_matcher.nil?
156
+ logger.info(
157
+ message: "Given tags input matches default matcher tags, using pre-defined RE2 Pattern Matcher"
158
+ )
159
+ return default_pattern_matcher
160
+ end
161
+
162
+ logger.info(
163
+ message: "Creating a new RE2 Pattern Matcher with given tags",
164
+ tags:,
165
+ include_missing_tags:
166
+ )
139
167
 
140
168
  matcher = RE2::Set.new
141
169
 
@@ -154,7 +182,7 @@ module Gitlab
154
182
  end
155
183
 
156
184
  unless matcher.compile
157
- logger.error "Failed to compile secret detection rulesets in RE::Set"
185
+ logger.error "Failed to compile secret detection ruleset in RE::Set"
158
186
 
159
187
  raise RulesetCompilationError
160
188
  end
@@ -174,7 +202,18 @@ module Gitlab
174
202
  end
175
203
 
176
204
  def build_keyword_matcher(tags:, include_missing_tags: false)
177
- return default_keyword_matcher if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_keyword_matcher.nil?
205
+ if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_keyword_matcher.nil?
206
+ logger.info(
207
+ message: "Given tags input matches default tags, using pre-defined RE2 Keyword Matcher"
208
+ )
209
+ return default_keyword_matcher
210
+ end
211
+
212
+ logger.info(
213
+ message: "Creating a new RE2 Keyword Matcher..",
214
+ tags:,
215
+ include_missing_tags:
216
+ )
178
217
 
179
218
  include_keywords = Set.new
180
219
 
@@ -187,15 +226,28 @@ module Gitlab
187
226
  include_keywords.merge(rule[:keywords]) unless rule[:keywords].nil?
188
227
  end
189
228
 
190
- return nil if include_keywords.empty?
229
+ if include_keywords.empty?
230
+ logger.error(
231
+ message: "No rule keywords found a match with given rule tags, returning empty RE2 Keyword Matcher"
232
+ )
233
+ return nil
234
+ end
191
235
 
192
236
  keywords_regex = include_keywords.join('|')
193
237
 
238
+ logger.debug(
239
+ message: "Creating RE2 Keyword Matcher with set of rule keywords",
240
+ keywords: include_keywords.to_a
241
+ )
242
+
194
243
  RE2("\\b(#{keywords_regex})")
195
244
  end
196
245
 
197
246
  def filter_by_keywords(keyword_matcher, payloads)
198
- return [] if keyword_matcher.nil?
247
+ if keyword_matcher.nil?
248
+ logger.warn "No RE2 Keyword Matcher instance available, skipping payload filter by rule keywords step.."
249
+ return payloads
250
+ end
199
251
 
200
252
  matched_payloads = []
201
253
  payloads.each do |payload|
@@ -204,6 +256,20 @@ module Gitlab
204
256
  matched_payloads << payload
205
257
  end
206
258
 
259
+ total_payloads_retained = matched_payloads.length == payloads.length ? 'all' : matched_payloads.length
260
+ log_message = if matched_payloads.empty?
261
+ "No payloads available to scan further after keyword-matching, exiting Secret Detection scan"
262
+ else
263
+ "Retained #{total_payloads_retained} payloads to scan further after keyword-matching step"
264
+ end
265
+
266
+ logger.info(
267
+ message: log_message,
268
+ given_total_payloads: payloads.length,
269
+ matched_payloads: matched_payloads.length,
270
+ payloads_to_scan_further: matched_payloads.map(&:id)
271
+ )
272
+
207
273
  matched_payloads
208
274
  end
209
275
 
@@ -218,6 +284,11 @@ module Gitlab
218
284
  )
219
285
  all_applied_exclusions = Set.new
220
286
 
287
+ logger.info(
288
+ message: "Running Secret Detection scan sequentially",
289
+ payload_timeout:
290
+ )
291
+
221
292
  all_findings = payloads.flat_map do |payload|
222
293
  Timeout.timeout(payload_timeout) do
223
294
  findings, applied_exclusions = find_secrets_in_payload(
@@ -249,6 +320,12 @@ module Gitlab
249
320
 
250
321
  grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
251
322
 
323
+ logger.info(
324
+ message: "Running Secret Detection scan within a subprocess",
325
+ grouped_payloads: grouped_payloads.length,
326
+ payload_timeout:
327
+ )
328
+
252
329
  found_secrets = Parallel.flat_map(
253
330
  grouped_payloads,
254
331
  in_processes: MAX_PROCS_PER_REQUEST,
@@ -291,8 +368,10 @@ module Gitlab
291
368
  .each_with_index do |line, index|
292
369
  unless raw_value_exclusions.empty?
293
370
  raw_value_exclusions.each do |exclusion|
294
- line.gsub!(exclusion.value, '') # replace input that doesn't contain allowed value in it
295
- applied_exclusions << exclusion
371
+ # replace input that doesn't contain allowed value in it
372
+ # replace exclusion value, `.gsub!` returns 'self' if replaced otherwise 'nil'
373
+ excl_replaced = !!line.gsub!(exclusion.value, '')
374
+ applied_exclusions << exclusion if excl_replaced
296
375
  end
297
376
  end
298
377
 
@@ -323,6 +402,13 @@ module Gitlab
323
402
  end
324
403
  end
325
404
 
405
+ logger.info(
406
+ message: "Secret Detection scan found #{findings.length} secret leaks in the payload(id:#{payload.id})",
407
+ payload_id: payload.id,
408
+ detected_rules: findings.map { |f| "#{f.type}:#{f.line_number}" },
409
+ applied_exclusions: format_exclusions_arr(applied_exclusions)
410
+ )
411
+
326
412
  [findings, applied_exclusions]
327
413
  rescue StandardError => e
328
414
  logger.error "Secret Detection scan failed on the payload(id:#{payload.id}): #{e}"
@@ -338,10 +424,20 @@ module Gitlab
338
424
  # Validates the given payloads by verifying the type and
339
425
  # presence of `id` and `data` fields necessary for the scan
340
426
  def validate_scan_input(payloads)
341
- return false if payloads.nil? || !payloads.instance_of?(Array)
427
+ if payloads.nil? || !payloads.instance_of?(Array)
428
+ logger.debug(message: "Scan input validation error: payloads arg is empty or not instance of array")
429
+ return false
430
+ end
342
431
 
343
432
  payloads.all? do |payload|
344
- payload.respond_to?(:id) && payload.respond_to?(:data)
433
+ has_valid_fields = payload.respond_to?(:id) && payload.respond_to?(:data)
434
+ unless has_valid_fields
435
+ logger.debug(
436
+ message: "Scan input validation error: one of the payloads does not respond to `id` or `data`"
437
+ )
438
+ end
439
+
440
+ has_valid_fields
345
441
  end
346
442
  end
347
443
 
@@ -390,6 +486,75 @@ module Gitlab
390
486
 
391
487
  chunk_indexes
392
488
  end
489
+
490
+ # Returns array of strings with each representing a masked exclusion
491
+ #
492
+ # Example: For given arg exclusions = {
493
+ # rule: ["gitlab_personal_access_token", "aws_key"],
494
+ # path: ["test.py"],
495
+ # raw_value: ["ABC123XYZ"]
496
+ # }
497
+ #
498
+ # The output will look like the following:
499
+ # [
500
+ # "rule=gitlab_personal_access_token,aws_key",
501
+ # "raw_value=AB*****YZ",
502
+ # "paths=test.py"
503
+ # ]
504
+ def format_exclusions_hash(exclusions = {})
505
+ masked_raw_values = exclusions.fetch(:raw_value, []).map do |exclusion|
506
+ Gitlab::SecretDetection::Utils::Masker.mask_secret(exclusion.value)
507
+ end.join(", ")
508
+ paths = exclusions.fetch(:path, []).map(&:value).join(", ")
509
+ rules = exclusions.fetch(:rule, []).map(&:value).join(", ")
510
+
511
+ out = []
512
+
513
+ out << "rules=#{rules}" unless rules.empty?
514
+ out << "raw_values=#{masked_raw_values}" unless masked_raw_values.empty?
515
+ out << "paths=#{paths}" unless paths.empty?
516
+
517
+ out
518
+ end
519
+
520
+ def format_exclusions_arr(exclusions = [])
521
+ return [] if exclusions.empty?
522
+
523
+ masked_raw_values = Set.new
524
+ paths = Set.new
525
+ rules = Set.new
526
+
527
+ exclusions.each do |exclusion|
528
+ case exclusion.exclusion_type
529
+ when :EXCLUSION_TYPE_RAW_VALUE
530
+ masked_raw_values << Gitlab::SecretDetection::Utils::Masker.mask_secret(exclusion.value)
531
+ when :EXCLUSION_TYPE_RULE
532
+ rules << exclusion.value
533
+ when :EXCLUSION_TYPE_PATH
534
+ paths << exclusion.value
535
+ else
536
+ logger.warn("Unknown exclusion type #{exclusion.exclusion_type}")
537
+ end
538
+ end
539
+
540
+ out = []
541
+
542
+ out << "rules=#{rules.join(',')}" unless rules.empty?
543
+ out << "raw_values=#{masked_raw_values.join(',')}" unless masked_raw_values.empty?
544
+ out << "paths=#{paths.join(',')}" unless paths.empty?
545
+
546
+ out
547
+ end
548
+
549
+ def format_detected_secrets_metadata(findings = [])
550
+ return [] if findings.empty?
551
+
552
+ found_secrets = findings.filter do |f|
553
+ f.status == Core::Status::FOUND
554
+ end
555
+
556
+ found_secrets.map { |f| "#{f.payload_id}=>#{f.type}:#{f.line_number}" }
557
+ end
393
558
  end
394
559
  end
395
560
  end
@@ -45,19 +45,34 @@ module Gitlab
45
45
  }.freeze
46
46
 
47
47
  # Implementation for /Scan RPC method
48
- def scan(request, _call)
49
- scan_request_action(request)
48
+ def scan(request, call)
49
+ scan_request_action(request, call)
50
50
  end
51
51
 
52
52
  # Implementation for /ScanStream RPC method
53
- def scan_stream(requests, _call)
54
- request_action = ->(r) { scan_request_action(r) }
53
+ def scan_stream(requests, call)
54
+ request_action = ->(r) { scan_request_action(r, call) }
55
55
  StreamEnumerator.new(requests, request_action).each_item
56
56
  end
57
57
 
58
58
  private
59
59
 
60
- def scan_request_action(request)
60
+ def scan_request_action(request, call)
61
+ if request.nil?
62
+ logger.error(
63
+ message: "FATAL: Secret Detection gRPC scan request is `nil`",
64
+ deadline: call.deadline,
65
+ cancelled: call.cancelled?
66
+ )
67
+ return Gitlab::SecretDetection::GRPC::ScanResponse.new(
68
+ results: [],
69
+ status: Gitlab::SecretDetection::GRPC::ScanResponse::Status::STATUS_INPUT_ERROR,
70
+ applied_exclusions: []
71
+ )
72
+ end
73
+
74
+ logger.info(message: "Secret Detection gRPC scan request received")
75
+
61
76
  validate_request(request)
62
77
 
63
78
  payloads = request.payloads.to_a
@@ -66,7 +81,7 @@ module Gitlab
66
81
  request.exclusions.each do |exclusion|
67
82
  case exclusion.exclusion_type
68
83
  when :EXCLUSION_TYPE_RAW_VALUE
69
- exclusions[:raw] << exclusion
84
+ exclusions[:raw_value] << exclusion
70
85
  when :EXCLUSION_TYPE_RULE
71
86
  exclusions[:rule] << exclusion
72
87
  when :EXCLUSION_TYPE_PATH
@@ -85,7 +100,8 @@ module Gitlab
85
100
  payload_timeout: request.payload_timeout_secs
86
101
  )
87
102
  rescue StandardError => e
88
- logger.error("Failed to run the scan: #{e}")
103
+ logger.error(message: "Failed to run the secret detection scan", exception: e)
104
+ logger.error(e.backtrace&.join("\n"))
89
105
  raise ::GRPC::Unknown, e.message
90
106
  end
91
107
 
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gitlab
4
+ module SecretDetection
5
+ module Utils
6
+ class Masker
7
+ DEFAULT_VISIBLE_CHAR_COUNT = 3
8
+ DEFAULT_MASK_CHAR_COUNT = 5
9
+ DEFAULT_MASK_CHAR = '*'
10
+
11
+ class << self
12
+ def mask_secret(
13
+ raw_secret_value,
14
+ mask_char: DEFAULT_MASK_CHAR,
15
+ visible_chars_count: DEFAULT_VISIBLE_CHAR_COUNT,
16
+ mask_chars_count: DEFAULT_MASK_CHAR_COUNT
17
+ )
18
+ return '' if raw_secret_value.nil? || raw_secret_value.empty?
19
+ return raw_secret_value if raw_secret_value.length <= visible_chars_count # Too short to mask
20
+
21
+ chars = raw_secret_value.chars
22
+ position = 0
23
+
24
+ while position < chars.length
25
+ # Show 'visible_chars_count' characters
26
+ position += visible_chars_count
27
+
28
+ # Mask next 'mask_chars' characters if available
29
+ mask_chars_count.times do
30
+ break if position >= chars.length
31
+
32
+ chars[position] = mask_char
33
+ position += 1
34
+ end
35
+ end
36
+
37
+ chars.join
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require_relative 'utils/certificate'
4
4
  require_relative 'utils/memoize'
5
+ require_relative 'utils/masker'
5
6
 
6
7
  module Gitlab
7
8
  module SecretDetection
@@ -8,7 +8,7 @@ module Gitlab
8
8
  # https://gitlab.com/gitlab-org/gitlab/-/issues/514015
9
9
  #
10
10
  # Ensure to maintain the same version in CHANGELOG file.
11
- VERSION = "0.19.0"
11
+ VERSION = "0.19.1"
12
12
 
13
13
  # SD_ENV env var is used to determine which environment the
14
14
  # server is running. This var is defined in `.runway/env-<env>.yml` files.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-secret_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.19.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - group::secret detection
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2025-02-13 00:00:00.000000000 Z
13
+ date: 2025-02-19 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: grpc
@@ -127,6 +127,7 @@ files:
127
127
  - lib/gitlab/secret_detection/grpc/scanner_service.rb
128
128
  - lib/gitlab/secret_detection/utils.rb
129
129
  - lib/gitlab/secret_detection/utils/certificate.rb
130
+ - lib/gitlab/secret_detection/utils/masker.rb
130
131
  - lib/gitlab/secret_detection/utils/memoize.rb
131
132
  - lib/gitlab/secret_detection/version.rb
132
133
  - proto/secret_detection.proto