gitlab-secret_detection 0.11.1 → 0.39.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -26
- data/lib/gitlab/secret_detection/core/response.rb +16 -6
- data/lib/gitlab/secret_detection/core/ruleset.rb +30 -3
- data/lib/gitlab/secret_detection/core/scanner.rb +308 -77
- data/lib/gitlab/secret_detection/core/secret_push_protection_rules.toml +1072 -0
- data/lib/gitlab/secret_detection/core/status.rb +34 -0
- data/lib/gitlab/secret_detection/grpc/client/grpc_client.rb +50 -19
- data/lib/gitlab/secret_detection/grpc/generated/secret_detection_pb.rb +1 -1
- data/lib/gitlab/secret_detection/grpc/integrated_error_tracking.rb +64 -0
- data/lib/gitlab/secret_detection/grpc/scanner_service.rb +35 -16
- data/lib/gitlab/secret_detection/grpc.rb +1 -0
- data/lib/gitlab/secret_detection/utils/masker.rb +43 -0
- data/lib/gitlab/secret_detection/utils.rb +1 -0
- data/lib/gitlab/secret_detection/version.rb +3 -17
- data/proto/secret_detection.proto +3 -0
- metadata +209 -19
- data/lib/gitlab.rb +0 -6
|
@@ -11,14 +11,6 @@ module Gitlab
|
|
|
11
11
|
module Core
|
|
12
12
|
# Scan is responsible for running Secret Detection scan operation
|
|
13
13
|
class Scanner
|
|
14
|
-
# RulesetParseError is thrown when the code fails to parse the
|
|
15
|
-
# ruleset file from the given path
|
|
16
|
-
RulesetParseError = Class.new(StandardError)
|
|
17
|
-
|
|
18
|
-
# RulesetCompilationError is thrown when the code fails to compile
|
|
19
|
-
# the predefined rulesets
|
|
20
|
-
RulesetCompilationError = Class.new(StandardError)
|
|
21
|
-
|
|
22
14
|
# default time limit(in seconds) for running the scan operation per invocation
|
|
23
15
|
DEFAULT_SCAN_TIMEOUT_SECS = 180 # 3 minutes
|
|
24
16
|
# default time limit(in seconds) for running the scan operation on a single payload
|
|
@@ -32,7 +24,9 @@ module Gitlab
|
|
|
32
24
|
# run the scan within a new subprocess.
|
|
33
25
|
MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB
|
|
34
26
|
# Whether to run scan in subprocesses or not. Default is false.
|
|
35
|
-
RUN_IN_SUBPROCESS = false
|
|
27
|
+
RUN_IN_SUBPROCESS = ENV.fetch('GITLAB_SD_RUN_IN_SUBPROCESS', false)
|
|
28
|
+
# Default limit for max findings to be returned in the scan
|
|
29
|
+
DEFAULT_MAX_FINDINGS_LIMIT = 999
|
|
36
30
|
|
|
37
31
|
# Initializes the instance with logger along with following operations:
|
|
38
32
|
# 1. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
|
|
@@ -46,7 +40,7 @@ module Gitlab
|
|
|
46
40
|
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
|
47
41
|
include_missing_tags: false
|
|
48
42
|
)
|
|
49
|
-
@default_pattern_matcher = build_pattern_matcher(
|
|
43
|
+
@default_pattern_matcher, @default_rules = build_pattern_matcher(
|
|
50
44
|
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
|
51
45
|
include_missing_tags: false
|
|
52
46
|
) # includes only gitlab_blocking rules
|
|
@@ -59,13 +53,18 @@ module Gitlab
|
|
|
59
53
|
# +timeout+:: No of seconds(accepts floating point for smaller time values) to limit the total scan duration
|
|
60
54
|
# +payload_timeout+:: No of seconds(accepts floating point for smaller time values) to limit
|
|
61
55
|
# the scan duration on each payload
|
|
62
|
-
# +
|
|
63
|
-
#
|
|
64
|
-
#
|
|
65
|
-
#
|
|
56
|
+
# +exclusions+:: Hash with keys: :raw_value, :rule and values of arrays of either
|
|
57
|
+
# GRPC::Exclusion objects (when used as a standalone service)
|
|
58
|
+
# or Security::ProjectSecurityExclusion objects (when used as gem).
|
|
59
|
+
# :raw_value - Exclusions in the :raw array are the raw values to ignore.
|
|
60
|
+
# :rule - Exclusions in the :rule array are the rules to exclude from the ruleset used for the scan.
|
|
61
|
+
# Each rule is represented by its ID. For example: `gitlab_personal_access_token`
|
|
62
|
+
# for representing Gitlab Personal Access Token. By default, no rule is excluded from the ruleset.
|
|
66
63
|
# +tags+:: Array of tag values to filter from the default ruleset when determining the rules used for the scan.
|
|
67
64
|
# For example: Add `gitlab_blocking` to include only rules for Push Protection. Defaults to
|
|
68
65
|
# [`gitlab_blocking`] (+DEFAULT_PATTERN_MATCHER_TAGS+).
|
|
66
|
+
# +max_findings_limit+:: Integer to limit the number of findings to be returned in the scan. Defaults
|
|
67
|
+
# to 999 (+DEFAULT_MAX_FINDINGS_LIMIT+).
|
|
69
68
|
#
|
|
70
69
|
# NOTE:
|
|
71
70
|
# Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by
|
|
@@ -84,13 +83,12 @@ module Gitlab
|
|
|
84
83
|
payloads,
|
|
85
84
|
timeout: DEFAULT_SCAN_TIMEOUT_SECS,
|
|
86
85
|
payload_timeout: DEFAULT_PAYLOAD_TIMEOUT_SECS,
|
|
87
|
-
|
|
88
|
-
rule_exclusions: [],
|
|
86
|
+
exclusions: {},
|
|
89
87
|
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
|
90
|
-
subprocess: RUN_IN_SUBPROCESS
|
|
88
|
+
subprocess: RUN_IN_SUBPROCESS,
|
|
89
|
+
max_findings_limit: DEFAULT_MAX_FINDINGS_LIMIT
|
|
91
90
|
)
|
|
92
|
-
|
|
93
|
-
return Core::Response.new(Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
|
|
91
|
+
return Core::Response.new(status: Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
|
|
94
92
|
|
|
95
93
|
# assign defaults since grpc passing zero timeout value to `Timeout.timeout(..)` makes it effectively useless.
|
|
96
94
|
timeout = DEFAULT_SCAN_TIMEOUT_SECS unless timeout.positive?
|
|
@@ -102,29 +100,53 @@ module Gitlab
|
|
|
102
100
|
|
|
103
101
|
matched_payloads = filter_by_keywords(keyword_matcher, payloads)
|
|
104
102
|
|
|
105
|
-
next Core::Response.new(Core::Status::NOT_FOUND) if matched_payloads.empty?
|
|
103
|
+
next Core::Response.new(status: Core::Status::NOT_FOUND) if matched_payloads.empty?
|
|
106
104
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
pattern_matcher: build_pattern_matcher(tags:),
|
|
110
|
-
raw_value_exclusions:, rule_exclusions:
|
|
111
|
-
}
|
|
105
|
+
# the pattern matcher will filter rules by tags so we use the filtered rule list
|
|
106
|
+
pattern_matcher, active_rules = build_pattern_matcher(tags:)
|
|
112
107
|
|
|
113
|
-
|
|
108
|
+
scan_args = {
|
|
109
|
+
payloads: matched_payloads,
|
|
110
|
+
payload_timeout:,
|
|
111
|
+
pattern_matcher:,
|
|
112
|
+
exclusions:,
|
|
113
|
+
rules: active_rules,
|
|
114
|
+
max_findings_limit:
|
|
115
|
+
}.freeze
|
|
116
|
+
|
|
117
|
+
logger.info(
|
|
118
|
+
message: "Scan input parameters for running Secret Detection scan",
|
|
119
|
+
timeout:,
|
|
120
|
+
payload_timeout:,
|
|
121
|
+
given_total_payloads: payloads.length,
|
|
122
|
+
scannable_payloads_post_keyword_filter: matched_payloads.length,
|
|
123
|
+
tags:,
|
|
124
|
+
run_in_subprocess: subprocess,
|
|
125
|
+
max_findings_limit:,
|
|
126
|
+
given_exclusions: format_exclusions_hash(exclusions)
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
secrets, applied_exclusions = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
|
|
114
130
|
|
|
115
131
|
scan_status = overall_scan_status(secrets)
|
|
116
132
|
|
|
117
|
-
|
|
133
|
+
logger.info(
|
|
134
|
+
message: "Secret Detection scan completed with #{secrets.length} secrets detected in the given payloads",
|
|
135
|
+
detected_secrets_metadata: format_detected_secrets_metadata(secrets),
|
|
136
|
+
applied_exclusions: format_exclusions_arr(applied_exclusions)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
Core::Response.new(status: scan_status, results: secrets, applied_exclusions:)
|
|
118
140
|
end
|
|
119
141
|
rescue Timeout::Error => e
|
|
120
142
|
logger.error "Secret detection operation timed out: #{e}"
|
|
121
143
|
|
|
122
|
-
Core::Response.new(Core::Status::SCAN_TIMEOUT)
|
|
144
|
+
Core::Response.new(status: Core::Status::SCAN_TIMEOUT)
|
|
123
145
|
end
|
|
124
146
|
|
|
125
147
|
private
|
|
126
148
|
|
|
127
|
-
attr_reader :logger, :rules, :keywords, :default_pattern_matcher, :default_keyword_matcher
|
|
149
|
+
attr_reader :logger, :rules, :keywords, :default_pattern_matcher, :default_keyword_matcher, :default_rules
|
|
128
150
|
|
|
129
151
|
# Builds RE2::Set pattern matcher for the given combination of rules
|
|
130
152
|
# and tags. It also allows a choice(via `include_missing_tags`) to consider rules
|
|
@@ -132,31 +154,49 @@ module Gitlab
|
|
|
132
154
|
# are same as +DEFAULT_PATTERN_MATCHER_TAGS+ then returns the eagerly loaded default
|
|
133
155
|
# pattern matcher created during initialization.
|
|
134
156
|
def build_pattern_matcher(tags:, include_missing_tags: false)
|
|
135
|
-
|
|
157
|
+
if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_pattern_matcher.nil?
|
|
158
|
+
logger.info(
|
|
159
|
+
message: "Given tags input matches default matcher tags, using pre-defined RE2 Pattern Matcher"
|
|
160
|
+
)
|
|
161
|
+
return [default_pattern_matcher, default_rules]
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
logger.info(
|
|
165
|
+
message: "Creating a new RE2 Pattern Matcher with given tags",
|
|
166
|
+
tags:,
|
|
167
|
+
include_missing_tags:
|
|
168
|
+
)
|
|
169
|
+
active_rules = []
|
|
136
170
|
|
|
137
171
|
matcher = RE2::Set.new
|
|
138
172
|
|
|
139
|
-
|
|
140
|
-
|
|
173
|
+
begin
|
|
174
|
+
rules.each do |rule|
|
|
175
|
+
rule_tags = rule[:tags]
|
|
141
176
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
177
|
+
include_rule = if tags.empty?
|
|
178
|
+
true
|
|
179
|
+
elsif rule_tags
|
|
180
|
+
tags.intersect?(rule_tags)
|
|
181
|
+
else
|
|
182
|
+
include_missing_tags
|
|
183
|
+
end
|
|
149
184
|
|
|
150
|
-
|
|
185
|
+
active_rules << rule if include_rule
|
|
186
|
+
matcher.add(rule[:regex]) if include_rule
|
|
187
|
+
end
|
|
188
|
+
rescue StandardError => e
|
|
189
|
+
logger.error "Failed to add regex secret detection ruleset in RE::Set: #{e.message}"
|
|
190
|
+
raise Core::Ruleset::RulesetCompilationError, cause: e
|
|
151
191
|
end
|
|
152
192
|
|
|
153
193
|
unless matcher.compile
|
|
154
|
-
logger.error "Failed to compile secret detection
|
|
194
|
+
logger.error "Failed to compile secret detection ruleset in RE::Set"
|
|
155
195
|
|
|
156
|
-
raise RulesetCompilationError
|
|
196
|
+
raise Core::Ruleset::RulesetCompilationError
|
|
157
197
|
end
|
|
158
198
|
|
|
159
|
-
matcher
|
|
199
|
+
[matcher, active_rules]
|
|
160
200
|
end
|
|
161
201
|
|
|
162
202
|
# Creates and returns the unique set of rule matching keywords
|
|
@@ -171,7 +211,18 @@ module Gitlab
|
|
|
171
211
|
end
|
|
172
212
|
|
|
173
213
|
def build_keyword_matcher(tags:, include_missing_tags: false)
|
|
174
|
-
|
|
214
|
+
if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_keyword_matcher.nil?
|
|
215
|
+
logger.info(
|
|
216
|
+
message: "Given tags input matches default tags, using pre-defined RE2 Keyword Matcher"
|
|
217
|
+
)
|
|
218
|
+
return default_keyword_matcher
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
logger.info(
|
|
222
|
+
message: "Creating a new RE2 Keyword Matcher..",
|
|
223
|
+
tags:,
|
|
224
|
+
include_missing_tags:
|
|
225
|
+
)
|
|
175
226
|
|
|
176
227
|
include_keywords = Set.new
|
|
177
228
|
|
|
@@ -184,15 +235,28 @@ module Gitlab
|
|
|
184
235
|
include_keywords.merge(rule[:keywords]) unless rule[:keywords].nil?
|
|
185
236
|
end
|
|
186
237
|
|
|
187
|
-
|
|
238
|
+
if include_keywords.empty?
|
|
239
|
+
logger.error(
|
|
240
|
+
message: "No rule keywords found a match with given rule tags, returning empty RE2 Keyword Matcher"
|
|
241
|
+
)
|
|
242
|
+
return nil
|
|
243
|
+
end
|
|
188
244
|
|
|
189
|
-
keywords_regex = include_keywords.join('|')
|
|
245
|
+
keywords_regex = include_keywords.map { |keyword| RE2::Regexp.quote(keyword) }.join('|')
|
|
190
246
|
|
|
191
|
-
|
|
247
|
+
logger.debug(
|
|
248
|
+
message: "Creating RE2 Keyword Matcher with set of rule keywords",
|
|
249
|
+
keywords: include_keywords.to_a
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
RE2("(#{keywords_regex})")
|
|
192
253
|
end
|
|
193
254
|
|
|
194
255
|
def filter_by_keywords(keyword_matcher, payloads)
|
|
195
|
-
|
|
256
|
+
if keyword_matcher.nil?
|
|
257
|
+
logger.warn "No RE2 Keyword Matcher instance available, skipping payload filter by rule keywords step.."
|
|
258
|
+
return payloads
|
|
259
|
+
end
|
|
196
260
|
|
|
197
261
|
matched_payloads = []
|
|
198
262
|
payloads.each do |payload|
|
|
@@ -201,73 +265,142 @@ module Gitlab
|
|
|
201
265
|
matched_payloads << payload
|
|
202
266
|
end
|
|
203
267
|
|
|
204
|
-
matched_payloads.
|
|
268
|
+
total_payloads_retained = matched_payloads.length == payloads.length ? 'all' : matched_payloads.length
|
|
269
|
+
log_message = if matched_payloads.empty?
|
|
270
|
+
"No payloads available to scan further after keyword-matching, exiting Secret Detection scan"
|
|
271
|
+
else
|
|
272
|
+
"Retained #{total_payloads_retained} payloads to scan further after keyword-matching step"
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
logger.info(
|
|
276
|
+
message: log_message,
|
|
277
|
+
given_total_payloads: payloads.length,
|
|
278
|
+
matched_payloads: matched_payloads.length,
|
|
279
|
+
payloads_to_scan_further: matched_payloads.map(&:id)
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
matched_payloads
|
|
205
283
|
end
|
|
206
284
|
|
|
207
285
|
# Runs the secret detection scan on the given list of payloads. It accepts
|
|
208
286
|
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
|
209
287
|
# the scan when performed on the payloads.
|
|
210
288
|
def run_scan(
|
|
211
|
-
payloads:,
|
|
212
|
-
|
|
289
|
+
payloads:,
|
|
290
|
+
payload_timeout:,
|
|
291
|
+
pattern_matcher:,
|
|
292
|
+
max_findings_limit:,
|
|
293
|
+
exclusions: {},
|
|
294
|
+
rules: [])
|
|
295
|
+
all_applied_exclusions = Set.new
|
|
296
|
+
|
|
297
|
+
logger.info(
|
|
298
|
+
message: "Running Secret Detection scan sequentially",
|
|
299
|
+
payload_timeout:
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
capped_findings = payloads.lazy.flat_map do |payload|
|
|
213
303
|
Timeout.timeout(payload_timeout) do
|
|
214
|
-
find_secrets_in_payload(
|
|
304
|
+
findings, applied_exclusions = find_secrets_in_payload(
|
|
215
305
|
payload:,
|
|
216
306
|
pattern_matcher:,
|
|
217
|
-
|
|
307
|
+
exclusions:,
|
|
308
|
+
rules:
|
|
218
309
|
)
|
|
310
|
+
all_applied_exclusions.merge(applied_exclusions)
|
|
311
|
+
findings
|
|
219
312
|
end
|
|
220
313
|
rescue Timeout::Error => e
|
|
221
|
-
logger.
|
|
314
|
+
logger.warn "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
|
|
315
|
+
|
|
222
316
|
Core::Finding.new(payload.id,
|
|
223
317
|
Core::Status::PAYLOAD_TIMEOUT)
|
|
224
|
-
end
|
|
318
|
+
end.take(max_findings_limit).to_a
|
|
319
|
+
|
|
320
|
+
[capped_findings, all_applied_exclusions.to_a]
|
|
225
321
|
end
|
|
226
322
|
|
|
227
323
|
def run_scan_within_subprocess(
|
|
228
|
-
payloads:,
|
|
229
|
-
|
|
324
|
+
payloads:,
|
|
325
|
+
payload_timeout:,
|
|
326
|
+
pattern_matcher:,
|
|
327
|
+
max_findings_limit:,
|
|
328
|
+
exclusions: {},
|
|
329
|
+
rules: []
|
|
330
|
+
)
|
|
331
|
+
all_applied_exclusions = Set.new
|
|
332
|
+
|
|
230
333
|
payload_sizes = payloads.map(&:size)
|
|
231
334
|
grouped_payload_indices = group_by_chunk_size(payload_sizes)
|
|
232
335
|
|
|
233
336
|
grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
|
|
234
337
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
)
|
|
240
|
-
|
|
338
|
+
logger.info(
|
|
339
|
+
message: "Running Secret Detection scan within a subprocess",
|
|
340
|
+
grouped_payloads: grouped_payloads.length,
|
|
341
|
+
payload_timeout:
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
found_secrets = []
|
|
345
|
+
|
|
346
|
+
grouped_payloads.each do |grouped_payload|
|
|
347
|
+
break if found_secrets.length >= max_findings_limit
|
|
348
|
+
|
|
349
|
+
batch_results = Parallel.map(
|
|
350
|
+
grouped_payload,
|
|
351
|
+
in_processes: MAX_PROCS_PER_REQUEST,
|
|
352
|
+
isolation: true # do not reuse sub-processes
|
|
353
|
+
) do |payload|
|
|
241
354
|
Timeout.timeout(payload_timeout) do
|
|
242
|
-
find_secrets_in_payload(
|
|
355
|
+
findings, applied_exclusions = find_secrets_in_payload(
|
|
243
356
|
payload:,
|
|
244
357
|
pattern_matcher:,
|
|
245
|
-
|
|
358
|
+
exclusions:,
|
|
359
|
+
rules:
|
|
246
360
|
)
|
|
361
|
+
[findings, applied_exclusions]
|
|
247
362
|
end
|
|
248
363
|
rescue Timeout::Error => e
|
|
249
|
-
logger.
|
|
364
|
+
logger.warn "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
|
|
365
|
+
|
|
250
366
|
Core::Finding.new(payload.id, Core::Status::PAYLOAD_TIMEOUT)
|
|
251
367
|
end
|
|
368
|
+
|
|
369
|
+
# Process results and collect exclusions
|
|
370
|
+
batch_results.each do |findings, applied_exclusions|
|
|
371
|
+
all_applied_exclusions.merge(applied_exclusions)
|
|
372
|
+
|
|
373
|
+
remaining_slots = max_findings_limit - found_secrets.length
|
|
374
|
+
found_secrets.concat(findings.take(remaining_slots))
|
|
375
|
+
|
|
376
|
+
break if found_secrets.length >= max_findings_limit
|
|
377
|
+
end
|
|
252
378
|
end
|
|
253
379
|
|
|
254
|
-
found_secrets.
|
|
380
|
+
[found_secrets, all_applied_exclusions.to_a]
|
|
255
381
|
end
|
|
256
382
|
|
|
257
383
|
# Finds secrets in the given payload guarded with a timeout as a circuit breaker. It accepts
|
|
258
384
|
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
|
259
385
|
# the scan.
|
|
260
|
-
def find_secrets_in_payload(payload:, pattern_matcher:,
|
|
386
|
+
def find_secrets_in_payload(payload:, pattern_matcher:, exclusions: {}, rules: @default_rules)
|
|
261
387
|
findings = []
|
|
388
|
+
applied_exclusions = Set.new
|
|
262
389
|
|
|
263
390
|
payload_offset = payload.respond_to?(:offset) ? payload.offset : 0
|
|
264
391
|
|
|
392
|
+
raw_value_exclusions = exclusions.fetch(:raw_value, [])
|
|
393
|
+
rule_exclusions = exclusions.fetch(:rule, [])
|
|
394
|
+
|
|
265
395
|
payload.data
|
|
266
396
|
.each_line($INPUT_RECORD_SEPARATOR, chomp: true)
|
|
267
397
|
.each_with_index do |line, index|
|
|
268
398
|
unless raw_value_exclusions.empty?
|
|
269
|
-
raw_value_exclusions.each do |
|
|
270
|
-
|
|
399
|
+
raw_value_exclusions.each do |exclusion|
|
|
400
|
+
# replace input that doesn't contain allowed value in it
|
|
401
|
+
# replace exclusion value, `.gsub!` returns 'self' if replaced otherwise 'nil'
|
|
402
|
+
excl_replaced = !!line.gsub!(exclusion.value, '')
|
|
403
|
+
applied_exclusions << exclusion if excl_replaced
|
|
271
404
|
end
|
|
272
405
|
end
|
|
273
406
|
|
|
@@ -284,27 +417,56 @@ module Gitlab
|
|
|
284
417
|
matches.each do |match_idx|
|
|
285
418
|
rule = rules[match_idx]
|
|
286
419
|
|
|
287
|
-
next if
|
|
420
|
+
next if applied_rule_exclusion?(rule[:id], rule_exclusions, applied_exclusions)
|
|
421
|
+
|
|
422
|
+
title = rule[:title].nil? ? rule[:description] : rule[:title]
|
|
288
423
|
|
|
289
|
-
findings << Core::Finding.new(
|
|
290
|
-
|
|
424
|
+
findings << Core::Finding.new(
|
|
425
|
+
payload.id,
|
|
426
|
+
Core::Status::FOUND,
|
|
427
|
+
line_no,
|
|
428
|
+
rule[:id],
|
|
429
|
+
title
|
|
430
|
+
)
|
|
291
431
|
end
|
|
292
432
|
end
|
|
293
433
|
|
|
294
|
-
|
|
434
|
+
logger.info(
|
|
435
|
+
message: "Secret Detection scan found #{findings.length} secret leaks in the payload(id:#{payload.id})",
|
|
436
|
+
payload_id: payload.id,
|
|
437
|
+
detected_rules: findings.map { |f| "#{f.type}:#{f.line_number}" },
|
|
438
|
+
applied_exclusions: format_exclusions_arr(applied_exclusions)
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
[findings, applied_exclusions]
|
|
295
442
|
rescue StandardError => e
|
|
296
443
|
logger.error "Secret Detection scan failed on the payload(id:#{payload.id}): #{e}"
|
|
297
444
|
|
|
298
|
-
Core::Finding.new(payload.id, Core::Status::SCAN_ERROR)
|
|
445
|
+
[[Core::Finding.new(payload.id, Core::Status::SCAN_ERROR)], []]
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
def applied_rule_exclusion?(type, rule_exclusions, applied_exclusions)
|
|
449
|
+
applied_exclusion = rule_exclusions&.find { |rule_exclusion| rule_exclusion.value == type }
|
|
450
|
+
applied_exclusion && (applied_exclusions << applied_exclusion)
|
|
299
451
|
end
|
|
300
452
|
|
|
301
453
|
# Validates the given payloads by verifying the type and
|
|
302
454
|
# presence of `id` and `data` fields necessary for the scan
|
|
303
455
|
def validate_scan_input(payloads)
|
|
304
|
-
|
|
456
|
+
if payloads.nil? || !payloads.instance_of?(Array)
|
|
457
|
+
logger.debug(message: "Scan input validation error: payloads arg is empty or not instance of array")
|
|
458
|
+
return false
|
|
459
|
+
end
|
|
305
460
|
|
|
306
461
|
payloads.all? do |payload|
|
|
307
|
-
payload.respond_to?(:id) && payload.respond_to?(:data)
|
|
462
|
+
has_valid_fields = payload.respond_to?(:id) && payload.respond_to?(:data) && payload.data.is_a?(String)
|
|
463
|
+
unless has_valid_fields
|
|
464
|
+
logger.debug(
|
|
465
|
+
message: "Scan input validation error: one of the payloads does not respond to `id` or `data`"
|
|
466
|
+
)
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
has_valid_fields
|
|
308
470
|
end
|
|
309
471
|
end
|
|
310
472
|
|
|
@@ -353,6 +515,75 @@ module Gitlab
|
|
|
353
515
|
|
|
354
516
|
chunk_indexes
|
|
355
517
|
end
|
|
518
|
+
|
|
519
|
+
# Returns array of strings with each representing a masked exclusion
|
|
520
|
+
#
|
|
521
|
+
# Example: For given arg exclusions = {
|
|
522
|
+
# rule: ["gitlab_personal_access_token", "aws_key"],
|
|
523
|
+
# path: ["test.py"],
|
|
524
|
+
# raw_value: ["ABC123XYZ"]
|
|
525
|
+
# }
|
|
526
|
+
#
|
|
527
|
+
# The output will look like the following:
|
|
528
|
+
# [
|
|
529
|
+
# "rule=gitlab_personal_access_token,aws_key",
|
|
530
|
+
# "raw_value=AB*****YZ",
|
|
531
|
+
# "paths=test.py"
|
|
532
|
+
# ]
|
|
533
|
+
def format_exclusions_hash(exclusions = {})
|
|
534
|
+
masked_raw_values = exclusions.fetch(:raw_value, []).map do |exclusion|
|
|
535
|
+
Gitlab::SecretDetection::Utils::Masker.mask_secret(exclusion.value)
|
|
536
|
+
end.join(", ")
|
|
537
|
+
paths = exclusions.fetch(:path, []).map(&:value).join(", ")
|
|
538
|
+
rules = exclusions.fetch(:rule, []).map(&:value).join(", ")
|
|
539
|
+
|
|
540
|
+
out = []
|
|
541
|
+
|
|
542
|
+
out << "rules=#{rules}" unless rules.empty?
|
|
543
|
+
out << "raw_values=#{masked_raw_values}" unless masked_raw_values.empty?
|
|
544
|
+
out << "paths=#{paths}" unless paths.empty?
|
|
545
|
+
|
|
546
|
+
out
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
def format_exclusions_arr(exclusions = [])
|
|
550
|
+
return [] if exclusions.empty?
|
|
551
|
+
|
|
552
|
+
masked_raw_values = Set.new
|
|
553
|
+
paths = Set.new
|
|
554
|
+
rules = Set.new
|
|
555
|
+
|
|
556
|
+
exclusions.each do |exclusion|
|
|
557
|
+
case exclusion.exclusion_type
|
|
558
|
+
when :EXCLUSION_TYPE_RAW_VALUE
|
|
559
|
+
masked_raw_values << Gitlab::SecretDetection::Utils::Masker.mask_secret(exclusion.value)
|
|
560
|
+
when :EXCLUSION_TYPE_RULE
|
|
561
|
+
rules << exclusion.value
|
|
562
|
+
when :EXCLUSION_TYPE_PATH
|
|
563
|
+
paths << exclusion.value
|
|
564
|
+
else
|
|
565
|
+
logger.warn("Unknown exclusion type #{exclusion.exclusion_type}")
|
|
566
|
+
end
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
out = []
|
|
570
|
+
|
|
571
|
+
out << "rules=#{rules.join(',')}" unless rules.empty?
|
|
572
|
+
out << "raw_values=#{masked_raw_values.join(',')}" unless masked_raw_values.empty?
|
|
573
|
+
out << "paths=#{paths.join(',')}" unless paths.empty?
|
|
574
|
+
|
|
575
|
+
out
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
def format_detected_secrets_metadata(findings = [])
|
|
579
|
+
return [] if findings.empty?
|
|
580
|
+
|
|
581
|
+
found_secrets = findings.filter do |f|
|
|
582
|
+
f.status == Core::Status::FOUND
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
found_secrets.map { |f| "#{f.payload_id}=>#{f.type}:#{f.line_number}" }
|
|
586
|
+
end
|
|
356
587
|
end
|
|
357
588
|
end
|
|
358
589
|
end
|