gitlab-secret_detection 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gitlab/secret_detection/core/scanner.rb +81 -3
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 648b6d5277ac8e7948762533af39dc2f4ff0ae4c62fbdcc6d5d32615a44ab815
|
4
|
+
data.tar.gz: afd2a580cb0a73bb84a401616ec622e1e14d2b75021036a959f933ed75864cf6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c49b71891f6e13d8dc252936f18df838d8e9d42000cbc1f4f3d7fe56b258a7e2c17bce7a7f0e771bc16991fa46bacf641a636a1d1efd3ef23de6b0a31a011b8
|
7
|
+
data.tar.gz: af93c82a0025be12bc82fe6ff62b290fe193a2338ae2f98d83bc2729d1c7cc35a223330a3a9907a9f715c009127e93ba2d13fc33982d2f13a179d8548f5cc36e
|
@@ -4,6 +4,7 @@ require 're2'
|
|
4
4
|
require 'logger'
|
5
5
|
require 'timeout'
|
6
6
|
require 'English'
|
7
|
+
require 'parallel'
|
7
8
|
|
8
9
|
module GitLab
|
9
10
|
module SecretDetection
|
@@ -24,6 +25,14 @@ module GitLab
|
|
24
25
|
DEFAULT_PAYLOAD_TIMEOUT_SECS = 30 # 30 seconds
|
25
26
|
# Tags used for creating default pattern matcher
|
26
27
|
DEFAULT_PATTERN_MATCHER_TAGS = ['gitlab_blocking'].freeze
|
28
|
+
# Max no of child processes to spawn per request
|
29
|
+
# ref: https://gitlab.com/gitlab-org/gitlab/-/issues/430160
|
30
|
+
MAX_PROCS_PER_REQUEST = 5
|
31
|
+
# Minimum cumulative size of the payloads required to spawn and
|
32
|
+
# run the scan within a new subprocess.
|
33
|
+
MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB
|
34
|
+
# Whether to run scan in subprocesses or not. Default is false.
|
35
|
+
RUN_IN_SUBPROCESS = false
|
27
36
|
|
28
37
|
# Initializes the instance with logger along with following operations:
|
29
38
|
# 1. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
|
@@ -58,6 +67,13 @@ module GitLab
|
|
58
67
|
# For example: Add `gitlab_blocking` to include only rules for Push Protection. Defaults to
|
59
68
|
# [`gitlab_blocking`] (+DEFAULT_PATTERN_MATCHER_TAGS+).
|
60
69
|
#
|
70
|
+
# NOTE:
|
71
|
+
# Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by
|
72
|
+
# offloading regex operations on large payloads to sub-processes. However, it does not assure the improvement
|
73
|
+
# in the overall latency of the scan, specifically in the case of smaller payloads, where the overhead of
|
74
|
+
# forking a new process adds to the overall latency of the scan instead. More reference on Subprocess-based
|
75
|
+
# execution is found here: https://gitlab.com/gitlab-org/gitlab/-/issues/430160.
|
76
|
+
#
|
61
77
|
# Returns an instance of GitLab::SecretDetection::Core::Response by following below structure:
|
62
78
|
# {
|
63
79
|
# status: One of the Core::Status values
|
@@ -70,7 +86,8 @@ module GitLab
|
|
70
86
|
payload_timeout: DEFAULT_PAYLOAD_TIMEOUT_SECS,
|
71
87
|
raw_value_exclusions: [],
|
72
88
|
rule_exclusions: [],
|
73
|
-
tags: DEFAULT_PATTERN_MATCHER_TAGS
|
89
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
90
|
+
subprocess: RUN_IN_SUBPROCESS
|
74
91
|
)
|
75
92
|
|
76
93
|
return Core::Response.new(Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
|
@@ -87,11 +104,13 @@ module GitLab
|
|
87
104
|
|
88
105
|
next Core::Response.new(Core::Status::NOT_FOUND) if matched_payloads.empty?
|
89
106
|
|
90
|
-
|
107
|
+
scan_args = {
|
91
108
|
payloads: matched_payloads, payload_timeout:,
|
92
109
|
pattern_matcher: build_pattern_matcher(tags:),
|
93
110
|
raw_value_exclusions:, rule_exclusions:
|
94
|
-
|
111
|
+
}
|
112
|
+
|
113
|
+
secrets = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
|
95
114
|
|
96
115
|
scan_status = overall_scan_status(secrets)
|
97
116
|
|
@@ -205,6 +224,36 @@ module GitLab
|
|
205
224
|
end
|
206
225
|
end
|
207
226
|
|
227
|
+
def run_scan_within_subprocess(
|
228
|
+
payloads:, payload_timeout:, pattern_matcher:, raw_value_exclusions: [],
|
229
|
+
rule_exclusions: [])
|
230
|
+
payload_sizes = payloads.map(&:size)
|
231
|
+
grouped_payload_indices = group_by_chunk_size(payload_sizes)
|
232
|
+
|
233
|
+
grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
|
234
|
+
|
235
|
+
found_secrets = Parallel.flat_map(
|
236
|
+
grouped_payloads,
|
237
|
+
in_processes: MAX_PROCS_PER_REQUEST,
|
238
|
+
isolation: true # do not reuse sub-processes
|
239
|
+
) do |grouped_payload|
|
240
|
+
grouped_payload.flat_map do |payload|
|
241
|
+
Timeout.timeout(payload_timeout) do
|
242
|
+
find_secrets_in_payload(
|
243
|
+
payload:,
|
244
|
+
pattern_matcher:,
|
245
|
+
raw_value_exclusions:, rule_exclusions:
|
246
|
+
)
|
247
|
+
end
|
248
|
+
rescue Timeout::Error => e
|
249
|
+
logger.error "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
|
250
|
+
Core::Finding.new(payload.id, Core::Status::PAYLOAD_TIMEOUT)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
found_secrets.freeze
|
255
|
+
end
|
256
|
+
|
208
257
|
# Finds secrets in the given payload guarded with a timeout as a circuit breaker. It accepts
|
209
258
|
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
210
259
|
# the scan.
|
@@ -268,6 +317,35 @@ module GitLab
|
|
268
317
|
Core::Status::FOUND_WITH_ERRORS
|
269
318
|
end
|
270
319
|
end
|
320
|
+
|
321
|
+
# This method accepts an array of payload sizes(in bytes) and groups them into an array
|
322
|
+
# of arrays structure where each element is the group of indices of the input
|
323
|
+
# array whose cumulative payload sizes has at least +MIN_CHUNK_SIZE_PER_PROC_BYTES+
|
324
|
+
def group_by_chunk_size(payload_size_arr)
|
325
|
+
cumulative_size = 0
|
326
|
+
chunk_indexes = []
|
327
|
+
chunk_idx_start = 0
|
328
|
+
|
329
|
+
payload_size_arr.each_with_index do |size, index|
|
330
|
+
cumulative_size += size
|
331
|
+
next unless cumulative_size >= MIN_CHUNK_SIZE_PER_PROC_BYTES
|
332
|
+
|
333
|
+
chunk_indexes << (chunk_idx_start..index).to_a
|
334
|
+
|
335
|
+
chunk_idx_start = index + 1
|
336
|
+
cumulative_size = 0
|
337
|
+
end
|
338
|
+
|
339
|
+
if cumulative_size.positive? && (chunk_idx_start < payload_size_arr.length)
|
340
|
+
chunk_indexes << if chunk_idx_start == payload_size_arr.length - 1
|
341
|
+
[chunk_idx_start]
|
342
|
+
else
|
343
|
+
(chunk_idx_start..payload_size_arr.length - 1).to_a
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
chunk_indexes
|
348
|
+
end
|
271
349
|
end
|
272
350
|
end
|
273
351
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitlab-secret_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- group::secret detection
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2024-10-
|
13
|
+
date: 2024-10-07 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: grpc
|
@@ -40,6 +40,20 @@ dependencies:
|
|
40
40
|
- - '='
|
41
41
|
- !ruby/object:Gem::Version
|
42
42
|
version: 1.63.0
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: parallel
|
45
|
+
requirement: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '1.19'
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '1.19'
|
43
57
|
- !ruby/object:Gem::Dependency
|
44
58
|
name: re2
|
45
59
|
requirement: !ruby/object:Gem::Requirement
|