gitlab-secret_detection 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gitlab/secret_detection/core/scanner.rb +81 -3
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 648b6d5277ac8e7948762533af39dc2f4ff0ae4c62fbdcc6d5d32615a44ab815
|
4
|
+
data.tar.gz: afd2a580cb0a73bb84a401616ec622e1e14d2b75021036a959f933ed75864cf6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c49b71891f6e13d8dc252936f18df838d8e9d42000cbc1f4f3d7fe56b258a7e2c17bce7a7f0e771bc16991fa46bacf641a636a1d1efd3ef23de6b0a31a011b8
|
7
|
+
data.tar.gz: af93c82a0025be12bc82fe6ff62b290fe193a2338ae2f98d83bc2729d1c7cc35a223330a3a9907a9f715c009127e93ba2d13fc33982d2f13a179d8548f5cc36e
|
@@ -4,6 +4,7 @@ require 're2'
|
|
4
4
|
require 'logger'
|
5
5
|
require 'timeout'
|
6
6
|
require 'English'
|
7
|
+
require 'parallel'
|
7
8
|
|
8
9
|
module GitLab
|
9
10
|
module SecretDetection
|
@@ -24,6 +25,14 @@ module GitLab
|
|
24
25
|
DEFAULT_PAYLOAD_TIMEOUT_SECS = 30 # 30 seconds
|
25
26
|
# Tags used for creating default pattern matcher
|
26
27
|
DEFAULT_PATTERN_MATCHER_TAGS = ['gitlab_blocking'].freeze
|
28
|
+
# Max no of child processes to spawn per request
|
29
|
+
# ref: https://gitlab.com/gitlab-org/gitlab/-/issues/430160
|
30
|
+
MAX_PROCS_PER_REQUEST = 5
|
31
|
+
# Minimum cumulative size of the payloads required to spawn and
|
32
|
+
# run the scan within a new subprocess.
|
33
|
+
MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB
|
34
|
+
# Whether to run scan in subprocesses or not. Default is false.
|
35
|
+
RUN_IN_SUBPROCESS = false
|
27
36
|
|
28
37
|
# Initializes the instance with logger along with following operations:
|
29
38
|
# 1. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
|
@@ -58,6 +67,13 @@ module GitLab
|
|
58
67
|
# For example: Add `gitlab_blocking` to include only rules for Push Protection. Defaults to
|
59
68
|
# [`gitlab_blocking`] (+DEFAULT_PATTERN_MATCHER_TAGS+).
|
60
69
|
#
|
70
|
+
# NOTE:
|
71
|
+
# Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by
|
72
|
+
# offloading regex operations on large payloads to sub-processes. However, it does not assure the improvement
|
73
|
+
# in the overall latency of the scan, specifically in the case of smaller payloads, where the overhead of
|
74
|
+
# forking a new process adds to the overall latency of the scan instead. More reference on Subprocess-based
|
75
|
+
# execution is found here: https://gitlab.com/gitlab-org/gitlab/-/issues/430160.
|
76
|
+
#
|
61
77
|
# Returns an instance of GitLab::SecretDetection::Core::Response by following below structure:
|
62
78
|
# {
|
63
79
|
# status: One of the Core::Status values
|
@@ -70,7 +86,8 @@ module GitLab
|
|
70
86
|
payload_timeout: DEFAULT_PAYLOAD_TIMEOUT_SECS,
|
71
87
|
raw_value_exclusions: [],
|
72
88
|
rule_exclusions: [],
|
73
|
-
tags: DEFAULT_PATTERN_MATCHER_TAGS
|
89
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
90
|
+
subprocess: RUN_IN_SUBPROCESS
|
74
91
|
)
|
75
92
|
|
76
93
|
return Core::Response.new(Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
|
@@ -87,11 +104,13 @@ module GitLab
|
|
87
104
|
|
88
105
|
next Core::Response.new(Core::Status::NOT_FOUND) if matched_payloads.empty?
|
89
106
|
|
90
|
-
|
107
|
+
scan_args = {
|
91
108
|
payloads: matched_payloads, payload_timeout:,
|
92
109
|
pattern_matcher: build_pattern_matcher(tags:),
|
93
110
|
raw_value_exclusions:, rule_exclusions:
|
94
|
-
|
111
|
+
}
|
112
|
+
|
113
|
+
secrets = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
|
95
114
|
|
96
115
|
scan_status = overall_scan_status(secrets)
|
97
116
|
|
@@ -205,6 +224,36 @@ module GitLab
|
|
205
224
|
end
|
206
225
|
end
|
207
226
|
|
227
|
+
def run_scan_within_subprocess(
|
228
|
+
payloads:, payload_timeout:, pattern_matcher:, raw_value_exclusions: [],
|
229
|
+
rule_exclusions: [])
|
230
|
+
payload_sizes = payloads.map(&:size)
|
231
|
+
grouped_payload_indices = group_by_chunk_size(payload_sizes)
|
232
|
+
|
233
|
+
grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
|
234
|
+
|
235
|
+
found_secrets = Parallel.flat_map(
|
236
|
+
grouped_payloads,
|
237
|
+
in_processes: MAX_PROCS_PER_REQUEST,
|
238
|
+
isolation: true # do not reuse sub-processes
|
239
|
+
) do |grouped_payload|
|
240
|
+
grouped_payload.flat_map do |payload|
|
241
|
+
Timeout.timeout(payload_timeout) do
|
242
|
+
find_secrets_in_payload(
|
243
|
+
payload:,
|
244
|
+
pattern_matcher:,
|
245
|
+
raw_value_exclusions:, rule_exclusions:
|
246
|
+
)
|
247
|
+
end
|
248
|
+
rescue Timeout::Error => e
|
249
|
+
logger.error "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
|
250
|
+
Core::Finding.new(payload.id, Core::Status::PAYLOAD_TIMEOUT)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
found_secrets.freeze
|
255
|
+
end
|
256
|
+
|
208
257
|
# Finds secrets in the given payload guarded with a timeout as a circuit breaker. It accepts
|
209
258
|
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
210
259
|
# the scan.
|
@@ -268,6 +317,35 @@ module GitLab
|
|
268
317
|
Core::Status::FOUND_WITH_ERRORS
|
269
318
|
end
|
270
319
|
end
|
320
|
+
|
321
|
+
# This method accepts an array of payload sizes(in bytes) and groups them into an array
|
322
|
+
# of arrays structure where each element is the group of indices of the input
|
323
|
+
# array whose cumulative payload sizes has at least +MIN_CHUNK_SIZE_PER_PROC_BYTES+
|
324
|
+
def group_by_chunk_size(payload_size_arr)
|
325
|
+
cumulative_size = 0
|
326
|
+
chunk_indexes = []
|
327
|
+
chunk_idx_start = 0
|
328
|
+
|
329
|
+
payload_size_arr.each_with_index do |size, index|
|
330
|
+
cumulative_size += size
|
331
|
+
next unless cumulative_size >= MIN_CHUNK_SIZE_PER_PROC_BYTES
|
332
|
+
|
333
|
+
chunk_indexes << (chunk_idx_start..index).to_a
|
334
|
+
|
335
|
+
chunk_idx_start = index + 1
|
336
|
+
cumulative_size = 0
|
337
|
+
end
|
338
|
+
|
339
|
+
if cumulative_size.positive? && (chunk_idx_start < payload_size_arr.length)
|
340
|
+
chunk_indexes << if chunk_idx_start == payload_size_arr.length - 1
|
341
|
+
[chunk_idx_start]
|
342
|
+
else
|
343
|
+
(chunk_idx_start..payload_size_arr.length - 1).to_a
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
chunk_indexes
|
348
|
+
end
|
271
349
|
end
|
272
350
|
end
|
273
351
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitlab-secret_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- group::secret detection
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2024-10-
|
13
|
+
date: 2024-10-07 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: grpc
|
@@ -40,6 +40,20 @@ dependencies:
|
|
40
40
|
- - '='
|
41
41
|
- !ruby/object:Gem::Version
|
42
42
|
version: 1.63.0
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: parallel
|
45
|
+
requirement: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '1.19'
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '1.19'
|
43
57
|
- !ruby/object:Gem::Dependency
|
44
58
|
name: re2
|
45
59
|
requirement: !ruby/object:Gem::Requirement
|