gitlab-secret_detection 0.1.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +19 -0
- data/README.md +337 -0
- data/config/log.rb +23 -0
- data/lib/gitlab/secret_detection/core/finding.rb +40 -0
- data/lib/gitlab/secret_detection/core/gitleaks.toml +1084 -0
- data/lib/gitlab/secret_detection/core/response.rb +41 -0
- data/lib/gitlab/secret_detection/core/ruleset.rb +39 -0
- data/lib/gitlab/secret_detection/core/scanner.rb +352 -0
- data/lib/gitlab/secret_detection/core/status.rb +19 -0
- data/lib/gitlab/secret_detection/core.rb +14 -0
- data/lib/gitlab/secret_detection/grpc/client/grpc_client.rb +145 -0
- data/lib/gitlab/secret_detection/grpc/client/stream_request_enumerator.rb +24 -0
- data/lib/gitlab/secret_detection/grpc/generated/.gitkeep +0 -0
- data/lib/gitlab/secret_detection/grpc/generated/secret_detection_pb.rb +25 -0
- data/lib/gitlab/secret_detection/grpc/generated/secret_detection_services_pb.rb +30 -0
- data/lib/gitlab/secret_detection/grpc/scanner_service.rb +154 -0
- data/lib/gitlab/secret_detection/grpc.rb +12 -0
- data/lib/gitlab/secret_detection/utils/certificate.rb +108 -0
- data/lib/gitlab/secret_detection/utils/memoize.rb +151 -0
- data/lib/gitlab/secret_detection/utils.rb +11 -0
- data/lib/gitlab/secret_detection/version.rb +25 -1
- data/lib/gitlab/secret_detection.rb +4 -3
- data/lib/gitlab.rb +6 -0
- data/proto/secret_detection.proto +75 -0
- metadata +84 -30
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Gitlab
|
|
4
|
+
module SecretDetection
|
|
5
|
+
module Core
|
|
6
|
+
# Response is the data object returned by the scan operation with the following structure
|
|
7
|
+
#
|
|
8
|
+
# +status+:: One of values from Gitlab::SecretDetection::Core::Status indicating the scan operation's status
|
|
9
|
+
# +results+:: Array of Gitlab::SecretDetection::Core::Finding values. Default value is nil.
|
|
10
|
+
# +metadata+:: Hash object containing additional meta information about the response. It is currently used
|
|
11
|
+
# to embed more information on error.
|
|
12
|
+
class Response
|
|
13
|
+
attr_reader :status, :results, :metadata
|
|
14
|
+
|
|
15
|
+
def initialize(status, results = [], metadata = {})
|
|
16
|
+
@status = status
|
|
17
|
+
@results = results
|
|
18
|
+
@metadata = metadata
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def ==(other)
|
|
22
|
+
self.class == other.class && other.state == state
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def to_h
|
|
26
|
+
{
|
|
27
|
+
status:,
|
|
28
|
+
metadata:,
|
|
29
|
+
results: results&.map(&:to_h)
|
|
30
|
+
}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
protected
|
|
34
|
+
|
|
35
|
+
def state
|
|
36
|
+
[status, metadata, results]
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'toml-rb'
|
|
4
|
+
require 'logger'
|
|
5
|
+
|
|
6
|
+
module Gitlab
|
|
7
|
+
module SecretDetection
|
|
8
|
+
module Core
|
|
9
|
+
class Ruleset
|
|
10
|
+
# file path where the secrets ruleset file is located
|
|
11
|
+
RULESET_FILE_PATH = File.expand_path('gitleaks.toml', __dir__)
|
|
12
|
+
|
|
13
|
+
def initialize(path: RULESET_FILE_PATH)
|
|
14
|
+
@path = path
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def rules(force_fetch: false)
|
|
18
|
+
return @rule_data unless @rule_data.nil? || force_fetch
|
|
19
|
+
|
|
20
|
+
@rule_data ||= parse_ruleset
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
attr_reader :path
|
|
26
|
+
|
|
27
|
+
# parses given ruleset file and returns the parsed rules
|
|
28
|
+
def parse_ruleset
|
|
29
|
+
# rule_file_content = File.read(path)
|
|
30
|
+
rules_data = TomlRB.load_file(path, symbolize_keys: true).freeze
|
|
31
|
+
rules_data[:rules].freeze
|
|
32
|
+
rescue StandardError => e
|
|
33
|
+
logger.error "Failed to parse secret detection ruleset from '#{path}' path: #{e}"
|
|
34
|
+
raise Core::Scanner::RulesetParseError
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 're2'
|
|
4
|
+
require 'logger'
|
|
5
|
+
require 'timeout'
|
|
6
|
+
require 'English'
|
|
7
|
+
require 'parallel'
|
|
8
|
+
|
|
9
|
+
module Gitlab
|
|
10
|
+
module SecretDetection
|
|
11
|
+
module Core
|
|
12
|
+
# Scan is responsible for running Secret Detection scan operation
|
|
13
|
+
class Scanner
|
|
14
|
+
# RulesetParseError is thrown when the code fails to parse the
|
|
15
|
+
# ruleset file from the given path
|
|
16
|
+
RulesetParseError = Class.new(StandardError)
|
|
17
|
+
|
|
18
|
+
# RulesetCompilationError is thrown when the code fails to compile
|
|
19
|
+
# the predefined rulesets
|
|
20
|
+
RulesetCompilationError = Class.new(StandardError)
|
|
21
|
+
|
|
22
|
+
# default time limit(in seconds) for running the scan operation per invocation
|
|
23
|
+
DEFAULT_SCAN_TIMEOUT_SECS = 180 # 3 minutes
|
|
24
|
+
# default time limit(in seconds) for running the scan operation on a single payload
|
|
25
|
+
DEFAULT_PAYLOAD_TIMEOUT_SECS = 30 # 30 seconds
|
|
26
|
+
# Tags used for creating default pattern matcher
|
|
27
|
+
DEFAULT_PATTERN_MATCHER_TAGS = ['gitlab_blocking'].freeze
|
|
28
|
+
# Max no of child processes to spawn per request
|
|
29
|
+
# ref: https://gitlab.com/gitlab-org/gitlab/-/issues/430160
|
|
30
|
+
MAX_PROCS_PER_REQUEST = 5
|
|
31
|
+
# Minimum cumulative size of the payloads required to spawn and
|
|
32
|
+
# run the scan within a new subprocess.
|
|
33
|
+
MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB
|
|
34
|
+
# Whether to run scan in subprocesses or not. Default is false.
|
|
35
|
+
RUN_IN_SUBPROCESS = false
|
|
36
|
+
|
|
37
|
+
# Initializes the instance with logger along with following operations:
|
|
38
|
+
# 1. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
|
|
39
|
+
# 2. Build and Compile rule regex patterns obtained from the ruleset with +DEFAULT_PATTERN_MATCHER_TAGS+
|
|
40
|
+
# tags. Raises +RulesetCompilationError+ in case the regex pattern compilation fails.
|
|
41
|
+
def initialize(rules:, logger: Logger.new($stdout))
|
|
42
|
+
@logger = logger
|
|
43
|
+
@rules = rules
|
|
44
|
+
@keywords = create_keywords(rules)
|
|
45
|
+
@default_keyword_matcher = build_keyword_matcher(
|
|
46
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
|
47
|
+
include_missing_tags: false
|
|
48
|
+
)
|
|
49
|
+
@default_pattern_matcher = build_pattern_matcher(
|
|
50
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
|
51
|
+
include_missing_tags: false
|
|
52
|
+
) # includes only gitlab_blocking rules
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Runs Secret Detection scan on the list of given payloads. Both the total scan duration and
|
|
56
|
+
# the duration for each payload is time bound via +timeout+ and +payload_timeout+ respectively.
|
|
57
|
+
#
|
|
58
|
+
# +payloads+:: Array of payloads where each payload should have `id` and `data` properties.
|
|
59
|
+
# +timeout+:: No of seconds(accepts floating point for smaller time values) to limit the total scan duration
|
|
60
|
+
# +payload_timeout+:: No of seconds(accepts floating point for smaller time values) to limit
|
|
61
|
+
# the scan duration on each payload
|
|
62
|
+
# +raw_value_exclusions:+:: Array of raw values to exclude from the scan.
|
|
63
|
+
# +rule_exclusions+:: Array of rules to exclude from the ruleset used for the scan. Each rule is represented
|
|
64
|
+
# by its ID. For example: `gitlab_personal_access_token` for representing Gitlab Personal Access
|
|
65
|
+
# Token. By default, no rule is excluded from the ruleset.
|
|
66
|
+
# +tags+:: Array of tag values to filter from the default ruleset when determining the rules used for the scan.
|
|
67
|
+
# For example: Add `gitlab_blocking` to include only rules for Push Protection. Defaults to
|
|
68
|
+
# [`gitlab_blocking`] (+DEFAULT_PATTERN_MATCHER_TAGS+).
|
|
69
|
+
#
|
|
70
|
+
# NOTE:
|
|
71
|
+
# Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by
|
|
72
|
+
# offloading regex operations on large payloads to sub-processes. However, it does not assure the improvement
|
|
73
|
+
# in the overall latency of the scan, specifically in the case of smaller payloads, where the overhead of
|
|
74
|
+
# forking a new process adds to the overall latency of the scan instead. More reference on Subprocess-based
|
|
75
|
+
# execution is found here: https://gitlab.com/gitlab-org/gitlab/-/issues/430160.
|
|
76
|
+
#
|
|
77
|
+
# Returns an instance of Gitlab::SecretDetection::Core::Response by following below structure:
|
|
78
|
+
# {
|
|
79
|
+
# status: One of the Core::Status values
|
|
80
|
+
# results: [SecretDetection::Finding]
|
|
81
|
+
# }
|
|
82
|
+
#
|
|
83
|
+
def secrets_scan(
|
|
84
|
+
payloads,
|
|
85
|
+
timeout: DEFAULT_SCAN_TIMEOUT_SECS,
|
|
86
|
+
payload_timeout: DEFAULT_PAYLOAD_TIMEOUT_SECS,
|
|
87
|
+
raw_value_exclusions: [],
|
|
88
|
+
rule_exclusions: [],
|
|
89
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
|
90
|
+
subprocess: RUN_IN_SUBPROCESS
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return Core::Response.new(Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
|
|
94
|
+
|
|
95
|
+
# assign defaults since grpc passing zero timeout value to `Timeout.timeout(..)` makes it effectively useless.
|
|
96
|
+
timeout = DEFAULT_SCAN_TIMEOUT_SECS unless timeout.positive?
|
|
97
|
+
payload_timeout = DEFAULT_PAYLOAD_TIMEOUT_SECS unless payload_timeout.positive?
|
|
98
|
+
tags = DEFAULT_PATTERN_MATCHER_TAGS if tags.empty?
|
|
99
|
+
|
|
100
|
+
Timeout.timeout(timeout) do
|
|
101
|
+
keyword_matcher = build_keyword_matcher(tags:)
|
|
102
|
+
|
|
103
|
+
matched_payloads = filter_by_keywords(keyword_matcher, payloads)
|
|
104
|
+
|
|
105
|
+
next Core::Response.new(Core::Status::NOT_FOUND) if matched_payloads.empty?
|
|
106
|
+
|
|
107
|
+
scan_args = {
|
|
108
|
+
payloads: matched_payloads, payload_timeout:,
|
|
109
|
+
pattern_matcher: build_pattern_matcher(tags:),
|
|
110
|
+
raw_value_exclusions:, rule_exclusions:
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
secrets = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
|
|
114
|
+
|
|
115
|
+
scan_status = overall_scan_status(secrets)
|
|
116
|
+
|
|
117
|
+
Core::Response.new(scan_status, secrets)
|
|
118
|
+
end
|
|
119
|
+
rescue Timeout::Error => e
|
|
120
|
+
logger.error "Secret detection operation timed out: #{e}"
|
|
121
|
+
|
|
122
|
+
Core::Response.new(Core::Status::SCAN_TIMEOUT)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
private
|
|
126
|
+
|
|
127
|
+
attr_reader :logger, :rules, :keywords, :default_pattern_matcher, :default_keyword_matcher
|
|
128
|
+
|
|
129
|
+
# Builds RE2::Set pattern matcher for the given combination of rules
|
|
130
|
+
# and tags. It also allows a choice(via `include_missing_tags`) to consider rules
|
|
131
|
+
# for pattern matching that do not have `tags` property defined. If the given tags
|
|
132
|
+
# are same as +DEFAULT_PATTERN_MATCHER_TAGS+ then returns the eagerly loaded default
|
|
133
|
+
# pattern matcher created during initialization.
|
|
134
|
+
def build_pattern_matcher(tags:, include_missing_tags: false)
|
|
135
|
+
return default_pattern_matcher if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_pattern_matcher.nil?
|
|
136
|
+
|
|
137
|
+
matcher = RE2::Set.new
|
|
138
|
+
|
|
139
|
+
rules.each do |rule|
|
|
140
|
+
rule_tags = rule[:tags]
|
|
141
|
+
|
|
142
|
+
include_rule = if tags.empty?
|
|
143
|
+
true
|
|
144
|
+
elsif rule_tags
|
|
145
|
+
tags.intersect?(rule_tags)
|
|
146
|
+
else
|
|
147
|
+
include_missing_tags
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
matcher.add(rule[:regex]) if include_rule
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
unless matcher.compile
|
|
154
|
+
logger.error "Failed to compile secret detection rulesets in RE::Set"
|
|
155
|
+
|
|
156
|
+
raise RulesetCompilationError
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
matcher
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Creates and returns the unique set of rule matching keywords
|
|
163
|
+
def create_keywords(rules)
|
|
164
|
+
secrets_keywords = Set.new
|
|
165
|
+
|
|
166
|
+
rules.each do |rule|
|
|
167
|
+
secrets_keywords.merge rule[:keywords] unless rule[:keywords].nil?
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
secrets_keywords.freeze
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def build_keyword_matcher(tags:, include_missing_tags: false)
|
|
174
|
+
return default_keyword_matcher if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_keyword_matcher.nil?
|
|
175
|
+
|
|
176
|
+
include_keywords = Set.new
|
|
177
|
+
|
|
178
|
+
rules.each do |rule|
|
|
179
|
+
rule_tags = rule.fetch(:tags, [])
|
|
180
|
+
|
|
181
|
+
next if rule_tags.empty? && !include_missing_tags
|
|
182
|
+
next unless rule_tags.intersect?(tags)
|
|
183
|
+
|
|
184
|
+
include_keywords.merge(rule[:keywords]) unless rule[:keywords].nil?
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
return nil if include_keywords.empty?
|
|
188
|
+
|
|
189
|
+
keywords_regex = include_keywords.join('|')
|
|
190
|
+
|
|
191
|
+
RE2("\\b(#{keywords_regex})")
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def filter_by_keywords(keyword_matcher, payloads)
|
|
195
|
+
return [] if keyword_matcher.nil?
|
|
196
|
+
|
|
197
|
+
matched_payloads = []
|
|
198
|
+
payloads.each do |payload|
|
|
199
|
+
next unless keyword_matcher.partial_match?(payload.data)
|
|
200
|
+
|
|
201
|
+
matched_payloads << payload
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
matched_payloads.freeze
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Runs the secret detection scan on the given list of payloads. It accepts
|
|
208
|
+
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
|
209
|
+
# the scan when performed on the payloads.
|
|
210
|
+
def run_scan(
|
|
211
|
+
payloads:, payload_timeout:, pattern_matcher:, raw_value_exclusions: [], rule_exclusions: [])
|
|
212
|
+
payloads.flat_map do |payload|
|
|
213
|
+
Timeout.timeout(payload_timeout) do
|
|
214
|
+
find_secrets_in_payload(
|
|
215
|
+
payload:,
|
|
216
|
+
pattern_matcher:,
|
|
217
|
+
raw_value_exclusions:, rule_exclusions:
|
|
218
|
+
)
|
|
219
|
+
end
|
|
220
|
+
rescue Timeout::Error => e
|
|
221
|
+
logger.error "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
|
|
222
|
+
Core::Finding.new(payload.id,
|
|
223
|
+
Core::Status::PAYLOAD_TIMEOUT)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def run_scan_within_subprocess(
|
|
228
|
+
payloads:, payload_timeout:, pattern_matcher:, raw_value_exclusions: [],
|
|
229
|
+
rule_exclusions: [])
|
|
230
|
+
payload_sizes = payloads.map(&:size)
|
|
231
|
+
grouped_payload_indices = group_by_chunk_size(payload_sizes)
|
|
232
|
+
|
|
233
|
+
grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
|
|
234
|
+
|
|
235
|
+
found_secrets = Parallel.flat_map(
|
|
236
|
+
grouped_payloads,
|
|
237
|
+
in_processes: MAX_PROCS_PER_REQUEST,
|
|
238
|
+
isolation: true # do not reuse sub-processes
|
|
239
|
+
) do |grouped_payload|
|
|
240
|
+
grouped_payload.flat_map do |payload|
|
|
241
|
+
Timeout.timeout(payload_timeout) do
|
|
242
|
+
find_secrets_in_payload(
|
|
243
|
+
payload:,
|
|
244
|
+
pattern_matcher:,
|
|
245
|
+
raw_value_exclusions:, rule_exclusions:
|
|
246
|
+
)
|
|
247
|
+
end
|
|
248
|
+
rescue Timeout::Error => e
|
|
249
|
+
logger.error "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
|
|
250
|
+
Core::Finding.new(payload.id, Core::Status::PAYLOAD_TIMEOUT)
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
found_secrets.freeze
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Finds secrets in the given payload guarded with a timeout as a circuit breaker. It accepts
|
|
258
|
+
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
|
259
|
+
# the scan.
|
|
260
|
+
def find_secrets_in_payload(payload:, pattern_matcher:, raw_value_exclusions: [], rule_exclusions: [])
|
|
261
|
+
findings = []
|
|
262
|
+
|
|
263
|
+
payload.data
|
|
264
|
+
.each_line($INPUT_RECORD_SEPARATOR, chomp: true)
|
|
265
|
+
.each_with_index do |line, index|
|
|
266
|
+
unless raw_value_exclusions.empty?
|
|
267
|
+
raw_value_exclusions.each do |value|
|
|
268
|
+
line.gsub!(value, '') # replace input that doesn't contain allowed value in it
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
next if line.empty?
|
|
273
|
+
|
|
274
|
+
line_no = index + 1
|
|
275
|
+
|
|
276
|
+
matches = pattern_matcher.match(line, exception: false) # returns indices of matched patterns
|
|
277
|
+
|
|
278
|
+
matches.each do |match_idx|
|
|
279
|
+
rule = rules[match_idx]
|
|
280
|
+
|
|
281
|
+
next if rule_exclusions.include?(rule[:id])
|
|
282
|
+
|
|
283
|
+
findings << Core::Finding.new(payload.id, Core::Status::FOUND, line_no, rule[:id], rule[:description])
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
findings.freeze
|
|
288
|
+
rescue StandardError => e
|
|
289
|
+
logger.error "Secret Detection scan failed on the payload(id:#{payload.id}): #{e}"
|
|
290
|
+
|
|
291
|
+
Core::Finding.new(payload.id, Core::Status::SCAN_ERROR)
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Validates the given payloads by verifying the type and
|
|
295
|
+
# presence of `id` and `data` fields necessary for the scan
|
|
296
|
+
def validate_scan_input(payloads)
|
|
297
|
+
return false if payloads.nil? || !payloads.instance_of?(Array)
|
|
298
|
+
|
|
299
|
+
payloads.all? do |payload|
|
|
300
|
+
payload.respond_to?(:id) && payload.respond_to?(:data)
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Returns the status of the overall scan request
|
|
305
|
+
# based on the detected secret findings found in the input payloads
|
|
306
|
+
def overall_scan_status(found_secrets)
|
|
307
|
+
return Core::Status::NOT_FOUND if found_secrets.empty?
|
|
308
|
+
|
|
309
|
+
timed_out_payloads = found_secrets.count { |el| el.status == Core::Status::PAYLOAD_TIMEOUT }
|
|
310
|
+
|
|
311
|
+
case timed_out_payloads
|
|
312
|
+
when 0
|
|
313
|
+
Core::Status::FOUND
|
|
314
|
+
when found_secrets.length
|
|
315
|
+
Core::Status::SCAN_TIMEOUT
|
|
316
|
+
else
|
|
317
|
+
Core::Status::FOUND_WITH_ERRORS
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# This method accepts an array of payload sizes(in bytes) and groups them into an array
|
|
322
|
+
# of arrays structure where each element is the group of indices of the input
|
|
323
|
+
# array whose cumulative payload sizes has at least +MIN_CHUNK_SIZE_PER_PROC_BYTES+
|
|
324
|
+
def group_by_chunk_size(payload_size_arr)
|
|
325
|
+
cumulative_size = 0
|
|
326
|
+
chunk_indexes = []
|
|
327
|
+
chunk_idx_start = 0
|
|
328
|
+
|
|
329
|
+
payload_size_arr.each_with_index do |size, index|
|
|
330
|
+
cumulative_size += size
|
|
331
|
+
next unless cumulative_size >= MIN_CHUNK_SIZE_PER_PROC_BYTES
|
|
332
|
+
|
|
333
|
+
chunk_indexes << (chunk_idx_start..index).to_a
|
|
334
|
+
|
|
335
|
+
chunk_idx_start = index + 1
|
|
336
|
+
cumulative_size = 0
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
if cumulative_size.positive? && (chunk_idx_start < payload_size_arr.length)
|
|
340
|
+
chunk_indexes << if chunk_idx_start == payload_size_arr.length - 1
|
|
341
|
+
[chunk_idx_start]
|
|
342
|
+
else
|
|
343
|
+
(chunk_idx_start..payload_size_arr.length - 1).to_a
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
chunk_indexes
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Gitlab
|
|
4
|
+
module SecretDetection
|
|
5
|
+
module Core
|
|
6
|
+
# All the possible statuses emitted by the scan operation
|
|
7
|
+
class Status
|
|
8
|
+
FOUND = 1 # When scan operation completes with one or more findings
|
|
9
|
+
FOUND_WITH_ERRORS = 2 # When scan operation completes with one or more findings along with some errors
|
|
10
|
+
SCAN_TIMEOUT = 3 # When the scan operation runs beyond given time out
|
|
11
|
+
PAYLOAD_TIMEOUT = 4 # When the scan operation on a payload runs beyond given time out
|
|
12
|
+
SCAN_ERROR = 5 # When the scan operation fails due to regex error
|
|
13
|
+
INPUT_ERROR = 6 # When the scan operation fails due to invalid input
|
|
14
|
+
NOT_FOUND = 7 # When scan operation completes with zero findings
|
|
15
|
+
AUTH_ERROR = 8 # When authentication fails
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'core/finding'
|
|
4
|
+
require_relative 'core/response'
|
|
5
|
+
require_relative 'core/status'
|
|
6
|
+
require_relative 'core/scanner'
|
|
7
|
+
require_relative 'core/ruleset'
|
|
8
|
+
|
|
9
|
+
module Gitlab
|
|
10
|
+
module SecretDetection
|
|
11
|
+
module Core
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'grpc'
|
|
4
|
+
require_relative '../../grpc/scanner_service'
|
|
5
|
+
require_relative '../../core/response'
|
|
6
|
+
require_relative '../../core/status'
|
|
7
|
+
require_relative '../../utils'
|
|
8
|
+
require_relative './stream_request_enumerator'
|
|
9
|
+
|
|
10
|
+
module Gitlab
|
|
11
|
+
module SecretDetection
|
|
12
|
+
module GRPC
|
|
13
|
+
class Client
|
|
14
|
+
include SecretDetection::Utils::StrongMemoize
|
|
15
|
+
include SDLogger
|
|
16
|
+
|
|
17
|
+
# Time to wait for the response from the service
|
|
18
|
+
REQUEST_TIMEOUT_SECONDS = 10 # 10 seconds
|
|
19
|
+
|
|
20
|
+
def initialize(host, secure: false, compression: true)
|
|
21
|
+
@host = host
|
|
22
|
+
@secure = secure
|
|
23
|
+
@compression = compression
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Triggers Secret Detection service's `/Scan` gRPC endpoint. To keep it consistent with SDS gem interface,
|
|
27
|
+
# this method transforms the gRPC response to +Gitlab::SecretDetection::Core::Response+.
|
|
28
|
+
# Furthermore, any errors that are raised by the service will be translated to
|
|
29
|
+
# +Gitlab::SecretDetection::Core::Response+ type by assiging a appropriate +status+ value to it.
|
|
30
|
+
def run_scan(request:, auth_token:, extra_headers: {})
|
|
31
|
+
with_rescued_errors do
|
|
32
|
+
grpc_response = stub.scan(
|
|
33
|
+
request,
|
|
34
|
+
metadata: build_metadata(auth_token, extra_headers),
|
|
35
|
+
deadline: request_deadline
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
convert_to_core_response(grpc_response)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Triggers Secret Detection service's `/ScanStream` gRPC endpoint.
|
|
43
|
+
#
|
|
44
|
+
# To keep it consistent with SDS gem interface, this method transforms the gRPC response to
|
|
45
|
+
# +Gitlab::SecretDetection::Core::Response+ type. Furthermore, any errors that are raised by the service will be
|
|
46
|
+
# translated to +Gitlab::SecretDetection::Core::Response+ type by assiging a appropriate +status+ value to it.
|
|
47
|
+
#
|
|
48
|
+
# Note: If one of the stream requests result in an error, the stream will end immediately without processing the
|
|
49
|
+
# remaining requests.
|
|
50
|
+
def run_scan_stream(requests:, auth_token:, extra_headers: {})
|
|
51
|
+
request_stream = Gitlab::SecretDetection::GRPC::StreamRequestEnumerator.new(requests)
|
|
52
|
+
results = []
|
|
53
|
+
with_rescued_errors do
|
|
54
|
+
stub.scan_stream(
|
|
55
|
+
request_stream.each_item,
|
|
56
|
+
metadata: build_metadata(auth_token, extra_headers),
|
|
57
|
+
deadline: request_deadline
|
|
58
|
+
).each do |grpc_response|
|
|
59
|
+
response = convert_to_core_response(grpc_response)
|
|
60
|
+
if block_given?
|
|
61
|
+
yield response
|
|
62
|
+
else
|
|
63
|
+
results << response
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
results
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
attr_reader :secure, :host, :compression
|
|
73
|
+
|
|
74
|
+
def stub
|
|
75
|
+
Gitlab::SecretDetection::GRPC::Scanner::Stub.new(
|
|
76
|
+
host,
|
|
77
|
+
channel_credentials,
|
|
78
|
+
channel_args:
|
|
79
|
+
)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
strong_memoize_attr :stub
|
|
83
|
+
|
|
84
|
+
def channel_args
|
|
85
|
+
default_options = {
|
|
86
|
+
'grpc.keepalive_permit_without_calls' => 1,
|
|
87
|
+
'grpc.keepalive_time_ms' => 30000, # 30 seconds
|
|
88
|
+
'grpc.keepalive_timeout_ms' => 10000 # 10 seconds timeout for keepalive response
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
compression_options = ::GRPC::Core::CompressionOptions
|
|
92
|
+
.new(default_algorithm: :gzip)
|
|
93
|
+
.to_channel_arg_hash
|
|
94
|
+
|
|
95
|
+
default_options.merge!(compression_options) if compression
|
|
96
|
+
|
|
97
|
+
default_options.freeze
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def channel_credentials
|
|
101
|
+
return :this_channel_is_insecure unless secure
|
|
102
|
+
|
|
103
|
+
certs = Gitlab::SecretDetection::Utils::X509::Certificate.ca_certs_bundle
|
|
104
|
+
|
|
105
|
+
::GRPC::Core::ChannelCredentials.new(certs)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def build_metadata(token, extra_headers = {})
|
|
109
|
+
{ 'x-sd-auth' => token }.merge!(extra_headers).freeze
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def request_deadline
|
|
113
|
+
Time.now + REQUEST_TIMEOUT_SECONDS
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def with_rescued_errors
|
|
117
|
+
yield
|
|
118
|
+
rescue ::GRPC::Unauthenticated
|
|
119
|
+
SecretDetection::Core::Response.new(SecretDetection::Core::Status::AUTH_ERROR)
|
|
120
|
+
rescue ::GRPC::InvalidArgument => e
|
|
121
|
+
SecretDetection::Core::Response.new(
|
|
122
|
+
SecretDetection::Core::Status::INPUT_ERROR, nil, { message: e.details, **e.metadata }
|
|
123
|
+
)
|
|
124
|
+
rescue ::GRPC::Unknown, ::GRPC::BadStatus => e
|
|
125
|
+
SecretDetection::Core::Response.new(
|
|
126
|
+
SecretDetection::Core::Status::SCAN_ERROR, nil, { message: e.details }
|
|
127
|
+
)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def convert_to_core_response(grpc_response)
|
|
131
|
+
response = grpc_response.to_h
|
|
132
|
+
|
|
133
|
+
SecretDetection::Core::Response.new(
|
|
134
|
+
response[:status],
|
|
135
|
+
response[:results],
|
|
136
|
+
response[:metadata]
|
|
137
|
+
)
|
|
138
|
+
rescue StandardError => e
|
|
139
|
+
logger.error("Failed to convert to core response: #{e}")
|
|
140
|
+
SecretDetection::Core::Response.new(SecretDetection::Core::Status::SCAN_ERROR)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Gitlab
|
|
4
|
+
module SecretDetection
|
|
5
|
+
module GRPC
|
|
6
|
+
class StreamRequestEnumerator
|
|
7
|
+
def initialize(requests = [])
|
|
8
|
+
@requests = requests
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# yields a request, waiting between 0 and 1 seconds between requests
|
|
12
|
+
#
|
|
13
|
+
# @return an Enumerable that yields a request input
|
|
14
|
+
def each_item
|
|
15
|
+
return enum_for(:each_item) unless block_given?
|
|
16
|
+
|
|
17
|
+
@requests.each do |request|
|
|
18
|
+
yield request
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
File without changes
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: secret_detection.proto
|
|
4
|
+
|
|
5
|
+
require 'google/protobuf'
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
descriptor_data = "\n\x16secret_detection.proto\x12\x17gitlab.secret_detection\"\xfc\x03\n\x0bScanRequest\x12>\n\x08payloads\x18\x01 \x03(\x0b\x32,.gitlab.secret_detection.ScanRequest.Payload\x12\x19\n\x0ctimeout_secs\x18\x02 \x01(\x02H\x00\x88\x01\x01\x12!\n\x14payload_timeout_secs\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x42\n\nexclusions\x18\x04 \x03(\x0b\x32..gitlab.secret_detection.ScanRequest.Exclusion\x12\x0c\n\x04tags\x18\x05 \x03(\t\x1a#\n\x07Payload\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x1a\x66\n\tExclusion\x12J\n\x0e\x65xclusion_type\x18\x01 \x01(\x0e\x32\x32.gitlab.secret_detection.ScanRequest.ExclusionType\x12\r\n\x05value\x18\x02 \x01(\t\"f\n\rExclusionType\x12\x1e\n\x1a\x45XCLUSION_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XCLUSION_TYPE_RULE\x10\x01\x12\x1c\n\x18\x45XCLUSION_TYPE_RAW_VALUE\x10\x02\x42\x0f\n\r_timeout_secsB\x17\n\x15_payload_timeout_secs\"\xe2\x03\n\x0cScanResponse\x12>\n\x07results\x18\x01 \x03(\x0b\x32-.gitlab.secret_detection.ScanResponse.Finding\x12\x0e\n\x06status\x18\x02 \x01(\x05\x1a\x9d\x01\n\x07\x46inding\x12\x12\n\npayload_id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\x05\x12\x11\n\x04type\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0bline_number\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x07\n\x05_typeB\x0e\n\x0c_descriptionB\x0e\n\x0c_line_number\"\xe1\x01\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x10\n\x0cSTATUS_FOUND\x10\x01\x12\x1c\n\x18STATUS_FOUND_WITH_ERRORS\x10\x02\x12\x17\n\x13STATUS_SCAN_TIMEOUT\x10\x03\x12\x1a\n\x16STATUS_PAYLOAD_TIMEOUT\x10\x04\x12\x15\n\x11STATUS_SCAN_ERROR\x10\x05\x12\x16\n\x12STATUS_INPUT_ERROR\x10\x06\x12\x14\n\x10STATUS_NOT_FOUND\x10\x07\x12\x15\n\x11STATUS_AUTH_ERROR\x10\x08\x32\xc1\x01\n\x07Scanner\x12U\n\x04Scan\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00\x12_\n\nScanStream\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00(\x01\x30\x01\x42 \xea\x02\x1dGitlab::SecretDetection::GRPCb\x06proto3"
|
|
9
|
+
|
|
10
|
+
pool = Google::Protobuf::DescriptorPool.generated_pool
|
|
11
|
+
pool.add_serialized_file(descriptor_data)
|
|
12
|
+
|
|
13
|
+
module Gitlab
|
|
14
|
+
module SecretDetection
|
|
15
|
+
module GRPC
|
|
16
|
+
ScanRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanRequest").msgclass
|
|
17
|
+
ScanRequest::Payload = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanRequest.Payload").msgclass
|
|
18
|
+
ScanRequest::Exclusion = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanRequest.Exclusion").msgclass
|
|
19
|
+
ScanRequest::ExclusionType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanRequest.ExclusionType").enummodule
|
|
20
|
+
ScanResponse = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanResponse").msgclass
|
|
21
|
+
ScanResponse::Finding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanResponse.Finding").msgclass
|
|
22
|
+
ScanResponse::Status = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanResponse.Status").enummodule
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|