gitlab-secret_detection 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +19 -0
- data/README.md +334 -0
- data/config/log.rb +23 -0
- data/lib/gitlab/secret_detection/core/finding.rb +40 -0
- data/lib/gitlab/secret_detection/core/gitleaks.toml +1084 -0
- data/lib/gitlab/secret_detection/core/response.rb +37 -0
- data/lib/gitlab/secret_detection/core/ruleset.rb +39 -0
- data/lib/gitlab/secret_detection/core/scanner.rb +274 -0
- data/lib/gitlab/secret_detection/core/status.rb +18 -0
- data/lib/gitlab/secret_detection/core.rb +14 -0
- data/lib/gitlab/secret_detection/grpc/client/grpc_client.rb +21 -0
- data/lib/gitlab/secret_detection/grpc/generated/.gitkeep +0 -0
- data/lib/gitlab/secret_detection/grpc/generated/secret_detection_pb.rb +25 -0
- data/lib/gitlab/secret_detection/grpc/generated/secret_detection_services_pb.rb +30 -0
- data/lib/gitlab/secret_detection/grpc/scanner_service.rb +148 -0
- data/lib/gitlab/secret_detection/grpc.rb +11 -0
- data/lib/gitlab/secret_detection/version.rb +26 -2
- data/lib/gitlab/secret_detection.rb +4 -4
- data/lib/gitlab.rb +6 -0
- data/proto/secret_detection.proto +76 -0
- metadata +70 -28
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GitLab
|
4
|
+
module SecretDetection
|
5
|
+
module Core
|
6
|
+
# Response is the data object returned by the scan operation with the following structure
|
7
|
+
#
|
8
|
+
# +status+:: One of values from GitLab::SecretDetection::Core::Status indicating the scan operation's status
|
9
|
+
# +results+:: Array of GitLab::SecretDetection::Core::Finding values. Default value is nil.
|
10
|
+
class Response
|
11
|
+
attr_reader :status, :results
|
12
|
+
|
13
|
+
def initialize(status, results = [])
|
14
|
+
@status = status
|
15
|
+
@results = results
|
16
|
+
end
|
17
|
+
|
18
|
+
def ==(other)
|
19
|
+
self.class == other.class && other.state == state
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_h
|
23
|
+
{
|
24
|
+
status:,
|
25
|
+
results: results&.map(&:to_h)
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def state
|
32
|
+
[status, results]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'toml-rb'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
module GitLab
|
7
|
+
module SecretDetection
|
8
|
+
module Core
|
9
|
+
class Ruleset
|
10
|
+
# file path where the secrets ruleset file is located
|
11
|
+
RULESET_FILE_PATH = File.expand_path('gitleaks.toml', __dir__)
|
12
|
+
|
13
|
+
def initialize(path: RULESET_FILE_PATH)
|
14
|
+
@path = path
|
15
|
+
end
|
16
|
+
|
17
|
+
def rules(force_fetch: false)
|
18
|
+
return @rule_data unless @rule_data.nil? || force_fetch
|
19
|
+
|
20
|
+
@rule_data ||= parse_ruleset
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
attr_reader :path
|
26
|
+
|
27
|
+
# parses given ruleset file and returns the parsed rules
|
28
|
+
def parse_ruleset
|
29
|
+
# rule_file_content = File.read(path)
|
30
|
+
rules_data = TomlRB.load_file(path, symbolize_keys: true).freeze
|
31
|
+
rules_data[:rules].freeze
|
32
|
+
rescue StandardError => e
|
33
|
+
logger.error "Failed to parse secret detection ruleset from '#{path}' path: #{e}"
|
34
|
+
raise Core::Scanner::RulesetParseError
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,274 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 're2'
|
4
|
+
require 'logger'
|
5
|
+
require 'timeout'
|
6
|
+
require 'English'
|
7
|
+
|
8
|
+
module GitLab
|
9
|
+
module SecretDetection
|
10
|
+
module Core
|
11
|
+
# Scan is responsible for running Secret Detection scan operation
|
12
|
+
class Scanner
|
13
|
+
# RulesetParseError is thrown when the code fails to parse the
|
14
|
+
# ruleset file from the given path
|
15
|
+
RulesetParseError = Class.new(StandardError)
|
16
|
+
|
17
|
+
# RulesetCompilationError is thrown when the code fails to compile
|
18
|
+
# the predefined rulesets
|
19
|
+
RulesetCompilationError = Class.new(StandardError)
|
20
|
+
|
21
|
+
# default time limit(in seconds) for running the scan operation per invocation
|
22
|
+
DEFAULT_SCAN_TIMEOUT_SECS = 180 # 3 minutes
|
23
|
+
# default time limit(in seconds) for running the scan operation on a single payload
|
24
|
+
DEFAULT_PAYLOAD_TIMEOUT_SECS = 30 # 30 seconds
|
25
|
+
# Tags used for creating default pattern matcher
|
26
|
+
DEFAULT_PATTERN_MATCHER_TAGS = ['gitlab_blocking'].freeze
|
27
|
+
|
28
|
+
# Initializes the instance with logger along with following operations:
|
29
|
+
# 1. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
|
30
|
+
# 2. Build and Compile rule regex patterns obtained from the ruleset with +DEFAULT_PATTERN_MATCHER_TAGS+
|
31
|
+
# tags. Raises +RulesetCompilationError+ in case the regex pattern compilation fails.
|
32
|
+
def initialize(rules:, logger: Logger.new($stdout))
|
33
|
+
@logger = logger
|
34
|
+
@rules = rules
|
35
|
+
@keywords = create_keywords(rules)
|
36
|
+
@default_keyword_matcher = build_keyword_matcher(
|
37
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
38
|
+
include_missing_tags: false
|
39
|
+
)
|
40
|
+
@default_pattern_matcher = build_pattern_matcher(
|
41
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
42
|
+
include_missing_tags: false
|
43
|
+
) # includes only gitlab_blocking rules
|
44
|
+
end
|
45
|
+
|
46
|
+
# Runs Secret Detection scan on the list of given payloads. Both the total scan duration and
|
47
|
+
# the duration for each payload is time bound via +timeout+ and +payload_timeout+ respectively.
|
48
|
+
#
|
49
|
+
# +payloads+:: Array of payloads where each payload should have `id` and `data` properties.
|
50
|
+
# +timeout+:: No of seconds(accepts floating point for smaller time values) to limit the total scan duration
|
51
|
+
# +payload_timeout+:: No of seconds(accepts floating point for smaller time values) to limit
|
52
|
+
# the scan duration on each payload
|
53
|
+
# +rule_exclusions+:: Array of rules to exclude from the ruleset used for the scan. Each rule is represented
|
54
|
+
# by its ID. For example: `gitlab_personal_access_token` for representing GitLab Personal Access
|
55
|
+
# Token. By default, no rule is excluded from the ruleset.
|
56
|
+
# +allow_values+:: Array of raw values to exclude from the scan.
|
57
|
+
# +tags+:: Array of tag values to filter from the default ruleset when determining the rules used for the scan.
|
58
|
+
# For example: Add `gitlab_blocking` to include only rules for Push Protection. Defaults to
|
59
|
+
# [`gitlab_blocking`] (+DEFAULT_PATTERN_MATCHER_TAGS+).
|
60
|
+
#
|
61
|
+
# Returns an instance of GitLab::SecretDetection::Core::Response by following below structure:
|
62
|
+
# {
|
63
|
+
# status: One of the Core::Status values
|
64
|
+
# results: [SecretDetection::Finding]
|
65
|
+
# }
|
66
|
+
#
|
67
|
+
def secrets_scan(
|
68
|
+
payloads,
|
69
|
+
timeout: DEFAULT_SCAN_TIMEOUT_SECS,
|
70
|
+
payload_timeout: DEFAULT_PAYLOAD_TIMEOUT_SECS,
|
71
|
+
rule_exclusions: [],
|
72
|
+
allow_values: [],
|
73
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS
|
74
|
+
)
|
75
|
+
|
76
|
+
return Core::Response.new(Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
|
77
|
+
|
78
|
+
# assign defaults since grpc passing zero timeout value to `Timeout.timeout(..)` makes it effectively useless.
|
79
|
+
timeout = DEFAULT_SCAN_TIMEOUT_SECS unless timeout.positive?
|
80
|
+
payload_timeout = DEFAULT_PAYLOAD_TIMEOUT_SECS unless payload_timeout.positive?
|
81
|
+
tags = DEFAULT_PATTERN_MATCHER_TAGS if tags.empty?
|
82
|
+
|
83
|
+
Timeout.timeout(timeout) do
|
84
|
+
keyword_matcher = build_keyword_matcher(tags:)
|
85
|
+
|
86
|
+
matched_payloads = filter_by_keywords(keyword_matcher, payloads)
|
87
|
+
|
88
|
+
next Core::Response.new(Core::Status::NOT_FOUND) if matched_payloads.empty?
|
89
|
+
|
90
|
+
secrets = run_scan(
|
91
|
+
payloads: matched_payloads, payload_timeout:,
|
92
|
+
pattern_matcher: build_pattern_matcher(tags:),
|
93
|
+
allow_values:, rule_exclusions:
|
94
|
+
)
|
95
|
+
|
96
|
+
scan_status = overall_scan_status(secrets)
|
97
|
+
|
98
|
+
Core::Response.new(scan_status, secrets)
|
99
|
+
end
|
100
|
+
rescue Timeout::Error => e
|
101
|
+
logger.error "Secret detection operation timed out: #{e}"
|
102
|
+
|
103
|
+
Core::Response.new(Core::Status::SCAN_TIMEOUT)
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
attr_reader :logger, :rules, :keywords, :default_pattern_matcher, :default_keyword_matcher
|
109
|
+
|
110
|
+
# Builds RE2::Set pattern matcher for the given combination of rules
|
111
|
+
# and tags. It also allows a choice(via `include_missing_tags`) to consider rules
|
112
|
+
# for pattern matching that do not have `tags` property defined. If the given tags
|
113
|
+
# are same as +DEFAULT_PATTERN_MATCHER_TAGS+ then returns the eagerly loaded default
|
114
|
+
# pattern matcher created during initialization.
|
115
|
+
def build_pattern_matcher(tags:, include_missing_tags: false)
|
116
|
+
return default_pattern_matcher if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_pattern_matcher.nil?
|
117
|
+
|
118
|
+
matcher = RE2::Set.new
|
119
|
+
|
120
|
+
rules.each do |rule|
|
121
|
+
rule_tags = rule[:tags]
|
122
|
+
|
123
|
+
include_rule = if tags.empty?
|
124
|
+
true
|
125
|
+
elsif rule_tags
|
126
|
+
tags.intersect?(rule_tags)
|
127
|
+
else
|
128
|
+
include_missing_tags
|
129
|
+
end
|
130
|
+
|
131
|
+
matcher.add(rule[:regex]) if include_rule
|
132
|
+
end
|
133
|
+
|
134
|
+
unless matcher.compile
|
135
|
+
logger.error "Failed to compile secret detection rulesets in RE::Set"
|
136
|
+
|
137
|
+
raise RulesetCompilationError
|
138
|
+
end
|
139
|
+
|
140
|
+
matcher
|
141
|
+
end
|
142
|
+
|
143
|
+
# Creates and returns the unique set of rule matching keywords
|
144
|
+
def create_keywords(rules)
|
145
|
+
secrets_keywords = Set.new
|
146
|
+
|
147
|
+
rules.each do |rule|
|
148
|
+
secrets_keywords.merge rule[:keywords] unless rule[:keywords].nil?
|
149
|
+
end
|
150
|
+
|
151
|
+
secrets_keywords.freeze
|
152
|
+
end
|
153
|
+
|
154
|
+
def build_keyword_matcher(tags:, include_missing_tags: false)
|
155
|
+
return default_keyword_matcher if tags.eql?(DEFAULT_PATTERN_MATCHER_TAGS) && !default_keyword_matcher.nil?
|
156
|
+
|
157
|
+
include_keywords = Set.new
|
158
|
+
|
159
|
+
rules.each do |rule|
|
160
|
+
rule_tags = rule.fetch(:tags, [])
|
161
|
+
|
162
|
+
next if rule_tags.empty? && !include_missing_tags
|
163
|
+
next unless rule_tags.intersect?(tags)
|
164
|
+
|
165
|
+
include_keywords.merge(rule[:keywords]) unless rule[:keywords].nil?
|
166
|
+
end
|
167
|
+
|
168
|
+
return nil if include_keywords.empty?
|
169
|
+
|
170
|
+
keywords_regex = include_keywords.join('|')
|
171
|
+
|
172
|
+
RE2("\\b(#{keywords_regex})")
|
173
|
+
end
|
174
|
+
|
175
|
+
def filter_by_keywords(keyword_matcher, payloads)
|
176
|
+
return [] if keyword_matcher.nil?
|
177
|
+
|
178
|
+
matched_payloads = []
|
179
|
+
payloads.each do |payload|
|
180
|
+
next unless keyword_matcher.partial_match?(payload.data)
|
181
|
+
|
182
|
+
matched_payloads << payload
|
183
|
+
end
|
184
|
+
|
185
|
+
matched_payloads.freeze
|
186
|
+
end
|
187
|
+
|
188
|
+
# Runs the secret detection scan on the given list of payloads. It accepts
|
189
|
+
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
190
|
+
# the scan when performed on the payloads.
|
191
|
+
def run_scan(
|
192
|
+
payloads:, payload_timeout:, pattern_matcher:, allow_values: [], rule_exclusions: [])
|
193
|
+
payloads.flat_map do |payload|
|
194
|
+
Timeout.timeout(payload_timeout) do
|
195
|
+
find_secrets_in_payload(
|
196
|
+
payload:,
|
197
|
+
pattern_matcher:,
|
198
|
+
allow_values:, rule_exclusions:
|
199
|
+
)
|
200
|
+
end
|
201
|
+
rescue Timeout::Error => e
|
202
|
+
logger.error "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
|
203
|
+
Core::Finding.new(payload.id,
|
204
|
+
Core::Status::PAYLOAD_TIMEOUT)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Finds secrets in the given payload guarded with a timeout as a circuit breaker. It accepts
|
209
|
+
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
210
|
+
# the scan.
|
211
|
+
def find_secrets_in_payload(payload:, pattern_matcher:, allow_values: [], rule_exclusions: [])
|
212
|
+
findings = []
|
213
|
+
|
214
|
+
payload.data
|
215
|
+
.each_line($INPUT_RECORD_SEPARATOR, chomp: true)
|
216
|
+
.each_with_index do |line, index|
|
217
|
+
unless allow_values.empty?
|
218
|
+
allow_values.each do |value|
|
219
|
+
line.gsub!(value, '') # replace input that doesn't contain allowed value in it
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
next if line.empty?
|
224
|
+
|
225
|
+
line_no = index + 1
|
226
|
+
|
227
|
+
matches = pattern_matcher.match(line, exception: false) # returns indices of matched patterns
|
228
|
+
|
229
|
+
matches.each do |match_idx|
|
230
|
+
rule = rules[match_idx]
|
231
|
+
|
232
|
+
next if rule_exclusions.include?(rule[:id])
|
233
|
+
|
234
|
+
findings << Core::Finding.new(payload.id, Core::Status::FOUND, line_no, rule[:id], rule[:description])
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
findings.freeze
|
239
|
+
rescue StandardError => e
|
240
|
+
logger.error "Secret Detection scan failed on the payload(id:#{payload.id}): #{e}"
|
241
|
+
|
242
|
+
Core::Finding.new(payload.id, Core::Status::SCAN_ERROR)
|
243
|
+
end
|
244
|
+
|
245
|
+
# Validates the given payloads by verifying the type and
|
246
|
+
# presence of `id` and `data` fields necessary for the scan
|
247
|
+
def validate_scan_input(payloads)
|
248
|
+
return false if payloads.nil? || !payloads.instance_of?(Array)
|
249
|
+
|
250
|
+
payloads.all? do |payload|
|
251
|
+
payload.respond_to?(:id) && payload.respond_to?(:data)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
# Returns the status of the overall scan request
|
256
|
+
# based on the detected secret findings found in the input payloads
|
257
|
+
def overall_scan_status(found_secrets)
|
258
|
+
return Core::Status::NOT_FOUND if found_secrets.empty?
|
259
|
+
|
260
|
+
timed_out_payloads = found_secrets.count { |el| el.status == Core::Status::PAYLOAD_TIMEOUT }
|
261
|
+
|
262
|
+
case timed_out_payloads
|
263
|
+
when 0
|
264
|
+
Core::Status::FOUND
|
265
|
+
when found_secrets.length
|
266
|
+
Core::Status::SCAN_TIMEOUT
|
267
|
+
else
|
268
|
+
Core::Status::FOUND_WITH_ERRORS
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GitLab
|
4
|
+
module SecretDetection
|
5
|
+
module Core
|
6
|
+
# All the possible statuses emitted by the scan operation
|
7
|
+
class Status
|
8
|
+
FOUND = 1 # When scan operation completes with one or more findings
|
9
|
+
FOUND_WITH_ERRORS = 2 # When scan operation completes with one or more findings along with some errors
|
10
|
+
SCAN_TIMEOUT = 3 # When the scan operation runs beyond given time out
|
11
|
+
PAYLOAD_TIMEOUT = 4 # When the scan operation on a payload runs beyond given time out
|
12
|
+
SCAN_ERROR = 5 # When the scan operation fails due to regex error
|
13
|
+
INPUT_ERROR = 6 # When the scan operation fails due to invalid input
|
14
|
+
NOT_FOUND = 7 # When scan operation completes with zero findings
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'core/finding'
|
4
|
+
require_relative 'core/response'
|
5
|
+
require_relative 'core/status'
|
6
|
+
require_relative 'core/scanner'
|
7
|
+
require_relative 'core/ruleset'
|
8
|
+
|
9
|
+
module GitLab
|
10
|
+
module SecretDetection
|
11
|
+
module Core
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../generated/secret_detection_pb'
|
4
|
+
require_relative '../generated/secret_detection_services_pb'
|
5
|
+
|
6
|
+
module GitLab
|
7
|
+
module SecretDetection
|
8
|
+
module GRPC
|
9
|
+
class Client
|
10
|
+
# TODO add implementation
|
11
|
+
def scan
|
12
|
+
raise NotImplementedError
|
13
|
+
end
|
14
|
+
|
15
|
+
def scan_stream
|
16
|
+
raise NotImplementedError
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
File without changes
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
|
+
# source: secret_detection.proto
|
4
|
+
|
5
|
+
require 'google/protobuf'
|
6
|
+
|
7
|
+
|
8
|
+
descriptor_data = "\n\x16secret_detection.proto\x12\x17gitlab.secret_detection\"\xdb\x03\n\x0bScanRequest\x12>\n\x08payloads\x18\x01 \x03(\x0b\x32,.gitlab.secret_detection.ScanRequest.Payload\x12\x19\n\x0ctimeout_secs\x18\x02 \x01(\x02H\x00\x88\x01\x01\x12!\n\x14payload_timeout_secs\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x42\n\tallowlist\x18\x04 \x03(\x0b\x32/.gitlab.secret_detection.ScanRequest.AllowEntry\x12\x0c\n\x04tags\x18\x05 \x03(\t\x1a#\n\x07Payload\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x1a_\n\nAllowEntry\x12\x42\n\nallow_type\x18\x01 \x01(\x0e\x32..gitlab.secret_detection.ScanRequest.AllowType\x12\r\n\x05value\x18\x02 \x01(\t\"L\n\tAllowType\x12\x15\n\x11\x41LLOW_UNSPECIFIED\x10\x00\x12\x13\n\x0f\x41LLOW_RULE_TYPE\x10\x01\x12\x13\n\x0f\x41LLOW_RAW_VALUE\x10\x02\x42\x0f\n\r_timeout_secsB\x17\n\x15_payload_timeout_secs\"\xe3\x04\n\x0cScanResponse\x12\x12\n\x05\x65rror\x18\x01 \x01(\tH\x00\x88\x01\x01\x12>\n\x07results\x18\x02 \x03(\x0b\x32-.gitlab.secret_detection.ScanResponse.Finding\x12<\n\x06status\x18\x03 \x01(\x0e\x32,.gitlab.secret_detection.ScanResponse.Status\x1a\xe9\x01\n\x07\x46inding\x12\x12\n\npayload_id\x18\x01 \x01(\t\x12<\n\x06status\x18\x02 \x01(\x0e\x32,.gitlab.secret_detection.ScanResponse.Status\x12\x11\n\x04type\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0bline_number\x18\x05 \x01(\x05H\x02\x88\x01\x01\x12\x12\n\x05\x65rror\x18\x06 \x01(\tH\x03\x88\x01\x01\x42\x07\n\x05_typeB\x0e\n\x0c_descriptionB\x0e\n\x0c_line_numberB\x08\n\x06_error\"\xca\x01\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x10\n\x0cSTATUS_FOUND\x10\x01\x12\x1c\n\x18STATUS_FOUND_WITH_ERRORS\x10\x02\x12\x17\n\x13STATUS_SCAN_TIMEOUT\x10\x03\x12\x1a\n\x16STATUS_PAYLOAD_TIMEOUT\x10\x04\x12\x15\n\x11STATUS_SCAN_ERROR\x10\x05\x12\x16\n\x12STATUS_INPUT_ERROR\x10\x06\x12\x14\n\x10STATUS_NOT_FOUND\x10\x07\x42\x08\n\x06_error2\xc1\x01\n\x07Scanner\x12U\n\x04Scan\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00\x12_\n\nScanStream\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00(\x01\x30\x01\x42 \xea\x02\x1dGitLab::SecretDetection::GRPCb\x06proto3"
|
9
|
+
|
10
|
+
pool = Google::Protobuf::DescriptorPool.generated_pool
|
11
|
+
pool.add_serialized_file(descriptor_data)
|
12
|
+
|
13
|
+
module GitLab
|
14
|
+
module SecretDetection
|
15
|
+
module GRPC
|
16
|
+
ScanRequest = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanRequest").msgclass
|
17
|
+
ScanRequest::Payload = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanRequest.Payload").msgclass
|
18
|
+
ScanRequest::AllowEntry = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanRequest.AllowEntry").msgclass
|
19
|
+
ScanRequest::AllowType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanRequest.AllowType").enummodule
|
20
|
+
ScanResponse = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanResponse").msgclass
|
21
|
+
ScanResponse::Finding = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanResponse.Finding").msgclass
|
22
|
+
ScanResponse::Status = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("gitlab.secret_detection.ScanResponse.Status").enummodule
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
# Source: secret_detection.proto for package 'GitLab.SecretDetection.GRPC'
|
3
|
+
|
4
|
+
require 'grpc'
|
5
|
+
require 'secret_detection_pb'
|
6
|
+
|
7
|
+
module GitLab
|
8
|
+
module SecretDetection
|
9
|
+
module GRPC
|
10
|
+
module Scanner
|
11
|
+
# Scanner service that scans given payloads and returns findings
|
12
|
+
class Service
|
13
|
+
|
14
|
+
include ::GRPC::GenericService
|
15
|
+
|
16
|
+
self.marshal_class_method = :encode
|
17
|
+
self.unmarshal_class_method = :decode
|
18
|
+
self.service_name = 'gitlab.secret_detection.Scanner'
|
19
|
+
|
20
|
+
# Runs secret detection scan for the given request
|
21
|
+
rpc :Scan, ::GitLab::SecretDetection::GRPC::ScanRequest, ::GitLab::SecretDetection::GRPC::ScanResponse
|
22
|
+
# Runs bi-directional streaming of scans for the given stream of requests with a stream of responses
|
23
|
+
rpc :ScanStream, stream(::GitLab::SecretDetection::GRPC::ScanRequest), stream(::GitLab::SecretDetection::GRPC::ScanResponse)
|
24
|
+
end
|
25
|
+
|
26
|
+
Stub = Service.rpc_stub_class
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.expand_path('generated', __dir__))
|
4
|
+
|
5
|
+
require 'grpc'
|
6
|
+
|
7
|
+
require_relative 'generated/secret_detection_pb'
|
8
|
+
require_relative 'generated/secret_detection_services_pb'
|
9
|
+
|
10
|
+
require_relative '../core'
|
11
|
+
require_relative '../../../../config/log'
|
12
|
+
|
13
|
+
# StreamEnumerator is used for Bi-directional streaming
|
14
|
+
# of requests by returning stream of responses.
|
15
|
+
class StreamEnumerator
|
16
|
+
def initialize(requests, action)
|
17
|
+
@requests = requests
|
18
|
+
@request_action = action
|
19
|
+
end
|
20
|
+
|
21
|
+
def each_item
|
22
|
+
return enum_for(:each_item) unless block_given?
|
23
|
+
|
24
|
+
@requests.each do |req|
|
25
|
+
yield @request_action.call(req)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
module GitLab
|
31
|
+
module SecretDetection
|
32
|
+
module GRPC
|
33
|
+
class ScannerService < Scanner::Service
|
34
|
+
include SDLogger
|
35
|
+
|
36
|
+
# Maximum timeout value that can be given as the input. This guards
|
37
|
+
# against the misuse of timeouts.
|
38
|
+
MAX_ALLOWED_TIMEOUT_SECONDS = 600
|
39
|
+
|
40
|
+
ERROR_MESSAGES = {
|
41
|
+
invalid_payload_fields: "Payload should not contain empty `id` and `data` fields",
|
42
|
+
allowlist_empty_value: "Allowlist entry value cannot be empty",
|
43
|
+
allowlist_invalid_type: "Invalid Allowlist entry type",
|
44
|
+
invalid_timeout_range: "Timeout value should be > 0 and <= #{MAX_ALLOWED_TIMEOUT_SECONDS} seconds"
|
45
|
+
}.freeze
|
46
|
+
|
47
|
+
# Implementation for /Scan RPC method
|
48
|
+
def scan(request, _call)
|
49
|
+
scan_request_action(request)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Implementation for /ScanStream RPC method
|
53
|
+
def scan_stream(requests, _call)
|
54
|
+
request_action = ->(r) { scan_request_action(r) }
|
55
|
+
StreamEnumerator.new(requests, request_action).each_item
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def scan_request_action(request)
|
61
|
+
validate_request(request)
|
62
|
+
|
63
|
+
payloads = request.payloads.to_a
|
64
|
+
|
65
|
+
rule_exclusions = []
|
66
|
+
allow_values = []
|
67
|
+
request.allowlist&.each do |entry|
|
68
|
+
case entry.allow_type
|
69
|
+
when :ALLOW_RULE_TYPE
|
70
|
+
rule_exclusions << entry.value
|
71
|
+
when :ALLOW_RAW_VALUE
|
72
|
+
allow_values << entry.value
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
result = scanner.secrets_scan(
|
77
|
+
payloads,
|
78
|
+
rule_exclusions:,
|
79
|
+
allow_values:,
|
80
|
+
tags: request.tags.to_a,
|
81
|
+
timeout: request.timeout_secs,
|
82
|
+
payload_timeout: request.payload_timeout_secs
|
83
|
+
)
|
84
|
+
|
85
|
+
findings = result.results&.map do |finding|
|
86
|
+
GitLab::SecretDetection::GRPC::ScanResponse::Finding.new(**finding.to_h)
|
87
|
+
end
|
88
|
+
|
89
|
+
GitLab::SecretDetection::GRPC::ScanResponse.new(
|
90
|
+
results: findings,
|
91
|
+
status: result.status
|
92
|
+
)
|
93
|
+
end
|
94
|
+
|
95
|
+
def scanner
|
96
|
+
@scanner ||= GitLab::SecretDetection::Core::Scanner.new(rules:, logger:)
|
97
|
+
end
|
98
|
+
|
99
|
+
def rules
|
100
|
+
GitLab::SecretDetection::Core::Ruleset.new.rules
|
101
|
+
end
|
102
|
+
|
103
|
+
# validates grpc request body
|
104
|
+
def validate_request(request)
|
105
|
+
# check for non-blank values and allowed types
|
106
|
+
request.allowlist&.each do |entry|
|
107
|
+
if entry.value.empty?
|
108
|
+
raise ::GRPC::InvalidArgument.new(ERROR_MESSAGES[:allowlist_empty_value],
|
109
|
+
{ field: "allowlist.value" })
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
unless valid_timeout_range?(request.timeout_secs)
|
114
|
+
raise ::GRPC::InvalidArgument.new(ERROR_MESSAGES[:invalid_timeout_range],
|
115
|
+
{ field: "timeout_secs" })
|
116
|
+
end
|
117
|
+
|
118
|
+
unless valid_timeout_range?(request.payload_timeout_secs)
|
119
|
+
raise ::GRPC::InvalidArgument.new(ERROR_MESSAGES[:invalid_timeout_range],
|
120
|
+
{ field: "payload_timeout_secs" })
|
121
|
+
end
|
122
|
+
|
123
|
+
# check for required payload fields
|
124
|
+
request.payloads.to_a.each_with_index do |payload, index|
|
125
|
+
if !payload.respond_to?(:id) || payload.id.empty?
|
126
|
+
raise ::GRPC::InvalidArgument.new(
|
127
|
+
ERROR_MESSAGES[:invalid_payload_fields],
|
128
|
+
{ field: "payloads[#{index}].id" }
|
129
|
+
)
|
130
|
+
end
|
131
|
+
|
132
|
+
unless payload.respond_to?(:data) # rubocop:disable Style/Next
|
133
|
+
raise ::GRPC::InvalidArgument.new(
|
134
|
+
ERROR_MESSAGES[:invalid_payload_fields],
|
135
|
+
{ field: "payloads[#{index}].data" }
|
136
|
+
)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# checks if the given timeout value is within range
|
142
|
+
def valid_timeout_range?(timeout_value)
|
143
|
+
timeout_value >= 0 && timeout_value <= MAX_ALLOWED_TIMEOUT_SECONDS
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -1,7 +1,31 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module GitLab
|
4
4
|
module SecretDetection
|
5
|
-
|
5
|
+
class Gem
|
6
|
+
DEFAULT_VERSION = "0.0.1"
|
7
|
+
|
8
|
+
SEMVER_REGEX = /^\d+\.\d+\.\d+(?:-[a-zA-Z0-9\-\.]+)?(?:\+[a-zA-Z0-9\-\.]+)?$/
|
9
|
+
|
10
|
+
def self.get_release_version
|
11
|
+
release_version = ENV.fetch("SD_GEM_RELEASE_VERSION", "")
|
12
|
+
|
13
|
+
if release_version.empty?
|
14
|
+
raise LoadError("Missing SD_GEM_RELEASE_VERSION environment variable.") unless local_env?
|
15
|
+
|
16
|
+
"#{DEFAULT_VERSION}-debug"
|
17
|
+
elsif release_version.match?(SEMVER_REGEX)
|
18
|
+
release_version
|
19
|
+
else
|
20
|
+
"#{DEFAULT_VERSION}-#{release_version}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# SD_ENV env var is used to determine which environment the
|
25
|
+
# server is running. This var is defined in `.runway/env-<env>.yml` files.
|
26
|
+
def self.local_env?
|
27
|
+
ENV.fetch('SD_ENV', 'localhost') == 'localhost'
|
28
|
+
end
|
29
|
+
end
|
6
30
|
end
|
7
31
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'secret_detection/core'
|
4
|
+
require_relative 'secret_detection/grpc'
|
5
|
+
require_relative 'secret_detection/version'
|
4
6
|
|
5
|
-
module
|
7
|
+
module GitLab
|
6
8
|
module SecretDetection
|
7
|
-
class Error < StandardError; end
|
8
|
-
# Your code goes here...
|
9
9
|
end
|
10
10
|
end
|