gitlab-secret_detection 0.4.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/lib/gitlab/secret_detection/core/response.rb +7 -3
- data/lib/gitlab/secret_detection/core/scanner.rb +81 -3
- data/lib/gitlab/secret_detection/core/status.rb +1 -0
- data/lib/gitlab/secret_detection/grpc/client/grpc_client.rb +131 -7
- data/lib/gitlab/secret_detection/grpc/client/stream_request_enumerator.rb +24 -0
- data/lib/gitlab/secret_detection/grpc/generated/secret_detection_pb.rb +1 -1
- data/lib/gitlab/secret_detection/grpc/scanner_service.rb +13 -8
- data/lib/gitlab/secret_detection/grpc.rb +1 -0
- data/lib/gitlab/secret_detection/utils/certificate.rb +108 -0
- data/lib/gitlab/secret_detection/utils/memoize.rb +151 -0
- data/lib/gitlab/secret_detection/utils.rb +11 -0
- data/lib/gitlab/secret_detection.rb +1 -0
- data/proto/secret_detection.proto +7 -8
- metadata +22 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 648b6d5277ac8e7948762533af39dc2f4ff0ae4c62fbdcc6d5d32615a44ab815
|
4
|
+
data.tar.gz: afd2a580cb0a73bb84a401616ec622e1e14d2b75021036a959f933ed75864cf6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c49b71891f6e13d8dc252936f18df838d8e9d42000cbc1f4f3d7fe56b258a7e2c17bce7a7f0e771bc16991fa46bacf641a636a1d1efd3ef23de6b0a31a011b8
|
7
|
+
data.tar.gz: af93c82a0025be12bc82fe6ff62b290fe193a2338ae2f98d83bc2729d1c7cc35a223330a3a9907a9f715c009127e93ba2d13fc33982d2f13a179d8548f5cc36e
|
data/README.md
CHANGED
@@ -42,6 +42,7 @@ the approach:
|
|
42
42
|
│ ├── core/.. # Secret detection logic (most of it pulled from existing gem)
|
43
43
|
│ └── grpc
|
44
44
|
│ ├── generated/.. # gRPC generated files and secret detection gRPC service
|
45
|
+
│ ├── client/.. # gRPC client to invoke secret detection service's RPC endpoints
|
45
46
|
│ └── scanner_service.rb # Secret detection gRPC service implementation
|
46
47
|
├── examples
|
47
48
|
│ └── sample-client/.. # Sample Ruby RPC client that connects with gRPC server and calls RPC scan
|
@@ -320,7 +321,7 @@ Run `ruby examples/sample-client/sample_client.rb` on your terminal to run the s
|
|
320
321
|
|
321
322
|
## Benchmark
|
322
323
|
|
323
|
-
RPC service is benchmarked using [`ghz`](https://ghz.sh), a powerful CLI-based tool for load testing and benchmarking gRPC services. More details added [here](
|
324
|
+
RPC service is benchmarked using [`ghz`](https://ghz.sh), a powerful CLI-based tool for load testing and benchmarking gRPC services. More details added [here](https://gitlab.com/gitlab-org/gitlab/-/work_items/468107).
|
324
325
|
|
325
326
|
## Project Status
|
326
327
|
|
@@ -7,12 +7,15 @@ module GitLab
|
|
7
7
|
#
|
8
8
|
# +status+:: One of values from GitLab::SecretDetection::Core::Status indicating the scan operation's status
|
9
9
|
# +results+:: Array of GitLab::SecretDetection::Core::Finding values. Default value is nil.
|
10
|
+
# +metadata+:: Hash object containing additional meta information about the response. It is currently used
|
11
|
+
# to embed more information on error.
|
10
12
|
class Response
|
11
|
-
attr_reader :status, :results
|
13
|
+
attr_reader :status, :results, :metadata
|
12
14
|
|
13
|
-
def initialize(status, results = [])
|
15
|
+
def initialize(status, results = [], metadata = {})
|
14
16
|
@status = status
|
15
17
|
@results = results
|
18
|
+
@metadata = metadata
|
16
19
|
end
|
17
20
|
|
18
21
|
def ==(other)
|
@@ -22,6 +25,7 @@ module GitLab
|
|
22
25
|
def to_h
|
23
26
|
{
|
24
27
|
status:,
|
28
|
+
metadata:,
|
25
29
|
results: results&.map(&:to_h)
|
26
30
|
}
|
27
31
|
end
|
@@ -29,7 +33,7 @@ module GitLab
|
|
29
33
|
protected
|
30
34
|
|
31
35
|
def state
|
32
|
-
[status, results]
|
36
|
+
[status, metadata, results]
|
33
37
|
end
|
34
38
|
end
|
35
39
|
end
|
@@ -4,6 +4,7 @@ require 're2'
|
|
4
4
|
require 'logger'
|
5
5
|
require 'timeout'
|
6
6
|
require 'English'
|
7
|
+
require 'parallel'
|
7
8
|
|
8
9
|
module GitLab
|
9
10
|
module SecretDetection
|
@@ -24,6 +25,14 @@ module GitLab
|
|
24
25
|
DEFAULT_PAYLOAD_TIMEOUT_SECS = 30 # 30 seconds
|
25
26
|
# Tags used for creating default pattern matcher
|
26
27
|
DEFAULT_PATTERN_MATCHER_TAGS = ['gitlab_blocking'].freeze
|
28
|
+
# Max no of child processes to spawn per request
|
29
|
+
# ref: https://gitlab.com/gitlab-org/gitlab/-/issues/430160
|
30
|
+
MAX_PROCS_PER_REQUEST = 5
|
31
|
+
# Minimum cumulative size of the payloads required to spawn and
|
32
|
+
# run the scan within a new subprocess.
|
33
|
+
MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB
|
34
|
+
# Whether to run scan in subprocesses or not. Default is false.
|
35
|
+
RUN_IN_SUBPROCESS = false
|
27
36
|
|
28
37
|
# Initializes the instance with logger along with following operations:
|
29
38
|
# 1. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
|
@@ -58,6 +67,13 @@ module GitLab
|
|
58
67
|
# For example: Add `gitlab_blocking` to include only rules for Push Protection. Defaults to
|
59
68
|
# [`gitlab_blocking`] (+DEFAULT_PATTERN_MATCHER_TAGS+).
|
60
69
|
#
|
70
|
+
# NOTE:
|
71
|
+
# Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by
|
72
|
+
# offloading regex operations on large payloads to sub-processes. However, it does not assure the improvement
|
73
|
+
# in the overall latency of the scan, specifically in the case of smaller payloads, where the overhead of
|
74
|
+
# forking a new process adds to the overall latency of the scan instead. More reference on Subprocess-based
|
75
|
+
# execution is found here: https://gitlab.com/gitlab-org/gitlab/-/issues/430160.
|
76
|
+
#
|
61
77
|
# Returns an instance of GitLab::SecretDetection::Core::Response by following below structure:
|
62
78
|
# {
|
63
79
|
# status: One of the Core::Status values
|
@@ -70,7 +86,8 @@ module GitLab
|
|
70
86
|
payload_timeout: DEFAULT_PAYLOAD_TIMEOUT_SECS,
|
71
87
|
raw_value_exclusions: [],
|
72
88
|
rule_exclusions: [],
|
73
|
-
tags: DEFAULT_PATTERN_MATCHER_TAGS
|
89
|
+
tags: DEFAULT_PATTERN_MATCHER_TAGS,
|
90
|
+
subprocess: RUN_IN_SUBPROCESS
|
74
91
|
)
|
75
92
|
|
76
93
|
return Core::Response.new(Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
|
@@ -87,11 +104,13 @@ module GitLab
|
|
87
104
|
|
88
105
|
next Core::Response.new(Core::Status::NOT_FOUND) if matched_payloads.empty?
|
89
106
|
|
90
|
-
|
107
|
+
scan_args = {
|
91
108
|
payloads: matched_payloads, payload_timeout:,
|
92
109
|
pattern_matcher: build_pattern_matcher(tags:),
|
93
110
|
raw_value_exclusions:, rule_exclusions:
|
94
|
-
|
111
|
+
}
|
112
|
+
|
113
|
+
secrets = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
|
95
114
|
|
96
115
|
scan_status = overall_scan_status(secrets)
|
97
116
|
|
@@ -205,6 +224,36 @@ module GitLab
|
|
205
224
|
end
|
206
225
|
end
|
207
226
|
|
227
|
+
def run_scan_within_subprocess(
|
228
|
+
payloads:, payload_timeout:, pattern_matcher:, raw_value_exclusions: [],
|
229
|
+
rule_exclusions: [])
|
230
|
+
payload_sizes = payloads.map(&:size)
|
231
|
+
grouped_payload_indices = group_by_chunk_size(payload_sizes)
|
232
|
+
|
233
|
+
grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
|
234
|
+
|
235
|
+
found_secrets = Parallel.flat_map(
|
236
|
+
grouped_payloads,
|
237
|
+
in_processes: MAX_PROCS_PER_REQUEST,
|
238
|
+
isolation: true # do not reuse sub-processes
|
239
|
+
) do |grouped_payload|
|
240
|
+
grouped_payload.flat_map do |payload|
|
241
|
+
Timeout.timeout(payload_timeout) do
|
242
|
+
find_secrets_in_payload(
|
243
|
+
payload:,
|
244
|
+
pattern_matcher:,
|
245
|
+
raw_value_exclusions:, rule_exclusions:
|
246
|
+
)
|
247
|
+
end
|
248
|
+
rescue Timeout::Error => e
|
249
|
+
logger.error "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
|
250
|
+
Core::Finding.new(payload.id, Core::Status::PAYLOAD_TIMEOUT)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
found_secrets.freeze
|
255
|
+
end
|
256
|
+
|
208
257
|
# Finds secrets in the given payload guarded with a timeout as a circuit breaker. It accepts
|
209
258
|
# literal values to exclude from the input before the scan, also SD rules to exclude during
|
210
259
|
# the scan.
|
@@ -268,6 +317,35 @@ module GitLab
|
|
268
317
|
Core::Status::FOUND_WITH_ERRORS
|
269
318
|
end
|
270
319
|
end
|
320
|
+
|
321
|
+
# This method accepts an array of payload sizes(in bytes) and groups them into an array
|
322
|
+
# of arrays structure where each element is the group of indices of the input
|
323
|
+
# array whose cumulative payload sizes has at least +MIN_CHUNK_SIZE_PER_PROC_BYTES+
|
324
|
+
def group_by_chunk_size(payload_size_arr)
|
325
|
+
cumulative_size = 0
|
326
|
+
chunk_indexes = []
|
327
|
+
chunk_idx_start = 0
|
328
|
+
|
329
|
+
payload_size_arr.each_with_index do |size, index|
|
330
|
+
cumulative_size += size
|
331
|
+
next unless cumulative_size >= MIN_CHUNK_SIZE_PER_PROC_BYTES
|
332
|
+
|
333
|
+
chunk_indexes << (chunk_idx_start..index).to_a
|
334
|
+
|
335
|
+
chunk_idx_start = index + 1
|
336
|
+
cumulative_size = 0
|
337
|
+
end
|
338
|
+
|
339
|
+
if cumulative_size.positive? && (chunk_idx_start < payload_size_arr.length)
|
340
|
+
chunk_indexes << if chunk_idx_start == payload_size_arr.length - 1
|
341
|
+
[chunk_idx_start]
|
342
|
+
else
|
343
|
+
(chunk_idx_start..payload_size_arr.length - 1).to_a
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
chunk_indexes
|
348
|
+
end
|
271
349
|
end
|
272
350
|
end
|
273
351
|
end
|
@@ -12,6 +12,7 @@ module GitLab
|
|
12
12
|
SCAN_ERROR = 5 # When the scan operation fails due to regex error
|
13
13
|
INPUT_ERROR = 6 # When the scan operation fails due to invalid input
|
14
14
|
NOT_FOUND = 7 # When scan operation completes with zero findings
|
15
|
+
AUTH_ERROR = 8 # When authentication fails
|
15
16
|
end
|
16
17
|
end
|
17
18
|
end
|
@@ -1,19 +1,143 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
require_relative '
|
3
|
+
require 'grpc'
|
4
|
+
require_relative '../../grpc/scanner_service'
|
5
|
+
require_relative '../../core/response'
|
6
|
+
require_relative '../../core/status'
|
7
|
+
require_relative '../../utils'
|
8
|
+
require_relative './stream_request_enumerator'
|
5
9
|
|
6
10
|
module GitLab
|
7
11
|
module SecretDetection
|
8
12
|
module GRPC
|
9
13
|
class Client
|
10
|
-
|
11
|
-
|
12
|
-
|
14
|
+
include SecretDetection::Utils::StrongMemoize
|
15
|
+
include SDLogger
|
16
|
+
|
17
|
+
# Time to wait for the response from the service
|
18
|
+
REQUEST_TIMEOUT_SECONDS = 10 # 10 seconds
|
19
|
+
|
20
|
+
def initialize(host, secure: false, compression: true)
|
21
|
+
@host = host
|
22
|
+
@secure = secure
|
23
|
+
@compression = compression
|
24
|
+
end
|
25
|
+
|
26
|
+
# Triggers Secret Detection service's `/Scan` gRPC endpoint. To keep it consistent with SDS gem interface,
|
27
|
+
# this method transforms the gRPC response to +GitLab::SecretDetection::Core::Response+.
|
28
|
+
# Furthermore, any errors that are raised by the service will be translated to
|
29
|
+
# +GitLab::SecretDetection::Core::Response+ type by assiging a appropriate +status+ value to it.
|
30
|
+
def run_scan(request:, auth_token:, extra_headers: {})
|
31
|
+
with_rescued_errors do
|
32
|
+
grpc_response = stub.scan(
|
33
|
+
request,
|
34
|
+
metadata: build_metadata(auth_token, extra_headers),
|
35
|
+
deadline: request_deadline
|
36
|
+
)
|
37
|
+
|
38
|
+
convert_to_core_response(grpc_response)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Triggers Secret Detection service's `/ScanStream` gRPC endpoint.
|
43
|
+
#
|
44
|
+
# To keep it consistent with SDS gem interface, this method transforms the gRPC response to
|
45
|
+
# +GitLab::SecretDetection::Core::Response+ type. Furthermore, any errors that are raised by the service will be
|
46
|
+
# translated to +GitLab::SecretDetection::Core::Response+ type by assiging a appropriate +status+ value to it.
|
47
|
+
#
|
48
|
+
# Note: If one of the stream requests result in an error, the stream will end immediately without processing the
|
49
|
+
# remaining requests.
|
50
|
+
def run_scan_stream(requests:, auth_token:, extra_headers: {})
|
51
|
+
request_stream = GitLab::SecretDetection::GRPC::StreamRequestEnumerator.new(requests)
|
52
|
+
results = []
|
53
|
+
with_rescued_errors do
|
54
|
+
stub.scan_stream(
|
55
|
+
request_stream.each_item,
|
56
|
+
metadata: build_metadata(auth_token, extra_headers),
|
57
|
+
deadline: request_deadline
|
58
|
+
).each do |grpc_response|
|
59
|
+
response = convert_to_core_response(grpc_response)
|
60
|
+
if block_given?
|
61
|
+
yield response
|
62
|
+
else
|
63
|
+
results << response
|
64
|
+
end
|
65
|
+
end
|
66
|
+
results
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
attr_reader :secure, :host, :compression
|
73
|
+
|
74
|
+
def stub
|
75
|
+
GitLab::SecretDetection::GRPC::Scanner::Stub.new(
|
76
|
+
host,
|
77
|
+
channel_credentials,
|
78
|
+
channel_args:
|
79
|
+
)
|
80
|
+
end
|
81
|
+
|
82
|
+
strong_memoize_attr :stub
|
83
|
+
|
84
|
+
def channel_args
|
85
|
+
default_options = {
|
86
|
+
'grpc.keepalive_permit_without_calls' => 1,
|
87
|
+
'grpc.keepalive_time_ms' => 30000, # 30 seconds
|
88
|
+
'grpc.keepalive_timeout_ms' => 10000 # 10 seconds timeout for keepalive response
|
89
|
+
}
|
90
|
+
|
91
|
+
compression_options = ::GRPC::Core::CompressionOptions
|
92
|
+
.new(default_algorithm: :gzip)
|
93
|
+
.to_channel_arg_hash
|
94
|
+
|
95
|
+
default_options.merge!(compression_options) if compression
|
96
|
+
|
97
|
+
default_options.freeze
|
98
|
+
end
|
99
|
+
|
100
|
+
def channel_credentials
|
101
|
+
return :this_channel_is_insecure unless secure
|
102
|
+
|
103
|
+
certs = GitLab::SecretDetection::Utils::X509::Certificate.ca_certs_bundle
|
104
|
+
|
105
|
+
::GRPC::Core::ChannelCredentials.new(certs)
|
13
106
|
end
|
14
107
|
|
15
|
-
def
|
16
|
-
|
108
|
+
def build_metadata(token, extra_headers = {})
|
109
|
+
{ 'x-sd-auth' => token }.merge!(extra_headers).freeze
|
110
|
+
end
|
111
|
+
|
112
|
+
def request_deadline
|
113
|
+
Time.now + REQUEST_TIMEOUT_SECONDS
|
114
|
+
end
|
115
|
+
|
116
|
+
def with_rescued_errors
|
117
|
+
yield
|
118
|
+
rescue ::GRPC::Unauthenticated
|
119
|
+
SecretDetection::Core::Response.new(SecretDetection::Core::Status::AUTH_ERROR)
|
120
|
+
rescue ::GRPC::InvalidArgument => e
|
121
|
+
SecretDetection::Core::Response.new(
|
122
|
+
SecretDetection::Core::Status::INPUT_ERROR, nil, { message: e.details, **e.metadata }
|
123
|
+
)
|
124
|
+
rescue ::GRPC::Unknown, ::GRPC::BadStatus => e
|
125
|
+
SecretDetection::Core::Response.new(
|
126
|
+
SecretDetection::Core::Status::SCAN_ERROR, nil, { message: e.details }
|
127
|
+
)
|
128
|
+
end
|
129
|
+
|
130
|
+
def convert_to_core_response(grpc_response)
|
131
|
+
response = grpc_response.to_h
|
132
|
+
|
133
|
+
SecretDetection::Core::Response.new(
|
134
|
+
response[:status],
|
135
|
+
response[:results],
|
136
|
+
response[:metadata]
|
137
|
+
)
|
138
|
+
rescue StandardError => e
|
139
|
+
logger.error("Failed to convert to core response: #{e}")
|
140
|
+
SecretDetection::Core::Response.new(SecretDetection::Core::Status::SCAN_ERROR)
|
17
141
|
end
|
18
142
|
end
|
19
143
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GitLab
|
4
|
+
module SecretDetection
|
5
|
+
module GRPC
|
6
|
+
class StreamRequestEnumerator
|
7
|
+
def initialize(requests = [])
|
8
|
+
@requests = requests
|
9
|
+
end
|
10
|
+
|
11
|
+
# yields a request, waiting between 0 and 1 seconds between requests
|
12
|
+
#
|
13
|
+
# @return an Enumerable that yields a request input
|
14
|
+
def each_item
|
15
|
+
return enum_for(:each_item) unless block_given?
|
16
|
+
|
17
|
+
@requests.each do |request|
|
18
|
+
yield request
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -5,7 +5,7 @@
|
|
5
5
|
require 'google/protobuf'
|
6
6
|
|
7
7
|
|
8
|
-
descriptor_data = "\n\x16secret_detection.proto\x12\x17gitlab.secret_detection\"\xfc\x03\n\x0bScanRequest\x12>\n\x08payloads\x18\x01 \x03(\x0b\x32,.gitlab.secret_detection.ScanRequest.Payload\x12\x19\n\x0ctimeout_secs\x18\x02 \x01(\x02H\x00\x88\x01\x01\x12!\n\x14payload_timeout_secs\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x42\n\nexclusions\x18\x04 \x03(\x0b\x32..gitlab.secret_detection.ScanRequest.Exclusion\x12\x0c\n\x04tags\x18\x05 \x03(\t\x1a#\n\x07Payload\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x1a\x66\n\tExclusion\x12J\n\x0e\x65xclusion_type\x18\x01 \x01(\x0e\x32\x32.gitlab.secret_detection.ScanRequest.ExclusionType\x12\r\n\x05value\x18\x02 \x01(\t\"f\n\rExclusionType\x12\x1e\n\x1a\x45XCLUSION_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XCLUSION_TYPE_RULE\x10\x01\x12\x1c\n\x18\x45XCLUSION_TYPE_RAW_VALUE\x10\x02\x42\x0f\n\r_timeout_secsB\x17\n\x15_payload_timeout_secs\"\
|
8
|
+
descriptor_data = "\n\x16secret_detection.proto\x12\x17gitlab.secret_detection\"\xfc\x03\n\x0bScanRequest\x12>\n\x08payloads\x18\x01 \x03(\x0b\x32,.gitlab.secret_detection.ScanRequest.Payload\x12\x19\n\x0ctimeout_secs\x18\x02 \x01(\x02H\x00\x88\x01\x01\x12!\n\x14payload_timeout_secs\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x42\n\nexclusions\x18\x04 \x03(\x0b\x32..gitlab.secret_detection.ScanRequest.Exclusion\x12\x0c\n\x04tags\x18\x05 \x03(\t\x1a#\n\x07Payload\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x1a\x66\n\tExclusion\x12J\n\x0e\x65xclusion_type\x18\x01 \x01(\x0e\x32\x32.gitlab.secret_detection.ScanRequest.ExclusionType\x12\r\n\x05value\x18\x02 \x01(\t\"f\n\rExclusionType\x12\x1e\n\x1a\x45XCLUSION_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XCLUSION_TYPE_RULE\x10\x01\x12\x1c\n\x18\x45XCLUSION_TYPE_RAW_VALUE\x10\x02\x42\x0f\n\r_timeout_secsB\x17\n\x15_payload_timeout_secs\"\xe2\x03\n\x0cScanResponse\x12>\n\x07results\x18\x01 \x03(\x0b\x32-.gitlab.secret_detection.ScanResponse.Finding\x12\x0e\n\x06status\x18\x02 \x01(\x05\x1a\x9d\x01\n\x07\x46inding\x12\x12\n\npayload_id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\x05\x12\x11\n\x04type\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0bline_number\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x07\n\x05_typeB\x0e\n\x0c_descriptionB\x0e\n\x0c_line_number\"\xe1\x01\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x10\n\x0cSTATUS_FOUND\x10\x01\x12\x1c\n\x18STATUS_FOUND_WITH_ERRORS\x10\x02\x12\x17\n\x13STATUS_SCAN_TIMEOUT\x10\x03\x12\x1a\n\x16STATUS_PAYLOAD_TIMEOUT\x10\x04\x12\x15\n\x11STATUS_SCAN_ERROR\x10\x05\x12\x16\n\x12STATUS_INPUT_ERROR\x10\x06\x12\x14\n\x10STATUS_NOT_FOUND\x10\x07\x12\x15\n\x11STATUS_AUTH_ERROR\x10\x08\x32\xc1\x01\n\x07Scanner\x12U\n\x04Scan\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00\x12_\n\nScanStream\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00(\x01\x30\x01\x42 \xea\x02\x1dGitLab::SecretDetection::GRPCb\x06proto3"
|
9
9
|
|
10
10
|
pool = Google::Protobuf::DescriptorPool.generated_pool
|
11
11
|
pool.add_serialized_file(descriptor_data)
|
@@ -74,14 +74,19 @@ module GitLab
|
|
74
74
|
end
|
75
75
|
end
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
77
|
+
begin
|
78
|
+
result = scanner.secrets_scan(
|
79
|
+
payloads,
|
80
|
+
raw_value_exclusions:,
|
81
|
+
rule_exclusions:,
|
82
|
+
tags: request.tags.to_a,
|
83
|
+
timeout: request.timeout_secs,
|
84
|
+
payload_timeout: request.payload_timeout_secs
|
85
|
+
)
|
86
|
+
rescue StandardError => e
|
87
|
+
logger.error("Failed to run the scan: #{e}")
|
88
|
+
raise ::GRPC::Unknown, e.message
|
89
|
+
end
|
85
90
|
|
86
91
|
findings = result.results&.map do |finding|
|
87
92
|
GitLab::SecretDetection::GRPC::ScanResponse::Finding.new(**finding.to_h)
|
@@ -0,0 +1,108 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'openssl'
|
4
|
+
require_relative 'memoize'
|
5
|
+
|
6
|
+
module GitLab
|
7
|
+
module SecretDetection
|
8
|
+
module Utils
|
9
|
+
module X509
|
10
|
+
# Pulled from GitLab.com source
|
11
|
+
# Link: https://gitlab.com/gitlab-org/gitlab/-/blob/4713a798f997389f04e442db3d1d8349a39d5d46/lib/gitlab/x509/certificate.rb
|
12
|
+
class Certificate
|
13
|
+
CERT_REGEX = /-----BEGIN CERTIFICATE-----(?:.|\n)+?-----END CERTIFICATE-----/
|
14
|
+
|
15
|
+
attr_reader :key, :cert, :ca_certs
|
16
|
+
|
17
|
+
def self.default_cert_dir
|
18
|
+
strong_memoize(:default_cert_dir) do
|
19
|
+
ENV.fetch('SSL_CERT_DIR', OpenSSL::X509::DEFAULT_CERT_DIR)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.default_cert_file
|
24
|
+
strong_memoize(:default_cert_file) do
|
25
|
+
ENV.fetch('SSL_CERT_FILE', OpenSSL::X509::DEFAULT_CERT_FILE)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.from_strings(key_string, cert_string, ca_certs_string = nil)
|
30
|
+
key = OpenSSL::PKey::RSA.new(key_string)
|
31
|
+
cert = OpenSSL::X509::Certificate.new(cert_string)
|
32
|
+
ca_certs = load_ca_certs_bundle(ca_certs_string)
|
33
|
+
|
34
|
+
new(key, cert, ca_certs)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.from_files(key_path, cert_path, ca_certs_path = nil)
|
38
|
+
ca_certs_string = File.read(ca_certs_path) if ca_certs_path
|
39
|
+
|
40
|
+
from_strings(File.read(key_path), File.read(cert_path), ca_certs_string)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns all top-level, readable files in the default CA cert directory
|
44
|
+
def self.ca_certs_paths
|
45
|
+
cert_paths = Dir["#{default_cert_dir}/*"].select do |path|
|
46
|
+
!File.directory?(path) && File.readable?(path)
|
47
|
+
end
|
48
|
+
cert_paths << default_cert_file if File.exist? default_cert_file
|
49
|
+
cert_paths
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns a concatenated array of Strings, each being a PEM-coded CA certificate.
|
53
|
+
def self.ca_certs_bundle
|
54
|
+
strong_memoize(:ca_certs_bundle) do
|
55
|
+
ca_certs_paths.flat_map do |cert_file|
|
56
|
+
load_ca_certs_bundle(File.read(cert_file))
|
57
|
+
end.uniq.join("\n")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.reset_ca_certs_bundle
|
62
|
+
clear_memoization(:ca_certs_bundle)
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.reset_default_cert_paths
|
66
|
+
clear_memoization(:default_cert_dir)
|
67
|
+
clear_memoization(:default_cert_file)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Returns an array of OpenSSL::X509::Certificate objects, empty array if none found
|
71
|
+
#
|
72
|
+
# Ruby OpenSSL::X509::Certificate.new will only load the first
|
73
|
+
# certificate if a bundle is presented, this allows to parse multiple certs
|
74
|
+
# in the same file
|
75
|
+
def self.load_ca_certs_bundle(ca_certs_string)
|
76
|
+
return [] unless ca_certs_string
|
77
|
+
|
78
|
+
ca_certs_string.scan(CERT_REGEX).map do |ca_cert_string|
|
79
|
+
OpenSSL::X509::Certificate.new(ca_cert_string)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def initialize(key, cert, ca_certs = nil)
|
84
|
+
@key = key
|
85
|
+
@cert = cert
|
86
|
+
@ca_certs = ca_certs
|
87
|
+
end
|
88
|
+
|
89
|
+
def key_string
|
90
|
+
key.to_s
|
91
|
+
end
|
92
|
+
|
93
|
+
def cert_string
|
94
|
+
cert.to_pem
|
95
|
+
end
|
96
|
+
|
97
|
+
def ca_certs_string
|
98
|
+
ca_certs&.map(&:to_pem)&.join('\n') unless ca_certs.blank?
|
99
|
+
end
|
100
|
+
|
101
|
+
class << self
|
102
|
+
include ::GitLab::SecretDetection::Utils::StrongMemoize
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GitLab
|
4
|
+
module SecretDetection
|
5
|
+
module Utils
|
6
|
+
# Pulled from GitLab.com source
|
7
|
+
# Link: https://gitlab.com/gitlab-org/gitlab/-/blob/4713a798f997389f04e442db3d1d8349a39d5d46/gems/gitlab-utils/lib/gitlab/utils/strong_memoize.rb
|
8
|
+
module StrongMemoize
|
9
|
+
# Instead of writing patterns like this:
|
10
|
+
#
|
11
|
+
# def trigger_from_token
|
12
|
+
# return @trigger if defined?(@trigger)
|
13
|
+
#
|
14
|
+
# @trigger = Ci::Trigger.find_by_token(params[:token].to_s)
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# We could write it like:
|
18
|
+
#
|
19
|
+
# include GitLab::SecretDetection::Utils::StrongMemoize
|
20
|
+
#
|
21
|
+
# def trigger_from_token
|
22
|
+
# Ci::Trigger.find_by_token(params[:token].to_s)
|
23
|
+
# end
|
24
|
+
# strong_memoize_attr :trigger_from_token
|
25
|
+
#
|
26
|
+
# def enabled?
|
27
|
+
# Feature.enabled?(:some_feature)
|
28
|
+
# end
|
29
|
+
# strong_memoize_attr :enabled?
|
30
|
+
#
|
31
|
+
def strong_memoize(name)
|
32
|
+
key = ivar(name)
|
33
|
+
|
34
|
+
if instance_variable_defined?(key)
|
35
|
+
instance_variable_get(key)
|
36
|
+
else
|
37
|
+
instance_variable_set(key, yield)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Works the same way as "strong_memoize" but takes
|
42
|
+
# a second argument - expire_in. This allows invalidate
|
43
|
+
# the data after specified number of seconds
|
44
|
+
def strong_memoize_with_expiration(name, expire_in)
|
45
|
+
key = ivar(name)
|
46
|
+
expiration_key = "#{key}_expired_at"
|
47
|
+
|
48
|
+
if instance_variable_defined?(expiration_key)
|
49
|
+
expire_at = instance_variable_get(expiration_key)
|
50
|
+
clear_memoization(name) if expire_at.past?
|
51
|
+
end
|
52
|
+
|
53
|
+
if instance_variable_defined?(key)
|
54
|
+
instance_variable_get(key)
|
55
|
+
else
|
56
|
+
value = instance_variable_set(key, yield)
|
57
|
+
instance_variable_set(expiration_key, Time.current + expire_in)
|
58
|
+
value
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def strong_memoize_with(name, *args)
|
63
|
+
container = strong_memoize(name) { {} }
|
64
|
+
|
65
|
+
if container.key?(args)
|
66
|
+
container[args]
|
67
|
+
else
|
68
|
+
container[args] = yield
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def strong_memoized?(name)
|
73
|
+
key = ivar(StrongMemoize.normalize_key(name))
|
74
|
+
instance_variable_defined?(key)
|
75
|
+
end
|
76
|
+
|
77
|
+
def clear_memoization(name)
|
78
|
+
key = ivar(StrongMemoize.normalize_key(name))
|
79
|
+
remove_instance_variable(key) if instance_variable_defined?(key)
|
80
|
+
end
|
81
|
+
|
82
|
+
module StrongMemoizeClassMethods
|
83
|
+
def strong_memoize_attr(method_name)
|
84
|
+
member_name = StrongMemoize.normalize_key(method_name)
|
85
|
+
|
86
|
+
StrongMemoize.send(:do_strong_memoize, self, method_name, member_name) # rubocop:disable GitlabSecurity/PublicSend
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.included(base)
|
91
|
+
base.singleton_class.prepend(StrongMemoizeClassMethods)
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# Convert `"name"`/`:name` into `:@name`
|
97
|
+
#
|
98
|
+
# Depending on a type ensure that there's a single memory allocation
|
99
|
+
def ivar(name)
|
100
|
+
case name
|
101
|
+
when Symbol
|
102
|
+
name.to_s.prepend("@").to_sym
|
103
|
+
when String
|
104
|
+
:"@#{name}"
|
105
|
+
else
|
106
|
+
raise ArgumentError, "Invalid type of '#{name}'"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class << self
|
111
|
+
def normalize_key(key)
|
112
|
+
return key unless key.end_with?('!', '?')
|
113
|
+
|
114
|
+
# Replace invalid chars like `!` and `?` with allowed Unicode codeparts.
|
115
|
+
key.to_s.tr('!?', "\uFF01\uFF1F")
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def do_strong_memoize(klass, method_name, member_name)
|
121
|
+
method = klass.instance_method(method_name)
|
122
|
+
|
123
|
+
unless method.arity.zero?
|
124
|
+
raise <<~ERROR
|
125
|
+
Using `strong_memoize_attr` on methods with parameters is not supported.
|
126
|
+
|
127
|
+
Use `strong_memoize_with` instead.
|
128
|
+
See https://docs.gitlab.com/ee/development/utilities.html#strongmemoize
|
129
|
+
ERROR
|
130
|
+
end
|
131
|
+
|
132
|
+
# Methods defined within a class method are already public by default, so we don't need to
|
133
|
+
# explicitly make them public.
|
134
|
+
scope = %i[private protected].find do |scope|
|
135
|
+
klass.send(:"#{scope}_instance_methods") # rubocop:disable GitlabSecurity/PublicSend
|
136
|
+
.include? method_name
|
137
|
+
end
|
138
|
+
|
139
|
+
klass.define_method(method_name) do |&block|
|
140
|
+
strong_memoize(member_name) do
|
141
|
+
method.bind_call(self, &block)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
klass.send(scope, method_name) if scope # rubocop:disable GitlabSecurity/PublicSend
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -5,7 +5,7 @@ package gitlab.secret_detection;
|
|
5
5
|
/* We keep generated files within grpc namespace i.e GitLab::SecretDetection::GRPC
|
6
6
|
* so that these files are exported too in the Ruby Gem along with Core and GRPC logic.
|
7
7
|
*/
|
8
|
-
option ruby_package="GitLab::SecretDetection::GRPC";
|
8
|
+
option ruby_package = "GitLab::SecretDetection::GRPC";
|
9
9
|
|
10
10
|
/* Request arg for triggering Scan/ScanStream method */
|
11
11
|
message ScanRequest {
|
@@ -42,11 +42,10 @@ message ScanResponse {
|
|
42
42
|
// Represents a secret finding identified within a payload
|
43
43
|
message Finding {
|
44
44
|
string payload_id = 1;
|
45
|
-
|
45
|
+
int32 status = 2;
|
46
46
|
optional string type = 3;
|
47
47
|
optional string description = 4;
|
48
48
|
optional int32 line_number = 5;
|
49
|
-
optional string error = 6;
|
50
49
|
}
|
51
50
|
|
52
51
|
// Return status code in sync with ::SecretDetection::Status
|
@@ -59,18 +58,18 @@ message ScanResponse {
|
|
59
58
|
STATUS_SCAN_ERROR = 5; // internal scan failure
|
60
59
|
STATUS_INPUT_ERROR = 6; // invalid input failure
|
61
60
|
STATUS_NOT_FOUND = 7; // zero findings
|
61
|
+
STATUS_AUTH_ERROR = 8; // authentication failure
|
62
62
|
}
|
63
63
|
|
64
|
-
|
65
|
-
|
66
|
-
Status status = 3;
|
64
|
+
repeated Finding results = 1;
|
65
|
+
int32 status = 2;
|
67
66
|
}
|
68
67
|
|
69
68
|
/* Scanner service that scans given payloads and returns findings */
|
70
69
|
service Scanner {
|
71
70
|
// Runs secret detection scan for the given request
|
72
|
-
rpc Scan(ScanRequest) returns (ScanResponse) {
|
71
|
+
rpc Scan(ScanRequest) returns (ScanResponse) {}
|
73
72
|
|
74
73
|
// Runs bi-directional streaming of scans for the given stream of requests with a stream of responses
|
75
|
-
rpc ScanStream(stream ScanRequest) returns (stream ScanResponse) {
|
74
|
+
rpc ScanStream(stream ScanRequest) returns (stream ScanResponse) {}
|
76
75
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitlab-secret_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- group::secret detection
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2024-10-
|
13
|
+
date: 2024-10-07 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: grpc
|
@@ -28,18 +28,32 @@ dependencies:
|
|
28
28
|
version: 1.63.0
|
29
29
|
- !ruby/object:Gem::Dependency
|
30
30
|
name: grpc-tools
|
31
|
+
requirement: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - '='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 1.63.0
|
36
|
+
type: :runtime
|
37
|
+
prerelease: false
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - '='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.63.0
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: parallel
|
31
45
|
requirement: !ruby/object:Gem::Requirement
|
32
46
|
requirements:
|
33
47
|
- - "~>"
|
34
48
|
- !ruby/object:Gem::Version
|
35
|
-
version: '1.
|
49
|
+
version: '1.19'
|
36
50
|
type: :runtime
|
37
51
|
prerelease: false
|
38
52
|
version_requirements: !ruby/object:Gem::Requirement
|
39
53
|
requirements:
|
40
54
|
- - "~>"
|
41
55
|
- !ruby/object:Gem::Version
|
42
|
-
version: '1.
|
56
|
+
version: '1.19'
|
43
57
|
- !ruby/object:Gem::Dependency
|
44
58
|
name: re2
|
45
59
|
requirement: !ruby/object:Gem::Requirement
|
@@ -93,10 +107,14 @@ files:
|
|
93
107
|
- lib/gitlab/secret_detection/core/status.rb
|
94
108
|
- lib/gitlab/secret_detection/grpc.rb
|
95
109
|
- lib/gitlab/secret_detection/grpc/client/grpc_client.rb
|
110
|
+
- lib/gitlab/secret_detection/grpc/client/stream_request_enumerator.rb
|
96
111
|
- lib/gitlab/secret_detection/grpc/generated/.gitkeep
|
97
112
|
- lib/gitlab/secret_detection/grpc/generated/secret_detection_pb.rb
|
98
113
|
- lib/gitlab/secret_detection/grpc/generated/secret_detection_services_pb.rb
|
99
114
|
- lib/gitlab/secret_detection/grpc/scanner_service.rb
|
115
|
+
- lib/gitlab/secret_detection/utils.rb
|
116
|
+
- lib/gitlab/secret_detection/utils/certificate.rb
|
117
|
+
- lib/gitlab/secret_detection/utils/memoize.rb
|
100
118
|
- lib/gitlab/secret_detection/version.rb
|
101
119
|
- proto/secret_detection.proto
|
102
120
|
homepage: https://gitlab.com/gitlab-org/security-products/secret-detection/secret-detection-service
|