gitlab-secret_detection 0.4.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c4b61c4602691e857833407badf2d2556d493679db2a5897c098eaff7aa96847
4
- data.tar.gz: f803fcbae2d2988aace3f62e141500e175a75664b3a21f28ef5ef8c2cf89fcb7
3
+ metadata.gz: 648b6d5277ac8e7948762533af39dc2f4ff0ae4c62fbdcc6d5d32615a44ab815
4
+ data.tar.gz: afd2a580cb0a73bb84a401616ec622e1e14d2b75021036a959f933ed75864cf6
5
5
  SHA512:
6
- metadata.gz: 0b7b61e27e95f816d6d8d260669eb0b54606170910ca18eb67937ca9f05b44333b7f6c8dccdf5c04aa6896d4d68a355e284745e91b309120f3a3fc789c9b05fb
7
- data.tar.gz: f91c75463925727308c61166ac89e4f48f61c2e0a7cb4d37cc4e0a411db4fcb88245cbc355ed49577d2fe22fcb5d1cb9bd08a734e3125dd4815ef86e44f7b578
6
+ metadata.gz: 7c49b71891f6e13d8dc252936f18df838d8e9d42000cbc1f4f3d7fe56b258a7e2c17bce7a7f0e771bc16991fa46bacf641a636a1d1efd3ef23de6b0a31a011b8
7
+ data.tar.gz: af93c82a0025be12bc82fe6ff62b290fe193a2338ae2f98d83bc2729d1c7cc35a223330a3a9907a9f715c009127e93ba2d13fc33982d2f13a179d8548f5cc36e
data/README.md CHANGED
@@ -42,6 +42,7 @@ the approach:
42
42
  │ ├── core/.. # Secret detection logic (most of it pulled from existing gem)
43
43
  │ └── grpc
44
44
  │ ├── generated/.. # gRPC generated files and secret detection gRPC service
45
+ │ ├── client/.. # gRPC client to invoke secret detection service's RPC endpoints
45
46
  │ └── scanner_service.rb # Secret detection gRPC service implementation
46
47
  ├── examples
47
48
  │ └── sample-client/.. # Sample Ruby RPC client that connects with gRPC server and calls RPC scan
@@ -320,7 +321,7 @@ Run `ruby examples/sample-client/sample_client.rb` on your terminal to run the s
320
321
 
321
322
  ## Benchmark
322
323
 
323
- RPC service is benchmarked using [`ghz`](https://ghz.sh), a powerful CLI-based tool for load testing and benchmarking gRPC services. More details added [here](benchmark/README.md).
324
+ RPC service is benchmarked using [`ghz`](https://ghz.sh), a powerful CLI-based tool for load testing and benchmarking gRPC services. More details added [here](https://gitlab.com/gitlab-org/gitlab/-/work_items/468107).
324
325
 
325
326
  ## Project Status
326
327
 
@@ -7,12 +7,15 @@ module GitLab
7
7
  #
8
8
  # +status+:: One of values from GitLab::SecretDetection::Core::Status indicating the scan operation's status
9
9
  # +results+:: Array of GitLab::SecretDetection::Core::Finding values. Default value is nil.
10
+ # +metadata+:: Hash object containing additional meta information about the response. It is currently used
11
+ # to embed more information on error.
10
12
  class Response
11
- attr_reader :status, :results
13
+ attr_reader :status, :results, :metadata
12
14
 
13
- def initialize(status, results = [])
15
+ def initialize(status, results = [], metadata = {})
14
16
  @status = status
15
17
  @results = results
18
+ @metadata = metadata
16
19
  end
17
20
 
18
21
  def ==(other)
@@ -22,6 +25,7 @@ module GitLab
22
25
  def to_h
23
26
  {
24
27
  status:,
28
+ metadata:,
25
29
  results: results&.map(&:to_h)
26
30
  }
27
31
  end
@@ -29,7 +33,7 @@ module GitLab
29
33
  protected
30
34
 
31
35
  def state
32
- [status, results]
36
+ [status, metadata, results]
33
37
  end
34
38
  end
35
39
  end
@@ -4,6 +4,7 @@ require 're2'
4
4
  require 'logger'
5
5
  require 'timeout'
6
6
  require 'English'
7
+ require 'parallel'
7
8
 
8
9
  module GitLab
9
10
  module SecretDetection
@@ -24,6 +25,14 @@ module GitLab
24
25
  DEFAULT_PAYLOAD_TIMEOUT_SECS = 30 # 30 seconds
25
26
  # Tags used for creating default pattern matcher
26
27
  DEFAULT_PATTERN_MATCHER_TAGS = ['gitlab_blocking'].freeze
28
+ # Max no of child processes to spawn per request
29
+ # ref: https://gitlab.com/gitlab-org/gitlab/-/issues/430160
30
+ MAX_PROCS_PER_REQUEST = 5
31
+ # Minimum cumulative size of the payloads required to spawn and
32
+ # run the scan within a new subprocess.
33
+ MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB
34
+ # Whether to run scan in subprocesses or not. Default is false.
35
+ RUN_IN_SUBPROCESS = false
27
36
 
28
37
  # Initializes the instance with logger along with following operations:
29
38
  # 1. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
@@ -58,6 +67,13 @@ module GitLab
58
67
  # For example: Add `gitlab_blocking` to include only rules for Push Protection. Defaults to
59
68
  # [`gitlab_blocking`] (+DEFAULT_PATTERN_MATCHER_TAGS+).
60
69
  #
70
+ # NOTE:
71
+ # Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by
72
+ # offloading regex operations on large payloads to sub-processes. However, it does not assure the improvement
73
+ # in the overall latency of the scan, specifically in the case of smaller payloads, where the overhead of
74
+ # forking a new process adds to the overall latency of the scan instead. More reference on Subprocess-based
75
+ # execution is found here: https://gitlab.com/gitlab-org/gitlab/-/issues/430160.
76
+ #
61
77
  # Returns an instance of GitLab::SecretDetection::Core::Response by following below structure:
62
78
  # {
63
79
  # status: One of the Core::Status values
@@ -70,7 +86,8 @@ module GitLab
70
86
  payload_timeout: DEFAULT_PAYLOAD_TIMEOUT_SECS,
71
87
  raw_value_exclusions: [],
72
88
  rule_exclusions: [],
73
- tags: DEFAULT_PATTERN_MATCHER_TAGS
89
+ tags: DEFAULT_PATTERN_MATCHER_TAGS,
90
+ subprocess: RUN_IN_SUBPROCESS
74
91
  )
75
92
 
76
93
  return Core::Response.new(Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
@@ -87,11 +104,13 @@ module GitLab
87
104
 
88
105
  next Core::Response.new(Core::Status::NOT_FOUND) if matched_payloads.empty?
89
106
 
90
- secrets = run_scan(
107
+ scan_args = {
91
108
  payloads: matched_payloads, payload_timeout:,
92
109
  pattern_matcher: build_pattern_matcher(tags:),
93
110
  raw_value_exclusions:, rule_exclusions:
94
- )
111
+ }
112
+
113
+ secrets = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
95
114
 
96
115
  scan_status = overall_scan_status(secrets)
97
116
 
@@ -205,6 +224,36 @@ module GitLab
205
224
  end
206
225
  end
207
226
 
227
+ def run_scan_within_subprocess(
228
+ payloads:, payload_timeout:, pattern_matcher:, raw_value_exclusions: [],
229
+ rule_exclusions: [])
230
+ payload_sizes = payloads.map(&:size)
231
+ grouped_payload_indices = group_by_chunk_size(payload_sizes)
232
+
233
+ grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
234
+
235
+ found_secrets = Parallel.flat_map(
236
+ grouped_payloads,
237
+ in_processes: MAX_PROCS_PER_REQUEST,
238
+ isolation: true # do not reuse sub-processes
239
+ ) do |grouped_payload|
240
+ grouped_payload.flat_map do |payload|
241
+ Timeout.timeout(payload_timeout) do
242
+ find_secrets_in_payload(
243
+ payload:,
244
+ pattern_matcher:,
245
+ raw_value_exclusions:, rule_exclusions:
246
+ )
247
+ end
248
+ rescue Timeout::Error => e
249
+ logger.error "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
250
+ Core::Finding.new(payload.id, Core::Status::PAYLOAD_TIMEOUT)
251
+ end
252
+ end
253
+
254
+ found_secrets.freeze
255
+ end
256
+
208
257
  # Finds secrets in the given payload guarded with a timeout as a circuit breaker. It accepts
209
258
  # literal values to exclude from the input before the scan, also SD rules to exclude during
210
259
  # the scan.
@@ -268,6 +317,35 @@ module GitLab
268
317
  Core::Status::FOUND_WITH_ERRORS
269
318
  end
270
319
  end
320
+
321
+ # This method accepts an array of payload sizes(in bytes) and groups them into an array
322
+ # of arrays structure where each element is the group of indices of the input
323
+ # array whose cumulative payload sizes has at least +MIN_CHUNK_SIZE_PER_PROC_BYTES+
324
+ def group_by_chunk_size(payload_size_arr)
325
+ cumulative_size = 0
326
+ chunk_indexes = []
327
+ chunk_idx_start = 0
328
+
329
+ payload_size_arr.each_with_index do |size, index|
330
+ cumulative_size += size
331
+ next unless cumulative_size >= MIN_CHUNK_SIZE_PER_PROC_BYTES
332
+
333
+ chunk_indexes << (chunk_idx_start..index).to_a
334
+
335
+ chunk_idx_start = index + 1
336
+ cumulative_size = 0
337
+ end
338
+
339
+ if cumulative_size.positive? && (chunk_idx_start < payload_size_arr.length)
340
+ chunk_indexes << if chunk_idx_start == payload_size_arr.length - 1
341
+ [chunk_idx_start]
342
+ else
343
+ (chunk_idx_start..payload_size_arr.length - 1).to_a
344
+ end
345
+ end
346
+
347
+ chunk_indexes
348
+ end
271
349
  end
272
350
  end
273
351
  end
@@ -12,6 +12,7 @@ module GitLab
12
12
  SCAN_ERROR = 5 # When the scan operation fails due to regex error
13
13
  INPUT_ERROR = 6 # When the scan operation fails due to invalid input
14
14
  NOT_FOUND = 7 # When scan operation completes with zero findings
15
+ AUTH_ERROR = 8 # When authentication fails
15
16
  end
16
17
  end
17
18
  end
@@ -1,19 +1,143 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative '../generated/secret_detection_pb'
4
- require_relative '../generated/secret_detection_services_pb'
3
+ require 'grpc'
4
+ require_relative '../../grpc/scanner_service'
5
+ require_relative '../../core/response'
6
+ require_relative '../../core/status'
7
+ require_relative '../../utils'
8
+ require_relative './stream_request_enumerator'
5
9
 
6
10
  module GitLab
7
11
  module SecretDetection
8
12
  module GRPC
9
13
  class Client
10
- # TODO add implementation
11
- def scan
12
- raise NotImplementedError
14
+ include SecretDetection::Utils::StrongMemoize
15
+ include SDLogger
16
+
17
+ # Time to wait for the response from the service
18
+ REQUEST_TIMEOUT_SECONDS = 10 # 10 seconds
19
+
20
+ def initialize(host, secure: false, compression: true)
21
+ @host = host
22
+ @secure = secure
23
+ @compression = compression
24
+ end
25
+
26
+ # Triggers Secret Detection service's `/Scan` gRPC endpoint. To keep it consistent with SDS gem interface,
27
+ # this method transforms the gRPC response to +GitLab::SecretDetection::Core::Response+.
28
+ # Furthermore, any errors that are raised by the service will be translated to
29
+ # +GitLab::SecretDetection::Core::Response+ type by assiging a appropriate +status+ value to it.
30
+ def run_scan(request:, auth_token:, extra_headers: {})
31
+ with_rescued_errors do
32
+ grpc_response = stub.scan(
33
+ request,
34
+ metadata: build_metadata(auth_token, extra_headers),
35
+ deadline: request_deadline
36
+ )
37
+
38
+ convert_to_core_response(grpc_response)
39
+ end
40
+ end
41
+
42
+ # Triggers Secret Detection service's `/ScanStream` gRPC endpoint.
43
+ #
44
+ # To keep it consistent with SDS gem interface, this method transforms the gRPC response to
45
+ # +GitLab::SecretDetection::Core::Response+ type. Furthermore, any errors that are raised by the service will be
46
+ # translated to +GitLab::SecretDetection::Core::Response+ type by assiging a appropriate +status+ value to it.
47
+ #
48
+ # Note: If one of the stream requests result in an error, the stream will end immediately without processing the
49
+ # remaining requests.
50
+ def run_scan_stream(requests:, auth_token:, extra_headers: {})
51
+ request_stream = GitLab::SecretDetection::GRPC::StreamRequestEnumerator.new(requests)
52
+ results = []
53
+ with_rescued_errors do
54
+ stub.scan_stream(
55
+ request_stream.each_item,
56
+ metadata: build_metadata(auth_token, extra_headers),
57
+ deadline: request_deadline
58
+ ).each do |grpc_response|
59
+ response = convert_to_core_response(grpc_response)
60
+ if block_given?
61
+ yield response
62
+ else
63
+ results << response
64
+ end
65
+ end
66
+ results
67
+ end
68
+ end
69
+
70
+ private
71
+
72
+ attr_reader :secure, :host, :compression
73
+
74
+ def stub
75
+ GitLab::SecretDetection::GRPC::Scanner::Stub.new(
76
+ host,
77
+ channel_credentials,
78
+ channel_args:
79
+ )
80
+ end
81
+
82
+ strong_memoize_attr :stub
83
+
84
+ def channel_args
85
+ default_options = {
86
+ 'grpc.keepalive_permit_without_calls' => 1,
87
+ 'grpc.keepalive_time_ms' => 30000, # 30 seconds
88
+ 'grpc.keepalive_timeout_ms' => 10000 # 10 seconds timeout for keepalive response
89
+ }
90
+
91
+ compression_options = ::GRPC::Core::CompressionOptions
92
+ .new(default_algorithm: :gzip)
93
+ .to_channel_arg_hash
94
+
95
+ default_options.merge!(compression_options) if compression
96
+
97
+ default_options.freeze
98
+ end
99
+
100
+ def channel_credentials
101
+ return :this_channel_is_insecure unless secure
102
+
103
+ certs = GitLab::SecretDetection::Utils::X509::Certificate.ca_certs_bundle
104
+
105
+ ::GRPC::Core::ChannelCredentials.new(certs)
13
106
  end
14
107
 
15
- def scan_stream
16
- raise NotImplementedError
108
+ def build_metadata(token, extra_headers = {})
109
+ { 'x-sd-auth' => token }.merge!(extra_headers).freeze
110
+ end
111
+
112
+ def request_deadline
113
+ Time.now + REQUEST_TIMEOUT_SECONDS
114
+ end
115
+
116
+ def with_rescued_errors
117
+ yield
118
+ rescue ::GRPC::Unauthenticated
119
+ SecretDetection::Core::Response.new(SecretDetection::Core::Status::AUTH_ERROR)
120
+ rescue ::GRPC::InvalidArgument => e
121
+ SecretDetection::Core::Response.new(
122
+ SecretDetection::Core::Status::INPUT_ERROR, nil, { message: e.details, **e.metadata }
123
+ )
124
+ rescue ::GRPC::Unknown, ::GRPC::BadStatus => e
125
+ SecretDetection::Core::Response.new(
126
+ SecretDetection::Core::Status::SCAN_ERROR, nil, { message: e.details }
127
+ )
128
+ end
129
+
130
+ def convert_to_core_response(grpc_response)
131
+ response = grpc_response.to_h
132
+
133
+ SecretDetection::Core::Response.new(
134
+ response[:status],
135
+ response[:results],
136
+ response[:metadata]
137
+ )
138
+ rescue StandardError => e
139
+ logger.error("Failed to convert to core response: #{e}")
140
+ SecretDetection::Core::Response.new(SecretDetection::Core::Status::SCAN_ERROR)
17
141
  end
18
142
  end
19
143
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GitLab
4
+ module SecretDetection
5
+ module GRPC
6
+ class StreamRequestEnumerator
7
+ def initialize(requests = [])
8
+ @requests = requests
9
+ end
10
+
11
+ # yields a request, waiting between 0 and 1 seconds between requests
12
+ #
13
+ # @return an Enumerable that yields a request input
14
+ def each_item
15
+ return enum_for(:each_item) unless block_given?
16
+
17
+ @requests.each do |request|
18
+ yield request
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -5,7 +5,7 @@
5
5
  require 'google/protobuf'
6
6
 
7
7
 
8
- descriptor_data = "\n\x16secret_detection.proto\x12\x17gitlab.secret_detection\"\xfc\x03\n\x0bScanRequest\x12>\n\x08payloads\x18\x01 \x03(\x0b\x32,.gitlab.secret_detection.ScanRequest.Payload\x12\x19\n\x0ctimeout_secs\x18\x02 \x01(\x02H\x00\x88\x01\x01\x12!\n\x14payload_timeout_secs\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x42\n\nexclusions\x18\x04 \x03(\x0b\x32..gitlab.secret_detection.ScanRequest.Exclusion\x12\x0c\n\x04tags\x18\x05 \x03(\t\x1a#\n\x07Payload\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x1a\x66\n\tExclusion\x12J\n\x0e\x65xclusion_type\x18\x01 \x01(\x0e\x32\x32.gitlab.secret_detection.ScanRequest.ExclusionType\x12\r\n\x05value\x18\x02 \x01(\t\"f\n\rExclusionType\x12\x1e\n\x1a\x45XCLUSION_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XCLUSION_TYPE_RULE\x10\x01\x12\x1c\n\x18\x45XCLUSION_TYPE_RAW_VALUE\x10\x02\x42\x0f\n\r_timeout_secsB\x17\n\x15_payload_timeout_secs\"\xe3\x04\n\x0cScanResponse\x12\x12\n\x05\x65rror\x18\x01 \x01(\tH\x00\x88\x01\x01\x12>\n\x07results\x18\x02 \x03(\x0b\x32-.gitlab.secret_detection.ScanResponse.Finding\x12<\n\x06status\x18\x03 \x01(\x0e\x32,.gitlab.secret_detection.ScanResponse.Status\x1a\xe9\x01\n\x07\x46inding\x12\x12\n\npayload_id\x18\x01 \x01(\t\x12<\n\x06status\x18\x02 \x01(\x0e\x32,.gitlab.secret_detection.ScanResponse.Status\x12\x11\n\x04type\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0bline_number\x18\x05 \x01(\x05H\x02\x88\x01\x01\x12\x12\n\x05\x65rror\x18\x06 \x01(\tH\x03\x88\x01\x01\x42\x07\n\x05_typeB\x0e\n\x0c_descriptionB\x0e\n\x0c_line_numberB\x08\n\x06_error\"\xca\x01\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x10\n\x0cSTATUS_FOUND\x10\x01\x12\x1c\n\x18STATUS_FOUND_WITH_ERRORS\x10\x02\x12\x17\n\x13STATUS_SCAN_TIMEOUT\x10\x03\x12\x1a\n\x16STATUS_PAYLOAD_TIMEOUT\x10\x04\x12\x15\n\x11STATUS_SCAN_ERROR\x10\x05\x12\x16\n\x12STATUS_INPUT_ERROR\x10\x06\x12\x14\n\x10STATUS_NOT_FOUND\x10\x07\x42\x08\n\x06_error2\xc1\x01\n\x07Scanner\x12U\n\x04Scan\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00\x12_\n\nScanStream\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00(\x01\x30\x01\x42 \xea\x02\x1dGitLab::SecretDetection::GRPCb\x06proto3"
8
+ descriptor_data = "\n\x16secret_detection.proto\x12\x17gitlab.secret_detection\"\xfc\x03\n\x0bScanRequest\x12>\n\x08payloads\x18\x01 \x03(\x0b\x32,.gitlab.secret_detection.ScanRequest.Payload\x12\x19\n\x0ctimeout_secs\x18\x02 \x01(\x02H\x00\x88\x01\x01\x12!\n\x14payload_timeout_secs\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x42\n\nexclusions\x18\x04 \x03(\x0b\x32..gitlab.secret_detection.ScanRequest.Exclusion\x12\x0c\n\x04tags\x18\x05 \x03(\t\x1a#\n\x07Payload\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x1a\x66\n\tExclusion\x12J\n\x0e\x65xclusion_type\x18\x01 \x01(\x0e\x32\x32.gitlab.secret_detection.ScanRequest.ExclusionType\x12\r\n\x05value\x18\x02 \x01(\t\"f\n\rExclusionType\x12\x1e\n\x1a\x45XCLUSION_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XCLUSION_TYPE_RULE\x10\x01\x12\x1c\n\x18\x45XCLUSION_TYPE_RAW_VALUE\x10\x02\x42\x0f\n\r_timeout_secsB\x17\n\x15_payload_timeout_secs\"\xe2\x03\n\x0cScanResponse\x12>\n\x07results\x18\x01 \x03(\x0b\x32-.gitlab.secret_detection.ScanResponse.Finding\x12\x0e\n\x06status\x18\x02 \x01(\x05\x1a\x9d\x01\n\x07\x46inding\x12\x12\n\npayload_id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\x05\x12\x11\n\x04type\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0bline_number\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x07\n\x05_typeB\x0e\n\x0c_descriptionB\x0e\n\x0c_line_number\"\xe1\x01\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x10\n\x0cSTATUS_FOUND\x10\x01\x12\x1c\n\x18STATUS_FOUND_WITH_ERRORS\x10\x02\x12\x17\n\x13STATUS_SCAN_TIMEOUT\x10\x03\x12\x1a\n\x16STATUS_PAYLOAD_TIMEOUT\x10\x04\x12\x15\n\x11STATUS_SCAN_ERROR\x10\x05\x12\x16\n\x12STATUS_INPUT_ERROR\x10\x06\x12\x14\n\x10STATUS_NOT_FOUND\x10\x07\x12\x15\n\x11STATUS_AUTH_ERROR\x10\x08\x32\xc1\x01\n\x07Scanner\x12U\n\x04Scan\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00\x12_\n\nScanStream\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00(\x01\x30\x01\x42 \xea\x02\x1dGitLab::SecretDetection::GRPCb\x06proto3"
9
9
 
10
10
  pool = Google::Protobuf::DescriptorPool.generated_pool
11
11
  pool.add_serialized_file(descriptor_data)
@@ -74,14 +74,19 @@ module GitLab
74
74
  end
75
75
  end
76
76
 
77
- result = scanner.secrets_scan(
78
- payloads,
79
- raw_value_exclusions:,
80
- rule_exclusions:,
81
- tags: request.tags.to_a,
82
- timeout: request.timeout_secs,
83
- payload_timeout: request.payload_timeout_secs
84
- )
77
+ begin
78
+ result = scanner.secrets_scan(
79
+ payloads,
80
+ raw_value_exclusions:,
81
+ rule_exclusions:,
82
+ tags: request.tags.to_a,
83
+ timeout: request.timeout_secs,
84
+ payload_timeout: request.payload_timeout_secs
85
+ )
86
+ rescue StandardError => e
87
+ logger.error("Failed to run the scan: #{e}")
88
+ raise ::GRPC::Unknown, e.message
89
+ end
85
90
 
86
91
  findings = result.results&.map do |finding|
87
92
  GitLab::SecretDetection::GRPC::ScanResponse::Finding.new(**finding.to_h)
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'grpc/scanner_service'
4
+ require_relative 'grpc/client/stream_request_enumerator'
4
5
  require_relative 'grpc/client/grpc_client'
5
6
 
6
7
  module GitLab
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'openssl'
4
+ require_relative 'memoize'
5
+
6
+ module GitLab
7
+ module SecretDetection
8
+ module Utils
9
+ module X509
10
+ # Pulled from GitLab.com source
11
+ # Link: https://gitlab.com/gitlab-org/gitlab/-/blob/4713a798f997389f04e442db3d1d8349a39d5d46/lib/gitlab/x509/certificate.rb
12
+ class Certificate
13
+ CERT_REGEX = /-----BEGIN CERTIFICATE-----(?:.|\n)+?-----END CERTIFICATE-----/
14
+
15
+ attr_reader :key, :cert, :ca_certs
16
+
17
+ def self.default_cert_dir
18
+ strong_memoize(:default_cert_dir) do
19
+ ENV.fetch('SSL_CERT_DIR', OpenSSL::X509::DEFAULT_CERT_DIR)
20
+ end
21
+ end
22
+
23
+ def self.default_cert_file
24
+ strong_memoize(:default_cert_file) do
25
+ ENV.fetch('SSL_CERT_FILE', OpenSSL::X509::DEFAULT_CERT_FILE)
26
+ end
27
+ end
28
+
29
+ def self.from_strings(key_string, cert_string, ca_certs_string = nil)
30
+ key = OpenSSL::PKey::RSA.new(key_string)
31
+ cert = OpenSSL::X509::Certificate.new(cert_string)
32
+ ca_certs = load_ca_certs_bundle(ca_certs_string)
33
+
34
+ new(key, cert, ca_certs)
35
+ end
36
+
37
+ def self.from_files(key_path, cert_path, ca_certs_path = nil)
38
+ ca_certs_string = File.read(ca_certs_path) if ca_certs_path
39
+
40
+ from_strings(File.read(key_path), File.read(cert_path), ca_certs_string)
41
+ end
42
+
43
+ # Returns all top-level, readable files in the default CA cert directory
44
+ def self.ca_certs_paths
45
+ cert_paths = Dir["#{default_cert_dir}/*"].select do |path|
46
+ !File.directory?(path) && File.readable?(path)
47
+ end
48
+ cert_paths << default_cert_file if File.exist? default_cert_file
49
+ cert_paths
50
+ end
51
+
52
+ # Returns a concatenated array of Strings, each being a PEM-coded CA certificate.
53
+ def self.ca_certs_bundle
54
+ strong_memoize(:ca_certs_bundle) do
55
+ ca_certs_paths.flat_map do |cert_file|
56
+ load_ca_certs_bundle(File.read(cert_file))
57
+ end.uniq.join("\n")
58
+ end
59
+ end
60
+
61
+ def self.reset_ca_certs_bundle
62
+ clear_memoization(:ca_certs_bundle)
63
+ end
64
+
65
+ def self.reset_default_cert_paths
66
+ clear_memoization(:default_cert_dir)
67
+ clear_memoization(:default_cert_file)
68
+ end
69
+
70
+ # Returns an array of OpenSSL::X509::Certificate objects, empty array if none found
71
+ #
72
+ # Ruby OpenSSL::X509::Certificate.new will only load the first
73
+ # certificate if a bundle is presented, this allows to parse multiple certs
74
+ # in the same file
75
+ def self.load_ca_certs_bundle(ca_certs_string)
76
+ return [] unless ca_certs_string
77
+
78
+ ca_certs_string.scan(CERT_REGEX).map do |ca_cert_string|
79
+ OpenSSL::X509::Certificate.new(ca_cert_string)
80
+ end
81
+ end
82
+
83
+ def initialize(key, cert, ca_certs = nil)
84
+ @key = key
85
+ @cert = cert
86
+ @ca_certs = ca_certs
87
+ end
88
+
89
+ def key_string
90
+ key.to_s
91
+ end
92
+
93
+ def cert_string
94
+ cert.to_pem
95
+ end
96
+
97
+ def ca_certs_string
98
+ ca_certs&.map(&:to_pem)&.join('\n') unless ca_certs.blank?
99
+ end
100
+
101
+ class << self
102
+ include ::GitLab::SecretDetection::Utils::StrongMemoize
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GitLab
4
+ module SecretDetection
5
+ module Utils
6
+ # Pulled from GitLab.com source
7
+ # Link: https://gitlab.com/gitlab-org/gitlab/-/blob/4713a798f997389f04e442db3d1d8349a39d5d46/gems/gitlab-utils/lib/gitlab/utils/strong_memoize.rb
8
+ module StrongMemoize
9
+ # Instead of writing patterns like this:
10
+ #
11
+ # def trigger_from_token
12
+ # return @trigger if defined?(@trigger)
13
+ #
14
+ # @trigger = Ci::Trigger.find_by_token(params[:token].to_s)
15
+ # end
16
+ #
17
+ # We could write it like:
18
+ #
19
+ # include GitLab::SecretDetection::Utils::StrongMemoize
20
+ #
21
+ # def trigger_from_token
22
+ # Ci::Trigger.find_by_token(params[:token].to_s)
23
+ # end
24
+ # strong_memoize_attr :trigger_from_token
25
+ #
26
+ # def enabled?
27
+ # Feature.enabled?(:some_feature)
28
+ # end
29
+ # strong_memoize_attr :enabled?
30
+ #
31
+ def strong_memoize(name)
32
+ key = ivar(name)
33
+
34
+ if instance_variable_defined?(key)
35
+ instance_variable_get(key)
36
+ else
37
+ instance_variable_set(key, yield)
38
+ end
39
+ end
40
+
41
+ # Works the same way as "strong_memoize" but takes
42
+ # a second argument - expire_in. This allows invalidate
43
+ # the data after specified number of seconds
44
+ def strong_memoize_with_expiration(name, expire_in)
45
+ key = ivar(name)
46
+ expiration_key = "#{key}_expired_at"
47
+
48
+ if instance_variable_defined?(expiration_key)
49
+ expire_at = instance_variable_get(expiration_key)
50
+ clear_memoization(name) if expire_at.past?
51
+ end
52
+
53
+ if instance_variable_defined?(key)
54
+ instance_variable_get(key)
55
+ else
56
+ value = instance_variable_set(key, yield)
57
+ instance_variable_set(expiration_key, Time.current + expire_in)
58
+ value
59
+ end
60
+ end
61
+
62
+ def strong_memoize_with(name, *args)
63
+ container = strong_memoize(name) { {} }
64
+
65
+ if container.key?(args)
66
+ container[args]
67
+ else
68
+ container[args] = yield
69
+ end
70
+ end
71
+
72
+ def strong_memoized?(name)
73
+ key = ivar(StrongMemoize.normalize_key(name))
74
+ instance_variable_defined?(key)
75
+ end
76
+
77
+ def clear_memoization(name)
78
+ key = ivar(StrongMemoize.normalize_key(name))
79
+ remove_instance_variable(key) if instance_variable_defined?(key)
80
+ end
81
+
82
+ module StrongMemoizeClassMethods
83
+ def strong_memoize_attr(method_name)
84
+ member_name = StrongMemoize.normalize_key(method_name)
85
+
86
+ StrongMemoize.send(:do_strong_memoize, self, method_name, member_name) # rubocop:disable GitlabSecurity/PublicSend
87
+ end
88
+ end
89
+
90
+ def self.included(base)
91
+ base.singleton_class.prepend(StrongMemoizeClassMethods)
92
+ end
93
+
94
+ private
95
+
96
+ # Convert `"name"`/`:name` into `:@name`
97
+ #
98
+ # Depending on a type ensure that there's a single memory allocation
99
+ def ivar(name)
100
+ case name
101
+ when Symbol
102
+ name.to_s.prepend("@").to_sym
103
+ when String
104
+ :"@#{name}"
105
+ else
106
+ raise ArgumentError, "Invalid type of '#{name}'"
107
+ end
108
+ end
109
+
110
+ class << self
111
+ def normalize_key(key)
112
+ return key unless key.end_with?('!', '?')
113
+
114
+ # Replace invalid chars like `!` and `?` with allowed Unicode codeparts.
115
+ key.to_s.tr('!?', "\uFF01\uFF1F")
116
+ end
117
+
118
+ private
119
+
120
+ def do_strong_memoize(klass, method_name, member_name)
121
+ method = klass.instance_method(method_name)
122
+
123
+ unless method.arity.zero?
124
+ raise <<~ERROR
125
+ Using `strong_memoize_attr` on methods with parameters is not supported.
126
+
127
+ Use `strong_memoize_with` instead.
128
+ See https://docs.gitlab.com/ee/development/utilities.html#strongmemoize
129
+ ERROR
130
+ end
131
+
132
+ # Methods defined within a class method are already public by default, so we don't need to
133
+ # explicitly make them public.
134
+ scope = %i[private protected].find do |scope|
135
+ klass.send(:"#{scope}_instance_methods") # rubocop:disable GitlabSecurity/PublicSend
136
+ .include? method_name
137
+ end
138
+
139
+ klass.define_method(method_name) do |&block|
140
+ strong_memoize(member_name) do
141
+ method.bind_call(self, &block)
142
+ end
143
+ end
144
+
145
+ klass.send(scope, method_name) if scope # rubocop:disable GitlabSecurity/PublicSend
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'utils/certificate'
4
+ require_relative 'utils/memoize'
5
+
6
+ module GitLab
7
+ module SecretDetection
8
+ module Utils
9
+ end
10
+ end
11
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'secret_detection/utils'
3
4
  require_relative 'secret_detection/core'
4
5
  require_relative 'secret_detection/grpc'
5
6
  require_relative 'secret_detection/version'
@@ -5,7 +5,7 @@ package gitlab.secret_detection;
5
5
  /* We keep generated files within grpc namespace i.e GitLab::SecretDetection::GRPC
6
6
  * so that these files are exported too in the Ruby Gem along with Core and GRPC logic.
7
7
  */
8
- option ruby_package="GitLab::SecretDetection::GRPC";
8
+ option ruby_package = "GitLab::SecretDetection::GRPC";
9
9
 
10
10
  /* Request arg for triggering Scan/ScanStream method */
11
11
  message ScanRequest {
@@ -42,11 +42,10 @@ message ScanResponse {
42
42
  // Represents a secret finding identified within a payload
43
43
  message Finding {
44
44
  string payload_id = 1;
45
- Status status = 2;
45
+ int32 status = 2;
46
46
  optional string type = 3;
47
47
  optional string description = 4;
48
48
  optional int32 line_number = 5;
49
- optional string error = 6;
50
49
  }
51
50
 
52
51
  // Return status code in sync with ::SecretDetection::Status
@@ -59,18 +58,18 @@ message ScanResponse {
59
58
  STATUS_SCAN_ERROR = 5; // internal scan failure
60
59
  STATUS_INPUT_ERROR = 6; // invalid input failure
61
60
  STATUS_NOT_FOUND = 7; // zero findings
61
+ STATUS_AUTH_ERROR = 8; // authentication failure
62
62
  }
63
63
 
64
- optional string error = 1;
65
- repeated Finding results = 2;
66
- Status status = 3;
64
+ repeated Finding results = 1;
65
+ int32 status = 2;
67
66
  }
68
67
 
69
68
  /* Scanner service that scans given payloads and returns findings */
70
69
  service Scanner {
71
70
  // Runs secret detection scan for the given request
72
- rpc Scan(ScanRequest) returns (ScanResponse) { }
71
+ rpc Scan(ScanRequest) returns (ScanResponse) {}
73
72
 
74
73
  // Runs bi-directional streaming of scans for the given stream of requests with a stream of responses
75
- rpc ScanStream(stream ScanRequest) returns (stream ScanResponse) { }
74
+ rpc ScanStream(stream ScanRequest) returns (stream ScanResponse) {}
76
75
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-secret_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - group::secret detection
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2024-10-01 00:00:00.000000000 Z
13
+ date: 2024-10-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: grpc
@@ -28,18 +28,32 @@ dependencies:
28
28
  version: 1.63.0
29
29
  - !ruby/object:Gem::Dependency
30
30
  name: grpc-tools
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - '='
34
+ - !ruby/object:Gem::Version
35
+ version: 1.63.0
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - '='
41
+ - !ruby/object:Gem::Version
42
+ version: 1.63.0
43
+ - !ruby/object:Gem::Dependency
44
+ name: parallel
31
45
  requirement: !ruby/object:Gem::Requirement
32
46
  requirements:
33
47
  - - "~>"
34
48
  - !ruby/object:Gem::Version
35
- version: '1.66'
49
+ version: '1.19'
36
50
  type: :runtime
37
51
  prerelease: false
38
52
  version_requirements: !ruby/object:Gem::Requirement
39
53
  requirements:
40
54
  - - "~>"
41
55
  - !ruby/object:Gem::Version
42
- version: '1.66'
56
+ version: '1.19'
43
57
  - !ruby/object:Gem::Dependency
44
58
  name: re2
45
59
  requirement: !ruby/object:Gem::Requirement
@@ -93,10 +107,14 @@ files:
93
107
  - lib/gitlab/secret_detection/core/status.rb
94
108
  - lib/gitlab/secret_detection/grpc.rb
95
109
  - lib/gitlab/secret_detection/grpc/client/grpc_client.rb
110
+ - lib/gitlab/secret_detection/grpc/client/stream_request_enumerator.rb
96
111
  - lib/gitlab/secret_detection/grpc/generated/.gitkeep
97
112
  - lib/gitlab/secret_detection/grpc/generated/secret_detection_pb.rb
98
113
  - lib/gitlab/secret_detection/grpc/generated/secret_detection_services_pb.rb
99
114
  - lib/gitlab/secret_detection/grpc/scanner_service.rb
115
+ - lib/gitlab/secret_detection/utils.rb
116
+ - lib/gitlab/secret_detection/utils/certificate.rb
117
+ - lib/gitlab/secret_detection/utils/memoize.rb
100
118
  - lib/gitlab/secret_detection/version.rb
101
119
  - proto/secret_detection.proto
102
120
  homepage: https://gitlab.com/gitlab-org/security-products/secret-detection/secret-detection-service