gitlab-secret_detection 0.4.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c4b61c4602691e857833407badf2d2556d493679db2a5897c098eaff7aa96847
4
- data.tar.gz: f803fcbae2d2988aace3f62e141500e175a75664b3a21f28ef5ef8c2cf89fcb7
3
+ metadata.gz: 648b6d5277ac8e7948762533af39dc2f4ff0ae4c62fbdcc6d5d32615a44ab815
4
+ data.tar.gz: afd2a580cb0a73bb84a401616ec622e1e14d2b75021036a959f933ed75864cf6
5
5
  SHA512:
6
- metadata.gz: 0b7b61e27e95f816d6d8d260669eb0b54606170910ca18eb67937ca9f05b44333b7f6c8dccdf5c04aa6896d4d68a355e284745e91b309120f3a3fc789c9b05fb
7
- data.tar.gz: f91c75463925727308c61166ac89e4f48f61c2e0a7cb4d37cc4e0a411db4fcb88245cbc355ed49577d2fe22fcb5d1cb9bd08a734e3125dd4815ef86e44f7b578
6
+ metadata.gz: 7c49b71891f6e13d8dc252936f18df838d8e9d42000cbc1f4f3d7fe56b258a7e2c17bce7a7f0e771bc16991fa46bacf641a636a1d1efd3ef23de6b0a31a011b8
7
+ data.tar.gz: af93c82a0025be12bc82fe6ff62b290fe193a2338ae2f98d83bc2729d1c7cc35a223330a3a9907a9f715c009127e93ba2d13fc33982d2f13a179d8548f5cc36e
data/README.md CHANGED
@@ -42,6 +42,7 @@ the approach:
42
42
  │ ├── core/.. # Secret detection logic (most of it pulled from existing gem)
43
43
  │ └── grpc
44
44
  │ ├── generated/.. # gRPC generated files and secret detection gRPC service
45
+ │ ├── client/.. # gRPC client to invoke secret detection service's RPC endpoints
45
46
  │ └── scanner_service.rb # Secret detection gRPC service implementation
46
47
  ├── examples
47
48
  │ └── sample-client/.. # Sample Ruby RPC client that connects with gRPC server and calls RPC scan
@@ -320,7 +321,7 @@ Run `ruby examples/sample-client/sample_client.rb` on your terminal to run the s
320
321
 
321
322
  ## Benchmark
322
323
 
323
- RPC service is benchmarked using [`ghz`](https://ghz.sh), a powerful CLI-based tool for load testing and benchmarking gRPC services. More details added [here](benchmark/README.md).
324
+ RPC service is benchmarked using [`ghz`](https://ghz.sh), a powerful CLI-based tool for load testing and benchmarking gRPC services. More details added [here](https://gitlab.com/gitlab-org/gitlab/-/work_items/468107).
324
325
 
325
326
  ## Project Status
326
327
 
@@ -7,12 +7,15 @@ module GitLab
7
7
  #
8
8
  # +status+:: One of values from GitLab::SecretDetection::Core::Status indicating the scan operation's status
9
9
  # +results+:: Array of GitLab::SecretDetection::Core::Finding values. Default value is nil.
10
+ # +metadata+:: Hash object containing additional meta information about the response. It is currently used
11
+ # to embed more information on error.
10
12
  class Response
11
- attr_reader :status, :results
13
+ attr_reader :status, :results, :metadata
12
14
 
13
- def initialize(status, results = [])
15
+ def initialize(status, results = [], metadata = {})
14
16
  @status = status
15
17
  @results = results
18
+ @metadata = metadata
16
19
  end
17
20
 
18
21
  def ==(other)
@@ -22,6 +25,7 @@ module GitLab
22
25
  def to_h
23
26
  {
24
27
  status:,
28
+ metadata:,
25
29
  results: results&.map(&:to_h)
26
30
  }
27
31
  end
@@ -29,7 +33,7 @@ module GitLab
29
33
  protected
30
34
 
31
35
  def state
32
- [status, results]
36
+ [status, metadata, results]
33
37
  end
34
38
  end
35
39
  end
@@ -4,6 +4,7 @@ require 're2'
4
4
  require 'logger'
5
5
  require 'timeout'
6
6
  require 'English'
7
+ require 'parallel'
7
8
 
8
9
  module GitLab
9
10
  module SecretDetection
@@ -24,6 +25,14 @@ module GitLab
24
25
  DEFAULT_PAYLOAD_TIMEOUT_SECS = 30 # 30 seconds
25
26
  # Tags used for creating default pattern matcher
26
27
  DEFAULT_PATTERN_MATCHER_TAGS = ['gitlab_blocking'].freeze
28
+ # Max no of child processes to spawn per request
29
+ # ref: https://gitlab.com/gitlab-org/gitlab/-/issues/430160
30
+ MAX_PROCS_PER_REQUEST = 5
31
+ # Minimum cumulative size of the payloads required to spawn and
32
+ # run the scan within a new subprocess.
33
+ MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB
34
+ # Whether to run scan in subprocesses or not. Default is false.
35
+ RUN_IN_SUBPROCESS = false
27
36
 
28
37
  # Initializes the instance with logger along with following operations:
29
38
  # 1. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
@@ -58,6 +67,13 @@ module GitLab
58
67
  # For example: Add `gitlab_blocking` to include only rules for Push Protection. Defaults to
59
68
  # [`gitlab_blocking`] (+DEFAULT_PATTERN_MATCHER_TAGS+).
60
69
  #
70
+ # NOTE:
71
+ # Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by
72
+ # offloading regex operations on large payloads to sub-processes. However, it does not assure the improvement
73
+ # in the overall latency of the scan, specifically in the case of smaller payloads, where the overhead of
74
+ # forking a new process adds to the overall latency of the scan instead. More reference on Subprocess-based
75
+ # execution is found here: https://gitlab.com/gitlab-org/gitlab/-/issues/430160.
76
+ #
61
77
  # Returns an instance of GitLab::SecretDetection::Core::Response by following below structure:
62
78
  # {
63
79
  # status: One of the Core::Status values
@@ -70,7 +86,8 @@ module GitLab
70
86
  payload_timeout: DEFAULT_PAYLOAD_TIMEOUT_SECS,
71
87
  raw_value_exclusions: [],
72
88
  rule_exclusions: [],
73
- tags: DEFAULT_PATTERN_MATCHER_TAGS
89
+ tags: DEFAULT_PATTERN_MATCHER_TAGS,
90
+ subprocess: RUN_IN_SUBPROCESS
74
91
  )
75
92
 
76
93
  return Core::Response.new(Core::Status::INPUT_ERROR) unless validate_scan_input(payloads)
@@ -87,11 +104,13 @@ module GitLab
87
104
 
88
105
  next Core::Response.new(Core::Status::NOT_FOUND) if matched_payloads.empty?
89
106
 
90
- secrets = run_scan(
107
+ scan_args = {
91
108
  payloads: matched_payloads, payload_timeout:,
92
109
  pattern_matcher: build_pattern_matcher(tags:),
93
110
  raw_value_exclusions:, rule_exclusions:
94
- )
111
+ }
112
+
113
+ secrets = subprocess ? run_scan_within_subprocess(**scan_args) : run_scan(**scan_args)
95
114
 
96
115
  scan_status = overall_scan_status(secrets)
97
116
 
@@ -205,6 +224,36 @@ module GitLab
205
224
  end
206
225
  end
207
226
 
227
+ def run_scan_within_subprocess(
228
+ payloads:, payload_timeout:, pattern_matcher:, raw_value_exclusions: [],
229
+ rule_exclusions: [])
230
+ payload_sizes = payloads.map(&:size)
231
+ grouped_payload_indices = group_by_chunk_size(payload_sizes)
232
+
233
+ grouped_payloads = grouped_payload_indices.map { |idx_arr| idx_arr.map { |i| payloads[i] } }
234
+
235
+ found_secrets = Parallel.flat_map(
236
+ grouped_payloads,
237
+ in_processes: MAX_PROCS_PER_REQUEST,
238
+ isolation: true # do not reuse sub-processes
239
+ ) do |grouped_payload|
240
+ grouped_payload.flat_map do |payload|
241
+ Timeout.timeout(payload_timeout) do
242
+ find_secrets_in_payload(
243
+ payload:,
244
+ pattern_matcher:,
245
+ raw_value_exclusions:, rule_exclusions:
246
+ )
247
+ end
248
+ rescue Timeout::Error => e
249
+ logger.error "Secret Detection scan timed out on the payload(id:#{payload.id}): #{e}"
250
+ Core::Finding.new(payload.id, Core::Status::PAYLOAD_TIMEOUT)
251
+ end
252
+ end
253
+
254
+ found_secrets.freeze
255
+ end
256
+
208
257
  # Finds secrets in the given payload guarded with a timeout as a circuit breaker. It accepts
209
258
  # literal values to exclude from the input before the scan, also SD rules to exclude during
210
259
  # the scan.
@@ -268,6 +317,35 @@ module GitLab
268
317
  Core::Status::FOUND_WITH_ERRORS
269
318
  end
270
319
  end
320
+
321
+ # This method accepts an array of payload sizes(in bytes) and groups them into an array
322
+ # of arrays structure where each element is the group of indices of the input
323
+ # array whose cumulative payload sizes has at least +MIN_CHUNK_SIZE_PER_PROC_BYTES+
324
+ def group_by_chunk_size(payload_size_arr)
325
+ cumulative_size = 0
326
+ chunk_indexes = []
327
+ chunk_idx_start = 0
328
+
329
+ payload_size_arr.each_with_index do |size, index|
330
+ cumulative_size += size
331
+ next unless cumulative_size >= MIN_CHUNK_SIZE_PER_PROC_BYTES
332
+
333
+ chunk_indexes << (chunk_idx_start..index).to_a
334
+
335
+ chunk_idx_start = index + 1
336
+ cumulative_size = 0
337
+ end
338
+
339
+ if cumulative_size.positive? && (chunk_idx_start < payload_size_arr.length)
340
+ chunk_indexes << if chunk_idx_start == payload_size_arr.length - 1
341
+ [chunk_idx_start]
342
+ else
343
+ (chunk_idx_start..payload_size_arr.length - 1).to_a
344
+ end
345
+ end
346
+
347
+ chunk_indexes
348
+ end
271
349
  end
272
350
  end
273
351
  end
@@ -12,6 +12,7 @@ module GitLab
12
12
  SCAN_ERROR = 5 # When the scan operation fails due to regex error
13
13
  INPUT_ERROR = 6 # When the scan operation fails due to invalid input
14
14
  NOT_FOUND = 7 # When scan operation completes with zero findings
15
+ AUTH_ERROR = 8 # When authentication fails
15
16
  end
16
17
  end
17
18
  end
@@ -1,19 +1,143 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative '../generated/secret_detection_pb'
4
- require_relative '../generated/secret_detection_services_pb'
3
+ require 'grpc'
4
+ require_relative '../../grpc/scanner_service'
5
+ require_relative '../../core/response'
6
+ require_relative '../../core/status'
7
+ require_relative '../../utils'
8
+ require_relative './stream_request_enumerator'
5
9
 
6
10
  module GitLab
7
11
  module SecretDetection
8
12
  module GRPC
9
13
  class Client
10
- # TODO add implementation
11
- def scan
12
- raise NotImplementedError
14
+ include SecretDetection::Utils::StrongMemoize
15
+ include SDLogger
16
+
17
+ # Time to wait for the response from the service
18
+ REQUEST_TIMEOUT_SECONDS = 10 # 10 seconds
19
+
20
+ def initialize(host, secure: false, compression: true)
21
+ @host = host
22
+ @secure = secure
23
+ @compression = compression
24
+ end
25
+
26
+ # Triggers Secret Detection service's `/Scan` gRPC endpoint. To keep it consistent with SDS gem interface,
27
+ # this method transforms the gRPC response to +GitLab::SecretDetection::Core::Response+.
28
+ # Furthermore, any errors that are raised by the service will be translated to
29
+ # +GitLab::SecretDetection::Core::Response+ type by assiging a appropriate +status+ value to it.
30
+ def run_scan(request:, auth_token:, extra_headers: {})
31
+ with_rescued_errors do
32
+ grpc_response = stub.scan(
33
+ request,
34
+ metadata: build_metadata(auth_token, extra_headers),
35
+ deadline: request_deadline
36
+ )
37
+
38
+ convert_to_core_response(grpc_response)
39
+ end
40
+ end
41
+
42
+ # Triggers Secret Detection service's `/ScanStream` gRPC endpoint.
43
+ #
44
+ # To keep it consistent with SDS gem interface, this method transforms the gRPC response to
45
+ # +GitLab::SecretDetection::Core::Response+ type. Furthermore, any errors that are raised by the service will be
46
+ # translated to +GitLab::SecretDetection::Core::Response+ type by assiging a appropriate +status+ value to it.
47
+ #
48
+ # Note: If one of the stream requests result in an error, the stream will end immediately without processing the
49
+ # remaining requests.
50
+ def run_scan_stream(requests:, auth_token:, extra_headers: {})
51
+ request_stream = GitLab::SecretDetection::GRPC::StreamRequestEnumerator.new(requests)
52
+ results = []
53
+ with_rescued_errors do
54
+ stub.scan_stream(
55
+ request_stream.each_item,
56
+ metadata: build_metadata(auth_token, extra_headers),
57
+ deadline: request_deadline
58
+ ).each do |grpc_response|
59
+ response = convert_to_core_response(grpc_response)
60
+ if block_given?
61
+ yield response
62
+ else
63
+ results << response
64
+ end
65
+ end
66
+ results
67
+ end
68
+ end
69
+
70
+ private
71
+
72
+ attr_reader :secure, :host, :compression
73
+
74
+ def stub
75
+ GitLab::SecretDetection::GRPC::Scanner::Stub.new(
76
+ host,
77
+ channel_credentials,
78
+ channel_args:
79
+ )
80
+ end
81
+
82
+ strong_memoize_attr :stub
83
+
84
+ def channel_args
85
+ default_options = {
86
+ 'grpc.keepalive_permit_without_calls' => 1,
87
+ 'grpc.keepalive_time_ms' => 30000, # 30 seconds
88
+ 'grpc.keepalive_timeout_ms' => 10000 # 10 seconds timeout for keepalive response
89
+ }
90
+
91
+ compression_options = ::GRPC::Core::CompressionOptions
92
+ .new(default_algorithm: :gzip)
93
+ .to_channel_arg_hash
94
+
95
+ default_options.merge!(compression_options) if compression
96
+
97
+ default_options.freeze
98
+ end
99
+
100
+ def channel_credentials
101
+ return :this_channel_is_insecure unless secure
102
+
103
+ certs = GitLab::SecretDetection::Utils::X509::Certificate.ca_certs_bundle
104
+
105
+ ::GRPC::Core::ChannelCredentials.new(certs)
13
106
  end
14
107
 
15
- def scan_stream
16
- raise NotImplementedError
108
+ def build_metadata(token, extra_headers = {})
109
+ { 'x-sd-auth' => token }.merge!(extra_headers).freeze
110
+ end
111
+
112
+ def request_deadline
113
+ Time.now + REQUEST_TIMEOUT_SECONDS
114
+ end
115
+
116
+ def with_rescued_errors
117
+ yield
118
+ rescue ::GRPC::Unauthenticated
119
+ SecretDetection::Core::Response.new(SecretDetection::Core::Status::AUTH_ERROR)
120
+ rescue ::GRPC::InvalidArgument => e
121
+ SecretDetection::Core::Response.new(
122
+ SecretDetection::Core::Status::INPUT_ERROR, nil, { message: e.details, **e.metadata }
123
+ )
124
+ rescue ::GRPC::Unknown, ::GRPC::BadStatus => e
125
+ SecretDetection::Core::Response.new(
126
+ SecretDetection::Core::Status::SCAN_ERROR, nil, { message: e.details }
127
+ )
128
+ end
129
+
130
+ def convert_to_core_response(grpc_response)
131
+ response = grpc_response.to_h
132
+
133
+ SecretDetection::Core::Response.new(
134
+ response[:status],
135
+ response[:results],
136
+ response[:metadata]
137
+ )
138
+ rescue StandardError => e
139
+ logger.error("Failed to convert to core response: #{e}")
140
+ SecretDetection::Core::Response.new(SecretDetection::Core::Status::SCAN_ERROR)
17
141
  end
18
142
  end
19
143
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GitLab
4
+ module SecretDetection
5
+ module GRPC
6
+ class StreamRequestEnumerator
7
+ def initialize(requests = [])
8
+ @requests = requests
9
+ end
10
+
11
+ # yields a request, waiting between 0 and 1 seconds between requests
12
+ #
13
+ # @return an Enumerable that yields a request input
14
+ def each_item
15
+ return enum_for(:each_item) unless block_given?
16
+
17
+ @requests.each do |request|
18
+ yield request
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -5,7 +5,7 @@
5
5
  require 'google/protobuf'
6
6
 
7
7
 
8
- descriptor_data = "\n\x16secret_detection.proto\x12\x17gitlab.secret_detection\"\xfc\x03\n\x0bScanRequest\x12>\n\x08payloads\x18\x01 \x03(\x0b\x32,.gitlab.secret_detection.ScanRequest.Payload\x12\x19\n\x0ctimeout_secs\x18\x02 \x01(\x02H\x00\x88\x01\x01\x12!\n\x14payload_timeout_secs\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x42\n\nexclusions\x18\x04 \x03(\x0b\x32..gitlab.secret_detection.ScanRequest.Exclusion\x12\x0c\n\x04tags\x18\x05 \x03(\t\x1a#\n\x07Payload\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x1a\x66\n\tExclusion\x12J\n\x0e\x65xclusion_type\x18\x01 \x01(\x0e\x32\x32.gitlab.secret_detection.ScanRequest.ExclusionType\x12\r\n\x05value\x18\x02 \x01(\t\"f\n\rExclusionType\x12\x1e\n\x1a\x45XCLUSION_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XCLUSION_TYPE_RULE\x10\x01\x12\x1c\n\x18\x45XCLUSION_TYPE_RAW_VALUE\x10\x02\x42\x0f\n\r_timeout_secsB\x17\n\x15_payload_timeout_secs\"\xe3\x04\n\x0cScanResponse\x12\x12\n\x05\x65rror\x18\x01 \x01(\tH\x00\x88\x01\x01\x12>\n\x07results\x18\x02 \x03(\x0b\x32-.gitlab.secret_detection.ScanResponse.Finding\x12<\n\x06status\x18\x03 \x01(\x0e\x32,.gitlab.secret_detection.ScanResponse.Status\x1a\xe9\x01\n\x07\x46inding\x12\x12\n\npayload_id\x18\x01 \x01(\t\x12<\n\x06status\x18\x02 \x01(\x0e\x32,.gitlab.secret_detection.ScanResponse.Status\x12\x11\n\x04type\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0bline_number\x18\x05 \x01(\x05H\x02\x88\x01\x01\x12\x12\n\x05\x65rror\x18\x06 \x01(\tH\x03\x88\x01\x01\x42\x07\n\x05_typeB\x0e\n\x0c_descriptionB\x0e\n\x0c_line_numberB\x08\n\x06_error\"\xca\x01\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x10\n\x0cSTATUS_FOUND\x10\x01\x12\x1c\n\x18STATUS_FOUND_WITH_ERRORS\x10\x02\x12\x17\n\x13STATUS_SCAN_TIMEOUT\x10\x03\x12\x1a\n\x16STATUS_PAYLOAD_TIMEOUT\x10\x04\x12\x15\n\x11STATUS_SCAN_ERROR\x10\x05\x12\x16\n\x12STATUS_INPUT_ERROR\x10\x06\x12\x14\n\x10STATUS_NOT_FOUND\x10\x07\x42\x08\n\x06_error2\xc1\x01\n\x07Scanner\x12U\n\x04Scan\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00\x12_\n\nScanStream\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00(\x01\x30\x01\x42 \xea\x02\x1dGitLab::SecretDetection::GRPCb\x06proto3"
8
+ descriptor_data = "\n\x16secret_detection.proto\x12\x17gitlab.secret_detection\"\xfc\x03\n\x0bScanRequest\x12>\n\x08payloads\x18\x01 \x03(\x0b\x32,.gitlab.secret_detection.ScanRequest.Payload\x12\x19\n\x0ctimeout_secs\x18\x02 \x01(\x02H\x00\x88\x01\x01\x12!\n\x14payload_timeout_secs\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x42\n\nexclusions\x18\x04 \x03(\x0b\x32..gitlab.secret_detection.ScanRequest.Exclusion\x12\x0c\n\x04tags\x18\x05 \x03(\t\x1a#\n\x07Payload\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\t\x1a\x66\n\tExclusion\x12J\n\x0e\x65xclusion_type\x18\x01 \x01(\x0e\x32\x32.gitlab.secret_detection.ScanRequest.ExclusionType\x12\r\n\x05value\x18\x02 \x01(\t\"f\n\rExclusionType\x12\x1e\n\x1a\x45XCLUSION_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XCLUSION_TYPE_RULE\x10\x01\x12\x1c\n\x18\x45XCLUSION_TYPE_RAW_VALUE\x10\x02\x42\x0f\n\r_timeout_secsB\x17\n\x15_payload_timeout_secs\"\xe2\x03\n\x0cScanResponse\x12>\n\x07results\x18\x01 \x03(\x0b\x32-.gitlab.secret_detection.ScanResponse.Finding\x12\x0e\n\x06status\x18\x02 \x01(\x05\x1a\x9d\x01\n\x07\x46inding\x12\x12\n\npayload_id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\x05\x12\x11\n\x04type\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0bline_number\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x07\n\x05_typeB\x0e\n\x0c_descriptionB\x0e\n\x0c_line_number\"\xe1\x01\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x10\n\x0cSTATUS_FOUND\x10\x01\x12\x1c\n\x18STATUS_FOUND_WITH_ERRORS\x10\x02\x12\x17\n\x13STATUS_SCAN_TIMEOUT\x10\x03\x12\x1a\n\x16STATUS_PAYLOAD_TIMEOUT\x10\x04\x12\x15\n\x11STATUS_SCAN_ERROR\x10\x05\x12\x16\n\x12STATUS_INPUT_ERROR\x10\x06\x12\x14\n\x10STATUS_NOT_FOUND\x10\x07\x12\x15\n\x11STATUS_AUTH_ERROR\x10\x08\x32\xc1\x01\n\x07Scanner\x12U\n\x04Scan\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00\x12_\n\nScanStream\x12$.gitlab.secret_detection.ScanRequest\x1a%.gitlab.secret_detection.ScanResponse\"\x00(\x01\x30\x01\x42 \xea\x02\x1dGitLab::SecretDetection::GRPCb\x06proto3"
9
9
 
10
10
  pool = Google::Protobuf::DescriptorPool.generated_pool
11
11
  pool.add_serialized_file(descriptor_data)
@@ -74,14 +74,19 @@ module GitLab
74
74
  end
75
75
  end
76
76
 
77
- result = scanner.secrets_scan(
78
- payloads,
79
- raw_value_exclusions:,
80
- rule_exclusions:,
81
- tags: request.tags.to_a,
82
- timeout: request.timeout_secs,
83
- payload_timeout: request.payload_timeout_secs
84
- )
77
+ begin
78
+ result = scanner.secrets_scan(
79
+ payloads,
80
+ raw_value_exclusions:,
81
+ rule_exclusions:,
82
+ tags: request.tags.to_a,
83
+ timeout: request.timeout_secs,
84
+ payload_timeout: request.payload_timeout_secs
85
+ )
86
+ rescue StandardError => e
87
+ logger.error("Failed to run the scan: #{e}")
88
+ raise ::GRPC::Unknown, e.message
89
+ end
85
90
 
86
91
  findings = result.results&.map do |finding|
87
92
  GitLab::SecretDetection::GRPC::ScanResponse::Finding.new(**finding.to_h)
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'grpc/scanner_service'
4
+ require_relative 'grpc/client/stream_request_enumerator'
4
5
  require_relative 'grpc/client/grpc_client'
5
6
 
6
7
  module GitLab
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'openssl'
4
+ require_relative 'memoize'
5
+
6
+ module GitLab
7
+ module SecretDetection
8
+ module Utils
9
+ module X509
10
+ # Pulled from GitLab.com source
11
+ # Link: https://gitlab.com/gitlab-org/gitlab/-/blob/4713a798f997389f04e442db3d1d8349a39d5d46/lib/gitlab/x509/certificate.rb
12
+ class Certificate
13
+ CERT_REGEX = /-----BEGIN CERTIFICATE-----(?:.|\n)+?-----END CERTIFICATE-----/
14
+
15
+ attr_reader :key, :cert, :ca_certs
16
+
17
+ def self.default_cert_dir
18
+ strong_memoize(:default_cert_dir) do
19
+ ENV.fetch('SSL_CERT_DIR', OpenSSL::X509::DEFAULT_CERT_DIR)
20
+ end
21
+ end
22
+
23
+ def self.default_cert_file
24
+ strong_memoize(:default_cert_file) do
25
+ ENV.fetch('SSL_CERT_FILE', OpenSSL::X509::DEFAULT_CERT_FILE)
26
+ end
27
+ end
28
+
29
+ def self.from_strings(key_string, cert_string, ca_certs_string = nil)
30
+ key = OpenSSL::PKey::RSA.new(key_string)
31
+ cert = OpenSSL::X509::Certificate.new(cert_string)
32
+ ca_certs = load_ca_certs_bundle(ca_certs_string)
33
+
34
+ new(key, cert, ca_certs)
35
+ end
36
+
37
+ def self.from_files(key_path, cert_path, ca_certs_path = nil)
38
+ ca_certs_string = File.read(ca_certs_path) if ca_certs_path
39
+
40
+ from_strings(File.read(key_path), File.read(cert_path), ca_certs_string)
41
+ end
42
+
43
+ # Returns all top-level, readable files in the default CA cert directory
44
+ def self.ca_certs_paths
45
+ cert_paths = Dir["#{default_cert_dir}/*"].select do |path|
46
+ !File.directory?(path) && File.readable?(path)
47
+ end
48
+ cert_paths << default_cert_file if File.exist? default_cert_file
49
+ cert_paths
50
+ end
51
+
52
+ # Returns a concatenated array of Strings, each being a PEM-coded CA certificate.
53
+ def self.ca_certs_bundle
54
+ strong_memoize(:ca_certs_bundle) do
55
+ ca_certs_paths.flat_map do |cert_file|
56
+ load_ca_certs_bundle(File.read(cert_file))
57
+ end.uniq.join("\n")
58
+ end
59
+ end
60
+
61
+ def self.reset_ca_certs_bundle
62
+ clear_memoization(:ca_certs_bundle)
63
+ end
64
+
65
+ def self.reset_default_cert_paths
66
+ clear_memoization(:default_cert_dir)
67
+ clear_memoization(:default_cert_file)
68
+ end
69
+
70
+ # Returns an array of OpenSSL::X509::Certificate objects, empty array if none found
71
+ #
72
+ # Ruby OpenSSL::X509::Certificate.new will only load the first
73
+ # certificate if a bundle is presented, this allows to parse multiple certs
74
+ # in the same file
75
+ def self.load_ca_certs_bundle(ca_certs_string)
76
+ return [] unless ca_certs_string
77
+
78
+ ca_certs_string.scan(CERT_REGEX).map do |ca_cert_string|
79
+ OpenSSL::X509::Certificate.new(ca_cert_string)
80
+ end
81
+ end
82
+
83
+ def initialize(key, cert, ca_certs = nil)
84
+ @key = key
85
+ @cert = cert
86
+ @ca_certs = ca_certs
87
+ end
88
+
89
+ def key_string
90
+ key.to_s
91
+ end
92
+
93
+ def cert_string
94
+ cert.to_pem
95
+ end
96
+
97
+ def ca_certs_string
98
+ ca_certs&.map(&:to_pem)&.join('\n') unless ca_certs.blank?
99
+ end
100
+
101
+ class << self
102
+ include ::GitLab::SecretDetection::Utils::StrongMemoize
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GitLab
4
+ module SecretDetection
5
+ module Utils
6
+ # Pulled from GitLab.com source
7
+ # Link: https://gitlab.com/gitlab-org/gitlab/-/blob/4713a798f997389f04e442db3d1d8349a39d5d46/gems/gitlab-utils/lib/gitlab/utils/strong_memoize.rb
8
+ module StrongMemoize
9
+ # Instead of writing patterns like this:
10
+ #
11
+ # def trigger_from_token
12
+ # return @trigger if defined?(@trigger)
13
+ #
14
+ # @trigger = Ci::Trigger.find_by_token(params[:token].to_s)
15
+ # end
16
+ #
17
+ # We could write it like:
18
+ #
19
+ # include GitLab::SecretDetection::Utils::StrongMemoize
20
+ #
21
+ # def trigger_from_token
22
+ # Ci::Trigger.find_by_token(params[:token].to_s)
23
+ # end
24
+ # strong_memoize_attr :trigger_from_token
25
+ #
26
+ # def enabled?
27
+ # Feature.enabled?(:some_feature)
28
+ # end
29
+ # strong_memoize_attr :enabled?
30
+ #
31
+ def strong_memoize(name)
32
+ key = ivar(name)
33
+
34
+ if instance_variable_defined?(key)
35
+ instance_variable_get(key)
36
+ else
37
+ instance_variable_set(key, yield)
38
+ end
39
+ end
40
+
41
+ # Works the same way as "strong_memoize" but takes
42
+ # a second argument - expire_in. This allows invalidate
43
+ # the data after specified number of seconds
44
+ def strong_memoize_with_expiration(name, expire_in)
45
+ key = ivar(name)
46
+ expiration_key = "#{key}_expired_at"
47
+
48
+ if instance_variable_defined?(expiration_key)
49
+ expire_at = instance_variable_get(expiration_key)
50
+ clear_memoization(name) if expire_at.past?
51
+ end
52
+
53
+ if instance_variable_defined?(key)
54
+ instance_variable_get(key)
55
+ else
56
+ value = instance_variable_set(key, yield)
57
+ instance_variable_set(expiration_key, Time.current + expire_in)
58
+ value
59
+ end
60
+ end
61
+
62
+ def strong_memoize_with(name, *args)
63
+ container = strong_memoize(name) { {} }
64
+
65
+ if container.key?(args)
66
+ container[args]
67
+ else
68
+ container[args] = yield
69
+ end
70
+ end
71
+
72
+ def strong_memoized?(name)
73
+ key = ivar(StrongMemoize.normalize_key(name))
74
+ instance_variable_defined?(key)
75
+ end
76
+
77
+ def clear_memoization(name)
78
+ key = ivar(StrongMemoize.normalize_key(name))
79
+ remove_instance_variable(key) if instance_variable_defined?(key)
80
+ end
81
+
82
+ module StrongMemoizeClassMethods
83
+ def strong_memoize_attr(method_name)
84
+ member_name = StrongMemoize.normalize_key(method_name)
85
+
86
+ StrongMemoize.send(:do_strong_memoize, self, method_name, member_name) # rubocop:disable GitlabSecurity/PublicSend
87
+ end
88
+ end
89
+
90
+ def self.included(base)
91
+ base.singleton_class.prepend(StrongMemoizeClassMethods)
92
+ end
93
+
94
+ private
95
+
96
+ # Convert `"name"`/`:name` into `:@name`
97
+ #
98
+ # Depending on a type ensure that there's a single memory allocation
99
+ def ivar(name)
100
+ case name
101
+ when Symbol
102
+ name.to_s.prepend("@").to_sym
103
+ when String
104
+ :"@#{name}"
105
+ else
106
+ raise ArgumentError, "Invalid type of '#{name}'"
107
+ end
108
+ end
109
+
110
+ class << self
111
+ def normalize_key(key)
112
+ return key unless key.end_with?('!', '?')
113
+
114
+ # Replace invalid chars like `!` and `?` with allowed Unicode codeparts.
115
+ key.to_s.tr('!?', "\uFF01\uFF1F")
116
+ end
117
+
118
+ private
119
+
120
+ def do_strong_memoize(klass, method_name, member_name)
121
+ method = klass.instance_method(method_name)
122
+
123
+ unless method.arity.zero?
124
+ raise <<~ERROR
125
+ Using `strong_memoize_attr` on methods with parameters is not supported.
126
+
127
+ Use `strong_memoize_with` instead.
128
+ See https://docs.gitlab.com/ee/development/utilities.html#strongmemoize
129
+ ERROR
130
+ end
131
+
132
+ # Methods defined within a class method are already public by default, so we don't need to
133
+ # explicitly make them public.
134
+ scope = %i[private protected].find do |scope|
135
+ klass.send(:"#{scope}_instance_methods") # rubocop:disable GitlabSecurity/PublicSend
136
+ .include? method_name
137
+ end
138
+
139
+ klass.define_method(method_name) do |&block|
140
+ strong_memoize(member_name) do
141
+ method.bind_call(self, &block)
142
+ end
143
+ end
144
+
145
+ klass.send(scope, method_name) if scope # rubocop:disable GitlabSecurity/PublicSend
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'utils/certificate'
4
+ require_relative 'utils/memoize'
5
+
6
+ module GitLab
7
+ module SecretDetection
8
+ module Utils
9
+ end
10
+ end
11
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'secret_detection/utils'
3
4
  require_relative 'secret_detection/core'
4
5
  require_relative 'secret_detection/grpc'
5
6
  require_relative 'secret_detection/version'
@@ -5,7 +5,7 @@ package gitlab.secret_detection;
5
5
  /* We keep generated files within grpc namespace i.e GitLab::SecretDetection::GRPC
6
6
  * so that these files are exported too in the Ruby Gem along with Core and GRPC logic.
7
7
  */
8
- option ruby_package="GitLab::SecretDetection::GRPC";
8
+ option ruby_package = "GitLab::SecretDetection::GRPC";
9
9
 
10
10
  /* Request arg for triggering Scan/ScanStream method */
11
11
  message ScanRequest {
@@ -42,11 +42,10 @@ message ScanResponse {
42
42
  // Represents a secret finding identified within a payload
43
43
  message Finding {
44
44
  string payload_id = 1;
45
- Status status = 2;
45
+ int32 status = 2;
46
46
  optional string type = 3;
47
47
  optional string description = 4;
48
48
  optional int32 line_number = 5;
49
- optional string error = 6;
50
49
  }
51
50
 
52
51
  // Return status code in sync with ::SecretDetection::Status
@@ -59,18 +58,18 @@ message ScanResponse {
59
58
  STATUS_SCAN_ERROR = 5; // internal scan failure
60
59
  STATUS_INPUT_ERROR = 6; // invalid input failure
61
60
  STATUS_NOT_FOUND = 7; // zero findings
61
+ STATUS_AUTH_ERROR = 8; // authentication failure
62
62
  }
63
63
 
64
- optional string error = 1;
65
- repeated Finding results = 2;
66
- Status status = 3;
64
+ repeated Finding results = 1;
65
+ int32 status = 2;
67
66
  }
68
67
 
69
68
  /* Scanner service that scans given payloads and returns findings */
70
69
  service Scanner {
71
70
  // Runs secret detection scan for the given request
72
- rpc Scan(ScanRequest) returns (ScanResponse) { }
71
+ rpc Scan(ScanRequest) returns (ScanResponse) {}
73
72
 
74
73
  // Runs bi-directional streaming of scans for the given stream of requests with a stream of responses
75
- rpc ScanStream(stream ScanRequest) returns (stream ScanResponse) { }
74
+ rpc ScanStream(stream ScanRequest) returns (stream ScanResponse) {}
76
75
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-secret_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - group::secret detection
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2024-10-01 00:00:00.000000000 Z
13
+ date: 2024-10-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: grpc
@@ -28,18 +28,32 @@ dependencies:
28
28
  version: 1.63.0
29
29
  - !ruby/object:Gem::Dependency
30
30
  name: grpc-tools
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - '='
34
+ - !ruby/object:Gem::Version
35
+ version: 1.63.0
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - '='
41
+ - !ruby/object:Gem::Version
42
+ version: 1.63.0
43
+ - !ruby/object:Gem::Dependency
44
+ name: parallel
31
45
  requirement: !ruby/object:Gem::Requirement
32
46
  requirements:
33
47
  - - "~>"
34
48
  - !ruby/object:Gem::Version
35
- version: '1.66'
49
+ version: '1.19'
36
50
  type: :runtime
37
51
  prerelease: false
38
52
  version_requirements: !ruby/object:Gem::Requirement
39
53
  requirements:
40
54
  - - "~>"
41
55
  - !ruby/object:Gem::Version
42
- version: '1.66'
56
+ version: '1.19'
43
57
  - !ruby/object:Gem::Dependency
44
58
  name: re2
45
59
  requirement: !ruby/object:Gem::Requirement
@@ -93,10 +107,14 @@ files:
93
107
  - lib/gitlab/secret_detection/core/status.rb
94
108
  - lib/gitlab/secret_detection/grpc.rb
95
109
  - lib/gitlab/secret_detection/grpc/client/grpc_client.rb
110
+ - lib/gitlab/secret_detection/grpc/client/stream_request_enumerator.rb
96
111
  - lib/gitlab/secret_detection/grpc/generated/.gitkeep
97
112
  - lib/gitlab/secret_detection/grpc/generated/secret_detection_pb.rb
98
113
  - lib/gitlab/secret_detection/grpc/generated/secret_detection_services_pb.rb
99
114
  - lib/gitlab/secret_detection/grpc/scanner_service.rb
115
+ - lib/gitlab/secret_detection/utils.rb
116
+ - lib/gitlab/secret_detection/utils/certificate.rb
117
+ - lib/gitlab/secret_detection/utils/memoize.rb
100
118
  - lib/gitlab/secret_detection/version.rb
101
119
  - proto/secret_detection.proto
102
120
  homepage: https://gitlab.com/gitlab-org/security-products/secret-detection/secret-detection-service