gitlab-secret_detection 0.1.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # Source: secret_detection.proto for package 'Gitlab.SecretDetection.GRPC'
3
+
4
+ require 'grpc'
5
+ require 'secret_detection_pb'
6
+
7
+ module Gitlab
8
+ module SecretDetection
9
+ module GRPC
10
+ module Scanner
11
+ # Scanner service that scans given payloads and returns findings
12
+ class Service
13
+
14
+ include ::GRPC::GenericService
15
+
16
+ self.marshal_class_method = :encode
17
+ self.unmarshal_class_method = :decode
18
+ self.service_name = 'gitlab.secret_detection.Scanner'
19
+
20
+ # Runs secret detection scan for the given request
21
+ rpc :Scan, ::Gitlab::SecretDetection::GRPC::ScanRequest, ::Gitlab::SecretDetection::GRPC::ScanResponse
22
+ # Runs bi-directional streaming of scans for the given stream of requests with a stream of responses
23
+ rpc :ScanStream, stream(::Gitlab::SecretDetection::GRPC::ScanRequest), stream(::Gitlab::SecretDetection::GRPC::ScanResponse)
24
+ end
25
+
26
+ Stub = Service.rpc_stub_class
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path('generated', __dir__))
4
+
5
+ require 'grpc'
6
+
7
+ require_relative 'generated/secret_detection_pb'
8
+ require_relative 'generated/secret_detection_services_pb'
9
+
10
+ require_relative '../core'
11
+ require_relative '../../../../config/log'
12
+
13
+ # StreamEnumerator is used for Bi-directional streaming
14
+ # of requests by returning stream of responses.
15
+ class StreamEnumerator
16
+ def initialize(requests, action)
17
+ @requests = requests
18
+ @request_action = action
19
+ end
20
+
21
+ def each_item
22
+ return enum_for(:each_item) unless block_given?
23
+
24
+ @requests.each do |req|
25
+ yield @request_action.call(req)
26
+ end
27
+ end
28
+ end
29
+
30
+ module Gitlab
31
+ module SecretDetection
32
+ module GRPC
33
+ class ScannerService < Scanner::Service
34
+ include SDLogger
35
+
36
+ # Maximum timeout value that can be given as the input. This guards
37
+ # against the misuse of timeouts.
38
+ MAX_ALLOWED_TIMEOUT_SECONDS = 600
39
+
40
+ ERROR_MESSAGES = {
41
+ invalid_payload_fields: "Payload should not contain empty `id` and `data` fields",
42
+ exclusion_empty_value: "Exclusion value cannot be empty",
43
+ exclusion_invalid_type: "Invalid exclusion type",
44
+ invalid_timeout_range: "Timeout value should be > 0 and <= #{MAX_ALLOWED_TIMEOUT_SECONDS} seconds"
45
+ }.freeze
46
+
47
+ # Implementation for /Scan RPC method
48
+ def scan(request, _call)
49
+ scan_request_action(request)
50
+ end
51
+
52
+ # Implementation for /ScanStream RPC method
53
+ def scan_stream(requests, _call)
54
+ request_action = ->(r) { scan_request_action(r) }
55
+ StreamEnumerator.new(requests, request_action).each_item
56
+ end
57
+
58
+ private
59
+
60
+ def scan_request_action(request)
61
+ validate_request(request)
62
+
63
+ payloads = request.payloads.to_a
64
+
65
+ raw_value_exclusions = []
66
+ rule_exclusions = []
67
+
68
+ request.exclusions&.each do |exclusion|
69
+ case exclusion.type
70
+ when :EXCLUSION_TYPE_RAW_VALUE
71
+ raw_value_exclusions << exclusion.value
72
+ when :EXCLUSION_TYPE_RULE
73
+ rule_exclusions << exclusion.value
74
+ end
75
+ end
76
+
77
+ begin
78
+ result = scanner.secrets_scan(
79
+ payloads,
80
+ raw_value_exclusions:,
81
+ rule_exclusions:,
82
+ tags: request.tags.to_a,
83
+ timeout: request.timeout_secs,
84
+ payload_timeout: request.payload_timeout_secs
85
+ )
86
+ rescue StandardError => e
87
+ logger.error("Failed to run the scan: #{e}")
88
+ raise ::GRPC::Unknown, e.message
89
+ end
90
+
91
+ findings = result.results&.map do |finding|
92
+ Gitlab::SecretDetection::GRPC::ScanResponse::Finding.new(**finding.to_h)
93
+ end
94
+
95
+ Gitlab::SecretDetection::GRPC::ScanResponse.new(
96
+ results: findings,
97
+ status: result.status
98
+ )
99
+ end
100
+
101
+ def scanner
102
+ @scanner ||= Gitlab::SecretDetection::Core::Scanner.new(rules:, logger:)
103
+ end
104
+
105
+ def rules
106
+ Gitlab::SecretDetection::Core::Ruleset.new.rules
107
+ end
108
+
109
+ # validates grpc request body
110
+ def validate_request(request)
111
+ # check for non-blank values and allowed types
112
+ request.exclusions&.each do |exclusion|
113
+ if exclusion.value.empty?
114
+ raise ::GRPC::InvalidArgument.new(ERROR_MESSAGES[:exclusion_empty_value],
115
+ { field: "exclusion.value" })
116
+ end
117
+ end
118
+
119
+ unless valid_timeout_range?(request.timeout_secs)
120
+ raise ::GRPC::InvalidArgument.new(ERROR_MESSAGES[:invalid_timeout_range],
121
+ { field: "timeout_secs" })
122
+ end
123
+
124
+ unless valid_timeout_range?(request.payload_timeout_secs)
125
+ raise ::GRPC::InvalidArgument.new(ERROR_MESSAGES[:invalid_timeout_range],
126
+ { field: "payload_timeout_secs" })
127
+ end
128
+
129
+ # check for required payload fields
130
+ request.payloads.to_a.each_with_index do |payload, index|
131
+ if !payload.respond_to?(:id) || payload.id.empty?
132
+ raise ::GRPC::InvalidArgument.new(
133
+ ERROR_MESSAGES[:invalid_payload_fields],
134
+ { field: "payloads[#{index}].id" }
135
+ )
136
+ end
137
+
138
+ unless payload.respond_to?(:data) # rubocop:disable Style/Next
139
+ raise ::GRPC::InvalidArgument.new(
140
+ ERROR_MESSAGES[:invalid_payload_fields],
141
+ { field: "payloads[#{index}].data" }
142
+ )
143
+ end
144
+ end
145
+ end
146
+
147
+ # checks if the given timeout value is within range
148
+ def valid_timeout_range?(timeout_value)
149
+ timeout_value >= 0 && timeout_value <= MAX_ALLOWED_TIMEOUT_SECONDS
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'grpc/scanner_service'
4
+ require_relative 'grpc/client/stream_request_enumerator'
5
+ require_relative 'grpc/client/grpc_client'
6
+
7
+ module Gitlab
8
+ module SecretDetection
9
+ module GRPC
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'openssl'
4
+ require_relative 'memoize'
5
+
6
+ module Gitlab
7
+ module SecretDetection
8
+ module Utils
9
+ module X509
10
+ # Pulled from Gitlab.com source
11
+ # Link: https://gitlab.com/gitlab-org/gitlab/-/blob/4713a798f997389f04e442db3d1d8349a39d5d46/lib/gitlab/x509/certificate.rb
12
+ class Certificate
13
+ CERT_REGEX = /-----BEGIN CERTIFICATE-----(?:.|\n)+?-----END CERTIFICATE-----/
14
+
15
+ attr_reader :key, :cert, :ca_certs
16
+
17
+ def self.default_cert_dir
18
+ strong_memoize(:default_cert_dir) do
19
+ ENV.fetch('SSL_CERT_DIR', OpenSSL::X509::DEFAULT_CERT_DIR)
20
+ end
21
+ end
22
+
23
+ def self.default_cert_file
24
+ strong_memoize(:default_cert_file) do
25
+ ENV.fetch('SSL_CERT_FILE', OpenSSL::X509::DEFAULT_CERT_FILE)
26
+ end
27
+ end
28
+
29
+ def self.from_strings(key_string, cert_string, ca_certs_string = nil)
30
+ key = OpenSSL::PKey::RSA.new(key_string)
31
+ cert = OpenSSL::X509::Certificate.new(cert_string)
32
+ ca_certs = load_ca_certs_bundle(ca_certs_string)
33
+
34
+ new(key, cert, ca_certs)
35
+ end
36
+
37
+ def self.from_files(key_path, cert_path, ca_certs_path = nil)
38
+ ca_certs_string = File.read(ca_certs_path) if ca_certs_path
39
+
40
+ from_strings(File.read(key_path), File.read(cert_path), ca_certs_string)
41
+ end
42
+
43
+ # Returns all top-level, readable files in the default CA cert directory
44
+ def self.ca_certs_paths
45
+ cert_paths = Dir["#{default_cert_dir}/*"].select do |path|
46
+ !File.directory?(path) && File.readable?(path)
47
+ end
48
+ cert_paths << default_cert_file if File.exist? default_cert_file
49
+ cert_paths
50
+ end
51
+
52
+ # Returns a concatenated array of Strings, each being a PEM-coded CA certificate.
53
+ def self.ca_certs_bundle
54
+ strong_memoize(:ca_certs_bundle) do
55
+ ca_certs_paths.flat_map do |cert_file|
56
+ load_ca_certs_bundle(File.read(cert_file))
57
+ end.uniq.join("\n")
58
+ end
59
+ end
60
+
61
+ def self.reset_ca_certs_bundle
62
+ clear_memoization(:ca_certs_bundle)
63
+ end
64
+
65
+ def self.reset_default_cert_paths
66
+ clear_memoization(:default_cert_dir)
67
+ clear_memoization(:default_cert_file)
68
+ end
69
+
70
+ # Returns an array of OpenSSL::X509::Certificate objects, empty array if none found
71
+ #
72
+ # Ruby OpenSSL::X509::Certificate.new will only load the first
73
+ # certificate if a bundle is presented, this allows to parse multiple certs
74
+ # in the same file
75
+ def self.load_ca_certs_bundle(ca_certs_string)
76
+ return [] unless ca_certs_string
77
+
78
+ ca_certs_string.scan(CERT_REGEX).map do |ca_cert_string|
79
+ OpenSSL::X509::Certificate.new(ca_cert_string)
80
+ end
81
+ end
82
+
83
+ def initialize(key, cert, ca_certs = nil)
84
+ @key = key
85
+ @cert = cert
86
+ @ca_certs = ca_certs
87
+ end
88
+
89
+ def key_string
90
+ key.to_s
91
+ end
92
+
93
+ def cert_string
94
+ cert.to_pem
95
+ end
96
+
97
+ def ca_certs_string
98
+ ca_certs&.map(&:to_pem)&.join('\n') unless ca_certs.blank?
99
+ end
100
+
101
+ class << self
102
+ include ::Gitlab::SecretDetection::Utils::StrongMemoize
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gitlab
4
+ module SecretDetection
5
+ module Utils
6
+ # Pulled from GitLab.com source
7
+ # Link: https://gitlab.com/gitlab-org/gitlab/-/blob/4713a798f997389f04e442db3d1d8349a39d5d46/gems/gitlab-utils/lib/gitlab/utils/strong_memoize.rb
8
+ module StrongMemoize
9
+ # Instead of writing patterns like this:
10
+ #
11
+ # def trigger_from_token
12
+ # return @trigger if defined?(@trigger)
13
+ #
14
+ # @trigger = Ci::Trigger.find_by_token(params[:token].to_s)
15
+ # end
16
+ #
17
+ # We could write it like:
18
+ #
19
+ # include Gitlab::SecretDetection::Utils::StrongMemoize
20
+ #
21
+ # def trigger_from_token
22
+ # Ci::Trigger.find_by_token(params[:token].to_s)
23
+ # end
24
+ # strong_memoize_attr :trigger_from_token
25
+ #
26
+ # def enabled?
27
+ # Feature.enabled?(:some_feature)
28
+ # end
29
+ # strong_memoize_attr :enabled?
30
+ #
31
+ def strong_memoize(name)
32
+ key = ivar(name)
33
+
34
+ if instance_variable_defined?(key)
35
+ instance_variable_get(key)
36
+ else
37
+ instance_variable_set(key, yield)
38
+ end
39
+ end
40
+
41
+ # Works the same way as "strong_memoize" but takes
42
+ # a second argument - expire_in. This allows invalidate
43
+ # the data after specified number of seconds
44
+ def strong_memoize_with_expiration(name, expire_in)
45
+ key = ivar(name)
46
+ expiration_key = "#{key}_expired_at"
47
+
48
+ if instance_variable_defined?(expiration_key)
49
+ expire_at = instance_variable_get(expiration_key)
50
+ clear_memoization(name) if expire_at.past?
51
+ end
52
+
53
+ if instance_variable_defined?(key)
54
+ instance_variable_get(key)
55
+ else
56
+ value = instance_variable_set(key, yield)
57
+ instance_variable_set(expiration_key, Time.current + expire_in)
58
+ value
59
+ end
60
+ end
61
+
62
+ def strong_memoize_with(name, *args)
63
+ container = strong_memoize(name) { {} }
64
+
65
+ if container.key?(args)
66
+ container[args]
67
+ else
68
+ container[args] = yield
69
+ end
70
+ end
71
+
72
+ def strong_memoized?(name)
73
+ key = ivar(StrongMemoize.normalize_key(name))
74
+ instance_variable_defined?(key)
75
+ end
76
+
77
+ def clear_memoization(name)
78
+ key = ivar(StrongMemoize.normalize_key(name))
79
+ remove_instance_variable(key) if instance_variable_defined?(key)
80
+ end
81
+
82
+ module StrongMemoizeClassMethods
83
+ def strong_memoize_attr(method_name)
84
+ member_name = StrongMemoize.normalize_key(method_name)
85
+
86
+ StrongMemoize.send(:do_strong_memoize, self, method_name, member_name) # rubocop:disable GitlabSecurity/PublicSend
87
+ end
88
+ end
89
+
90
+ def self.included(base)
91
+ base.singleton_class.prepend(StrongMemoizeClassMethods)
92
+ end
93
+
94
+ private
95
+
96
+ # Convert `"name"`/`:name` into `:@name`
97
+ #
98
+ # Depending on a type ensure that there's a single memory allocation
99
+ def ivar(name)
100
+ case name
101
+ when Symbol
102
+ name.to_s.prepend("@").to_sym
103
+ when String
104
+ :"@#{name}"
105
+ else
106
+ raise ArgumentError, "Invalid type of '#{name}'"
107
+ end
108
+ end
109
+
110
+ class << self
111
+ def normalize_key(key)
112
+ return key unless key.end_with?('!', '?')
113
+
114
+ # Replace invalid chars like `!` and `?` with allowed Unicode codeparts.
115
+ key.to_s.tr('!?', "\uFF01\uFF1F")
116
+ end
117
+
118
+ private
119
+
120
+ def do_strong_memoize(klass, method_name, member_name)
121
+ method = klass.instance_method(method_name)
122
+
123
+ unless method.arity.zero?
124
+ raise <<~ERROR
125
+ Using `strong_memoize_attr` on methods with parameters is not supported.
126
+
127
+ Use `strong_memoize_with` instead.
128
+ See https://docs.gitlab.com/ee/development/utilities.html#strongmemoize
129
+ ERROR
130
+ end
131
+
132
+ # Methods defined within a class method are already public by default, so we don't need to
133
+ # explicitly make them public.
134
+ scope = %i[private protected].find do |scope|
135
+ klass.send(:"#{scope}_instance_methods") # rubocop:disable GitlabSecurity/PublicSend
136
+ .include? method_name
137
+ end
138
+
139
+ klass.define_method(method_name) do |&block|
140
+ strong_memoize(member_name) do
141
+ method.bind_call(self, &block)
142
+ end
143
+ end
144
+
145
+ klass.send(scope, method_name) if scope # rubocop:disable GitlabSecurity/PublicSend
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'utils/certificate'
4
+ require_relative 'utils/memoize'
5
+
6
+ module Gitlab
7
+ module SecretDetection
8
+ module Utils
9
+ end
10
+ end
11
+ end
@@ -2,6 +2,30 @@
2
2
 
3
3
  module Gitlab
4
4
  module SecretDetection
5
- VERSION = "0.1.0"
5
+ class Gem
6
+ DEFAULT_VERSION = "0.0.1"
7
+
8
+ SEMVER_REGEX = /^\d+\.\d+\.\d+(?:-[a-zA-Z0-9\-\.]+)?(?:\+[a-zA-Z0-9\-\.]+)?$/
9
+
10
+ def self.get_release_version
11
+ release_version = ENV.fetch("SD_GEM_RELEASE_VERSION", "")
12
+
13
+ if release_version.empty?
14
+ raise LoadError("Missing SD_GEM_RELEASE_VERSION environment variable.") unless local_env?
15
+
16
+ "#{DEFAULT_VERSION}-debug"
17
+ elsif release_version.match?(SEMVER_REGEX)
18
+ release_version
19
+ else
20
+ "#{DEFAULT_VERSION}-#{release_version}"
21
+ end
22
+ end
23
+
24
+ # SD_ENV env var is used to determine which environment the
25
+ # server is running. This var is defined in `.runway/env-<env>.yml` files.
26
+ def self.local_env?
27
+ ENV.fetch('SD_ENV', 'localhost') == 'localhost'
28
+ end
29
+ end
6
30
  end
7
31
  end
@@ -1,10 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "secret_detection/version"
3
+ require_relative 'secret_detection/utils'
4
+ require_relative 'secret_detection/core'
5
+ require_relative 'secret_detection/grpc'
6
+ require_relative 'secret_detection/version'
4
7
 
5
8
  module Gitlab
6
9
  module SecretDetection
7
- class Error < StandardError; end
8
- # Your code goes here...
9
10
  end
10
11
  end
data/lib/gitlab.rb ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'gitlab/secret_detection'
4
+
5
+ module Gitlab
6
+ end
@@ -0,0 +1,75 @@
1
+ syntax = "proto3";
2
+
3
+ package gitlab.secret_detection;
4
+
5
+ /* We keep generated files within grpc namespace i.e Gitlab::SecretDetection::GRPC
6
+ * so that these files are exported too in the Ruby Gem along with Core and GRPC logic.
7
+ */
8
+ option ruby_package = "Gitlab::SecretDetection::GRPC";
9
+
10
+ /* Request arg for triggering Scan/ScanStream method */
11
+ message ScanRequest {
12
+ message Payload {
13
+ string id = 1;
14
+ string data = 2;
15
+ }
16
+
17
+ // Either provide rule type or a particular value to allow during the scan
18
+ message Exclusion {
19
+ ExclusionType exclusion_type = 1;
20
+ string value = 2;
21
+ }
22
+
23
+ enum ExclusionType {
24
+ EXCLUSION_TYPE_UNSPECIFIED = 0;
25
+ EXCLUSION_TYPE_RULE = 1; // Rule ID to exclude
26
+ EXCLUSION_TYPE_RAW_VALUE = 2; // Raw value to exclude
27
+ }
28
+
29
+ repeated Payload payloads = 1; // Array of payloads to scan
30
+ // Scan timeout on the entire request. Value is represented in seconds, accepts float values to represent
31
+ // smaller unit values. Default is 180 seconds.
32
+ optional float timeout_secs = 2;
33
+ // Scan timeout on each payload . Value is represented in seconds, accepts float values to represent smaller
34
+ // unit values. Default is 30 seconds.
35
+ optional float payload_timeout_secs = 3;
36
+ repeated Exclusion exclusions = 4; // Optional. Array of rule-types/raw-values to exclude from being considered during scan.
37
+ repeated string tags = 5; // Optional. Array of rule tags to consider for scan. Ex: ["gitlab_blocking"]
38
+ }
39
+
40
+ /* Response from Scan/ScanStream method */
41
+ message ScanResponse {
42
+ // Represents a secret finding identified within a payload
43
+ message Finding {
44
+ string payload_id = 1;
45
+ int32 status = 2;
46
+ optional string type = 3;
47
+ optional string description = 4;
48
+ optional int32 line_number = 5;
49
+ }
50
+
51
+ // Return status code in sync with ::SecretDetection::Status
52
+ enum Status {
53
+ STATUS_UNSPECIFIED = 0;
54
+ STATUS_FOUND = 1; // one or more findings
55
+ STATUS_FOUND_WITH_ERRORS = 2; // one or more findings along with some errors
56
+ STATUS_SCAN_TIMEOUT = 3; // whole scan timeout
57
+ STATUS_PAYLOAD_TIMEOUT = 4; // single payload timeout
58
+ STATUS_SCAN_ERROR = 5; // internal scan failure
59
+ STATUS_INPUT_ERROR = 6; // invalid input failure
60
+ STATUS_NOT_FOUND = 7; // zero findings
61
+ STATUS_AUTH_ERROR = 8; // authentication failure
62
+ }
63
+
64
+ repeated Finding results = 1;
65
+ int32 status = 2;
66
+ }
67
+
68
+ /* Scanner service that scans given payloads and returns findings */
69
+ service Scanner {
70
+ // Runs secret detection scan for the given request
71
+ rpc Scan(ScanRequest) returns (ScanResponse) {}
72
+
73
+ // Runs bi-directional streaming of scans for the given stream of requests with a stream of responses
74
+ rpc ScanStream(stream ScanRequest) returns (stream ScanResponse) {}
75
+ }