sentry-sanitize 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/sentry/sanitize/processor/custom_sanitize_data.rb +7 -5
- data/lib/sentry/sanitize/processor/sanitize_data.rb +111 -109
- data/lib/sentry/sanitize/processor/utf8conversion.rb +45 -43
- data/lib/sentry/sanitize/processor.rb +12 -10
- data/lib/sentry/sanitize/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c334e976d9f260a013086dc61180d889e594d64cb32bebda6c5352610c3646fe
|
4
|
+
data.tar.gz: f763783ce3f4fdfc5d12cf126f487cb08d25108dd6bb84baf954b12a782d81f2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f0390a5238cac85411e54293d6091e2b6c6ca1042c9cf701d27766afd8b77924ef4b7633aa71055a4fd236be797d2d489e1af8b575213e5962b029d6ada55f8
|
7
|
+
data.tar.gz: b282cc0b410b405a75765d1198ee2c0694e1834761a584f611fa5f215c4cd0d41a88a9fef1fe7967aba0d3e65ee9c053924202911e2d33e7383500b51dbec2f3
|
data/Gemfile.lock
CHANGED
@@ -5,11 +5,13 @@ require 'sentry/sanitize/processor/sanitize_data'
|
|
5
5
|
require 'sentry/sanitize/processor/utf8conversion'
|
6
6
|
|
7
7
|
module Sentry
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
module Sanitize
|
9
|
+
class Processor::CustomSanitizeData < Processor::SanitizeData
|
10
|
+
def initialize(sanitize_fields)
|
11
|
+
self.sanitize_fields = sanitize_fields
|
12
|
+
self.sanitize_credit_cards = true
|
13
|
+
self.sanitize_fields_excluded = []
|
14
|
+
end
|
13
15
|
end
|
14
16
|
end
|
15
17
|
end
|
@@ -3,134 +3,136 @@
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
module Sentry
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
6
|
+
module Sanitize
|
7
|
+
class Processor::SanitizeData < Processor
|
8
|
+
DEFAULT_FIELDS = %w(authorization password passwd secret ssn social(.*)?sec).freeze
|
9
|
+
CREDIT_CARD_RE = /\b(?:3[47]\d|(?:4\d|5[1-5]|65)\d{2}|6011)\d{12}\b/.freeze
|
10
|
+
QUERY_STRING = ['query_string', :query_string].freeze
|
11
|
+
JSON_STARTS_WITH = ["[", "{"].freeze
|
12
|
+
|
13
|
+
attr_accessor :sanitize_fields, :sanitize_credit_cards, :sanitize_fields_excluded
|
14
|
+
|
15
|
+
def initialize(client)
|
16
|
+
super
|
17
|
+
self.sanitize_fields = client.configuration.sanitize_fields
|
18
|
+
self.sanitize_credit_cards = client.configuration.sanitize_credit_cards
|
19
|
+
self.sanitize_fields_excluded = client.configuration.sanitize_fields_excluded
|
20
|
+
end
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
22
|
+
def process(value, key = nil)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
sanitize_hash_value(key, value)
|
26
|
+
when Array
|
27
|
+
sanitize_array_value(key, value)
|
28
|
+
when Integer
|
29
|
+
matches_regexes?(key, value.to_s) ? INT_MASK : value
|
30
|
+
when String
|
31
|
+
sanitize_string_value(key, value)
|
32
|
+
else
|
33
|
+
value
|
34
|
+
end
|
33
35
|
end
|
34
|
-
end
|
35
36
|
|
36
|
-
|
37
|
+
private
|
37
38
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
# CGI.parse takes our nice UTF-8 strings and converts them back to ASCII,
|
40
|
+
# so we have to convert them back, again.
|
41
|
+
def utf8_processor
|
42
|
+
@utf8_processor ||= Processor::UTF8Conversion.new
|
43
|
+
end
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
45
|
+
def sanitize_hash_value(key, value)
|
46
|
+
if key =~ sensitive_fields
|
47
|
+
STRING_MASK
|
48
|
+
elsif value.frozen?
|
49
|
+
value.merge(value) { |k, v| process v, k }
|
50
|
+
else
|
51
|
+
value.merge!(value) { |k, v| process v, k }
|
52
|
+
end
|
51
53
|
end
|
52
|
-
end
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
def sanitize_array_value(key, value)
|
56
|
+
if value.frozen?
|
57
|
+
value.map { |v| process v, key }
|
58
|
+
else
|
59
|
+
value.map! { |v| process v, key }
|
60
|
+
end
|
59
61
|
end
|
60
|
-
end
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
63
|
+
def sanitize_string_value(key, value)
|
64
|
+
if value =~ sensitive_fields && (json = parse_json_or_nil(value))
|
65
|
+
# if this string is actually a json obj, convert and sanitize
|
66
|
+
process(json).to_json
|
67
|
+
elsif matches_regexes?(key, value)
|
68
|
+
STRING_MASK
|
69
|
+
elsif QUERY_STRING.include?(key)
|
70
|
+
sanitize_query_string(value)
|
71
|
+
elsif value =~ sensitive_fields
|
72
|
+
sanitize_sensitive_string_content(value)
|
73
|
+
else
|
74
|
+
value
|
75
|
+
end
|
74
76
|
end
|
75
|
-
end
|
76
77
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
78
|
+
def sanitize_query_string(query_string)
|
79
|
+
query_hash = CGI.parse(query_string)
|
80
|
+
sanitized = utf8_processor.process(query_hash)
|
81
|
+
processed_query_hash = process(sanitized)
|
82
|
+
URI.encode_www_form(processed_query_hash)
|
83
|
+
end
|
83
84
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
85
|
+
# this scrubs some sensitive info from the string content. for example:
|
86
|
+
#
|
87
|
+
# ```
|
88
|
+
# unexpected token at '{
|
89
|
+
# "role": "admin","password": "Abc@123","foo": "bar"
|
90
|
+
# }'
|
91
|
+
# ```
|
92
|
+
#
|
93
|
+
# will become
|
94
|
+
#
|
95
|
+
# ```
|
96
|
+
# unexpected token at '{
|
97
|
+
# "role": "admin","password": *******,"foo": "bar"
|
98
|
+
# }'
|
99
|
+
# ```
|
100
|
+
#
|
101
|
+
# it's particularly useful in hash or param-parsing related errors
|
102
|
+
def sanitize_sensitive_string_content(value)
|
103
|
+
value.gsub(/(#{sensitive_fields}['":]\s?(:|=>)?\s?)(".*?"|'.*?')/, '\1' + STRING_MASK)
|
104
|
+
end
|
104
105
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
106
|
+
def matches_regexes?(k, v)
|
107
|
+
(sanitize_credit_cards && v =~ CREDIT_CARD_RE) ||
|
108
|
+
k =~ sensitive_fields
|
109
|
+
end
|
109
110
|
|
110
|
-
|
111
|
-
|
111
|
+
def sensitive_fields
|
112
|
+
return @sensitive_fields if instance_variable_defined?(:@sensitive_fields)
|
112
113
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
114
|
+
fields = DEFAULT_FIELDS | sanitize_fields
|
115
|
+
fields -= sanitize_fields_excluded
|
116
|
+
@sensitive_fields = /#{fields.map do |f|
|
117
|
+
use_boundary?(f) ? "\\b#{f}\\b" : f
|
118
|
+
end.join("|")}/i
|
119
|
+
end
|
119
120
|
|
120
|
-
|
121
|
-
|
122
|
-
|
121
|
+
def use_boundary?(string)
|
122
|
+
!DEFAULT_FIELDS.include?(string) && !special_characters?(string)
|
123
|
+
end
|
123
124
|
|
124
|
-
|
125
|
-
|
126
|
-
|
125
|
+
def special_characters?(string)
|
126
|
+
REGEX_SPECIAL_CHARACTERS.select { |r| string.include?(r) }.any?
|
127
|
+
end
|
127
128
|
|
128
|
-
|
129
|
-
|
129
|
+
def parse_json_or_nil(string)
|
130
|
+
return unless string.start_with?(*JSON_STARTS_WITH)
|
130
131
|
|
131
|
-
|
132
|
-
|
133
|
-
|
132
|
+
JSON.parse(string)
|
133
|
+
rescue JSON::ParserError, NoMethodError
|
134
|
+
nil
|
135
|
+
end
|
134
136
|
end
|
135
137
|
end
|
136
138
|
end
|
@@ -1,53 +1,55 @@
|
|
1
1
|
module Sentry
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
module Sanitize
|
3
|
+
class Processor::UTF8Conversion < Processor
|
4
|
+
# Slightly misnamed - actually just removes any bytes with invalid encoding
|
5
|
+
# Previously, our JSON backend required UTF-8. Since we now use the built-in
|
6
|
+
# JSON, we can use any encoding, but it must be valid anyway so we can do
|
7
|
+
# things like call #match and #slice on strings
|
8
|
+
REPLACE = "".freeze
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
10
|
+
def process(value)
|
11
|
+
case value
|
12
|
+
when Hash
|
13
|
+
!value.frozen? ? value.merge!(value) { |_, v| process v } : value.merge(value) { |_, v| process v }
|
14
|
+
when Array
|
15
|
+
!value.frozen? ? value.map! { |v| process v } : value.map { |v| process v }
|
16
|
+
when Exception
|
17
|
+
return value if value.message.valid_encoding?
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
clean_exc = value.class.new(remove_invalid_bytes(value.message))
|
20
|
+
clean_exc.set_backtrace(value.backtrace)
|
21
|
+
clean_exc
|
22
|
+
when String
|
23
|
+
# Encoding::BINARY / Encoding::ASCII_8BIT is a special binary encoding.
|
24
|
+
# valid_encoding? will always return true because it contains all codepoints,
|
25
|
+
# so instead we check if it only contains actual ASCII codepoints, and if
|
26
|
+
# not we assume it's actually just UTF8 and scrub accordingly.
|
27
|
+
if value.encoding == Encoding::BINARY && !value.ascii_only?
|
28
|
+
value = value.dup
|
29
|
+
value.force_encoding(Encoding::UTF_8)
|
30
|
+
end
|
31
|
+
return value if value.valid_encoding?
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
33
|
+
remove_invalid_bytes(value)
|
34
|
+
else
|
35
|
+
value
|
36
|
+
end
|
35
37
|
end
|
36
|
-
end
|
37
38
|
|
38
|
-
|
39
|
+
private
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
41
|
+
# Stolen from RSpec
|
42
|
+
# https://github.com/rspec/rspec-support/blob/f0af3fd74a94ff7bb700f6ba06dbdc67bba17fbf/lib/rspec/support/encoded_string.rb#L120-L139
|
43
|
+
if String.method_defined?(:scrub) # 2.1+
|
44
|
+
def remove_invalid_bytes(string)
|
45
|
+
string.scrub(REPLACE)
|
46
|
+
end
|
47
|
+
else
|
48
|
+
def remove_invalid_bytes(string)
|
49
|
+
string.chars.map do |char|
|
50
|
+
char.valid_encoding? ? char : REPLACE
|
51
|
+
end.join
|
52
|
+
end
|
51
53
|
end
|
52
54
|
end
|
53
55
|
end
|
@@ -1,15 +1,17 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
module Sentry
|
2
|
+
module Sanitize
|
3
|
+
class Processor
|
4
|
+
STRING_MASK = '********'.freeze
|
5
|
+
INT_MASK = 0
|
6
|
+
REGEX_SPECIAL_CHARACTERS = %w(. $ ^ { [ ( | ) * + ?).freeze
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
8
|
+
def initialize(client = nil)
|
9
|
+
@client = client
|
10
|
+
end
|
10
11
|
|
11
|
-
|
12
|
-
|
12
|
+
def process(_data)
|
13
|
+
raise NotImplementedError
|
14
|
+
end
|
13
15
|
end
|
14
16
|
end
|
15
17
|
end
|