sentry-sanitize 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/sentry/sanitize/processor/custom_sanitize_data.rb +7 -5
- data/lib/sentry/sanitize/processor/sanitize_data.rb +111 -109
- data/lib/sentry/sanitize/processor/utf8conversion.rb +45 -43
- data/lib/sentry/sanitize/processor.rb +12 -10
- data/lib/sentry/sanitize/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c334e976d9f260a013086dc61180d889e594d64cb32bebda6c5352610c3646fe
|
4
|
+
data.tar.gz: f763783ce3f4fdfc5d12cf126f487cb08d25108dd6bb84baf954b12a782d81f2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f0390a5238cac85411e54293d6091e2b6c6ca1042c9cf701d27766afd8b77924ef4b7633aa71055a4fd236be797d2d489e1af8b575213e5962b029d6ada55f8
|
7
|
+
data.tar.gz: b282cc0b410b405a75765d1198ee2c0694e1834761a584f611fa5f215c4cd0d41a88a9fef1fe7967aba0d3e65ee9c053924202911e2d33e7383500b51dbec2f3
|
data/Gemfile.lock
CHANGED
@@ -5,11 +5,13 @@ require 'sentry/sanitize/processor/sanitize_data'
|
|
5
5
|
require 'sentry/sanitize/processor/utf8conversion'
|
6
6
|
|
7
7
|
module Sentry
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
module Sanitize
|
9
|
+
class Processor::CustomSanitizeData < Processor::SanitizeData
|
10
|
+
def initialize(sanitize_fields)
|
11
|
+
self.sanitize_fields = sanitize_fields
|
12
|
+
self.sanitize_credit_cards = true
|
13
|
+
self.sanitize_fields_excluded = []
|
14
|
+
end
|
13
15
|
end
|
14
16
|
end
|
15
17
|
end
|
@@ -3,134 +3,136 @@
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
module Sentry
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
6
|
+
module Sanitize
|
7
|
+
class Processor::SanitizeData < Processor
|
8
|
+
DEFAULT_FIELDS = %w(authorization password passwd secret ssn social(.*)?sec).freeze
|
9
|
+
CREDIT_CARD_RE = /\b(?:3[47]\d|(?:4\d|5[1-5]|65)\d{2}|6011)\d{12}\b/.freeze
|
10
|
+
QUERY_STRING = ['query_string', :query_string].freeze
|
11
|
+
JSON_STARTS_WITH = ["[", "{"].freeze
|
12
|
+
|
13
|
+
attr_accessor :sanitize_fields, :sanitize_credit_cards, :sanitize_fields_excluded
|
14
|
+
|
15
|
+
def initialize(client)
|
16
|
+
super
|
17
|
+
self.sanitize_fields = client.configuration.sanitize_fields
|
18
|
+
self.sanitize_credit_cards = client.configuration.sanitize_credit_cards
|
19
|
+
self.sanitize_fields_excluded = client.configuration.sanitize_fields_excluded
|
20
|
+
end
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
22
|
+
def process(value, key = nil)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
sanitize_hash_value(key, value)
|
26
|
+
when Array
|
27
|
+
sanitize_array_value(key, value)
|
28
|
+
when Integer
|
29
|
+
matches_regexes?(key, value.to_s) ? INT_MASK : value
|
30
|
+
when String
|
31
|
+
sanitize_string_value(key, value)
|
32
|
+
else
|
33
|
+
value
|
34
|
+
end
|
33
35
|
end
|
34
|
-
end
|
35
36
|
|
36
|
-
|
37
|
+
private
|
37
38
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
# CGI.parse takes our nice UTF-8 strings and converts them back to ASCII,
|
40
|
+
# so we have to convert them back, again.
|
41
|
+
def utf8_processor
|
42
|
+
@utf8_processor ||= Processor::UTF8Conversion.new
|
43
|
+
end
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
45
|
+
def sanitize_hash_value(key, value)
|
46
|
+
if key =~ sensitive_fields
|
47
|
+
STRING_MASK
|
48
|
+
elsif value.frozen?
|
49
|
+
value.merge(value) { |k, v| process v, k }
|
50
|
+
else
|
51
|
+
value.merge!(value) { |k, v| process v, k }
|
52
|
+
end
|
51
53
|
end
|
52
|
-
end
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
def sanitize_array_value(key, value)
|
56
|
+
if value.frozen?
|
57
|
+
value.map { |v| process v, key }
|
58
|
+
else
|
59
|
+
value.map! { |v| process v, key }
|
60
|
+
end
|
59
61
|
end
|
60
|
-
end
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
63
|
+
def sanitize_string_value(key, value)
|
64
|
+
if value =~ sensitive_fields && (json = parse_json_or_nil(value))
|
65
|
+
# if this string is actually a json obj, convert and sanitize
|
66
|
+
process(json).to_json
|
67
|
+
elsif matches_regexes?(key, value)
|
68
|
+
STRING_MASK
|
69
|
+
elsif QUERY_STRING.include?(key)
|
70
|
+
sanitize_query_string(value)
|
71
|
+
elsif value =~ sensitive_fields
|
72
|
+
sanitize_sensitive_string_content(value)
|
73
|
+
else
|
74
|
+
value
|
75
|
+
end
|
74
76
|
end
|
75
|
-
end
|
76
77
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
78
|
+
def sanitize_query_string(query_string)
|
79
|
+
query_hash = CGI.parse(query_string)
|
80
|
+
sanitized = utf8_processor.process(query_hash)
|
81
|
+
processed_query_hash = process(sanitized)
|
82
|
+
URI.encode_www_form(processed_query_hash)
|
83
|
+
end
|
83
84
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
85
|
+
# this scrubs some sensitive info from the string content. for example:
|
86
|
+
#
|
87
|
+
# ```
|
88
|
+
# unexpected token at '{
|
89
|
+
# "role": "admin","password": "Abc@123","foo": "bar"
|
90
|
+
# }'
|
91
|
+
# ```
|
92
|
+
#
|
93
|
+
# will become
|
94
|
+
#
|
95
|
+
# ```
|
96
|
+
# unexpected token at '{
|
97
|
+
# "role": "admin","password": *******,"foo": "bar"
|
98
|
+
# }'
|
99
|
+
# ```
|
100
|
+
#
|
101
|
+
# it's particularly useful in hash or param-parsing related errors
|
102
|
+
def sanitize_sensitive_string_content(value)
|
103
|
+
value.gsub(/(#{sensitive_fields}['":]\s?(:|=>)?\s?)(".*?"|'.*?')/, '\1' + STRING_MASK)
|
104
|
+
end
|
104
105
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
106
|
+
def matches_regexes?(k, v)
|
107
|
+
(sanitize_credit_cards && v =~ CREDIT_CARD_RE) ||
|
108
|
+
k =~ sensitive_fields
|
109
|
+
end
|
109
110
|
|
110
|
-
|
111
|
-
|
111
|
+
def sensitive_fields
|
112
|
+
return @sensitive_fields if instance_variable_defined?(:@sensitive_fields)
|
112
113
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
114
|
+
fields = DEFAULT_FIELDS | sanitize_fields
|
115
|
+
fields -= sanitize_fields_excluded
|
116
|
+
@sensitive_fields = /#{fields.map do |f|
|
117
|
+
use_boundary?(f) ? "\\b#{f}\\b" : f
|
118
|
+
end.join("|")}/i
|
119
|
+
end
|
119
120
|
|
120
|
-
|
121
|
-
|
122
|
-
|
121
|
+
def use_boundary?(string)
|
122
|
+
!DEFAULT_FIELDS.include?(string) && !special_characters?(string)
|
123
|
+
end
|
123
124
|
|
124
|
-
|
125
|
-
|
126
|
-
|
125
|
+
def special_characters?(string)
|
126
|
+
REGEX_SPECIAL_CHARACTERS.select { |r| string.include?(r) }.any?
|
127
|
+
end
|
127
128
|
|
128
|
-
|
129
|
-
|
129
|
+
def parse_json_or_nil(string)
|
130
|
+
return unless string.start_with?(*JSON_STARTS_WITH)
|
130
131
|
|
131
|
-
|
132
|
-
|
133
|
-
|
132
|
+
JSON.parse(string)
|
133
|
+
rescue JSON::ParserError, NoMethodError
|
134
|
+
nil
|
135
|
+
end
|
134
136
|
end
|
135
137
|
end
|
136
138
|
end
|
@@ -1,53 +1,55 @@
|
|
1
1
|
module Sentry
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
module Sanitize
|
3
|
+
class Processor::UTF8Conversion < Processor
|
4
|
+
# Slightly misnamed - actually just removes any bytes with invalid encoding
|
5
|
+
# Previously, our JSON backend required UTF-8. Since we now use the built-in
|
6
|
+
# JSON, we can use any encoding, but it must be valid anyway so we can do
|
7
|
+
# things like call #match and #slice on strings
|
8
|
+
REPLACE = "".freeze
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
10
|
+
def process(value)
|
11
|
+
case value
|
12
|
+
when Hash
|
13
|
+
!value.frozen? ? value.merge!(value) { |_, v| process v } : value.merge(value) { |_, v| process v }
|
14
|
+
when Array
|
15
|
+
!value.frozen? ? value.map! { |v| process v } : value.map { |v| process v }
|
16
|
+
when Exception
|
17
|
+
return value if value.message.valid_encoding?
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
clean_exc = value.class.new(remove_invalid_bytes(value.message))
|
20
|
+
clean_exc.set_backtrace(value.backtrace)
|
21
|
+
clean_exc
|
22
|
+
when String
|
23
|
+
# Encoding::BINARY / Encoding::ASCII_8BIT is a special binary encoding.
|
24
|
+
# valid_encoding? will always return true because it contains all codepoints,
|
25
|
+
# so instead we check if it only contains actual ASCII codepoints, and if
|
26
|
+
# not we assume it's actually just UTF8 and scrub accordingly.
|
27
|
+
if value.encoding == Encoding::BINARY && !value.ascii_only?
|
28
|
+
value = value.dup
|
29
|
+
value.force_encoding(Encoding::UTF_8)
|
30
|
+
end
|
31
|
+
return value if value.valid_encoding?
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
33
|
+
remove_invalid_bytes(value)
|
34
|
+
else
|
35
|
+
value
|
36
|
+
end
|
35
37
|
end
|
36
|
-
end
|
37
38
|
|
38
|
-
|
39
|
+
private
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
41
|
+
# Stolen from RSpec
|
42
|
+
# https://github.com/rspec/rspec-support/blob/f0af3fd74a94ff7bb700f6ba06dbdc67bba17fbf/lib/rspec/support/encoded_string.rb#L120-L139
|
43
|
+
if String.method_defined?(:scrub) # 2.1+
|
44
|
+
def remove_invalid_bytes(string)
|
45
|
+
string.scrub(REPLACE)
|
46
|
+
end
|
47
|
+
else
|
48
|
+
def remove_invalid_bytes(string)
|
49
|
+
string.chars.map do |char|
|
50
|
+
char.valid_encoding? ? char : REPLACE
|
51
|
+
end.join
|
52
|
+
end
|
51
53
|
end
|
52
54
|
end
|
53
55
|
end
|
@@ -1,15 +1,17 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
module Sentry
|
2
|
+
module Sanitize
|
3
|
+
class Processor
|
4
|
+
STRING_MASK = '********'.freeze
|
5
|
+
INT_MASK = 0
|
6
|
+
REGEX_SPECIAL_CHARACTERS = %w(. $ ^ { [ ( | ) * + ?).freeze
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
8
|
+
def initialize(client = nil)
|
9
|
+
@client = client
|
10
|
+
end
|
10
11
|
|
11
|
-
|
12
|
-
|
12
|
+
def process(_data)
|
13
|
+
raise NotImplementedError
|
14
|
+
end
|
13
15
|
end
|
14
16
|
end
|
15
17
|
end
|