fluent-plugin-sanitizer 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +32 -3
- data/fluent-plugin-sanitizer.gemspec +1 -1
- data/lib/fluent/plugin/filter_sanitizer.rb +81 -47
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a773fb4d2f1fb7c00d11982f92f15fc4803135af5a6a5c3433783fa12d0811c
|
4
|
+
data.tar.gz: 8ae5cc7723cab184885ebebd401824700c9ffde8fdf5ab81463f3b67be6aed95
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5fe885cf146981c70346b7acd1126eed88cba19f090131f56fcdb19f0ef0f68c1a97d039105e59b997ef9662f2db73d8f6dad8407a056ca9fcdcf9da4dfa261
|
7
|
+
data.tar.gz: 87810f31b6f307779e785291ca1c4ca6d7c17b9cf62235e5a22bfdd99399e1f551c98cb0750add509a69f6ecf2b1134cb593c3175c46abf7b4f20cf894d821e5
|
data/README.md
CHANGED
@@ -18,10 +18,11 @@ td-agent-gem install fluent-plugin-sanitizer
|
|
18
18
|
- keys (mandatory) : Name of keys whose values will be masked. You can specify multiple keys. When keys are nested, you can use {parent key}.{child key} like "kubernetes.master_url".
|
19
19
|
- pattern_ipv4 (optional) : Mask IP addresses in IPv4 format. You can use “true” or “false”. (defalt: false)
|
20
20
|
- pattern_fqdn (optional) : Mask hostname in FQDN style. You can use “true” or “false”. (defalt: false)
|
21
|
-
- pattern_regex (optional) : Mask value mactches custom regular expression.
|
22
|
-
|
21
|
+
- pattern_regex (optional) : Mask value mactches custom regular expression.
|
22
|
+
- regex_capture_group (optional) : If you define capture group in regular expression, you can specify the name of capture group to be masked.
|
23
|
+
- pattern_regex_prefix (optional) : Define prefix used for masking vales. (default: Regex)
|
23
24
|
- pattern_keywords (optional) : Mask values match custom keywords. You can specify multiple keywords.
|
24
|
-
|
25
|
+
- pattern_keywords_prefix (optional) : Define prefix used for masking vales. (default: Keyword)
|
25
26
|
|
26
27
|
You can specify multiple rules in a single configuration. It is also possible to define multiple pattern options in a single rule like the following sample.
|
27
28
|
|
@@ -129,6 +130,34 @@ In case log messages including sensitive information such as SSN and phone numbe
|
|
129
130
|
}
|
130
131
|
}
|
131
132
|
```
|
133
|
+
From v0.1.2, "regex_capture_group" option is available. With "regex_capture_group" option, it is possible to mask specific part of original messages.
|
134
|
+
|
135
|
+
**Configuration sample**
|
136
|
+
```
|
137
|
+
<rule>
|
138
|
+
keys user.email
|
139
|
+
pattern_regex /(?<user>\w+)\@\w+.\w+/
|
140
|
+
regex_capture_group "user"
|
141
|
+
pattern_regex_prefix "USER"
|
142
|
+
</rule>
|
143
|
+
```
|
144
|
+
**Input sample**
|
145
|
+
```
|
146
|
+
{
|
147
|
+
"user" : {
|
148
|
+
"email" : "user1@demo.com"
|
149
|
+
}
|
150
|
+
}
|
151
|
+
```
|
152
|
+
**Output sample**
|
153
|
+
```
|
154
|
+
{
|
155
|
+
"user" : {
|
156
|
+
"email" : "USER_321865df6f0ce6bdf3ea16f74623534a@demo.com"
|
157
|
+
}
|
158
|
+
}
|
159
|
+
```
|
160
|
+
|
132
161
|
### Tips : Debug how sanitizer works
|
133
162
|
When you design custom rules in a configuration file, you might need information about how Sanitizer masks original values into hash values for debugging purposes. You can check that information if you run td-agent/Fluentd with debug option enabled. The debug information is shown in the log file of td-agent/Fluentd like the following log message sample.
|
134
163
|
|
@@ -23,44 +23,58 @@ module Fluent
|
|
23
23
|
|
24
24
|
helpers :event_emitter, :record_accessor
|
25
25
|
|
26
|
-
desc "Hash salt to be used to generate hash values with
|
26
|
+
desc "Hash salt to be used to generate hash values with specified hash(optional)"
|
27
27
|
config_param :hash_salt, :string, default: ""
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
|
29
|
+
desc "Hash scheme to use for generating hash value (supported schemes are md5,sha1,sha256,sha384,sha512) (optional)"
|
30
|
+
config_param :hash_scheme, :enum, list: [:md5, :sha1, :sha256, :sha384, :sha512], default: :md5
|
31
|
+
|
32
|
+
config_section :rule, param_name: :rules, multi: true do
|
33
|
+
desc "Name of keys whose values are to be sanitized"
|
31
34
|
config_param :keys, :array, default: []
|
32
35
|
desc "Sanitize if values contain IPv4 (optional)"
|
33
36
|
config_param :pattern_ipv4, :bool, default: false
|
34
|
-
desc "Sanitize if values contain Hostname in FQDN style (
|
37
|
+
desc "Sanitize if values contain Hostname in FQDN style (optional)"
|
35
38
|
config_param :pattern_fqdn, :bool, default: false
|
36
|
-
desc "Sanitize if values
|
39
|
+
desc "Sanitize if values match custom regular expression (optional)"
|
37
40
|
config_param :pattern_regex, :regexp, default: /^$/
|
38
|
-
desc "Prefix for pattern_regex (optional)"
|
39
|
-
config_param :regex_capture_group, :string, default:""
|
40
41
|
desc "Target capture group name to be masked (optional)"
|
42
|
+
config_param :regex_capture_group, :string, default:""
|
43
|
+
desc "Prefix for pattern_regex (optional)"
|
41
44
|
config_param :pattern_regex_prefix, :string, default: "Regex"
|
42
|
-
desc "Sanitize if values
|
45
|
+
desc "Sanitize if values match custom keywords (optional)"
|
43
46
|
config_param :pattern_keywords, :array, default: []
|
44
47
|
desc "Prefix for pattern_keywords (optional)"
|
45
48
|
config_param :pattern_keywords_prefix, :string, default: "Keywords"
|
46
49
|
end
|
47
50
|
|
48
51
|
def configure(conf)
|
49
|
-
super
|
50
|
-
|
52
|
+
super
|
51
53
|
@salt = conf['hash_salt']
|
52
|
-
|
54
|
+
@salt = "" if @salt.nil?
|
55
|
+
@hash_scheme = conf['hash_scheme']
|
56
|
+
@sanitize_func =
|
57
|
+
case @hash_scheme
|
58
|
+
when "sha1"
|
59
|
+
Proc.new { |str| Digest::SHA1.hexdigest(@salt + str) }
|
60
|
+
when "sha256"
|
61
|
+
Proc.new { |str| Digest::SHA256.hexdigest(@salt +str) }
|
62
|
+
when "sha384"
|
63
|
+
Proc.new { |str| Digest::SHA384.hexdigest(@salt +str) }
|
64
|
+
when "sha512"
|
65
|
+
Proc.new { |str| Digest::SHA512.hexdigest(@salt +str) }
|
66
|
+
else
|
67
|
+
Proc.new { |str| Digest::MD5.hexdigest(@salt +str) }
|
68
|
+
end
|
69
|
+
|
53
70
|
@sanitizerules = []
|
54
71
|
@rules.each do |rule|
|
55
72
|
if rule.keys.empty?
|
56
73
|
raise Fluent::ConfigError, "You need to specify at least one key in rule statement."
|
57
74
|
else
|
58
|
-
#keys = record_accessor_create(rule.keys)
|
59
75
|
keys = rule.keys
|
60
76
|
end
|
61
77
|
|
62
|
-
#record_accessor_create(rule.keys)
|
63
|
-
|
64
78
|
if rule.pattern_ipv4 || !rule.pattern_ipv4
|
65
79
|
pattern_ipv4 = rule.pattern_ipv4
|
66
80
|
else
|
@@ -77,16 +91,11 @@ module Fluent
|
|
77
91
|
pattern_regex = rule.pattern_regex
|
78
92
|
regex_capture_group = rule.regex_capture_group
|
79
93
|
else
|
80
|
-
raise Fluent::ConfigError, "Your need to specify Regexp for
|
81
|
-
end
|
82
|
-
|
94
|
+
raise Fluent::ConfigError, "Your need to specify Regexp for pattern_regex option."
|
95
|
+
end
|
96
|
+
|
83
97
|
pattern_keywords = rule.pattern_keywords
|
84
98
|
|
85
|
-
case [pattern_ipv4, pattern_fqdn, pattern_regex, pattern_keywords.empty?]
|
86
|
-
when [false, false, /^$/, true]
|
87
|
-
raise Fluent::ConfigError, "You need to specify at least one pattern option in the rule statement."
|
88
|
-
end
|
89
|
-
|
90
99
|
regex_prefix = rule.pattern_regex_prefix
|
91
100
|
keywords_prefix = rule.pattern_keywords_prefix
|
92
101
|
|
@@ -98,22 +107,27 @@ module Fluent
|
|
98
107
|
@sanitizerules.each do |keys, pattern_ipv4, pattern_fqdn, pattern_regex, regex_capture_group, pattern_keywords, regex_prefix, keywords_prefix|
|
99
108
|
keys.each do |key|
|
100
109
|
accessor = record_accessor_create("$."+key.to_s)
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
110
|
+
begin
|
111
|
+
if pattern_ipv4 && accessor.call(record)
|
112
|
+
accessor.set(record, sanitize_ipv4_val(accessor.call(record).to_s))
|
113
|
+
end
|
114
|
+
if pattern_fqdn && accessor.call(record)
|
115
|
+
accessor.set(record, sanitize_fqdn_val(accessor.call(record).to_s))
|
116
|
+
end
|
117
|
+
if !pattern_regex.to_s.eql?("(?-mix:^$)") && accessor.call(record)
|
118
|
+
if regex_capture_group.empty?
|
119
|
+
accessor.set(record, sanitize_regex_val(accessor.call(record), regex_prefix, pattern_regex))
|
120
|
+
else
|
121
|
+
accessor.set(record, sanitize_regex_val_capture(accessor.call(record), regex_prefix, pattern_regex, regex_capture_group))
|
122
|
+
end
|
123
|
+
#end
|
124
|
+
end
|
125
|
+
if !pattern_keywords.empty? && accessor.call(record)
|
126
|
+
accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix))
|
127
|
+
end
|
128
|
+
rescue => e
|
129
|
+
log.warn "Skipping this key", error_class: e.class, error: e.message
|
112
130
|
end
|
113
|
-
end
|
114
|
-
if !pattern_keywords.empty? && accessor.call(record)
|
115
|
-
accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix))
|
116
|
-
end
|
117
131
|
end
|
118
132
|
end
|
119
133
|
record
|
@@ -152,16 +166,32 @@ module Fluent
|
|
152
166
|
end
|
153
167
|
|
154
168
|
def sanitize_ipv4(str)
|
155
|
-
return "IPv4_"+
|
169
|
+
return "IPv4_"+ @sanitize_func.call(str)
|
156
170
|
end
|
157
171
|
|
158
172
|
def sanitize_fqdn(str)
|
159
|
-
return "FQDN_"+
|
173
|
+
return "FQDN_"+ @sanitize_func.call(str)
|
174
|
+
end
|
175
|
+
|
176
|
+
def sanitize_val(str, prefix)
|
177
|
+
s = prefix + "_" + @sanitize_func.call(str)
|
178
|
+
$log.debug "[pattern_regex] sanitize '#{str}' to '#{s}'" if str != s
|
179
|
+
return s
|
160
180
|
end
|
161
181
|
|
162
182
|
def sanitize_regex(str, prefix, regex)
|
163
|
-
|
164
|
-
|
183
|
+
regex_p = Regexp.new(regex)
|
184
|
+
if str =~ regex_p
|
185
|
+
scans = str.scan(regex).flatten
|
186
|
+
if scans.any?{ |e| e.nil? }
|
187
|
+
return prefix + "_" + @sanitize_func.call(str)
|
188
|
+
else
|
189
|
+
scans.each do |s|
|
190
|
+
mask = prefix + "_" + @sanitize_func.call(str)
|
191
|
+
str = str.gsub(s, mask)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
return str
|
165
195
|
else
|
166
196
|
$log.debug "[pattern_regex] #{str} does not match given regex #{regex}. skip this rule."
|
167
197
|
return str
|
@@ -169,11 +199,15 @@ module Fluent
|
|
169
199
|
end
|
170
200
|
|
171
201
|
def sanitize_regex_capture(str, prefix, regex, capture_group)
|
172
|
-
|
202
|
+
regex_p = Regexp.new(regex)
|
203
|
+
if str =~ regex_p
|
173
204
|
if str.match(regex).names.include?(capture_group)
|
174
|
-
|
175
|
-
|
176
|
-
|
205
|
+
scans = str.scan(regex).flatten
|
206
|
+
scans.each do |s|
|
207
|
+
mask = prefix + "_" + @sanitize_func.call(str)
|
208
|
+
str = str.gsub(s, mask)
|
209
|
+
end
|
210
|
+
return str
|
177
211
|
else
|
178
212
|
$log.debug "[pattern_regex] regex pattern matched but capture group '#{capture_group}' does not exist. Skip this rule."
|
179
213
|
return str
|
@@ -185,7 +219,7 @@ module Fluent
|
|
185
219
|
end
|
186
220
|
|
187
221
|
def sanitize_keyword(str, prefix)
|
188
|
-
return prefix + "_" +
|
222
|
+
return prefix + "_" + @sanitize_func.call(str)
|
189
223
|
end
|
190
224
|
|
191
225
|
def sanitize_ipv4_port(str)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TK Kubota
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|