fluent-plugin-sanitizer 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/fluent-plugin-sanitizer.gemspec +1 -1
- data/lib/fluent/plugin/filter_sanitizer.rb +88 -97
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be5c045333ced653e07d2dcaf634ac05d2896441778e29a5d7baf60e7eee6cc8
|
4
|
+
data.tar.gz: 23b29b983f14c2f43b2460eaaf0ab0284ade49e1289ae4db741f16d5b2c00a3c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '09818b00aea38d0afc90a34660b6df04be062cef758fa09aa62b893c6fc17ccf44e466fc30dfe1559c9d32a341c8fa31496d1f4d214f4c82ac135130b17cd998'
|
7
|
+
data.tar.gz: d7a609f1afc62e3118d311bb4b5708880e4e40ff88e07a945cb485f23240f91a05b9781f22279229fbef32563084eb2ee9850fbdd1a054a587f8cff9abaa28ef
|
@@ -24,7 +24,7 @@ module Fluent
|
|
24
24
|
helpers :event_emitter, :record_accessor
|
25
25
|
|
26
26
|
desc "Hash salt to be used to generate hash values with MD5(optional)"
|
27
|
-
config_param :hash_salt, :string, default:
|
27
|
+
config_param :hash_salt, :string, default: ""
|
28
28
|
|
29
29
|
config_section :rule, param_name: :rules, multi: true do
|
30
30
|
desc "Name of keys whose valuse are to be sanitized"
|
@@ -35,210 +35,192 @@ module Fluent
|
|
35
35
|
config_param :pattern_fqdn, :bool, default: false
|
36
36
|
desc "Sanitize if values mactch custom regular expression (optional)"
|
37
37
|
config_param :pattern_regex, :regexp, default: /^$/
|
38
|
+
desc "Prefix for pattern_regex (optional)"
|
39
|
+
config_param :pattern_regex_prefix, :string, default: "Regex"
|
38
40
|
desc "Sanitize if values mactch custom keywords (optional)"
|
39
41
|
config_param :pattern_keywords, :array, default: []
|
42
|
+
desc "Prefix for pattern_keywords (optional)"
|
43
|
+
config_param :pattern_keywords_prefix, :string, default: "Keywords"
|
40
44
|
end
|
41
45
|
|
42
46
|
def configure(conf)
|
43
47
|
super
|
44
48
|
|
45
|
-
@salt =
|
46
|
-
@salt = conf['hash_salt'] if conf['hash_salt'] != nil
|
49
|
+
@salt = conf['hash_salt']
|
47
50
|
|
48
51
|
@sanitizerules = []
|
49
52
|
@rules.each do |rule|
|
50
53
|
if rule.keys.empty?
|
51
54
|
raise Fluent::ConfigError, "You need to specify at least one key in rule statement."
|
52
55
|
else
|
56
|
+
#keys = record_accessor_create(rule.keys)
|
53
57
|
keys = rule.keys
|
54
58
|
end
|
55
59
|
|
56
|
-
|
60
|
+
#record_accessor_create(rule.keys)
|
61
|
+
|
62
|
+
if rule.pattern_ipv4 || !rule.pattern_ipv4
|
57
63
|
pattern_ipv4 = rule.pattern_ipv4
|
58
64
|
else
|
59
65
|
raise Fluent::ConfigError, "true or false is available for pattern_ipv4 option."
|
60
66
|
end
|
61
67
|
|
62
|
-
if rule.pattern_fqdn
|
68
|
+
if rule.pattern_fqdn || !rule.pattern_fqdn
|
63
69
|
pattern_fqdn = rule.pattern_fqdn
|
64
70
|
else
|
65
71
|
raise Fluent::ConfigError, "true or false is available for pattern_fqdn option."
|
66
72
|
end
|
67
|
-
|
68
|
-
|
73
|
+
|
74
|
+
if rule.pattern_regex.class == Regexp
|
75
|
+
pattern_regex = rule.pattern_regex
|
76
|
+
else
|
77
|
+
raise Fluent::ConfigError, "Your need to specify Regexp for pattern_fqdn option."
|
78
|
+
end
|
79
|
+
|
69
80
|
pattern_keywords = rule.pattern_keywords
|
70
81
|
|
71
82
|
case [pattern_ipv4, pattern_fqdn, pattern_regex, pattern_keywords.empty?]
|
72
83
|
when [false, false, /^$/, true]
|
73
84
|
raise Fluent::ConfigError, "You need to specify at least one pattern option in the rule statement."
|
74
85
|
end
|
75
|
-
|
86
|
+
|
87
|
+
regex_prefix = rule.pattern_regex_prefix
|
88
|
+
keywords_prefix = rule.pattern_keywords_prefix
|
89
|
+
|
90
|
+
@sanitizerules.push([keys, pattern_ipv4, pattern_fqdn, pattern_regex, pattern_keywords, regex_prefix, keywords_prefix])
|
76
91
|
end
|
77
92
|
end
|
78
93
|
|
79
94
|
def filter(tag, time, record)
|
80
|
-
@sanitizerules.each do |keys, pattern_ipv4, pattern_fqdn, pattern_regex, pattern_keywords|
|
95
|
+
@sanitizerules.each do |keys, pattern_ipv4, pattern_fqdn, pattern_regex, pattern_keywords, regex_prefix, keywords_prefix|
|
81
96
|
keys.each do |key|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
record[nkey[0]][nkey[1]] = sanitize_ipv4_val(@salt, record[nkey[0]][nkey[1]]) if pattern_ipv4 == true
|
88
|
-
record[nkey[0]][nkey[1]] = sanitize_fqdn_val(@salt, record[nkey[0]][nkey[1]]) if pattern_fqdn == true
|
89
|
-
record[nkey[0]][nkey[1]] = sanitize_regex(@salt, record[nkey[0]][nkey[1]]) if is_regex?(pattern_regex) && !!(pattern_regex =~ record[nkey[0]][nkey[1]])
|
90
|
-
record[nkey[0]][nkey[1]] = sanitize_keyword(@salt, pattern_keywords, record[nkey[0]][nkey[1]]) if pattern_keywords.empty? == false
|
91
|
-
else
|
92
|
-
$log.error "no such nested key found : key name = #{key}"
|
93
|
-
end
|
94
|
-
elsif nkey.length ==3
|
95
|
-
if record[nkey[0]][nkey[1]].key?(nkey[2])
|
96
|
-
v = record[nkey[0]][nkey[1]][nkey[2]]
|
97
|
-
record[nkey[0]][nkey[1]][nkey[2]] = sanitize_ipv4_val(@salt, record[nkey[0]][nkey[1]][nkey[2]]) if pattern_ipv4 == true
|
98
|
-
record[nkey[0]][nkey[1]][nkey[2]] = sanitize_fqdn_val(@salt, record[nkey[0]][nkey[1]][nkey[2]]) if pattern_fqdn == true
|
99
|
-
record[nkey[0]][nkey[1]][nkey[2]] = sanitize_regex(@salt, record[nkey[0]][nkey[1]][nkey[2]]) if is_regex?(pattern_regex) && !!(pattern_regex =~ record[nkey[0]][nkey[1]][nkey[2]])
|
100
|
-
record[nkey[0]][nkey[1]][nkey[2]] = sanitize_keyword(@salt, pattern_keywords, record[nkey[0]][nkey[1]][nkey[2]]) if pattern_keywords.empty? == false
|
101
|
-
else
|
102
|
-
$log.error "no such nested key found : key name = #{key}"
|
103
|
-
end
|
104
|
-
end
|
105
|
-
else
|
106
|
-
if record.key?(key)
|
107
|
-
v = record[key]
|
108
|
-
record[key] = sanitize_ipv4_val(@salt, record[key]) if pattern_ipv4 == true
|
109
|
-
record[key] = sanitize_fqdn_val(@salt, record[key]) if pattern_fqdn == true
|
110
|
-
record[key] = sanitize_regex(@salt, v) if is_regex?(pattern_regex) && !!(pattern_regex =~ record[key])
|
111
|
-
record[key] = sanitize_keyword_val(@salt, pattern_keywords, v) if pattern_keywords.empty? == false
|
112
|
-
else
|
113
|
-
$log.error "no such key found : key name = #{key}"
|
114
|
-
end
|
115
|
-
end
|
97
|
+
accessor = record_accessor_create("$."+key.to_s)
|
98
|
+
accessor.set(record, sanitize_ipv4_val(accessor.call(record).to_s)) if pattern_ipv4
|
99
|
+
accessor.set(record, sanitize_fqdn_val(accessor.call(record).to_s)) if pattern_fqdn
|
100
|
+
accessor.set(record, sanitize_regex_val(accessor.call(record).to_s, regex_prefix)) if accessor.call(record).to_s.match?(pattern_regex)
|
101
|
+
accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix)) if !pattern_keywords.empty?
|
116
102
|
end
|
117
103
|
end
|
118
104
|
record
|
119
105
|
end
|
120
106
|
|
121
107
|
def include_ipv4?(str)
|
122
|
-
|
108
|
+
str.match?(/^.*\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}.*$/)
|
123
109
|
end
|
124
110
|
|
125
111
|
def is_ipv4?(str)
|
126
|
-
|
112
|
+
str.match?(/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/)
|
127
113
|
end
|
128
114
|
|
129
115
|
def is_ipv4_port?(str)
|
130
|
-
|
116
|
+
str.match?(/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:[0-9]{1,5}$/)
|
131
117
|
end
|
132
118
|
|
133
119
|
def include_fqdn?(str)
|
134
|
-
|
120
|
+
str.match?(/^.*\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}.*$/)
|
135
121
|
end
|
136
122
|
|
137
123
|
def is_fqdn?(str)
|
138
|
-
|
124
|
+
str.match?(/^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}$/)
|
139
125
|
end
|
140
126
|
|
141
127
|
def is_fqdn_port?(str)
|
142
|
-
|
128
|
+
str.match?(/^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}:[0-9]{1,5}$/)
|
143
129
|
end
|
144
130
|
|
145
131
|
def is_url?(str)
|
146
|
-
|
147
|
-
end
|
148
|
-
|
149
|
-
def is_regex?(regex)
|
150
|
-
return regex.class == Regexp
|
132
|
+
str.match?(/^[a-zA-Z0-9]{2,}:\/\/.*$/)
|
151
133
|
end
|
152
134
|
|
153
135
|
def subtract_quotations(str)
|
154
|
-
|
136
|
+
str.gsub(/\\\"|\'|\"|\\\'/,'')
|
155
137
|
end
|
156
138
|
|
157
|
-
def sanitize_ipv4(
|
158
|
-
return
|
139
|
+
def sanitize_ipv4(str)
|
140
|
+
return "IPv4_"+Digest::MD5.hexdigest(@salt + str)
|
159
141
|
end
|
160
142
|
|
161
|
-
def sanitize_fqdn(
|
162
|
-
return
|
143
|
+
def sanitize_fqdn(str)
|
144
|
+
return "FQDN_"+Digest::MD5.hexdigest(@salt + str)
|
163
145
|
end
|
164
146
|
|
165
|
-
def sanitize_regex(
|
166
|
-
return "
|
147
|
+
def sanitize_regex(str, prefix)
|
148
|
+
return prefix + "_" + Digest::MD5.hexdigest(@salt + str)
|
167
149
|
end
|
168
150
|
|
169
|
-
def sanitize_keyword(
|
170
|
-
return "
|
151
|
+
def sanitize_keyword(str, prefix)
|
152
|
+
return prefix + "_" + Digest::MD5.hexdigest(@salt + str)
|
171
153
|
end
|
172
154
|
|
173
|
-
def sanitize_ipv4_port(
|
155
|
+
def sanitize_ipv4_port(str)
|
174
156
|
ip_port = []
|
175
157
|
str.split(":").each do |s|
|
176
|
-
|
158
|
+
s = sanitize_ipv4(s) if is_ipv4?(s)
|
177
159
|
ip_port.push(s)
|
178
160
|
end
|
179
|
-
return
|
161
|
+
return ip_port.join(":")
|
180
162
|
end
|
181
163
|
|
182
|
-
def sanitize_fqdn_port(
|
164
|
+
def sanitize_fqdn_port(str)
|
183
165
|
fqdn_port = []
|
184
166
|
str.split(":").each do |s|
|
185
|
-
|
167
|
+
s = sanitize_fqdn(s) if is_fqdn?(s)
|
186
168
|
fqdn_port.push(s)
|
187
169
|
end
|
188
|
-
return
|
170
|
+
return fqdn_port.join(":")
|
189
171
|
end
|
190
172
|
|
191
|
-
def sanitize_ipv4_url(
|
173
|
+
def sanitize_ipv4_url(str)
|
192
174
|
ip_url = []
|
193
175
|
str.split("://").each do |s|
|
194
176
|
if s.include?("/")
|
195
177
|
url_slash = []
|
196
178
|
s.split("/").each do |ss|
|
197
|
-
|
198
|
-
|
179
|
+
ss = sanitize_ipv4(ss) if is_ipv4?(ss)
|
180
|
+
ss = sanitize_ipv4_port(ss) if is_ipv4_port?(ss)
|
199
181
|
url_slash.push(ss)
|
200
182
|
end
|
201
183
|
s = url_slash.join("/")
|
202
184
|
else
|
203
|
-
|
204
|
-
|
185
|
+
s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
|
186
|
+
s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
|
205
187
|
end
|
206
188
|
ip_url.push(s)
|
207
189
|
end
|
208
|
-
return
|
190
|
+
return ip_url.join("://")
|
209
191
|
end
|
210
192
|
|
211
|
-
def sanitize_fqdn_url(
|
193
|
+
def sanitize_fqdn_url(str)
|
212
194
|
fqdn_url = []
|
213
195
|
str.split("://").each do |s|
|
214
196
|
if s.include?("/")
|
215
197
|
url_slash = []
|
216
198
|
s.split("/").each do |ss|
|
217
|
-
|
218
|
-
|
199
|
+
ss = sanitize_fqdn(ss) if is_fqdn?(ss)
|
200
|
+
ss = sanitize_fqdn_port(ss) if is_fqdn_port?(ss)
|
219
201
|
url_slash.push(ss)
|
220
202
|
end
|
221
203
|
s = url_slash.join("/")
|
222
204
|
else
|
223
|
-
|
224
|
-
|
205
|
+
s = sanitize_fqdn(s) if is_fqdn?(s)
|
206
|
+
s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
|
225
207
|
end
|
226
208
|
fqdn_url.push(s)
|
227
209
|
end
|
228
|
-
return
|
210
|
+
return fqdn_url.join("://")
|
229
211
|
end
|
230
212
|
|
231
|
-
def sanitize_ipv4_val(
|
213
|
+
def sanitize_ipv4_val(v)
|
232
214
|
line = []
|
233
215
|
if v.include?(",")
|
234
216
|
v.split(",").each do |s|
|
235
217
|
s = subtract_quotations(s)
|
236
218
|
if include_ipv4?(s)
|
237
219
|
if is_url?(s)
|
238
|
-
|
220
|
+
s = sanitize_ipv4_url(s)
|
239
221
|
else
|
240
|
-
|
241
|
-
|
222
|
+
s = sanitize_ipv4(s) if is_ipv4?(s)
|
223
|
+
s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
|
242
224
|
end
|
243
225
|
end
|
244
226
|
line.push(s)
|
@@ -249,29 +231,30 @@ module Fluent
|
|
249
231
|
s = subtract_quotations(s)
|
250
232
|
if include_ipv4?(s)
|
251
233
|
if is_url?(s)
|
252
|
-
|
234
|
+
s = sanitize_ipv4_url(s)
|
253
235
|
else
|
254
|
-
|
255
|
-
|
236
|
+
s = sanitize_ipv4(s) if is_ipv4?(s)
|
237
|
+
s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
|
256
238
|
end
|
257
239
|
end
|
258
240
|
line.push(s)
|
259
241
|
end
|
242
|
+
$log.debug "[pattern_ipv4] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
|
260
243
|
return line.join(" ")
|
261
244
|
end
|
262
245
|
end
|
263
246
|
|
264
|
-
def sanitize_fqdn_val(
|
247
|
+
def sanitize_fqdn_val(v)
|
265
248
|
line = []
|
266
249
|
if v.include?(",")
|
267
250
|
v.split(",").each do |s|
|
268
251
|
s = subtract_quotations(s)
|
269
252
|
if include_fqdn?(s)
|
270
253
|
if is_url?(s)
|
271
|
-
|
254
|
+
s = sanitize_fqdn_url(s)
|
272
255
|
else
|
273
|
-
|
274
|
-
|
256
|
+
s = sanitize_fqdn(s) if is_fqdn?(s)
|
257
|
+
s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
|
275
258
|
end
|
276
259
|
end
|
277
260
|
line.push(s)
|
@@ -282,27 +265,35 @@ module Fluent
|
|
282
265
|
s = subtract_quotations(s)
|
283
266
|
if include_fqdn?(s)
|
284
267
|
if is_url?(s)
|
285
|
-
|
268
|
+
s = sanitize_fqdn_url(s)
|
286
269
|
else
|
287
|
-
|
288
|
-
|
270
|
+
s = sanitize_fqdn(s) if is_fqdn?(s)
|
271
|
+
s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
|
289
272
|
end
|
290
273
|
end
|
291
274
|
line.push(s)
|
292
275
|
end
|
276
|
+
$log.debug "[pattern_fqdn] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
|
293
277
|
return line.join(" ")
|
294
278
|
end
|
295
279
|
end
|
296
280
|
|
297
|
-
def
|
281
|
+
def sanitize_regex_val(v, prefix)
|
282
|
+
s = sanitize_regex(v, prefix)
|
283
|
+
$log.debug "[pattern_keywords] sanitize '#{v}' to '#{s}'" if v != s
|
284
|
+
return s
|
285
|
+
end
|
286
|
+
|
287
|
+
def sanitize_keywords_val(v, keywords, prefix)
|
298
288
|
line = []
|
299
289
|
v.split().each do |vv|
|
300
290
|
if keywords.include?(vv)
|
301
|
-
line.push(sanitize_keyword(
|
291
|
+
line.push(sanitize_keyword(vv, prefix))
|
302
292
|
else
|
303
293
|
line.push(vv)
|
304
294
|
end
|
305
295
|
end
|
296
|
+
$log.debug "[pattern_keywords] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
|
306
297
|
return line.join(" ")
|
307
298
|
end
|
308
299
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TK Kubota
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|