fluent-plugin-rewrite-tag-filter 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
### RewriteTagFilterOutput
|
6
6
|
|
7
7
|
Fluentd Output filter plugin. It has designed to rewrite tag like mod_rewrite.
|
8
|
-
Re-emmit a record with rewrited tag when a value matches with the regular expression.
|
8
|
+
Re-emmit a record with rewrited tag when a value matches/unmatches with the regular expression.
|
9
9
|
Also you can change a tag from apache log by domain, status-code(ex. 500 error),
|
10
10
|
user-agent, request-uri, regex-backreference and so on with regular expression.
|
11
11
|
|
@@ -24,15 +24,15 @@ gem install fluent-plugin-rewrite-tag-filter
|
|
24
24
|
### Syntax
|
25
25
|
|
26
26
|
```
|
27
|
-
|
27
|
+
rewriterule<num:1-200> <attribute> <regex_pattern> <new_tag>
|
28
28
|
|
29
29
|
# Optional: Capitalize every matched regex backreference. (ex: $1, $2)
|
30
30
|
capitalize_regex_backreference <yes/no> (default no)
|
31
31
|
|
32
|
-
# Optional: remove tag prefix for tag placeholder.
|
32
|
+
# Optional: remove tag prefix for tag placeholder. (see the section of "Tag placeholder")
|
33
33
|
remove_tag_prefix <string>
|
34
34
|
|
35
|
-
# Optional:
|
35
|
+
# Optional: override hostname command for placeholder. (see the section of "Tag placeholder")
|
36
36
|
hostname_command <string>
|
37
37
|
```
|
38
38
|
|
@@ -50,15 +50,19 @@ It's a sample to exclude some static file log before split tag by domain.
|
|
50
50
|
pos_file /var/log/td-agent/apache_access.pos
|
51
51
|
</source>
|
52
52
|
|
53
|
-
# At
|
53
|
+
# At rewriterule2, redirect to tag named "clear" which unmatched for status code 200.
|
54
|
+
# At rewriterule3, redirect to tag named "clear" which is not end with ".com"
|
55
|
+
# At rewriterule6, "site.$2$1" to be "site.ExampleMail" by capitalize_regex_backreference option.
|
54
56
|
<match td.apache.access>
|
55
57
|
type rewrite_tag_filter
|
56
58
|
capitalize_regex_backreference yes
|
57
59
|
rewriterule1 path \.(gif|jpe?g|png|pdf|zip)$ clear
|
58
|
-
rewriterule2
|
59
|
-
rewriterule3 domain
|
60
|
-
rewriterule4 domain ^
|
61
|
-
rewriterule5 domain
|
60
|
+
rewriterule2 status !^200$ clear
|
61
|
+
rewriterule3 domain !^.+\.com$ clear
|
62
|
+
rewriterule4 domain ^maps\.example\.com$ site.ExampleMaps
|
63
|
+
rewriterule5 domain ^news\.example\.com$ site.ExampleNews
|
64
|
+
rewriterule6 domain ^(mail)\.(example)\.com$ site.$2$1
|
65
|
+
rewriterule7 domain .+ site.unmatched
|
62
66
|
</match>
|
63
67
|
|
64
68
|
<match site.*>
|
@@ -106,14 +110,22 @@ $ tailf /var/log/td-agent/td-agent.log
|
|
106
110
|
|
107
111
|
### Tag placeholder
|
108
112
|
|
109
|
-
|
113
|
+
It is supporting there placeholder for new_tag(rewrited tag).
|
114
|
+
|
115
|
+
- `${tag}`
|
116
|
+
- `__TAG__`
|
117
|
+
|
110
118
|
It's available to use this placeholder with `remove_tag_prefix` option.
|
111
|
-
This
|
119
|
+
This option adds removing tag prefix for `${tag}` or `__TAG__` in placeholder.
|
112
120
|
|
113
121
|
- `${hostname}`
|
114
122
|
- `__HOSTNAME__`
|
115
|
-
|
116
|
-
|
123
|
+
|
124
|
+
By default, execute command as `hostname` to get full hostname.
|
125
|
+
Also, you can override hostname command using `hostname_command` option.
|
126
|
+
It comes short hostname with `hostname_command hostname -s` configuration specified.
|
127
|
+
|
128
|
+
#### Placeholder Usage
|
117
129
|
|
118
130
|
It's a sample to rewrite a tag with placeholder.
|
119
131
|
|
@@ -3,11 +3,11 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "fluent-plugin-rewrite-tag-filter"
|
6
|
-
s.version = "1.
|
6
|
+
s.version = "1.3.0"
|
7
7
|
s.authors = ["Kentaro Yoshida"]
|
8
8
|
s.email = ["y.ken.studio@gmail.com"]
|
9
9
|
s.homepage = "https://github.com/y-ken/fluent-plugin-rewrite-tag-filter"
|
10
|
-
s.summary = %q{Fluentd Output filter plugin. It has designed to rewrite tag like mod_rewrite. Re-emmit a record with rewrited tag when a value matches with the regular expression. Also you can change a tag from apache log by domain, status-code(ex. 500 error), user-agent, request-uri, regex-backreference and so on with regular expression.}
|
10
|
+
s.summary = %q{Fluentd Output filter plugin. It has designed to rewrite tag like mod_rewrite. Re-emmit a record with rewrited tag when a value matches/unmatches with the regular expression. Also you can change a tag from apache log by domain, status-code(ex. 500 error), user-agent, request-uri, regex-backreference and so on with regular expression.}
|
11
11
|
|
12
12
|
s.files = `git ls-files`.split("\n")
|
13
13
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
@@ -1,16 +1,12 @@
|
|
1
1
|
class Fluent::RewriteTagFilterOutput < Fluent::Output
|
2
2
|
Fluent::Plugin.register_output('rewrite_tag_filter', self)
|
3
3
|
|
4
|
-
PATTERN_MAX_NUM = 200
|
5
|
-
|
6
|
-
config_param :rewriterule1, :string # string: NAME REGEXP
|
7
|
-
(2..PATTERN_MAX_NUM).each do |i|
|
8
|
-
config_param ('rewriterule' + i.to_s).to_sym, :string, :default => nil # NAME REGEXP
|
9
|
-
end
|
10
4
|
config_param :capitalize_regex_backreference, :bool, :default => false
|
11
5
|
config_param :remove_tag_prefix, :string, :default => nil
|
12
6
|
config_param :hostname_command, :string, :default => 'hostname'
|
13
7
|
|
8
|
+
MATCH_OPERATOR_EXCLUDE = '!'
|
9
|
+
|
14
10
|
def configure(conf)
|
15
11
|
super
|
16
12
|
|
@@ -18,24 +14,20 @@ class Fluent::RewriteTagFilterOutput < Fluent::Output
|
|
18
14
|
rewriterule_names = []
|
19
15
|
@hostname = `#{@hostname_command}`.chomp
|
20
16
|
|
21
|
-
|
22
|
-
|
23
|
-
$log.warn "invalid number rewriterules (valid rewriterule number:1-{PATTERN_MAX_NUM}): #{invalids.join(",")}"
|
24
|
-
end
|
25
|
-
(1..PATTERN_MAX_NUM).each do |i|
|
26
|
-
next unless conf["rewriterule#{i}"]
|
27
|
-
rewritekey,regexp,rewritetag = conf["rewriterule#{i}"].match(/^([^\s]+)\s+(.+?)\s+([^\s]+)$/).captures
|
17
|
+
conf.keys.select{|k| k =~ /^rewriterule(\d+)$/}.sort_by{|i| i.sub('rewriterule', '').to_i}.each do |key|
|
18
|
+
rewritekey,regexp,rewritetag = parse_rewriterule(conf[key])
|
28
19
|
if regexp.nil? || rewritetag.nil?
|
29
|
-
raise Fluent::ConfigError, "
|
20
|
+
raise Fluent::ConfigError, "failed to parse rewriterules at #{key} #{conf[key]}"
|
30
21
|
end
|
31
|
-
@rewriterules.push([
|
22
|
+
@rewriterules.push([rewritekey, Regexp.new(trim_regex_quote(regexp)), get_match_operator(regexp), rewritetag])
|
32
23
|
rewriterule_names.push(rewritekey + regexp)
|
33
|
-
$log.info "adding rewrite_tag_filter rule: #{@rewriterules.last}"
|
24
|
+
$log.info "adding rewrite_tag_filter rule: #{key} #{@rewriterules.last}"
|
34
25
|
end
|
35
|
-
|
36
|
-
unless
|
37
|
-
raise Fluent::ConfigError, "
|
26
|
+
|
27
|
+
unless @rewriterules.length > 0
|
28
|
+
raise Fluent::ConfigError, "missing rewriterules"
|
38
29
|
end
|
30
|
+
|
39
31
|
unless @rewriterules.length == rewriterule_names.uniq.length
|
40
32
|
raise Fluent::ConfigError, "duplicated rewriterules found #{@rewriterules.inspect}"
|
41
33
|
end
|
@@ -46,34 +38,58 @@ class Fluent::RewriteTagFilterOutput < Fluent::Output
|
|
46
38
|
end
|
47
39
|
|
48
40
|
def emit(tag, es, chain)
|
49
|
-
placeholder = get_placeholder(tag)
|
50
41
|
es.each do |time,record|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
next if rewritevalue.nil?
|
55
|
-
next unless (regexp && regexp.match(rewritevalue))
|
56
|
-
backreference_table = map_regex_table($~.captures)
|
57
|
-
rewrite = true
|
58
|
-
rewritetag.gsub!(/(\${[a-z]+}|__[A-Z]+__)/, placeholder)
|
59
|
-
tag = rewritetag.gsub(/\$\d+/, backreference_table)
|
60
|
-
break
|
61
|
-
end
|
62
|
-
Fluent::Engine.emit(tag, time, record) if (rewrite)
|
42
|
+
rewrited_tag = rewrite_tag(tag, record)
|
43
|
+
next if rewrited_tag.nil? || tag == rewrited_tag
|
44
|
+
Fluent::Engine.emit(rewrited_tag, time, record)
|
63
45
|
end
|
64
46
|
|
65
47
|
chain.next
|
66
48
|
end
|
67
49
|
|
50
|
+
def rewrite_tag(tag, record)
|
51
|
+
placeholder = get_placeholder(tag)
|
52
|
+
@rewriterules.each do |rewritekey, regexp, match_operator, rewritetag|
|
53
|
+
rewritevalue = record[rewritekey].to_s
|
54
|
+
next if rewritevalue.empty? && match_operator != MATCH_OPERATOR_EXCLUDE
|
55
|
+
matched = regexp && regexp.match(rewritevalue)
|
56
|
+
case match_operator
|
57
|
+
when MATCH_OPERATOR_EXCLUDE
|
58
|
+
next if matched
|
59
|
+
else
|
60
|
+
next if !matched
|
61
|
+
backreference_table = get_backreference_table($~.captures)
|
62
|
+
rewritetag.gsub!(/\$\d+/, backreference_table)
|
63
|
+
end
|
64
|
+
rewritetag.gsub!(/(\${[a-z]+}|__[A-Z]+__)/, placeholder)
|
65
|
+
return rewritetag
|
66
|
+
end
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def parse_rewriterule(rule)
|
71
|
+
if rule.match(/^([^\s]+)\s+(.+?)\s+([^\s]+)$/)
|
72
|
+
return $~.captures
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
68
76
|
def trim_regex_quote(regexp)
|
69
77
|
if regexp.start_with?('"') && regexp.end_with?('"')
|
70
78
|
$log.info "rewrite_tag_filter: [DEPRECATED] Use ^....$ pattern for partial word match instead of double-quote-delimiter. #{regexp}"
|
71
|
-
|
79
|
+
regexp = regexp[1..-2]
|
80
|
+
end
|
81
|
+
if regexp.start_with?('!')
|
82
|
+
regexp = regexp[1, regexp.length]
|
72
83
|
end
|
73
84
|
return regexp
|
74
85
|
end
|
75
86
|
|
76
|
-
def
|
87
|
+
def get_match_operator(regexp)
|
88
|
+
return '!' if regexp.start_with?('!')
|
89
|
+
return ''
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_backreference_table(elements)
|
77
93
|
hash_table = Hash.new
|
78
94
|
elements.each.with_index(1) do |value, index|
|
79
95
|
hash_table["$#{index}"] = @capitalize_regex_backreference ? value.capitalize : value
|
@@ -16,7 +16,7 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
16
16
|
# aggresive test
|
17
17
|
# indentation, comment, capitalize_regex_backreference, regex with space aside.
|
18
18
|
# [DEPLICATED] Use ^....$ pattern for partial word match instead of double-quote-delimiter.
|
19
|
-
|
19
|
+
CONFIG_INDENT_SPACE_AND_CAPITALIZE_OPTION = %[
|
20
20
|
capitalize_regex_backreference yes
|
21
21
|
rewriterule1 domain ^www\.google\.com$ site.Google # some comment
|
22
22
|
rewriterule2 domain ^(news)\.(google)\.com$ site.$2$1
|
@@ -25,18 +25,30 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
25
25
|
]
|
26
26
|
|
27
27
|
# remove_tag_prefix test
|
28
|
-
|
28
|
+
CONFIG_REMOVE_TAG_PREFIX = %[
|
29
29
|
rewriterule1 domain ^www\.google\.com$ ${tag}
|
30
30
|
remove_tag_prefix input
|
31
31
|
]
|
32
32
|
|
33
33
|
# hostname placeholder test
|
34
|
-
|
34
|
+
CONFIG_SHORT_HOSTNAME = %[
|
35
35
|
rewriterule1 domain ^www\.google\.com$ ${hostname}
|
36
36
|
remove_tag_prefix input
|
37
37
|
hostname_command hostname -s
|
38
38
|
]
|
39
39
|
|
40
|
+
# '!' character (exclamation mark) to specify a non-matching pattern
|
41
|
+
CONFIG_NON_MATCHING = %[
|
42
|
+
rewriterule1 domain !^www\..+$ not_start_with_www
|
43
|
+
rewriterule2 domain ^www\..+$ start_with_www
|
44
|
+
]
|
45
|
+
|
46
|
+
# jump of index
|
47
|
+
CONFIG_JUMP_INDEX = %[
|
48
|
+
rewriterule10 domain ^www\.google\.com$ site.Google
|
49
|
+
rewriterule20 domain ^news\.google\.com$ site.GoogleNews
|
50
|
+
]
|
51
|
+
|
40
52
|
def create_driver(conf=CONFIG,tag='test')
|
41
53
|
Fluent::Test::OutputTestDriver.new(Fluent::RewriteTagFilterOutput, tag).configure(conf)
|
42
54
|
end
|
@@ -45,13 +57,19 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
45
57
|
assert_raise(Fluent::ConfigError) {
|
46
58
|
d = create_driver('')
|
47
59
|
}
|
60
|
+
assert_raise(Fluent::ConfigError) {
|
61
|
+
d = create_driver('rewriterule1 foo')
|
62
|
+
}
|
63
|
+
assert_raise(Fluent::ConfigError) {
|
64
|
+
d = create_driver('rewriterule1 foo foo')
|
65
|
+
}
|
48
66
|
d = create_driver %[
|
49
67
|
rewriterule1 domain ^www.google.com$ site.Google
|
50
68
|
rewriterule2 domain ^news.google.com$ site.GoogleNews
|
51
69
|
]
|
52
|
-
d.instance.inspect
|
53
|
-
assert_equal 'domain ^www.google.com$ site.Google', d.instance.rewriterule1
|
54
|
-
assert_equal 'domain ^news.google.com$ site.GoogleNews', d.instance.rewriterule2
|
70
|
+
puts d.instance.inspect
|
71
|
+
assert_equal 'domain ^www.google.com$ site.Google', d.instance.config['rewriterule1']
|
72
|
+
assert_equal 'domain ^news.google.com$ site.GoogleNews', d.instance.config['rewriterule2']
|
55
73
|
end
|
56
74
|
|
57
75
|
def test_emit
|
@@ -62,6 +80,7 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
62
80
|
d1.emit({'domain' => 'map.google.com', 'path' => '/', 'agent' => 'Macintosh; Intel Mac OS X 10_7_4', 'response_time' => 900000})
|
63
81
|
d1.emit({'domain' => 'labs.google.com', 'path' => '/', 'agent' => 'Mozilla/5.0 Googlebot-FooBar/2.1', 'response_time' => 900000})
|
64
82
|
d1.emit({'domain' => 'tagtest.google.com', 'path' => '/', 'agent' => 'Googlebot', 'response_time' => 900000})
|
83
|
+
d1.emit({'domain' => 'noop.example.com'}) # to be ignored
|
65
84
|
end
|
66
85
|
emits = d1.emits
|
67
86
|
assert_equal 5, emits.length
|
@@ -78,8 +97,8 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
78
97
|
assert_equal 'site.input.access.tagtest', emits[4][0] #tag
|
79
98
|
end
|
80
99
|
|
81
|
-
def
|
82
|
-
d1 = create_driver(
|
100
|
+
def test_emit2_indent_and_capitalize_option
|
101
|
+
d1 = create_driver(CONFIG_INDENT_SPACE_AND_CAPITALIZE_OPTION, 'input.access')
|
83
102
|
d1.run do
|
84
103
|
d1.emit({'domain' => 'www.google.com', 'path' => '/foo/bar?key=value', 'agent' => 'Googlebot', 'response_time' => 1000000})
|
85
104
|
d1.emit({'domain' => 'news.google.com', 'path' => '/', 'agent' => 'Googlebot-Mobile', 'response_time' => 900000})
|
@@ -99,8 +118,8 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
99
118
|
assert_equal 'agent.Googlebot-Foobar', emits[3][0] #tag
|
100
119
|
end
|
101
120
|
|
102
|
-
def
|
103
|
-
d1 = create_driver(
|
121
|
+
def test_emit3_remove_tag_prefix
|
122
|
+
d1 = create_driver(CONFIG_REMOVE_TAG_PREFIX, 'input.access')
|
104
123
|
d1.run do
|
105
124
|
d1.emit({'domain' => 'www.google.com', 'path' => '/foo/bar?key=value', 'agent' => 'Googlebot', 'response_time' => 1000000})
|
106
125
|
end
|
@@ -110,8 +129,8 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
110
129
|
assert_equal 'access', emits[0][0] # tag
|
111
130
|
end
|
112
131
|
|
113
|
-
def
|
114
|
-
d1 = create_driver(
|
132
|
+
def test_emit4_short_hostname
|
133
|
+
d1 = create_driver(CONFIG_SHORT_HOSTNAME, 'input.access')
|
115
134
|
d1.run do
|
116
135
|
d1.emit({'domain' => 'www.google.com', 'path' => '/foo/bar?key=value', 'agent' => 'Googlebot', 'response_time' => 1000000})
|
117
136
|
end
|
@@ -120,5 +139,37 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
120
139
|
p emits[0]
|
121
140
|
assert_equal `hostname -s`.chomp, emits[0][0] # tag
|
122
141
|
end
|
142
|
+
|
143
|
+
def test_emit5_non_matching
|
144
|
+
d1 = create_driver(CONFIG_NON_MATCHING, 'input.access')
|
145
|
+
d1.run do
|
146
|
+
d1.emit({'domain' => 'www.google.com'})
|
147
|
+
d1.emit({'path' => '/'})
|
148
|
+
d1.emit({'domain' => 'maps.google.com'})
|
149
|
+
end
|
150
|
+
emits = d1.emits
|
151
|
+
assert_equal 3, emits.length
|
152
|
+
p emits[0]
|
153
|
+
assert_equal 'start_with_www', emits[0][0] # tag
|
154
|
+
p emits[1]
|
155
|
+
assert_equal 'not_start_with_www', emits[1][0] # tag
|
156
|
+
p emits[2]
|
157
|
+
assert_equal 'not_start_with_www', emits[2][0] # tag
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_emit6_jump_index
|
161
|
+
d1 = create_driver(CONFIG_JUMP_INDEX, 'input.access')
|
162
|
+
d1.run do
|
163
|
+
d1.emit({'domain' => 'www.google.com', 'path' => '/', 'agent' => 'Googlebot', 'response_time' => 1000000})
|
164
|
+
d1.emit({'domain' => 'news.google.com', 'path' => '/', 'agent' => 'Googlebot', 'response_time' => 900000})
|
165
|
+
end
|
166
|
+
emits = d1.emits
|
167
|
+
assert_equal 2, emits.length
|
168
|
+
p emits[0]
|
169
|
+
assert_equal 'site.Google', emits[0][0] # tag
|
170
|
+
p emits[1]
|
171
|
+
assert_equal 'site.GoogleNews', emits[1][0] # tag
|
172
|
+
end
|
173
|
+
|
123
174
|
end
|
124
175
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-rewrite-tag-filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -85,9 +85,10 @@ rubygems_version: 1.8.23
|
|
85
85
|
signing_key:
|
86
86
|
specification_version: 3
|
87
87
|
summary: Fluentd Output filter plugin. It has designed to rewrite tag like mod_rewrite.
|
88
|
-
Re-emmit a record with rewrited tag when a value matches with the regular
|
89
|
-
Also you can change a tag from apache log by domain, status-code(ex.
|
90
|
-
user-agent, request-uri, regex-backreference and so on with regular
|
88
|
+
Re-emmit a record with rewrited tag when a value matches/unmatches with the regular
|
89
|
+
expression. Also you can change a tag from apache log by domain, status-code(ex.
|
90
|
+
500 error), user-agent, request-uri, regex-backreference and so on with regular
|
91
|
+
expression.
|
91
92
|
test_files:
|
92
93
|
- test/helper.rb
|
93
94
|
- test/plugin/test_out_rewrite_tag_filter.rb
|