fluent-plugin-rewrite-tag-filter 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
### RewriteTagFilterOutput
|
6
6
|
|
7
7
|
Fluentd Output filter plugin. It has designed to rewrite tag like mod_rewrite.
|
8
|
-
Re-emmit a record with rewrited tag when a value matches with the regular expression.
|
8
|
+
Re-emmit a record with rewrited tag when a value matches/unmatches with the regular expression.
|
9
9
|
Also you can change a tag from apache log by domain, status-code(ex. 500 error),
|
10
10
|
user-agent, request-uri, regex-backreference and so on with regular expression.
|
11
11
|
|
@@ -24,15 +24,15 @@ gem install fluent-plugin-rewrite-tag-filter
|
|
24
24
|
### Syntax
|
25
25
|
|
26
26
|
```
|
27
|
-
|
27
|
+
rewriterule<num:1-200> <attribute> <regex_pattern> <new_tag>
|
28
28
|
|
29
29
|
# Optional: Capitalize every matched regex backreference. (ex: $1, $2)
|
30
30
|
capitalize_regex_backreference <yes/no> (default no)
|
31
31
|
|
32
|
-
# Optional: remove tag prefix for tag placeholder.
|
32
|
+
# Optional: remove tag prefix for tag placeholder. (see the section of "Tag placeholder")
|
33
33
|
remove_tag_prefix <string>
|
34
34
|
|
35
|
-
# Optional:
|
35
|
+
# Optional: override hostname command for placeholder. (see the section of "Tag placeholder")
|
36
36
|
hostname_command <string>
|
37
37
|
```
|
38
38
|
|
@@ -50,15 +50,19 @@ It's a sample to exclude some static file log before split tag by domain.
|
|
50
50
|
pos_file /var/log/td-agent/apache_access.pos
|
51
51
|
</source>
|
52
52
|
|
53
|
-
# At
|
53
|
+
# At rewriterule2, redirect to tag named "clear" which unmatched for status code 200.
|
54
|
+
# At rewriterule3, redirect to tag named "clear" which is not end with ".com"
|
55
|
+
# At rewriterule6, "site.$2$1" to be "site.ExampleMail" by capitalize_regex_backreference option.
|
54
56
|
<match td.apache.access>
|
55
57
|
type rewrite_tag_filter
|
56
58
|
capitalize_regex_backreference yes
|
57
59
|
rewriterule1 path \.(gif|jpe?g|png|pdf|zip)$ clear
|
58
|
-
rewriterule2
|
59
|
-
rewriterule3 domain
|
60
|
-
rewriterule4 domain ^
|
61
|
-
rewriterule5 domain
|
60
|
+
rewriterule2 status !^200$ clear
|
61
|
+
rewriterule3 domain !^.+\.com$ clear
|
62
|
+
rewriterule4 domain ^maps\.example\.com$ site.ExampleMaps
|
63
|
+
rewriterule5 domain ^news\.example\.com$ site.ExampleNews
|
64
|
+
rewriterule6 domain ^(mail)\.(example)\.com$ site.$2$1
|
65
|
+
rewriterule7 domain .+ site.unmatched
|
62
66
|
</match>
|
63
67
|
|
64
68
|
<match site.*>
|
@@ -106,14 +110,22 @@ $ tailf /var/log/td-agent/td-agent.log
|
|
106
110
|
|
107
111
|
### Tag placeholder
|
108
112
|
|
109
|
-
|
113
|
+
It is supporting there placeholder for new_tag(rewrited tag).
|
114
|
+
|
115
|
+
- `${tag}`
|
116
|
+
- `__TAG__`
|
117
|
+
|
110
118
|
It's available to use this placeholder with `remove_tag_prefix` option.
|
111
|
-
This
|
119
|
+
This option adds removing tag prefix for `${tag}` or `__TAG__` in placeholder.
|
112
120
|
|
113
121
|
- `${hostname}`
|
114
122
|
- `__HOSTNAME__`
|
115
|
-
|
116
|
-
|
123
|
+
|
124
|
+
By default, execute command as `hostname` to get full hostname.
|
125
|
+
Also, you can override hostname command using `hostname_command` option.
|
126
|
+
It comes short hostname with `hostname_command hostname -s` configuration specified.
|
127
|
+
|
128
|
+
#### Placeholder Usage
|
117
129
|
|
118
130
|
It's a sample to rewrite a tag with placeholder.
|
119
131
|
|
@@ -3,11 +3,11 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "fluent-plugin-rewrite-tag-filter"
|
6
|
-
s.version = "1.
|
6
|
+
s.version = "1.3.0"
|
7
7
|
s.authors = ["Kentaro Yoshida"]
|
8
8
|
s.email = ["y.ken.studio@gmail.com"]
|
9
9
|
s.homepage = "https://github.com/y-ken/fluent-plugin-rewrite-tag-filter"
|
10
|
-
s.summary = %q{Fluentd Output filter plugin. It has designed to rewrite tag like mod_rewrite. Re-emmit a record with rewrited tag when a value matches with the regular expression. Also you can change a tag from apache log by domain, status-code(ex. 500 error), user-agent, request-uri, regex-backreference and so on with regular expression.}
|
10
|
+
s.summary = %q{Fluentd Output filter plugin. It has designed to rewrite tag like mod_rewrite. Re-emmit a record with rewrited tag when a value matches/unmatches with the regular expression. Also you can change a tag from apache log by domain, status-code(ex. 500 error), user-agent, request-uri, regex-backreference and so on with regular expression.}
|
11
11
|
|
12
12
|
s.files = `git ls-files`.split("\n")
|
13
13
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
@@ -1,16 +1,12 @@
|
|
1
1
|
class Fluent::RewriteTagFilterOutput < Fluent::Output
|
2
2
|
Fluent::Plugin.register_output('rewrite_tag_filter', self)
|
3
3
|
|
4
|
-
PATTERN_MAX_NUM = 200
|
5
|
-
|
6
|
-
config_param :rewriterule1, :string # string: NAME REGEXP
|
7
|
-
(2..PATTERN_MAX_NUM).each do |i|
|
8
|
-
config_param ('rewriterule' + i.to_s).to_sym, :string, :default => nil # NAME REGEXP
|
9
|
-
end
|
10
4
|
config_param :capitalize_regex_backreference, :bool, :default => false
|
11
5
|
config_param :remove_tag_prefix, :string, :default => nil
|
12
6
|
config_param :hostname_command, :string, :default => 'hostname'
|
13
7
|
|
8
|
+
MATCH_OPERATOR_EXCLUDE = '!'
|
9
|
+
|
14
10
|
def configure(conf)
|
15
11
|
super
|
16
12
|
|
@@ -18,24 +14,20 @@ class Fluent::RewriteTagFilterOutput < Fluent::Output
|
|
18
14
|
rewriterule_names = []
|
19
15
|
@hostname = `#{@hostname_command}`.chomp
|
20
16
|
|
21
|
-
|
22
|
-
|
23
|
-
$log.warn "invalid number rewriterules (valid rewriterule number:1-{PATTERN_MAX_NUM}): #{invalids.join(",")}"
|
24
|
-
end
|
25
|
-
(1..PATTERN_MAX_NUM).each do |i|
|
26
|
-
next unless conf["rewriterule#{i}"]
|
27
|
-
rewritekey,regexp,rewritetag = conf["rewriterule#{i}"].match(/^([^\s]+)\s+(.+?)\s+([^\s]+)$/).captures
|
17
|
+
conf.keys.select{|k| k =~ /^rewriterule(\d+)$/}.sort_by{|i| i.sub('rewriterule', '').to_i}.each do |key|
|
18
|
+
rewritekey,regexp,rewritetag = parse_rewriterule(conf[key])
|
28
19
|
if regexp.nil? || rewritetag.nil?
|
29
|
-
raise Fluent::ConfigError, "
|
20
|
+
raise Fluent::ConfigError, "failed to parse rewriterules at #{key} #{conf[key]}"
|
30
21
|
end
|
31
|
-
@rewriterules.push([
|
22
|
+
@rewriterules.push([rewritekey, Regexp.new(trim_regex_quote(regexp)), get_match_operator(regexp), rewritetag])
|
32
23
|
rewriterule_names.push(rewritekey + regexp)
|
33
|
-
$log.info "adding rewrite_tag_filter rule: #{@rewriterules.last}"
|
24
|
+
$log.info "adding rewrite_tag_filter rule: #{key} #{@rewriterules.last}"
|
34
25
|
end
|
35
|
-
|
36
|
-
unless
|
37
|
-
raise Fluent::ConfigError, "
|
26
|
+
|
27
|
+
unless @rewriterules.length > 0
|
28
|
+
raise Fluent::ConfigError, "missing rewriterules"
|
38
29
|
end
|
30
|
+
|
39
31
|
unless @rewriterules.length == rewriterule_names.uniq.length
|
40
32
|
raise Fluent::ConfigError, "duplicated rewriterules found #{@rewriterules.inspect}"
|
41
33
|
end
|
@@ -46,34 +38,58 @@ class Fluent::RewriteTagFilterOutput < Fluent::Output
|
|
46
38
|
end
|
47
39
|
|
48
40
|
def emit(tag, es, chain)
|
49
|
-
placeholder = get_placeholder(tag)
|
50
41
|
es.each do |time,record|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
next if rewritevalue.nil?
|
55
|
-
next unless (regexp && regexp.match(rewritevalue))
|
56
|
-
backreference_table = map_regex_table($~.captures)
|
57
|
-
rewrite = true
|
58
|
-
rewritetag.gsub!(/(\${[a-z]+}|__[A-Z]+__)/, placeholder)
|
59
|
-
tag = rewritetag.gsub(/\$\d+/, backreference_table)
|
60
|
-
break
|
61
|
-
end
|
62
|
-
Fluent::Engine.emit(tag, time, record) if (rewrite)
|
42
|
+
rewrited_tag = rewrite_tag(tag, record)
|
43
|
+
next if rewrited_tag.nil? || tag == rewrited_tag
|
44
|
+
Fluent::Engine.emit(rewrited_tag, time, record)
|
63
45
|
end
|
64
46
|
|
65
47
|
chain.next
|
66
48
|
end
|
67
49
|
|
50
|
+
def rewrite_tag(tag, record)
|
51
|
+
placeholder = get_placeholder(tag)
|
52
|
+
@rewriterules.each do |rewritekey, regexp, match_operator, rewritetag|
|
53
|
+
rewritevalue = record[rewritekey].to_s
|
54
|
+
next if rewritevalue.empty? && match_operator != MATCH_OPERATOR_EXCLUDE
|
55
|
+
matched = regexp && regexp.match(rewritevalue)
|
56
|
+
case match_operator
|
57
|
+
when MATCH_OPERATOR_EXCLUDE
|
58
|
+
next if matched
|
59
|
+
else
|
60
|
+
next if !matched
|
61
|
+
backreference_table = get_backreference_table($~.captures)
|
62
|
+
rewritetag.gsub!(/\$\d+/, backreference_table)
|
63
|
+
end
|
64
|
+
rewritetag.gsub!(/(\${[a-z]+}|__[A-Z]+__)/, placeholder)
|
65
|
+
return rewritetag
|
66
|
+
end
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def parse_rewriterule(rule)
|
71
|
+
if rule.match(/^([^\s]+)\s+(.+?)\s+([^\s]+)$/)
|
72
|
+
return $~.captures
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
68
76
|
def trim_regex_quote(regexp)
|
69
77
|
if regexp.start_with?('"') && regexp.end_with?('"')
|
70
78
|
$log.info "rewrite_tag_filter: [DEPRECATED] Use ^....$ pattern for partial word match instead of double-quote-delimiter. #{regexp}"
|
71
|
-
|
79
|
+
regexp = regexp[1..-2]
|
80
|
+
end
|
81
|
+
if regexp.start_with?('!')
|
82
|
+
regexp = regexp[1, regexp.length]
|
72
83
|
end
|
73
84
|
return regexp
|
74
85
|
end
|
75
86
|
|
76
|
-
def
|
87
|
+
def get_match_operator(regexp)
|
88
|
+
return '!' if regexp.start_with?('!')
|
89
|
+
return ''
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_backreference_table(elements)
|
77
93
|
hash_table = Hash.new
|
78
94
|
elements.each.with_index(1) do |value, index|
|
79
95
|
hash_table["$#{index}"] = @capitalize_regex_backreference ? value.capitalize : value
|
@@ -16,7 +16,7 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
16
16
|
# aggresive test
|
17
17
|
# indentation, comment, capitalize_regex_backreference, regex with space aside.
|
18
18
|
# [DEPLICATED] Use ^....$ pattern for partial word match instead of double-quote-delimiter.
|
19
|
-
|
19
|
+
CONFIG_INDENT_SPACE_AND_CAPITALIZE_OPTION = %[
|
20
20
|
capitalize_regex_backreference yes
|
21
21
|
rewriterule1 domain ^www\.google\.com$ site.Google # some comment
|
22
22
|
rewriterule2 domain ^(news)\.(google)\.com$ site.$2$1
|
@@ -25,18 +25,30 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
25
25
|
]
|
26
26
|
|
27
27
|
# remove_tag_prefix test
|
28
|
-
|
28
|
+
CONFIG_REMOVE_TAG_PREFIX = %[
|
29
29
|
rewriterule1 domain ^www\.google\.com$ ${tag}
|
30
30
|
remove_tag_prefix input
|
31
31
|
]
|
32
32
|
|
33
33
|
# hostname placeholder test
|
34
|
-
|
34
|
+
CONFIG_SHORT_HOSTNAME = %[
|
35
35
|
rewriterule1 domain ^www\.google\.com$ ${hostname}
|
36
36
|
remove_tag_prefix input
|
37
37
|
hostname_command hostname -s
|
38
38
|
]
|
39
39
|
|
40
|
+
# '!' character (exclamation mark) to specify a non-matching pattern
|
41
|
+
CONFIG_NON_MATCHING = %[
|
42
|
+
rewriterule1 domain !^www\..+$ not_start_with_www
|
43
|
+
rewriterule2 domain ^www\..+$ start_with_www
|
44
|
+
]
|
45
|
+
|
46
|
+
# jump of index
|
47
|
+
CONFIG_JUMP_INDEX = %[
|
48
|
+
rewriterule10 domain ^www\.google\.com$ site.Google
|
49
|
+
rewriterule20 domain ^news\.google\.com$ site.GoogleNews
|
50
|
+
]
|
51
|
+
|
40
52
|
def create_driver(conf=CONFIG,tag='test')
|
41
53
|
Fluent::Test::OutputTestDriver.new(Fluent::RewriteTagFilterOutput, tag).configure(conf)
|
42
54
|
end
|
@@ -45,13 +57,19 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
45
57
|
assert_raise(Fluent::ConfigError) {
|
46
58
|
d = create_driver('')
|
47
59
|
}
|
60
|
+
assert_raise(Fluent::ConfigError) {
|
61
|
+
d = create_driver('rewriterule1 foo')
|
62
|
+
}
|
63
|
+
assert_raise(Fluent::ConfigError) {
|
64
|
+
d = create_driver('rewriterule1 foo foo')
|
65
|
+
}
|
48
66
|
d = create_driver %[
|
49
67
|
rewriterule1 domain ^www.google.com$ site.Google
|
50
68
|
rewriterule2 domain ^news.google.com$ site.GoogleNews
|
51
69
|
]
|
52
|
-
d.instance.inspect
|
53
|
-
assert_equal 'domain ^www.google.com$ site.Google', d.instance.rewriterule1
|
54
|
-
assert_equal 'domain ^news.google.com$ site.GoogleNews', d.instance.rewriterule2
|
70
|
+
puts d.instance.inspect
|
71
|
+
assert_equal 'domain ^www.google.com$ site.Google', d.instance.config['rewriterule1']
|
72
|
+
assert_equal 'domain ^news.google.com$ site.GoogleNews', d.instance.config['rewriterule2']
|
55
73
|
end
|
56
74
|
|
57
75
|
def test_emit
|
@@ -62,6 +80,7 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
62
80
|
d1.emit({'domain' => 'map.google.com', 'path' => '/', 'agent' => 'Macintosh; Intel Mac OS X 10_7_4', 'response_time' => 900000})
|
63
81
|
d1.emit({'domain' => 'labs.google.com', 'path' => '/', 'agent' => 'Mozilla/5.0 Googlebot-FooBar/2.1', 'response_time' => 900000})
|
64
82
|
d1.emit({'domain' => 'tagtest.google.com', 'path' => '/', 'agent' => 'Googlebot', 'response_time' => 900000})
|
83
|
+
d1.emit({'domain' => 'noop.example.com'}) # to be ignored
|
65
84
|
end
|
66
85
|
emits = d1.emits
|
67
86
|
assert_equal 5, emits.length
|
@@ -78,8 +97,8 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
78
97
|
assert_equal 'site.input.access.tagtest', emits[4][0] #tag
|
79
98
|
end
|
80
99
|
|
81
|
-
def
|
82
|
-
d1 = create_driver(
|
100
|
+
def test_emit2_indent_and_capitalize_option
|
101
|
+
d1 = create_driver(CONFIG_INDENT_SPACE_AND_CAPITALIZE_OPTION, 'input.access')
|
83
102
|
d1.run do
|
84
103
|
d1.emit({'domain' => 'www.google.com', 'path' => '/foo/bar?key=value', 'agent' => 'Googlebot', 'response_time' => 1000000})
|
85
104
|
d1.emit({'domain' => 'news.google.com', 'path' => '/', 'agent' => 'Googlebot-Mobile', 'response_time' => 900000})
|
@@ -99,8 +118,8 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
99
118
|
assert_equal 'agent.Googlebot-Foobar', emits[3][0] #tag
|
100
119
|
end
|
101
120
|
|
102
|
-
def
|
103
|
-
d1 = create_driver(
|
121
|
+
def test_emit3_remove_tag_prefix
|
122
|
+
d1 = create_driver(CONFIG_REMOVE_TAG_PREFIX, 'input.access')
|
104
123
|
d1.run do
|
105
124
|
d1.emit({'domain' => 'www.google.com', 'path' => '/foo/bar?key=value', 'agent' => 'Googlebot', 'response_time' => 1000000})
|
106
125
|
end
|
@@ -110,8 +129,8 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
110
129
|
assert_equal 'access', emits[0][0] # tag
|
111
130
|
end
|
112
131
|
|
113
|
-
def
|
114
|
-
d1 = create_driver(
|
132
|
+
def test_emit4_short_hostname
|
133
|
+
d1 = create_driver(CONFIG_SHORT_HOSTNAME, 'input.access')
|
115
134
|
d1.run do
|
116
135
|
d1.emit({'domain' => 'www.google.com', 'path' => '/foo/bar?key=value', 'agent' => 'Googlebot', 'response_time' => 1000000})
|
117
136
|
end
|
@@ -120,5 +139,37 @@ class RewriteTagFilterOutputTest < Test::Unit::TestCase
|
|
120
139
|
p emits[0]
|
121
140
|
assert_equal `hostname -s`.chomp, emits[0][0] # tag
|
122
141
|
end
|
142
|
+
|
143
|
+
def test_emit5_non_matching
|
144
|
+
d1 = create_driver(CONFIG_NON_MATCHING, 'input.access')
|
145
|
+
d1.run do
|
146
|
+
d1.emit({'domain' => 'www.google.com'})
|
147
|
+
d1.emit({'path' => '/'})
|
148
|
+
d1.emit({'domain' => 'maps.google.com'})
|
149
|
+
end
|
150
|
+
emits = d1.emits
|
151
|
+
assert_equal 3, emits.length
|
152
|
+
p emits[0]
|
153
|
+
assert_equal 'start_with_www', emits[0][0] # tag
|
154
|
+
p emits[1]
|
155
|
+
assert_equal 'not_start_with_www', emits[1][0] # tag
|
156
|
+
p emits[2]
|
157
|
+
assert_equal 'not_start_with_www', emits[2][0] # tag
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_emit6_jump_index
|
161
|
+
d1 = create_driver(CONFIG_JUMP_INDEX, 'input.access')
|
162
|
+
d1.run do
|
163
|
+
d1.emit({'domain' => 'www.google.com', 'path' => '/', 'agent' => 'Googlebot', 'response_time' => 1000000})
|
164
|
+
d1.emit({'domain' => 'news.google.com', 'path' => '/', 'agent' => 'Googlebot', 'response_time' => 900000})
|
165
|
+
end
|
166
|
+
emits = d1.emits
|
167
|
+
assert_equal 2, emits.length
|
168
|
+
p emits[0]
|
169
|
+
assert_equal 'site.Google', emits[0][0] # tag
|
170
|
+
p emits[1]
|
171
|
+
assert_equal 'site.GoogleNews', emits[1][0] # tag
|
172
|
+
end
|
173
|
+
|
123
174
|
end
|
124
175
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-rewrite-tag-filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -85,9 +85,10 @@ rubygems_version: 1.8.23
|
|
85
85
|
signing_key:
|
86
86
|
specification_version: 3
|
87
87
|
summary: Fluentd Output filter plugin. It has designed to rewrite tag like mod_rewrite.
|
88
|
-
Re-emmit a record with rewrited tag when a value matches with the regular
|
89
|
-
Also you can change a tag from apache log by domain, status-code(ex.
|
90
|
-
user-agent, request-uri, regex-backreference and so on with regular
|
88
|
+
Re-emmit a record with rewrited tag when a value matches/unmatches with the regular
|
89
|
+
expression. Also you can change a tag from apache log by domain, status-code(ex.
|
90
|
+
500 error), user-agent, request-uri, regex-backreference and so on with regular
|
91
|
+
expression.
|
91
92
|
test_files:
|
92
93
|
- test/helper.rb
|
93
94
|
- test/plugin/test_out_rewrite_tag_filter.rb
|