fluent-plugin-woothee 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +79 -0
- data/Rakefile +11 -0
- data/fluent-plugin-woothee.gemspec +22 -0
- data/lib/fluent/plugin/out_woothee.rb +142 -0
- data/test/helper.rb +28 -0
- data/test/plugin/test_out_woothee.rb +352 -0
- metadata +136 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2012- TAGOMORI Satoshi
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# fluent-plugin-woothee
|
2
|
+
|
3
|
+
## WootheeOutput
|
4
|
+
|
5
|
+
'fluent-plugin-woothee' is a Fluentd plugin to parse UserAgent strings and to filter/drop specified categories of user terminals (like 'pc', 'smartphone' and so on).
|
6
|
+
|
7
|
+
'woothee' is multi-language user-agent strings parser project. See: https://github.com/tagomoris/woothee
|
8
|
+
|
9
|
+
## Configuration
|
10
|
+
|
11
|
+
To add woothee parser result into matched messages:
|
12
|
+
|
13
|
+
<match input.**>
|
14
|
+
type woothee
|
15
|
+
key_name agent
|
16
|
+
remove_prefix input
|
17
|
+
add_prefix merged
|
18
|
+
merge_agent_info yes
|
19
|
+
</match>
|
20
|
+
|
21
|
+
Output messages with tag 'merged.**' has attributes like 'agent\_name', 'agent\_category' and 'agent\_os' from woothee parser result. If you want to change attribute names, or want to merge more attributes of browser vendor and its version, write configurations as below:
|
22
|
+
|
23
|
+
<match input.**>
|
24
|
+
type woothee
|
25
|
+
key_name agent
|
26
|
+
remove_prefix input
|
27
|
+
add_prefix merged
|
28
|
+
merge_agent_info yes
|
29
|
+
out_key_name ua_name
|
30
|
+
out_key_category ua_category
|
31
|
+
out_key_os ua_os
|
32
|
+
out_key_version ua_version
|
33
|
+
out_key_vendor ua_vendor
|
34
|
+
</match>
|
35
|
+
|
36
|
+
To re-emit messages with specified user-agent categories (and merge woothee parser result), configure like this:
|
37
|
+
|
38
|
+
<match input.**>
|
39
|
+
type woothee
|
40
|
+
key_name agent
|
41
|
+
filter_categories pc,smartphone,mobilephone,appliance
|
42
|
+
remove_prefix input
|
43
|
+
add_prefix merged
|
44
|
+
merge_agent_info yes
|
45
|
+
</match>
|
46
|
+
|
47
|
+
Or, you can specify categories to drop (and not to merge woothee result):
|
48
|
+
|
49
|
+
<match input.**>
|
50
|
+
type woothee
|
51
|
+
key_name agent
|
52
|
+
drop_categories crawler
|
53
|
+
remove_prefix input
|
54
|
+
add_prefix merged
|
55
|
+
merge_agent_info false # default
|
56
|
+
</match>
|
57
|
+
|
58
|
+
### Fast Crawler Filter
|
59
|
+
|
60
|
+
If you want to drop __almost__ all of messages with crawler's user-agent, and not to merge woothee result, you just specify plugin type:
|
61
|
+
|
62
|
+
<match input.**>
|
63
|
+
type woothee_fast_crawler_filter
|
64
|
+
key_name useragent
|
65
|
+
tag filtered
|
66
|
+
</match>
|
67
|
+
|
68
|
+
'fluent-plugin-woothee' uses 'Woothee.is_crawler' of woothee with this configuration, fast and incomplete method to judge user-agent is crawler or not.
|
69
|
+
If you want to drop all of crawlers completely, specify 'type woothee' and 'drop_categories crawler'.
|
70
|
+
|
71
|
+
## TODO
|
72
|
+
|
73
|
+
* patches welcome!
|
74
|
+
|
75
|
+
## Copyright
|
76
|
+
|
77
|
+
* Copyright (c) 2012- TAGOMORI Satoshi (tagomoris)
|
78
|
+
* License
|
79
|
+
* Apache License, Version 2.0
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = "fluent-plugin-woothee"
|
5
|
+
gem.version = "0.0.1"
|
6
|
+
gem.authors = ["TAGOMORI Satoshi"]
|
7
|
+
gem.email = ["tagomoris@gmail.com"]
|
8
|
+
gem.description = %q{parsing by Project Woothee. See https://github.com/tagomoris/woothee }
|
9
|
+
gem.summary = %q{Fluentd plugin to parse UserAgent strings}
|
10
|
+
gem.homepage = "https://github.com/tagomoris/fluent-plugin-woothee"
|
11
|
+
|
12
|
+
gem.files = `git ls-files`.split($\)
|
13
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
14
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
|
17
|
+
gem.add_development_dependency "rake"
|
18
|
+
gem.add_development_dependency "fluentd"
|
19
|
+
gem.add_development_dependency "woothee", ">= 0.2.4"
|
20
|
+
gem.add_runtime_dependency "fluentd"
|
21
|
+
gem.add_runtime_dependency "woothee", ">= 0.2.4"
|
22
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
class Fluent::WootheeOutput < Fluent::Output
|
2
|
+
Fluent::Plugin.register_output('woothee', self)
|
3
|
+
Fluent::Plugin.register_output('woothee_fast_crawler_filter', self)
|
4
|
+
|
5
|
+
config_param :tag, :string, :default => nil
|
6
|
+
config_param :remove_prefix, :string, :default => nil
|
7
|
+
config_param :add_prefix, :string, :default => nil
|
8
|
+
|
9
|
+
config_param :fast_crawler_filter_mode, :bool, :default => false
|
10
|
+
|
11
|
+
config_param :key_name, :string
|
12
|
+
|
13
|
+
config_param :filter_categories, :default => [] do |val|
|
14
|
+
val.split(',').map(&:to_sym)
|
15
|
+
end
|
16
|
+
config_param :drop_categories, :default => [] do |val|
|
17
|
+
val.split(',').map(&:to_sym)
|
18
|
+
end
|
19
|
+
attr_accessor :mode
|
20
|
+
|
21
|
+
config_param :merge_agent_info, :bool, :default => false
|
22
|
+
config_param :out_key_name, :string, :default => 'agent_name'
|
23
|
+
config_param :out_key_category, :string, :default => 'agent_category'
|
24
|
+
config_param :out_key_os, :string, :default => 'agent_os'
|
25
|
+
config_param :out_key_version, :string, :default => nil # supress output
|
26
|
+
config_param :out_key_vendor, :string, :default => nil # supress output
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
super
|
30
|
+
require 'woothee'
|
31
|
+
end
|
32
|
+
|
33
|
+
def configure(conf)
|
34
|
+
super
|
35
|
+
|
36
|
+
# tag ->
|
37
|
+
if not @tag and not @remove_prefix and not @add_prefix
|
38
|
+
raise Fluent::ConfigError, "missing both of remove_prefix and add_prefix"
|
39
|
+
end
|
40
|
+
if @tag and (@remove_prefix or @add_prefix)
|
41
|
+
raise Fluent::ConfigError, "both of tag and remove_prefix/add_prefix must not be specified"
|
42
|
+
end
|
43
|
+
if @remove_prefix
|
44
|
+
@removed_prefix_string = @remove_prefix + '.'
|
45
|
+
@removed_length = @removed_prefix_string.length
|
46
|
+
end
|
47
|
+
if @add_prefix
|
48
|
+
@added_prefix_string = @add_prefix + '.'
|
49
|
+
end
|
50
|
+
# <- tag
|
51
|
+
|
52
|
+
if conf['type'] == 'woothee_fast_crawler_filter' or @fast_crawler_filter_mode
|
53
|
+
@fast_crawler_filter_mode = true
|
54
|
+
|
55
|
+
if @filter_categories.size > 0 or @drop_categories.size > 0 or @merge_agent_info
|
56
|
+
raise Fluent::ConfigError, "fast_crawler_filter cannot be specified with filter/drop/merge options"
|
57
|
+
end
|
58
|
+
|
59
|
+
return
|
60
|
+
end
|
61
|
+
|
62
|
+
if @filter_categories.size > 0 and @drop_categories.size > 0
|
63
|
+
raise Fluent::ConfigError, "both of 'filter' and 'drop' categories specified"
|
64
|
+
elsif @filter_categories.size > 0
|
65
|
+
unless @filter_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
|
66
|
+
raise Fluent::ConfigError, "filter_categories has invalid category name"
|
67
|
+
end
|
68
|
+
@mode = :filter
|
69
|
+
elsif @drop_categories.size > 0
|
70
|
+
unless @drop_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
|
71
|
+
raise Fluent::ConfigError, "drop_categories has invalid category name"
|
72
|
+
end
|
73
|
+
@mode = :drop
|
74
|
+
else
|
75
|
+
@mode = :through
|
76
|
+
end
|
77
|
+
|
78
|
+
if @mode == :through and not @merge_agent_info
|
79
|
+
raise Fluent::ConfigError, "configured not to do nothing (not to do either filter/drop nor addition of parser result)"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def tag_mangle(tag)
|
84
|
+
if @tag
|
85
|
+
@tag
|
86
|
+
else
|
87
|
+
if @remove_prefix and
|
88
|
+
( (tag.start_with?(@removed_prefix_string) and tag.length > @removed_length) or tag == @remove_prefix)
|
89
|
+
tag = tag[@removed_length..-1]
|
90
|
+
end
|
91
|
+
if @add_prefix
|
92
|
+
tag = if tag and tag.length > 0
|
93
|
+
@added_prefix_string + tag
|
94
|
+
else
|
95
|
+
@add_prefix
|
96
|
+
end
|
97
|
+
end
|
98
|
+
tag
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def fast_crawler_filter_emit(tag, es)
|
103
|
+
es.each do |time,record|
|
104
|
+
unless Woothee.is_crawler(record[@key_name] || '')
|
105
|
+
Fluent::Engine.emit(tag, time, record)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def normal_emit(tag, es)
|
111
|
+
es.each do |time,record|
|
112
|
+
parsed = Woothee.parse(record[@key_name] || '')
|
113
|
+
|
114
|
+
category = parsed[Woothee::ATTRIBUTE_CATEGORY]
|
115
|
+
next if @mode == :filter and not @filter_categories.include?(category)
|
116
|
+
next if @mode == :drop and @drop_categories.include?(category)
|
117
|
+
|
118
|
+
if @merge_agent_info
|
119
|
+
record = record.merge({
|
120
|
+
@out_key_name => parsed[Woothee::ATTRIBUTE_NAME],
|
121
|
+
@out_key_category => parsed[Woothee::ATTRIBUTE_CATEGORY].to_s,
|
122
|
+
@out_key_os => parsed[Woothee::ATTRIBUTE_OS]
|
123
|
+
})
|
124
|
+
record[@out_key_version] = parsed[Woothee::ATTRIBUTE_VERSION] if @out_key_version
|
125
|
+
record[@out_key_vendor] = parsed[Woothee::ATTRIBUTE_VENDOR] if @out_key_vendor
|
126
|
+
end
|
127
|
+
Fluent::Engine.emit(tag, time, record)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def emit(tag, es, chain)
|
132
|
+
tag = tag_mangle(tag)
|
133
|
+
|
134
|
+
if @fast_crawler_filter_mode
|
135
|
+
fast_crawler_filter_emit(tag, es)
|
136
|
+
else
|
137
|
+
normal_emit(tag, es)
|
138
|
+
end
|
139
|
+
|
140
|
+
chain.next
|
141
|
+
end
|
142
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'fluent/test'
|
15
|
+
unless ENV.has_key?('VERBOSE')
|
16
|
+
nulllogger = Object.new
|
17
|
+
nulllogger.instance_eval {|obj|
|
18
|
+
def method_missing(method, *args)
|
19
|
+
# pass
|
20
|
+
end
|
21
|
+
}
|
22
|
+
$log = nulllogger
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'fluent/plugin/out_woothee'
|
26
|
+
|
27
|
+
class Test::Unit::TestCase
|
28
|
+
end
|
@@ -0,0 +1,352 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class Fluent::WootheeOutputTest < Test::Unit::TestCase
|
4
|
+
# fast crawler filter
|
5
|
+
CONFIG0 = %[
|
6
|
+
type woothee_fast_crawler_filter
|
7
|
+
key_name useragent
|
8
|
+
tag filtered
|
9
|
+
]
|
10
|
+
|
11
|
+
# through & merge
|
12
|
+
CONFIG1 = %[
|
13
|
+
type woothee
|
14
|
+
key_name agent
|
15
|
+
remove_prefix test
|
16
|
+
add_prefix merged
|
17
|
+
merge_agent_info yes
|
18
|
+
]
|
19
|
+
|
20
|
+
# filter & merge
|
21
|
+
CONFIG2 = %[
|
22
|
+
type woothee
|
23
|
+
key_name agent
|
24
|
+
filter_categories pc,smartphone,mobilephone,appliance
|
25
|
+
remove_prefix test
|
26
|
+
add_prefix merged
|
27
|
+
merge_agent_info yes
|
28
|
+
out_key_name ua_name
|
29
|
+
out_key_category ua_category
|
30
|
+
out_key_os ua_os
|
31
|
+
out_key_version ua_version
|
32
|
+
out_key_vendor ua_vendor
|
33
|
+
]
|
34
|
+
|
35
|
+
# drop & non-merge
|
36
|
+
CONFIG3 = %[
|
37
|
+
type woothee
|
38
|
+
key_name user_agent
|
39
|
+
drop_categories crawler,misc
|
40
|
+
tag selected
|
41
|
+
]
|
42
|
+
|
43
|
+
def create_driver(conf=CONFIG1,tag='test')
|
44
|
+
Fluent::Test::OutputTestDriver.new(Fluent::WootheeOutput, tag).configure(conf)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_configure
|
48
|
+
# fast_crawler_filter
|
49
|
+
d = create_driver CONFIG0
|
50
|
+
assert_equal true, d.instance.fast_crawler_filter_mode
|
51
|
+
assert_equal 'useragent', d.instance.key_name
|
52
|
+
assert_equal 'filtered', d.instance.tag
|
53
|
+
|
54
|
+
# through & merge
|
55
|
+
d = create_driver CONFIG1
|
56
|
+
assert_equal false, d.instance.fast_crawler_filter_mode
|
57
|
+
assert_equal 'agent', d.instance.key_name
|
58
|
+
assert_equal 'test', d.instance.remove_prefix
|
59
|
+
assert_equal 'merged', d.instance.add_prefix
|
60
|
+
|
61
|
+
assert_equal 0, d.instance.filter_categories.size
|
62
|
+
assert_equal 0, d.instance.drop_categories.size
|
63
|
+
assert_equal :through, d.instance.mode
|
64
|
+
|
65
|
+
assert_equal true, d.instance.merge_agent_info
|
66
|
+
assert_equal 'agent_name', d.instance.out_key_name
|
67
|
+
assert_equal 'agent_category', d.instance.out_key_category
|
68
|
+
assert_equal 'agent_os', d.instance.out_key_os
|
69
|
+
assert_nil d.instance.out_key_version
|
70
|
+
assert_nil d.instance.out_key_vendor
|
71
|
+
|
72
|
+
# filter & merge
|
73
|
+
d = create_driver CONFIG2
|
74
|
+
assert_equal false, d.instance.fast_crawler_filter_mode
|
75
|
+
assert_equal 'agent', d.instance.key_name
|
76
|
+
assert_equal 'test', d.instance.remove_prefix
|
77
|
+
assert_equal 'merged', d.instance.add_prefix
|
78
|
+
|
79
|
+
assert_equal 4, d.instance.filter_categories.size
|
80
|
+
assert_equal [:pc,:smartphone,:mobilephone,:appliance], d.instance.filter_categories
|
81
|
+
assert_equal 0, d.instance.drop_categories.size
|
82
|
+
assert_equal :filter, d.instance.mode
|
83
|
+
|
84
|
+
assert_equal true, d.instance.merge_agent_info
|
85
|
+
assert_equal 'ua_name', d.instance.out_key_name
|
86
|
+
assert_equal 'ua_category', d.instance.out_key_category
|
87
|
+
assert_equal 'ua_os', d.instance.out_key_os
|
88
|
+
assert_equal 'ua_version', d.instance.out_key_version
|
89
|
+
assert_equal 'ua_vendor', d.instance.out_key_vendor
|
90
|
+
|
91
|
+
# drop & non-merge
|
92
|
+
d = create_driver CONFIG3
|
93
|
+
assert_equal false, d.instance.fast_crawler_filter_mode
|
94
|
+
assert_equal 'user_agent', d.instance.key_name
|
95
|
+
assert_equal 'selected', d.instance.tag
|
96
|
+
|
97
|
+
assert_equal 0, d.instance.filter_categories.size
|
98
|
+
assert_equal 2, d.instance.drop_categories.size
|
99
|
+
assert_equal [:crawler,:misc], d.instance.drop_categories
|
100
|
+
assert_equal :drop, d.instance.mode
|
101
|
+
|
102
|
+
assert_equal false, d.instance.merge_agent_info
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_tag_mangle
|
106
|
+
p = create_driver(CONFIG0).instance
|
107
|
+
assert_equal 'filtered', p.tag_mangle('data')
|
108
|
+
assert_equal 'filtered', p.tag_mangle('test.data')
|
109
|
+
assert_equal 'filtered', p.tag_mangle('test.test.data')
|
110
|
+
assert_equal 'filtered', p.tag_mangle('test')
|
111
|
+
|
112
|
+
p = create_driver(CONFIG1).instance
|
113
|
+
assert_equal 'merged.data', p.tag_mangle('data')
|
114
|
+
assert_equal 'merged.data', p.tag_mangle('test.data')
|
115
|
+
assert_equal 'merged.test.data', p.tag_mangle('test.test.data')
|
116
|
+
assert_equal 'merged', p.tag_mangle('test')
|
117
|
+
|
118
|
+
p = create_driver(CONFIG3).instance
|
119
|
+
assert_equal 'selected', p.tag_mangle('data')
|
120
|
+
assert_equal 'selected', p.tag_mangle('test.data')
|
121
|
+
assert_equal 'selected', p.tag_mangle('test.test.data')
|
122
|
+
assert_equal 'selected', p.tag_mangle('test')
|
123
|
+
end
|
124
|
+
|
125
|
+
def test_emit_fast_crawler_filter
|
126
|
+
d = create_driver CONFIG0
|
127
|
+
time = Time.parse('2012-07-20 16:19:00').to_i
|
128
|
+
d.run do
|
129
|
+
d.emit({'useragent' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'value' => 1}, time)
|
130
|
+
d.emit({'useragent' => 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', 'value' => 2}, time)
|
131
|
+
d.emit({'useragent' => 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', 'value' => 3}, time)
|
132
|
+
d.emit({'useragent' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'value' => 4}, time)
|
133
|
+
d.emit({'useragent' => 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'value' => 5}, time)
|
134
|
+
d.emit({'useragent' => 'Mozilla/5.0 (compatible; Rakutenbot/1.0; +http://dynamic.rakuten.co.jp/bot.html)', 'value' => 6}, time)
|
135
|
+
d.emit({'useragent' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; ja-jp) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1', 'value' => 7}, time)
|
136
|
+
d.emit({'useragent' => 'Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)', 'value' => 8}, time)
|
137
|
+
end
|
138
|
+
|
139
|
+
emits = d.emits
|
140
|
+
assert_equal 4, emits.size
|
141
|
+
|
142
|
+
assert_equal 'filtered', emits[0][0]
|
143
|
+
assert_equal time, emits[0][1]
|
144
|
+
assert_equal 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', emits[0][2]['useragent']
|
145
|
+
assert_equal 3, emits[0][2]['value']
|
146
|
+
assert_equal 2, emits[0][2].keys.size
|
147
|
+
|
148
|
+
assert_equal 4, emits[1][2]['value']
|
149
|
+
assert_equal 6, emits[2][2]['value']
|
150
|
+
assert_equal 7, emits[3][2]['value']
|
151
|
+
end
|
152
|
+
|
153
|
+
# # through & merge
|
154
|
+
def test_emit_through
|
155
|
+
d = create_driver(CONFIG1, 'test.message')
|
156
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
157
|
+
d.run do
|
158
|
+
d.emit({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
|
159
|
+
d.emit({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
160
|
+
d.emit({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
161
|
+
d.emit({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
|
162
|
+
d.emit({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
|
163
|
+
d.emit({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
|
164
|
+
d.emit({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
|
165
|
+
d.emit({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
|
166
|
+
end
|
167
|
+
|
168
|
+
emits = d.emits
|
169
|
+
assert_equal 8, emits.size
|
170
|
+
assert_equal 'merged.message', emits[0][0]
|
171
|
+
assert_equal time, emits[0][1]
|
172
|
+
|
173
|
+
# 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
|
174
|
+
m = emits[0][2]
|
175
|
+
assert_equal 0, m['value']
|
176
|
+
assert_equal 'Internet Explorer', m['agent_name']
|
177
|
+
assert_equal 'pc', m['agent_category']
|
178
|
+
assert_equal 'Windows 8', m['agent_os']
|
179
|
+
assert_equal 5, m.keys.size
|
180
|
+
|
181
|
+
# 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
182
|
+
m = emits[1][2]
|
183
|
+
assert_equal 1, m['value']
|
184
|
+
assert_equal 'Firefox', m['agent_name']
|
185
|
+
assert_equal 'pc', m['agent_category']
|
186
|
+
assert_equal 'Windows Vista', m['agent_os']
|
187
|
+
|
188
|
+
# 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
189
|
+
m = emits[2][2]
|
190
|
+
assert_equal 2, m['value']
|
191
|
+
assert_equal 'Firefox', m['agent_name']
|
192
|
+
assert_equal 'pc', m['agent_category']
|
193
|
+
assert_equal 'Linux', m['agent_os']
|
194
|
+
|
195
|
+
# 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
|
196
|
+
m = emits[3][2]
|
197
|
+
assert_equal 3, m['value']
|
198
|
+
assert_equal 'Safari', m['agent_name']
|
199
|
+
assert_equal 'smartphone', m['agent_category']
|
200
|
+
assert_equal 'Android', m['agent_os']
|
201
|
+
|
202
|
+
# 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
|
203
|
+
m = emits[4][2]
|
204
|
+
assert_equal 4, m['value']
|
205
|
+
assert_equal 'docomo', m['agent_name']
|
206
|
+
assert_equal 'mobilephone', m['agent_category']
|
207
|
+
assert_equal 'docomo', m['agent_os']
|
208
|
+
|
209
|
+
# 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
|
210
|
+
m = emits[5][2]
|
211
|
+
assert_equal 5, m['value']
|
212
|
+
assert_equal 'PlayStation Vita', m['agent_name']
|
213
|
+
assert_equal 'appliance', m['agent_category']
|
214
|
+
assert_equal 'PlayStation Vita', m['agent_os']
|
215
|
+
|
216
|
+
# 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'
|
217
|
+
m = emits[6][2]
|
218
|
+
assert_equal 6, m['value']
|
219
|
+
assert_equal 'Google Desktop', m['agent_name']
|
220
|
+
assert_equal 'misc', m['agent_category']
|
221
|
+
assert_equal 'UNKNOWN', m['agent_os']
|
222
|
+
|
223
|
+
# 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'
|
224
|
+
m = emits[7][2]
|
225
|
+
assert_equal 7, m['value']
|
226
|
+
assert_equal 'msnbot', m['agent_name']
|
227
|
+
assert_equal 'crawler', m['agent_category']
|
228
|
+
assert_equal 'UNKNOWN', m['agent_os']
|
229
|
+
end
|
230
|
+
|
231
|
+
# # filter & merge
|
232
|
+
def test_emit_filter
|
233
|
+
d = create_driver(CONFIG2, 'test.message')
|
234
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
235
|
+
d.run do
|
236
|
+
d.emit({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
|
237
|
+
d.emit({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
238
|
+
d.emit({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
239
|
+
d.emit({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
|
240
|
+
d.emit({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
|
241
|
+
d.emit({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
|
242
|
+
d.emit({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
|
243
|
+
d.emit({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
|
244
|
+
end
|
245
|
+
|
246
|
+
emits = d.emits
|
247
|
+
assert_equal 6, emits.size
|
248
|
+
assert_equal 'merged.message', emits[0][0]
|
249
|
+
assert_equal time, emits[0][1]
|
250
|
+
|
251
|
+
# 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
|
252
|
+
m = emits[0][2]
|
253
|
+
assert_equal 7, m.keys.size
|
254
|
+
assert_equal 0, m['value']
|
255
|
+
assert_equal 'Internet Explorer', m['ua_name']
|
256
|
+
assert_equal 'pc', m['ua_category']
|
257
|
+
assert_equal 'Windows 8', m['ua_os']
|
258
|
+
assert_equal 'Microsoft', m['ua_vendor']
|
259
|
+
assert_equal '10.0', m['ua_version']
|
260
|
+
|
261
|
+
# 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
262
|
+
m = emits[1][2]
|
263
|
+
assert_equal 1, m['value']
|
264
|
+
assert_equal 'Firefox', m['ua_name']
|
265
|
+
assert_equal 'pc', m['ua_category']
|
266
|
+
assert_equal 'Windows Vista', m['ua_os']
|
267
|
+
assert_equal 'Mozilla', m['ua_vendor']
|
268
|
+
assert_equal '9.0.1', m['ua_version']
|
269
|
+
|
270
|
+
# 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
271
|
+
m = emits[2][2]
|
272
|
+
assert_equal 2, m['value']
|
273
|
+
assert_equal 'Firefox', m['ua_name']
|
274
|
+
assert_equal 'pc', m['ua_category']
|
275
|
+
assert_equal 'Linux', m['ua_os']
|
276
|
+
assert_equal 'Mozilla', m['ua_vendor']
|
277
|
+
assert_equal '9.0.1', m['ua_version']
|
278
|
+
|
279
|
+
# 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
|
280
|
+
m = emits[3][2]
|
281
|
+
assert_equal 3, m['value']
|
282
|
+
assert_equal 'Safari', m['ua_name']
|
283
|
+
assert_equal 'smartphone', m['ua_category']
|
284
|
+
assert_equal 'Android', m['ua_os']
|
285
|
+
assert_equal 'Apple', m['ua_vendor']
|
286
|
+
assert_equal '4.0', m['ua_version']
|
287
|
+
|
288
|
+
# 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
|
289
|
+
m = emits[4][2]
|
290
|
+
assert_equal 4, m['value']
|
291
|
+
assert_equal 'docomo', m['ua_name']
|
292
|
+
assert_equal 'mobilephone', m['ua_category']
|
293
|
+
assert_equal 'docomo', m['ua_os']
|
294
|
+
assert_equal 'docomo', m['ua_vendor']
|
295
|
+
assert_equal 'N505i', m['ua_version']
|
296
|
+
|
297
|
+
# 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
|
298
|
+
m = emits[5][2]
|
299
|
+
assert_equal 5, m['value']
|
300
|
+
assert_equal 'PlayStation Vita', m['ua_name']
|
301
|
+
assert_equal 'appliance', m['ua_category']
|
302
|
+
assert_equal 'PlayStation Vita', m['ua_os']
|
303
|
+
assert_equal 'Sony', m['ua_vendor']
|
304
|
+
assert_equal 'UNKNOWN', m['ua_version']
|
305
|
+
end
|
306
|
+
|
307
|
+
# # drop & non-merge
|
308
|
+
def test_emit_drop
|
309
|
+
d = create_driver(CONFIG3, 'test.message')
|
310
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
311
|
+
d.run do
|
312
|
+
d.emit({'value' => 0, 'user_agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
|
313
|
+
d.emit({'value' => 1, 'user_agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
314
|
+
d.emit({'value' => 2, 'user_agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
315
|
+
d.emit({'value' => 3, 'user_agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
|
316
|
+
d.emit({'value' => 4, 'user_agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
|
317
|
+
d.emit({'value' => 5, 'user_agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
|
318
|
+
d.emit({'value' => 6, 'user_agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
|
319
|
+
d.emit({'value' => 7, 'user_agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
|
320
|
+
end
|
321
|
+
|
322
|
+
emits = d.emits
|
323
|
+
assert_equal 6, emits.size
|
324
|
+
assert_equal 'selected', emits[0][0]
|
325
|
+
assert_equal time, emits[0][1]
|
326
|
+
|
327
|
+
# 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
|
328
|
+
m = emits[0][2]
|
329
|
+
assert_equal 0, m['value']
|
330
|
+
assert_equal 2, m.keys.size
|
331
|
+
|
332
|
+
# 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
333
|
+
m = emits[1][2]
|
334
|
+
assert_equal 1, m['value']
|
335
|
+
|
336
|
+
# 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
337
|
+
m = emits[2][2]
|
338
|
+
assert_equal 2, m['value']
|
339
|
+
|
340
|
+
# 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
|
341
|
+
m = emits[3][2]
|
342
|
+
assert_equal 3, m['value']
|
343
|
+
|
344
|
+
# 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
|
345
|
+
m = emits[4][2]
|
346
|
+
assert_equal 4, m['value']
|
347
|
+
|
348
|
+
# 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
|
349
|
+
m = emits[5][2]
|
350
|
+
assert_equal 5, m['value']
|
351
|
+
end
|
352
|
+
end
|
metadata
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-woothee
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- TAGOMORI Satoshi
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: fluentd
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: woothee
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.2.4
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.2.4
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: fluentd
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: woothee
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 0.2.4
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 0.2.4
|
94
|
+
description: ! 'parsing by Project Woothee. See https://github.com/tagomoris/woothee '
|
95
|
+
email:
|
96
|
+
- tagomoris@gmail.com
|
97
|
+
executables: []
|
98
|
+
extensions: []
|
99
|
+
extra_rdoc_files: []
|
100
|
+
files:
|
101
|
+
- .gitignore
|
102
|
+
- Gemfile
|
103
|
+
- LICENSE.txt
|
104
|
+
- README.md
|
105
|
+
- Rakefile
|
106
|
+
- fluent-plugin-woothee.gemspec
|
107
|
+
- lib/fluent/plugin/out_woothee.rb
|
108
|
+
- test/helper.rb
|
109
|
+
- test/plugin/test_out_woothee.rb
|
110
|
+
homepage: https://github.com/tagomoris/fluent-plugin-woothee
|
111
|
+
licenses: []
|
112
|
+
post_install_message:
|
113
|
+
rdoc_options: []
|
114
|
+
require_paths:
|
115
|
+
- lib
|
116
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
118
|
+
requirements:
|
119
|
+
- - ! '>='
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ! '>='
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
128
|
+
requirements: []
|
129
|
+
rubyforge_project:
|
130
|
+
rubygems_version: 1.8.21
|
131
|
+
signing_key:
|
132
|
+
specification_version: 3
|
133
|
+
summary: Fluentd plugin to parse UserAgent strings
|
134
|
+
test_files:
|
135
|
+
- test/helper.rb
|
136
|
+
- test/plugin/test_out_woothee.rb
|