fluent-plugin-woothee 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +59 -0
- data/fluent-plugin-woothee.gemspec +2 -2
- data/lib/fluent/plugin/filter_woothee.rb +107 -0
- data/lib/fluent/plugin/out_woothee.rb +7 -2
- data/test/helper.rb +1 -0
- data/test/plugin/test_filter_woothee.rb +338 -0
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d68a5e296eb04df8c7a1234cf96db2a097c52f8
|
4
|
+
data.tar.gz: cd1d52ccf24f32dc9e3cb5f47c774e654da3ed17
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 06a22c57d945e94c124a64a066fd202784d489f5e5b46d55277c1289edacf404a018a9002ab93758c5df2c8c49cc1f1838159110aefbe1884e70362bb75bf0b6
|
7
|
+
data.tar.gz: fce1881e0f0c538e48f4d86de8f07db25df84445381a086927e03c96a67f477fd7e7518d18c452ab5ccc4758edec4778a6869929b442529cb1db9c6a7f754fb3
|
data/README.md
CHANGED
@@ -69,6 +69,65 @@ If you want to drop __almost__ all of messages with crawler's user-agent, and no
|
|
69
69
|
'fluent-plugin-woothee' uses 'Woothee.is_crawler' of woothee with this configuration, fast and incomplete method to judge user-agent is crawler or not.
|
70
70
|
If you want to drop all of crawlers completely, specify 'type woothee' and 'drop_categories crawler'.
|
71
71
|
|
72
|
+
## WootheeFilter
|
73
|
+
|
74
|
+
This is filter version of 'fluent-plugin-woothee'.
|
75
|
+
Note that this filter version does not have rewrite tag functionality.
|
76
|
+
|
77
|
+
## Configuration
|
78
|
+
|
79
|
+
To add woothee parser result into filtered messages:
|
80
|
+
|
81
|
+
<filter input.**>
|
82
|
+
type woothee
|
83
|
+
key_name agent
|
84
|
+
merge_agent_info yes
|
85
|
+
</filter>
|
86
|
+
|
87
|
+
Filtered messages with non-modified tag has attributes like 'agent\_name', 'agent\_category' and 'agent\_os' from woothee parser result. If you want to change attribute names, or want to merge more attributes of browser vendor and its version, write configurations as below:
|
88
|
+
|
89
|
+
<filter input.**>
|
90
|
+
type woothee
|
91
|
+
key_name agent
|
92
|
+
merge_agent_info yes
|
93
|
+
out_key_name ua_name
|
94
|
+
out_key_category ua_category
|
95
|
+
out_key_os ua_os
|
96
|
+
out_key_os_version ua_os_version
|
97
|
+
out_key_version ua_version
|
98
|
+
out_key_vendor ua_vendor
|
99
|
+
</filter>
|
100
|
+
|
101
|
+
To filter messages with specified user-agent categories (and merge woothee parser result), configure like this:
|
102
|
+
|
103
|
+
<filter input.**>
|
104
|
+
type woothee
|
105
|
+
key_name agent
|
106
|
+
filter_categories pc,smartphone,mobilephone,appliance
|
107
|
+
merge_agent_info yes
|
108
|
+
</filter>
|
109
|
+
|
110
|
+
Or, you can specify categories to drop (and not to merge woothee result):
|
111
|
+
|
112
|
+
<filter input.**>
|
113
|
+
type woothee
|
114
|
+
key_name agent
|
115
|
+
drop_categories crawler
|
116
|
+
merge_agent_info false # default
|
117
|
+
</filter>
|
118
|
+
|
119
|
+
### Fast Crawler Filter
|
120
|
+
|
121
|
+
If you want to drop __almost__ all of messages with crawler's user-agent, and not to merge woothee result, you just specify plugin type:
|
122
|
+
|
123
|
+
<filter input.**>
|
124
|
+
type woothee_fast_crawler_filter
|
125
|
+
key_name useragent
|
126
|
+
</filter>
|
127
|
+
|
128
|
+
'fluent-plugin-woothee' uses 'Woothee.is_crawler' of woothee with this configuration, fast and incomplete method to judge user-agent is crawler or not.
|
129
|
+
If you want to drop all of crawlers completely, specify 'type woothee' and 'drop_categories crawler'.
|
130
|
+
|
72
131
|
## TODO
|
73
132
|
|
74
133
|
* patches welcome!
|
@@ -2,13 +2,13 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-woothee"
|
5
|
-
gem.version = "0.
|
5
|
+
gem.version = "0.2.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.description = %q{parsing by Project Woothee. See https://github.com/woothee/woothee }
|
9
9
|
gem.summary = %q{Fluentd plugin to parse UserAgent strings with woothee parser. It adds device information or filter records with specific device types.}
|
10
10
|
gem.homepage = "https://github.com/tagomoris/fluent-plugin-woothee"
|
11
|
-
gem.license = "
|
11
|
+
gem.license = "Apache-2.0"
|
12
12
|
|
13
13
|
gem.files = `git ls-files`.split($\)
|
14
14
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
@@ -0,0 +1,107 @@
|
|
1
|
+
class Fluent::WootheeFilter < Fluent::Filter
|
2
|
+
Fluent::Plugin.register_filter('woothee', self)
|
3
|
+
Fluent::Plugin.register_filter('woothee_fast_crawler_filter', self)
|
4
|
+
|
5
|
+
config_param :fast_crawler_filter_mode, :bool, :default => false
|
6
|
+
|
7
|
+
config_param :key_name, :string
|
8
|
+
|
9
|
+
config_param :filter_categories, :default => [] do |val|
|
10
|
+
val.split(',').map(&:to_sym)
|
11
|
+
end
|
12
|
+
config_param :drop_categories, :default => [] do |val|
|
13
|
+
val.split(',').map(&:to_sym)
|
14
|
+
end
|
15
|
+
attr_accessor :mode
|
16
|
+
|
17
|
+
config_param :merge_agent_info, :bool, :default => false
|
18
|
+
config_param :out_key_name, :string, :default => 'agent_name'
|
19
|
+
config_param :out_key_category, :string, :default => 'agent_category'
|
20
|
+
config_param :out_key_os, :string, :default => 'agent_os'
|
21
|
+
config_param :out_key_os_version, :string, :default => nil # supress output
|
22
|
+
config_param :out_key_version, :string, :default => nil # supress output
|
23
|
+
config_param :out_key_vendor, :string, :default => nil # supress output
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
super
|
27
|
+
require 'woothee'
|
28
|
+
end
|
29
|
+
|
30
|
+
def configure(conf)
|
31
|
+
super
|
32
|
+
|
33
|
+
if conf['type'] == 'woothee_fast_crawler_filter' or @fast_crawler_filter_mode
|
34
|
+
@fast_crawler_filter_mode = true
|
35
|
+
|
36
|
+
if @filter_categories.size > 0 or @drop_categories.size > 0 or @merge_agent_info
|
37
|
+
raise Fluent::ConfigError, "fast_crawler_filter cannot be specified with filter/drop/merge options"
|
38
|
+
end
|
39
|
+
|
40
|
+
return
|
41
|
+
end
|
42
|
+
|
43
|
+
if @filter_categories.size > 0 and @drop_categories.size > 0
|
44
|
+
raise Fluent::ConfigError, "both of 'filter' and 'drop' categories specified"
|
45
|
+
elsif @filter_categories.size > 0
|
46
|
+
unless @filter_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
|
47
|
+
raise Fluent::ConfigError, "filter_categories has invalid category name"
|
48
|
+
end
|
49
|
+
@mode = :filter
|
50
|
+
elsif @drop_categories.size > 0
|
51
|
+
unless @drop_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
|
52
|
+
raise Fluent::ConfigError, "drop_categories has invalid category name"
|
53
|
+
end
|
54
|
+
@mode = :drop
|
55
|
+
else
|
56
|
+
@mode = :through
|
57
|
+
end
|
58
|
+
|
59
|
+
if @mode == :through and not @merge_agent_info
|
60
|
+
raise Fluent::ConfigError, "configured not to do nothing (not to do either filter/drop nor addition of parser result)"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def fast_crawler_filter_stream(tag, es)
|
65
|
+
new_es = Fluent::MultiEventStream.new
|
66
|
+
|
67
|
+
es.each do |time,record|
|
68
|
+
unless Woothee.is_crawler(record[@key_name] || '')
|
69
|
+
new_es.add(time, record.dup)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
new_es
|
73
|
+
end
|
74
|
+
|
75
|
+
def normal_filter_stream(tag, es)
|
76
|
+
new_es = Fluent::MultiEventStream.new
|
77
|
+
|
78
|
+
es.each do |time,record|
|
79
|
+
parsed = Woothee.parse(record[@key_name] || '')
|
80
|
+
|
81
|
+
category = parsed[Woothee::ATTRIBUTE_CATEGORY]
|
82
|
+
next if @mode == :filter and not @filter_categories.include?(category)
|
83
|
+
next if @mode == :drop and @drop_categories.include?(category)
|
84
|
+
|
85
|
+
if @merge_agent_info
|
86
|
+
record = record.merge({
|
87
|
+
@out_key_name => parsed[Woothee::ATTRIBUTE_NAME],
|
88
|
+
@out_key_category => parsed[Woothee::ATTRIBUTE_CATEGORY].to_s,
|
89
|
+
@out_key_os => parsed[Woothee::ATTRIBUTE_OS]
|
90
|
+
})
|
91
|
+
record[@out_key_os_version] = parsed[Woothee::ATTRIBUTE_OS_VERSION] if @out_key_os_version
|
92
|
+
record[@out_key_version] = parsed[Woothee::ATTRIBUTE_VERSION] if @out_key_version
|
93
|
+
record[@out_key_vendor] = parsed[Woothee::ATTRIBUTE_VENDOR] if @out_key_vendor
|
94
|
+
end
|
95
|
+
new_es.add(time, record.dup)
|
96
|
+
end
|
97
|
+
new_es
|
98
|
+
end
|
99
|
+
|
100
|
+
def filter_stream(tag, es)
|
101
|
+
if @fast_crawler_filter_mode
|
102
|
+
fast_crawler_filter_stream(tag, es)
|
103
|
+
else
|
104
|
+
normal_filter_stream(tag, es)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end if defined?(Fluent::Filter)
|
@@ -2,6 +2,11 @@ class Fluent::WootheeOutput < Fluent::Output
|
|
2
2
|
Fluent::Plugin.register_output('woothee', self)
|
3
3
|
Fluent::Plugin.register_output('woothee_fast_crawler_filter', self)
|
4
4
|
|
5
|
+
# Define `router` method of v0.12 to support v0.10 or earlier
|
6
|
+
unless method_defined?(:router)
|
7
|
+
define_method("router") { Fluent::Engine }
|
8
|
+
end
|
9
|
+
|
5
10
|
config_param :tag, :string, :default => nil
|
6
11
|
config_param :remove_prefix, :string, :default => nil
|
7
12
|
config_param :add_prefix, :string, :default => nil
|
@@ -103,7 +108,7 @@ class Fluent::WootheeOutput < Fluent::Output
|
|
103
108
|
def fast_crawler_filter_emit(tag, es)
|
104
109
|
es.each do |time,record|
|
105
110
|
unless Woothee.is_crawler(record[@key_name] || '')
|
106
|
-
|
111
|
+
router.emit(tag, time, record)
|
107
112
|
end
|
108
113
|
end
|
109
114
|
end
|
@@ -126,7 +131,7 @@ class Fluent::WootheeOutput < Fluent::Output
|
|
126
131
|
record[@out_key_version] = parsed[Woothee::ATTRIBUTE_VERSION] if @out_key_version
|
127
132
|
record[@out_key_vendor] = parsed[Woothee::ATTRIBUTE_VENDOR] if @out_key_vendor
|
128
133
|
end
|
129
|
-
|
134
|
+
router.emit(tag, time, record)
|
130
135
|
end
|
131
136
|
end
|
132
137
|
|
data/test/helper.rb
CHANGED
@@ -0,0 +1,338 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class Fluent::WootheeFilterTest < Test::Unit::TestCase
|
4
|
+
# fast crawler filter
|
5
|
+
CONFIG0 = %[
|
6
|
+
type woothee_fast_crawler_filter
|
7
|
+
key_name useragent
|
8
|
+
]
|
9
|
+
|
10
|
+
# through & merge
|
11
|
+
CONFIG1 = %[
|
12
|
+
type woothee
|
13
|
+
key_name agent
|
14
|
+
merge_agent_info yes
|
15
|
+
]
|
16
|
+
|
17
|
+
# filter & merge
|
18
|
+
CONFIG2 = %[
|
19
|
+
type woothee
|
20
|
+
key_name agent
|
21
|
+
filter_categories pc,smartphone,mobilephone,appliance
|
22
|
+
merge_agent_info yes
|
23
|
+
out_key_name ua_name
|
24
|
+
out_key_category ua_category
|
25
|
+
out_key_os ua_os
|
26
|
+
out_key_os_version ua_os_version
|
27
|
+
out_key_version ua_version
|
28
|
+
out_key_vendor ua_vendor
|
29
|
+
]
|
30
|
+
|
31
|
+
# drop & non-merge
|
32
|
+
CONFIG3 = %[
|
33
|
+
type woothee
|
34
|
+
key_name user_agent
|
35
|
+
drop_categories crawler,misc
|
36
|
+
]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
omit("Use fluentd v0.12 or later") unless defined?(Fluent::Filter)
|
40
|
+
|
41
|
+
Fluent::Test.setup
|
42
|
+
end
|
43
|
+
|
44
|
+
def create_driver(conf=CONFIG1,tag='test')
|
45
|
+
Fluent::Test::FilterTestDriver.new(Fluent::WootheeFilter, tag).configure(conf)
|
46
|
+
end
|
47
|
+
|
48
|
+
class TestConfigure < self
|
49
|
+
def test_fast_crawer_filter
|
50
|
+
d = create_driver CONFIG0
|
51
|
+
assert_equal true, d.instance.fast_crawler_filter_mode
|
52
|
+
assert_equal 'useragent', d.instance.key_name
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_through_and_merge
|
56
|
+
d = create_driver CONFIG1
|
57
|
+
assert_equal false, d.instance.fast_crawler_filter_mode
|
58
|
+
assert_equal 'agent', d.instance.key_name
|
59
|
+
|
60
|
+
assert_equal 0, d.instance.filter_categories.size
|
61
|
+
assert_equal 0, d.instance.drop_categories.size
|
62
|
+
assert_equal :through, d.instance.mode
|
63
|
+
|
64
|
+
assert_equal true, d.instance.merge_agent_info
|
65
|
+
assert_equal 'agent_name', d.instance.out_key_name
|
66
|
+
assert_equal 'agent_category', d.instance.out_key_category
|
67
|
+
assert_equal 'agent_os', d.instance.out_key_os
|
68
|
+
assert_nil d.instance.out_key_version
|
69
|
+
assert_nil d.instance.out_key_vendor
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_filter_and_merge
|
73
|
+
d = create_driver CONFIG2
|
74
|
+
assert_equal false, d.instance.fast_crawler_filter_mode
|
75
|
+
assert_equal 'agent', d.instance.key_name
|
76
|
+
|
77
|
+
assert_equal 4, d.instance.filter_categories.size
|
78
|
+
assert_equal [:pc,:smartphone,:mobilephone,:appliance], d.instance.filter_categories
|
79
|
+
assert_equal 0, d.instance.drop_categories.size
|
80
|
+
assert_equal :filter, d.instance.mode
|
81
|
+
|
82
|
+
assert_equal true, d.instance.merge_agent_info
|
83
|
+
assert_equal 'ua_name', d.instance.out_key_name
|
84
|
+
assert_equal 'ua_category', d.instance.out_key_category
|
85
|
+
assert_equal 'ua_os', d.instance.out_key_os
|
86
|
+
assert_equal 'ua_os_version', d.instance.out_key_os_version
|
87
|
+
assert_equal 'ua_version', d.instance.out_key_version
|
88
|
+
assert_equal 'ua_vendor', d.instance.out_key_vendor
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_drop_and_non_merge
|
92
|
+
d = create_driver CONFIG3
|
93
|
+
assert_equal false, d.instance.fast_crawler_filter_mode
|
94
|
+
assert_equal 'user_agent', d.instance.key_name
|
95
|
+
|
96
|
+
assert_equal 0, d.instance.filter_categories.size
|
97
|
+
assert_equal 2, d.instance.drop_categories.size
|
98
|
+
assert_equal [:crawler,:misc], d.instance.drop_categories
|
99
|
+
assert_equal :drop, d.instance.mode
|
100
|
+
|
101
|
+
assert_equal false, d.instance.merge_agent_info
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_filter_fast_crawler_filter_stream
|
106
|
+
d = create_driver CONFIG0
|
107
|
+
time = Time.parse('2012-07-20 16:19:00').to_i
|
108
|
+
d.run do
|
109
|
+
d.filter({'useragent' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'value' => 1}, time)
|
110
|
+
d.filter({'useragent' => 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', 'value' => 2}, time)
|
111
|
+
d.filter({'useragent' => 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', 'value' => 3}, time)
|
112
|
+
d.filter({'useragent' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'value' => 4}, time)
|
113
|
+
d.filter({'useragent' => 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'value' => 5}, time)
|
114
|
+
d.filter({'useragent' => 'Mozilla/5.0 (compatible; Rakutenbot/1.0; +http://dynamic.rakuten.co.jp/bot.html)', 'value' => 6}, time)
|
115
|
+
d.filter({'useragent' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; ja-jp) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1', 'value' => 7}, time)
|
116
|
+
d.filter({'useragent' => 'Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)', 'value' => 8}, time)
|
117
|
+
end
|
118
|
+
|
119
|
+
filtered = d.filtered_as_array
|
120
|
+
assert_equal 4, filtered.size
|
121
|
+
|
122
|
+
assert_equal 'test', filtered[0][0]
|
123
|
+
assert_equal time, filtered[0][1]
|
124
|
+
assert_equal 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', filtered[0][2]['useragent']
|
125
|
+
assert_equal 3, filtered[0][2]['value']
|
126
|
+
assert_equal 2, filtered[0][2].keys.size
|
127
|
+
|
128
|
+
assert_equal 4, filtered[1][2]['value']
|
129
|
+
assert_equal 6, filtered[2][2]['value']
|
130
|
+
assert_equal 7, filtered[3][2]['value']
|
131
|
+
end
|
132
|
+
|
133
|
+
# through & merge
|
134
|
+
def test_filter_through
|
135
|
+
d = create_driver(CONFIG1, 'test.message')
|
136
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
137
|
+
d.run do
|
138
|
+
d.filter({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
|
139
|
+
d.filter({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
140
|
+
d.filter({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
141
|
+
d.filter({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
|
142
|
+
d.filter({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
|
143
|
+
d.filter({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
|
144
|
+
d.filter({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
|
145
|
+
d.filter({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
|
146
|
+
end
|
147
|
+
|
148
|
+
filtered = d.filtered_as_array
|
149
|
+
assert_equal 8, filtered.size
|
150
|
+
assert_equal 'test.message', filtered[0][0]
|
151
|
+
assert_equal time, filtered[0][1]
|
152
|
+
|
153
|
+
# 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
|
154
|
+
m = filtered[0][2]
|
155
|
+
assert_equal 0, m['value']
|
156
|
+
assert_equal 'Internet Explorer', m['agent_name']
|
157
|
+
assert_equal 'pc', m['agent_category']
|
158
|
+
assert_equal 'Windows 8', m['agent_os']
|
159
|
+
assert_equal 5, m.keys.size
|
160
|
+
|
161
|
+
# 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
162
|
+
m = filtered[1][2]
|
163
|
+
assert_equal 1, m['value']
|
164
|
+
assert_equal 'Firefox', m['agent_name']
|
165
|
+
assert_equal 'pc', m['agent_category']
|
166
|
+
assert_equal 'Windows Vista', m['agent_os']
|
167
|
+
|
168
|
+
# 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
169
|
+
m = filtered[2][2]
|
170
|
+
assert_equal 2, m['value']
|
171
|
+
assert_equal 'Firefox', m['agent_name']
|
172
|
+
assert_equal 'pc', m['agent_category']
|
173
|
+
assert_equal 'Linux', m['agent_os']
|
174
|
+
|
175
|
+
# 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
|
176
|
+
m = filtered[3][2]
|
177
|
+
assert_equal 3, m['value']
|
178
|
+
assert_equal 'Safari', m['agent_name']
|
179
|
+
assert_equal 'smartphone', m['agent_category']
|
180
|
+
assert_equal 'Android', m['agent_os']
|
181
|
+
|
182
|
+
# 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
|
183
|
+
m = filtered[4][2]
|
184
|
+
assert_equal 4, m['value']
|
185
|
+
assert_equal 'docomo', m['agent_name']
|
186
|
+
assert_equal 'mobilephone', m['agent_category']
|
187
|
+
assert_equal 'docomo', m['agent_os']
|
188
|
+
|
189
|
+
# 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
|
190
|
+
m = filtered[5][2]
|
191
|
+
assert_equal 5, m['value']
|
192
|
+
assert_equal 'PlayStation Vita', m['agent_name']
|
193
|
+
assert_equal 'appliance', m['agent_category']
|
194
|
+
assert_equal 'PlayStation Vita', m['agent_os']
|
195
|
+
|
196
|
+
# 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'
|
197
|
+
m = filtered[6][2]
|
198
|
+
assert_equal 6, m['value']
|
199
|
+
assert_equal 'Google Desktop', m['agent_name']
|
200
|
+
assert_equal 'misc', m['agent_category']
|
201
|
+
assert_equal 'UNKNOWN', m['agent_os']
|
202
|
+
|
203
|
+
# 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'
|
204
|
+
m = filtered[7][2]
|
205
|
+
assert_equal 7, m['value']
|
206
|
+
assert_equal 'msnbot', m['agent_name']
|
207
|
+
assert_equal 'crawler', m['agent_category']
|
208
|
+
assert_equal 'UNKNOWN', m['agent_os']
|
209
|
+
end
|
210
|
+
|
211
|
+
# filter & merge
|
212
|
+
def test_filter_stream
|
213
|
+
d = create_driver(CONFIG2, 'test.message')
|
214
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
215
|
+
d.run do
|
216
|
+
d.filter({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
|
217
|
+
d.filter({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
218
|
+
d.filter({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
219
|
+
d.filter({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
|
220
|
+
d.filter({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
|
221
|
+
d.filter({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
|
222
|
+
d.filter({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
|
223
|
+
d.filter({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
|
224
|
+
end
|
225
|
+
|
226
|
+
filtered = d.filtered_as_array
|
227
|
+
assert_equal 6, filtered.size
|
228
|
+
assert_equal 'test.message', filtered[0][0]
|
229
|
+
assert_equal time, filtered[0][1]
|
230
|
+
|
231
|
+
# 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
|
232
|
+
m = filtered[0][2]
|
233
|
+
assert_equal 8, m.keys.size
|
234
|
+
assert_equal 0, m['value']
|
235
|
+
assert_equal 'Internet Explorer', m['ua_name']
|
236
|
+
assert_equal 'pc', m['ua_category']
|
237
|
+
assert_equal 'Windows 8', m['ua_os']
|
238
|
+
assert_equal 'NT 6.2', m['ua_os_version']
|
239
|
+
assert_equal 'Microsoft', m['ua_vendor']
|
240
|
+
assert_equal '10.0', m['ua_version']
|
241
|
+
|
242
|
+
# 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
243
|
+
m = filtered[1][2]
|
244
|
+
assert_equal 1, m['value']
|
245
|
+
assert_equal 'Firefox', m['ua_name']
|
246
|
+
assert_equal 'pc', m['ua_category']
|
247
|
+
assert_equal 'Windows Vista', m['ua_os']
|
248
|
+
assert_equal 'NT 6.0', m['ua_os_version']
|
249
|
+
assert_equal 'Mozilla', m['ua_vendor']
|
250
|
+
assert_equal '9.0.1', m['ua_version']
|
251
|
+
|
252
|
+
# 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
253
|
+
m = filtered[2][2]
|
254
|
+
assert_equal 2, m['value']
|
255
|
+
assert_equal 'Firefox', m['ua_name']
|
256
|
+
assert_equal 'pc', m['ua_category']
|
257
|
+
assert_equal 'Linux', m['ua_os']
|
258
|
+
assert_equal 'UNKNOWN', m['ua_os_version']
|
259
|
+
assert_equal 'Mozilla', m['ua_vendor']
|
260
|
+
assert_equal '9.0.1', m['ua_version']
|
261
|
+
|
262
|
+
# 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
|
263
|
+
m = filtered[3][2]
|
264
|
+
assert_equal 3, m['value']
|
265
|
+
assert_equal 'Safari', m['ua_name']
|
266
|
+
assert_equal 'smartphone', m['ua_category']
|
267
|
+
assert_equal 'Android', m['ua_os']
|
268
|
+
assert_equal '3.1', m['ua_os_version']
|
269
|
+
assert_equal 'Apple', m['ua_vendor']
|
270
|
+
assert_equal '4.0', m['ua_version']
|
271
|
+
|
272
|
+
# 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
|
273
|
+
m = filtered[4][2]
|
274
|
+
assert_equal 4, m['value']
|
275
|
+
assert_equal 'docomo', m['ua_name']
|
276
|
+
assert_equal 'mobilephone', m['ua_category']
|
277
|
+
assert_equal 'docomo', m['ua_os']
|
278
|
+
assert_equal 'UNKNOWN', m['ua_os_version']
|
279
|
+
assert_equal 'docomo', m['ua_vendor']
|
280
|
+
assert_equal 'N505i', m['ua_version']
|
281
|
+
|
282
|
+
# 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
|
283
|
+
m = filtered[5][2]
|
284
|
+
assert_equal 5, m['value']
|
285
|
+
assert_equal 'PlayStation Vita', m['ua_name']
|
286
|
+
assert_equal 'appliance', m['ua_category']
|
287
|
+
assert_equal 'PlayStation Vita', m['ua_os']
|
288
|
+
assert_equal '1.51', m['ua_os_version']
|
289
|
+
assert_equal 'Sony', m['ua_vendor']
|
290
|
+
assert_equal 'UNKNOWN', m['ua_version']
|
291
|
+
end
|
292
|
+
|
293
|
+
# drop & non-merge
|
294
|
+
def test_filter_drop
|
295
|
+
d = create_driver(CONFIG3, 'test.message')
|
296
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
297
|
+
d.run do
|
298
|
+
d.filter({'value' => 0, 'user_agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
|
299
|
+
d.filter({'value' => 1, 'user_agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
300
|
+
d.filter({'value' => 2, 'user_agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
|
301
|
+
d.filter({'value' => 3, 'user_agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
|
302
|
+
d.filter({'value' => 4, 'user_agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
|
303
|
+
d.filter({'value' => 5, 'user_agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
|
304
|
+
d.filter({'value' => 6, 'user_agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
|
305
|
+
d.filter({'value' => 7, 'user_agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
|
306
|
+
end
|
307
|
+
|
308
|
+
filtered = d.filtered_as_array
|
309
|
+
assert_equal 6, filtered.size
|
310
|
+
assert_equal 'test.message', filtered[0][0]
|
311
|
+
assert_equal time, filtered[0][1]
|
312
|
+
|
313
|
+
# 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
|
314
|
+
m = filtered[0][2]
|
315
|
+
assert_equal 0, m['value']
|
316
|
+
assert_equal 2, m.keys.size
|
317
|
+
|
318
|
+
# 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
319
|
+
m = filtered[1][2]
|
320
|
+
assert_equal 1, m['value']
|
321
|
+
|
322
|
+
# 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
|
323
|
+
m = filtered[2][2]
|
324
|
+
assert_equal 2, m['value']
|
325
|
+
|
326
|
+
# 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
|
327
|
+
m = filtered[3][2]
|
328
|
+
assert_equal 3, m['value']
|
329
|
+
|
330
|
+
# 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
|
331
|
+
m = filtered[4][2]
|
332
|
+
assert_equal 4, m['value']
|
333
|
+
|
334
|
+
# 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
|
335
|
+
m = filtered[5][2]
|
336
|
+
assert_equal 5, m['value']
|
337
|
+
end
|
338
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-woothee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -80,12 +80,14 @@ files:
|
|
80
80
|
- README.md
|
81
81
|
- Rakefile
|
82
82
|
- fluent-plugin-woothee.gemspec
|
83
|
+
- lib/fluent/plugin/filter_woothee.rb
|
83
84
|
- lib/fluent/plugin/out_woothee.rb
|
84
85
|
- test/helper.rb
|
86
|
+
- test/plugin/test_filter_woothee.rb
|
85
87
|
- test/plugin/test_out_woothee.rb
|
86
88
|
homepage: https://github.com/tagomoris/fluent-plugin-woothee
|
87
89
|
licenses:
|
88
|
-
-
|
90
|
+
- Apache-2.0
|
89
91
|
metadata: {}
|
90
92
|
post_install_message:
|
91
93
|
rdoc_options: []
|
@@ -110,5 +112,6 @@ summary: Fluentd plugin to parse UserAgent strings with woothee parser. It adds
|
|
110
112
|
information or filter records with specific device types.
|
111
113
|
test_files:
|
112
114
|
- test/helper.rb
|
115
|
+
- test/plugin/test_filter_woothee.rb
|
113
116
|
- test/plugin/test_out_woothee.rb
|
114
117
|
has_rdoc:
|