fluent-plugin-woothee 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-woothee.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012- TAGOMORI Satoshi
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # fluent-plugin-woothee
2
+
3
+ ## WootheeOutput
4
+
5
+ 'fluent-plugin-woothee' is a Fluentd plugin to parse UserAgent strings and to filter/drop specified categories of user terminals (like 'pc', 'smartphone' and so on).
6
+
7
+ 'woothee' is multi-language user-agent strings parser project. See: https://github.com/tagomoris/woothee
8
+
9
+ ## Configuration
10
+
11
+ To add woothee parser result into matched messages:
12
+
13
+ <match input.**>
14
+ type woothee
15
+ key_name agent
16
+ remove_prefix input
17
+ add_prefix merged
18
+ merge_agent_info yes
19
+ </match>
20
+
21
+ Output messages with tag 'merged.**' has attributes like 'agent\_name', 'agent\_category' and 'agent\_os' from woothee parser result. If you want to change attribute names, or want to merge more attributes of browser vendor and its version, write configurations as below:
22
+
23
+ <match input.**>
24
+ type woothee
25
+ key_name agent
26
+ remove_prefix input
27
+ add_prefix merged
28
+ merge_agent_info yes
29
+ out_key_name ua_name
30
+ out_key_category ua_category
31
+ out_key_os ua_os
32
+ out_key_version ua_version
33
+ out_key_vendor ua_vendor
34
+ </match>
35
+
36
+ To re-emit messages with specified user-agent categories (and merge woothee parser result), configure like this:
37
+
38
+ <match input.**>
39
+ type woothee
40
+ key_name agent
41
+ filter_categories pc,smartphone,mobilephone,appliance
42
+ remove_prefix input
43
+ add_prefix merged
44
+ merge_agent_info yes
45
+ </match>
46
+
47
+ Or, you can specify categories to drop (and not to merge woothee result):
48
+
49
+ <match input.**>
50
+ type woothee
51
+ key_name agent
52
+ drop_categories crawler
53
+ remove_prefix input
54
+ add_prefix merged
55
+ merge_agent_info false # default
56
+ </match>
57
+
58
+ ### Fast Crawler Filter
59
+
60
+ If you want to drop __almost__ all of messages with crawler's user-agent, and not to merge woothee result, you just specify plugin type:
61
+
62
+ <match input.**>
63
+ type woothee_fast_crawler_filter
64
+ key_name useragent
65
+ tag filtered
66
+ </match>
67
+
68
+ 'fluent-plugin-woothee' uses 'Woothee.is_crawler' of woothee with this configuration, fast and incomplete method to judge user-agent is crawler or not.
69
+ If you want to drop all of crawlers completely, specify 'type woothee' and 'drop_categories crawler'.
70
+
71
+ ## TODO
72
+
73
+ * patches welcome!
74
+
75
+ ## Copyright
76
+
77
+ * Copyright (c) 2012- TAGOMORI Satoshi (tagomoris)
78
+ * License
79
+ * Apache License, Version 2.0
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.pattern = 'test/**/test_*.rb'
8
+ test.verbose = true
9
+ end
10
+
11
+ task :default => :test
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = "fluent-plugin-woothee"
5
+ gem.version = "0.0.1"
6
+ gem.authors = ["TAGOMORI Satoshi"]
7
+ gem.email = ["tagomoris@gmail.com"]
8
+ gem.description = %q{parsing by Project Woothee. See https://github.com/tagomoris/woothee }
9
+ gem.summary = %q{Fluentd plugin to parse UserAgent strings}
10
+ gem.homepage = "https://github.com/tagomoris/fluent-plugin-woothee"
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.require_paths = ["lib"]
16
+
17
+ gem.add_development_dependency "rake"
18
+ gem.add_development_dependency "fluentd"
19
+ gem.add_development_dependency "woothee", ">= 0.2.4"
20
+ gem.add_runtime_dependency "fluentd"
21
+ gem.add_runtime_dependency "woothee", ">= 0.2.4"
22
+ end
@@ -0,0 +1,142 @@
1
+ class Fluent::WootheeOutput < Fluent::Output
2
+ Fluent::Plugin.register_output('woothee', self)
3
+ Fluent::Plugin.register_output('woothee_fast_crawler_filter', self)
4
+
5
+ config_param :tag, :string, :default => nil
6
+ config_param :remove_prefix, :string, :default => nil
7
+ config_param :add_prefix, :string, :default => nil
8
+
9
+ config_param :fast_crawler_filter_mode, :bool, :default => false
10
+
11
+ config_param :key_name, :string
12
+
13
+ config_param :filter_categories, :default => [] do |val|
14
+ val.split(',').map(&:to_sym)
15
+ end
16
+ config_param :drop_categories, :default => [] do |val|
17
+ val.split(',').map(&:to_sym)
18
+ end
19
+ attr_accessor :mode
20
+
21
+ config_param :merge_agent_info, :bool, :default => false
22
+ config_param :out_key_name, :string, :default => 'agent_name'
23
+ config_param :out_key_category, :string, :default => 'agent_category'
24
+ config_param :out_key_os, :string, :default => 'agent_os'
25
+ config_param :out_key_version, :string, :default => nil # supress output
26
+ config_param :out_key_vendor, :string, :default => nil # supress output
27
+
28
+ def initialize
29
+ super
30
+ require 'woothee'
31
+ end
32
+
33
+ def configure(conf)
34
+ super
35
+
36
+ # tag ->
37
+ if not @tag and not @remove_prefix and not @add_prefix
38
+ raise Fluent::ConfigError, "missing both of remove_prefix and add_prefix"
39
+ end
40
+ if @tag and (@remove_prefix or @add_prefix)
41
+ raise Fluent::ConfigError, "both of tag and remove_prefix/add_prefix must not be specified"
42
+ end
43
+ if @remove_prefix
44
+ @removed_prefix_string = @remove_prefix + '.'
45
+ @removed_length = @removed_prefix_string.length
46
+ end
47
+ if @add_prefix
48
+ @added_prefix_string = @add_prefix + '.'
49
+ end
50
+ # <- tag
51
+
52
+ if conf['type'] == 'woothee_fast_crawler_filter' or @fast_crawler_filter_mode
53
+ @fast_crawler_filter_mode = true
54
+
55
+ if @filter_categories.size > 0 or @drop_categories.size > 0 or @merge_agent_info
56
+ raise Fluent::ConfigError, "fast_crawler_filter cannot be specified with filter/drop/merge options"
57
+ end
58
+
59
+ return
60
+ end
61
+
62
+ if @filter_categories.size > 0 and @drop_categories.size > 0
63
+ raise Fluent::ConfigError, "both of 'filter' and 'drop' categories specified"
64
+ elsif @filter_categories.size > 0
65
+ unless @filter_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
66
+ raise Fluent::ConfigError, "filter_categories has invalid category name"
67
+ end
68
+ @mode = :filter
69
+ elsif @drop_categories.size > 0
70
+ unless @drop_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
71
+ raise Fluent::ConfigError, "drop_categories has invalid category name"
72
+ end
73
+ @mode = :drop
74
+ else
75
+ @mode = :through
76
+ end
77
+
78
+ if @mode == :through and not @merge_agent_info
79
+ raise Fluent::ConfigError, "configured not to do nothing (not to do either filter/drop nor addition of parser result)"
80
+ end
81
+ end
82
+
83
+ def tag_mangle(tag)
84
+ if @tag
85
+ @tag
86
+ else
87
+ if @remove_prefix and
88
+ ( (tag.start_with?(@removed_prefix_string) and tag.length > @removed_length) or tag == @remove_prefix)
89
+ tag = tag[@removed_length..-1]
90
+ end
91
+ if @add_prefix
92
+ tag = if tag and tag.length > 0
93
+ @added_prefix_string + tag
94
+ else
95
+ @add_prefix
96
+ end
97
+ end
98
+ tag
99
+ end
100
+ end
101
+
102
+ def fast_crawler_filter_emit(tag, es)
103
+ es.each do |time,record|
104
+ unless Woothee.is_crawler(record[@key_name] || '')
105
+ Fluent::Engine.emit(tag, time, record)
106
+ end
107
+ end
108
+ end
109
+
110
+ def normal_emit(tag, es)
111
+ es.each do |time,record|
112
+ parsed = Woothee.parse(record[@key_name] || '')
113
+
114
+ category = parsed[Woothee::ATTRIBUTE_CATEGORY]
115
+ next if @mode == :filter and not @filter_categories.include?(category)
116
+ next if @mode == :drop and @drop_categories.include?(category)
117
+
118
+ if @merge_agent_info
119
+ record = record.merge({
120
+ @out_key_name => parsed[Woothee::ATTRIBUTE_NAME],
121
+ @out_key_category => parsed[Woothee::ATTRIBUTE_CATEGORY].to_s,
122
+ @out_key_os => parsed[Woothee::ATTRIBUTE_OS]
123
+ })
124
+ record[@out_key_version] = parsed[Woothee::ATTRIBUTE_VERSION] if @out_key_version
125
+ record[@out_key_vendor] = parsed[Woothee::ATTRIBUTE_VENDOR] if @out_key_vendor
126
+ end
127
+ Fluent::Engine.emit(tag, time, record)
128
+ end
129
+ end
130
+
131
+ def emit(tag, es, chain)
132
+ tag = tag_mangle(tag)
133
+
134
+ if @fast_crawler_filter_mode
135
+ fast_crawler_filter_emit(tag, es)
136
+ else
137
+ normal_emit(tag, es)
138
+ end
139
+
140
+ chain.next
141
+ end
142
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ # pass
20
+ end
21
+ }
22
+ $log = nulllogger
23
+ end
24
+
25
+ require 'fluent/plugin/out_woothee'
26
+
27
+ class Test::Unit::TestCase
28
+ end
@@ -0,0 +1,352 @@
1
+ require 'helper'
2
+
3
+ class Fluent::WootheeOutputTest < Test::Unit::TestCase
4
+ # fast crawler filter
5
+ CONFIG0 = %[
6
+ type woothee_fast_crawler_filter
7
+ key_name useragent
8
+ tag filtered
9
+ ]
10
+
11
+ # through & merge
12
+ CONFIG1 = %[
13
+ type woothee
14
+ key_name agent
15
+ remove_prefix test
16
+ add_prefix merged
17
+ merge_agent_info yes
18
+ ]
19
+
20
+ # filter & merge
21
+ CONFIG2 = %[
22
+ type woothee
23
+ key_name agent
24
+ filter_categories pc,smartphone,mobilephone,appliance
25
+ remove_prefix test
26
+ add_prefix merged
27
+ merge_agent_info yes
28
+ out_key_name ua_name
29
+ out_key_category ua_category
30
+ out_key_os ua_os
31
+ out_key_version ua_version
32
+ out_key_vendor ua_vendor
33
+ ]
34
+
35
+ # drop & non-merge
36
+ CONFIG3 = %[
37
+ type woothee
38
+ key_name user_agent
39
+ drop_categories crawler,misc
40
+ tag selected
41
+ ]
42
+
43
+ def create_driver(conf=CONFIG1,tag='test')
44
+ Fluent::Test::OutputTestDriver.new(Fluent::WootheeOutput, tag).configure(conf)
45
+ end
46
+
47
+ def test_configure
48
+ # fast_crawler_filter
49
+ d = create_driver CONFIG0
50
+ assert_equal true, d.instance.fast_crawler_filter_mode
51
+ assert_equal 'useragent', d.instance.key_name
52
+ assert_equal 'filtered', d.instance.tag
53
+
54
+ # through & merge
55
+ d = create_driver CONFIG1
56
+ assert_equal false, d.instance.fast_crawler_filter_mode
57
+ assert_equal 'agent', d.instance.key_name
58
+ assert_equal 'test', d.instance.remove_prefix
59
+ assert_equal 'merged', d.instance.add_prefix
60
+
61
+ assert_equal 0, d.instance.filter_categories.size
62
+ assert_equal 0, d.instance.drop_categories.size
63
+ assert_equal :through, d.instance.mode
64
+
65
+ assert_equal true, d.instance.merge_agent_info
66
+ assert_equal 'agent_name', d.instance.out_key_name
67
+ assert_equal 'agent_category', d.instance.out_key_category
68
+ assert_equal 'agent_os', d.instance.out_key_os
69
+ assert_nil d.instance.out_key_version
70
+ assert_nil d.instance.out_key_vendor
71
+
72
+ # filter & merge
73
+ d = create_driver CONFIG2
74
+ assert_equal false, d.instance.fast_crawler_filter_mode
75
+ assert_equal 'agent', d.instance.key_name
76
+ assert_equal 'test', d.instance.remove_prefix
77
+ assert_equal 'merged', d.instance.add_prefix
78
+
79
+ assert_equal 4, d.instance.filter_categories.size
80
+ assert_equal [:pc,:smartphone,:mobilephone,:appliance], d.instance.filter_categories
81
+ assert_equal 0, d.instance.drop_categories.size
82
+ assert_equal :filter, d.instance.mode
83
+
84
+ assert_equal true, d.instance.merge_agent_info
85
+ assert_equal 'ua_name', d.instance.out_key_name
86
+ assert_equal 'ua_category', d.instance.out_key_category
87
+ assert_equal 'ua_os', d.instance.out_key_os
88
+ assert_equal 'ua_version', d.instance.out_key_version
89
+ assert_equal 'ua_vendor', d.instance.out_key_vendor
90
+
91
+ # drop & non-merge
92
+ d = create_driver CONFIG3
93
+ assert_equal false, d.instance.fast_crawler_filter_mode
94
+ assert_equal 'user_agent', d.instance.key_name
95
+ assert_equal 'selected', d.instance.tag
96
+
97
+ assert_equal 0, d.instance.filter_categories.size
98
+ assert_equal 2, d.instance.drop_categories.size
99
+ assert_equal [:crawler,:misc], d.instance.drop_categories
100
+ assert_equal :drop, d.instance.mode
101
+
102
+ assert_equal false, d.instance.merge_agent_info
103
+ end
104
+
105
+ def test_tag_mangle
106
+ p = create_driver(CONFIG0).instance
107
+ assert_equal 'filtered', p.tag_mangle('data')
108
+ assert_equal 'filtered', p.tag_mangle('test.data')
109
+ assert_equal 'filtered', p.tag_mangle('test.test.data')
110
+ assert_equal 'filtered', p.tag_mangle('test')
111
+
112
+ p = create_driver(CONFIG1).instance
113
+ assert_equal 'merged.data', p.tag_mangle('data')
114
+ assert_equal 'merged.data', p.tag_mangle('test.data')
115
+ assert_equal 'merged.test.data', p.tag_mangle('test.test.data')
116
+ assert_equal 'merged', p.tag_mangle('test')
117
+
118
+ p = create_driver(CONFIG3).instance
119
+ assert_equal 'selected', p.tag_mangle('data')
120
+ assert_equal 'selected', p.tag_mangle('test.data')
121
+ assert_equal 'selected', p.tag_mangle('test.test.data')
122
+ assert_equal 'selected', p.tag_mangle('test')
123
+ end
124
+
125
+ def test_emit_fast_crawler_filter
126
+ d = create_driver CONFIG0
127
+ time = Time.parse('2012-07-20 16:19:00').to_i
128
+ d.run do
129
+ d.emit({'useragent' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'value' => 1}, time)
130
+ d.emit({'useragent' => 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', 'value' => 2}, time)
131
+ d.emit({'useragent' => 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', 'value' => 3}, time)
132
+ d.emit({'useragent' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'value' => 4}, time)
133
+ d.emit({'useragent' => 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'value' => 5}, time)
134
+ d.emit({'useragent' => 'Mozilla/5.0 (compatible; Rakutenbot/1.0; +http://dynamic.rakuten.co.jp/bot.html)', 'value' => 6}, time)
135
+ d.emit({'useragent' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; ja-jp) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1', 'value' => 7}, time)
136
+ d.emit({'useragent' => 'Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)', 'value' => 8}, time)
137
+ end
138
+
139
+ emits = d.emits
140
+ assert_equal 4, emits.size
141
+
142
+ assert_equal 'filtered', emits[0][0]
143
+ assert_equal time, emits[0][1]
144
+ assert_equal 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', emits[0][2]['useragent']
145
+ assert_equal 3, emits[0][2]['value']
146
+ assert_equal 2, emits[0][2].keys.size
147
+
148
+ assert_equal 4, emits[1][2]['value']
149
+ assert_equal 6, emits[2][2]['value']
150
+ assert_equal 7, emits[3][2]['value']
151
+ end
152
+
153
+ # # through & merge
154
+ def test_emit_through
155
+ d = create_driver(CONFIG1, 'test.message')
156
+ time = Time.parse('2012-07-20 16:40:30').to_i
157
+ d.run do
158
+ d.emit({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
159
+ d.emit({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
160
+ d.emit({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
161
+ d.emit({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
162
+ d.emit({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
163
+ d.emit({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
164
+ d.emit({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
165
+ d.emit({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
166
+ end
167
+
168
+ emits = d.emits
169
+ assert_equal 8, emits.size
170
+ assert_equal 'merged.message', emits[0][0]
171
+ assert_equal time, emits[0][1]
172
+
173
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
174
+ m = emits[0][2]
175
+ assert_equal 0, m['value']
176
+ assert_equal 'Internet Explorer', m['agent_name']
177
+ assert_equal 'pc', m['agent_category']
178
+ assert_equal 'Windows 8', m['agent_os']
179
+ assert_equal 5, m.keys.size
180
+
181
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
182
+ m = emits[1][2]
183
+ assert_equal 1, m['value']
184
+ assert_equal 'Firefox', m['agent_name']
185
+ assert_equal 'pc', m['agent_category']
186
+ assert_equal 'Windows Vista', m['agent_os']
187
+
188
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
189
+ m = emits[2][2]
190
+ assert_equal 2, m['value']
191
+ assert_equal 'Firefox', m['agent_name']
192
+ assert_equal 'pc', m['agent_category']
193
+ assert_equal 'Linux', m['agent_os']
194
+
195
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
196
+ m = emits[3][2]
197
+ assert_equal 3, m['value']
198
+ assert_equal 'Safari', m['agent_name']
199
+ assert_equal 'smartphone', m['agent_category']
200
+ assert_equal 'Android', m['agent_os']
201
+
202
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
203
+ m = emits[4][2]
204
+ assert_equal 4, m['value']
205
+ assert_equal 'docomo', m['agent_name']
206
+ assert_equal 'mobilephone', m['agent_category']
207
+ assert_equal 'docomo', m['agent_os']
208
+
209
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
210
+ m = emits[5][2]
211
+ assert_equal 5, m['value']
212
+ assert_equal 'PlayStation Vita', m['agent_name']
213
+ assert_equal 'appliance', m['agent_category']
214
+ assert_equal 'PlayStation Vita', m['agent_os']
215
+
216
+ # 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'
217
+ m = emits[6][2]
218
+ assert_equal 6, m['value']
219
+ assert_equal 'Google Desktop', m['agent_name']
220
+ assert_equal 'misc', m['agent_category']
221
+ assert_equal 'UNKNOWN', m['agent_os']
222
+
223
+ # 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'
224
+ m = emits[7][2]
225
+ assert_equal 7, m['value']
226
+ assert_equal 'msnbot', m['agent_name']
227
+ assert_equal 'crawler', m['agent_category']
228
+ assert_equal 'UNKNOWN', m['agent_os']
229
+ end
230
+
231
+ # # filter & merge
232
+ def test_emit_filter
233
+ d = create_driver(CONFIG2, 'test.message')
234
+ time = Time.parse('2012-07-20 16:40:30').to_i
235
+ d.run do
236
+ d.emit({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
237
+ d.emit({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
238
+ d.emit({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
239
+ d.emit({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
240
+ d.emit({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
241
+ d.emit({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
242
+ d.emit({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
243
+ d.emit({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
244
+ end
245
+
246
+ emits = d.emits
247
+ assert_equal 6, emits.size
248
+ assert_equal 'merged.message', emits[0][0]
249
+ assert_equal time, emits[0][1]
250
+
251
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
252
+ m = emits[0][2]
253
+ assert_equal 7, m.keys.size
254
+ assert_equal 0, m['value']
255
+ assert_equal 'Internet Explorer', m['ua_name']
256
+ assert_equal 'pc', m['ua_category']
257
+ assert_equal 'Windows 8', m['ua_os']
258
+ assert_equal 'Microsoft', m['ua_vendor']
259
+ assert_equal '10.0', m['ua_version']
260
+
261
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
262
+ m = emits[1][2]
263
+ assert_equal 1, m['value']
264
+ assert_equal 'Firefox', m['ua_name']
265
+ assert_equal 'pc', m['ua_category']
266
+ assert_equal 'Windows Vista', m['ua_os']
267
+ assert_equal 'Mozilla', m['ua_vendor']
268
+ assert_equal '9.0.1', m['ua_version']
269
+
270
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
271
+ m = emits[2][2]
272
+ assert_equal 2, m['value']
273
+ assert_equal 'Firefox', m['ua_name']
274
+ assert_equal 'pc', m['ua_category']
275
+ assert_equal 'Linux', m['ua_os']
276
+ assert_equal 'Mozilla', m['ua_vendor']
277
+ assert_equal '9.0.1', m['ua_version']
278
+
279
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
280
+ m = emits[3][2]
281
+ assert_equal 3, m['value']
282
+ assert_equal 'Safari', m['ua_name']
283
+ assert_equal 'smartphone', m['ua_category']
284
+ assert_equal 'Android', m['ua_os']
285
+ assert_equal 'Apple', m['ua_vendor']
286
+ assert_equal '4.0', m['ua_version']
287
+
288
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
289
+ m = emits[4][2]
290
+ assert_equal 4, m['value']
291
+ assert_equal 'docomo', m['ua_name']
292
+ assert_equal 'mobilephone', m['ua_category']
293
+ assert_equal 'docomo', m['ua_os']
294
+ assert_equal 'docomo', m['ua_vendor']
295
+ assert_equal 'N505i', m['ua_version']
296
+
297
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
298
+ m = emits[5][2]
299
+ assert_equal 5, m['value']
300
+ assert_equal 'PlayStation Vita', m['ua_name']
301
+ assert_equal 'appliance', m['ua_category']
302
+ assert_equal 'PlayStation Vita', m['ua_os']
303
+ assert_equal 'Sony', m['ua_vendor']
304
+ assert_equal 'UNKNOWN', m['ua_version']
305
+ end
306
+
307
+ # # drop & non-merge
308
+ def test_emit_drop
309
+ d = create_driver(CONFIG3, 'test.message')
310
+ time = Time.parse('2012-07-20 16:40:30').to_i
311
+ d.run do
312
+ d.emit({'value' => 0, 'user_agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
313
+ d.emit({'value' => 1, 'user_agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
314
+ d.emit({'value' => 2, 'user_agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
315
+ d.emit({'value' => 3, 'user_agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
316
+ d.emit({'value' => 4, 'user_agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
317
+ d.emit({'value' => 5, 'user_agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
318
+ d.emit({'value' => 6, 'user_agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
319
+ d.emit({'value' => 7, 'user_agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
320
+ end
321
+
322
+ emits = d.emits
323
+ assert_equal 6, emits.size
324
+ assert_equal 'selected', emits[0][0]
325
+ assert_equal time, emits[0][1]
326
+
327
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
328
+ m = emits[0][2]
329
+ assert_equal 0, m['value']
330
+ assert_equal 2, m.keys.size
331
+
332
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
333
+ m = emits[1][2]
334
+ assert_equal 1, m['value']
335
+
336
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
337
+ m = emits[2][2]
338
+ assert_equal 2, m['value']
339
+
340
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
341
+ m = emits[3][2]
342
+ assert_equal 3, m['value']
343
+
344
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
345
+ m = emits[4][2]
346
+ assert_equal 4, m['value']
347
+
348
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
349
+ m = emits[5][2]
350
+ assert_equal 5, m['value']
351
+ end
352
+ end
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-woothee
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - TAGOMORI Satoshi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: fluentd
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: woothee
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 0.2.4
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.2.4
62
+ - !ruby/object:Gem::Dependency
63
+ name: fluentd
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: woothee
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 0.2.4
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 0.2.4
94
+ description: ! 'parsing by Project Woothee. See https://github.com/tagomoris/woothee '
95
+ email:
96
+ - tagomoris@gmail.com
97
+ executables: []
98
+ extensions: []
99
+ extra_rdoc_files: []
100
+ files:
101
+ - .gitignore
102
+ - Gemfile
103
+ - LICENSE.txt
104
+ - README.md
105
+ - Rakefile
106
+ - fluent-plugin-woothee.gemspec
107
+ - lib/fluent/plugin/out_woothee.rb
108
+ - test/helper.rb
109
+ - test/plugin/test_out_woothee.rb
110
+ homepage: https://github.com/tagomoris/fluent-plugin-woothee
111
+ licenses: []
112
+ post_install_message:
113
+ rdoc_options: []
114
+ require_paths:
115
+ - lib
116
+ required_ruby_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ! '>='
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ requirements: []
129
+ rubyforge_project:
130
+ rubygems_version: 1.8.21
131
+ signing_key:
132
+ specification_version: 3
133
+ summary: Fluentd plugin to parse UserAgent strings
134
+ test_files:
135
+ - test/helper.rb
136
+ - test/plugin/test_out_woothee.rb