fluent-plugin-woothee 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-woothee.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012- TAGOMORI Satoshi
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # fluent-plugin-woothee
2
+
3
+ ## WootheeOutput
4
+
5
+ 'fluent-plugin-woothee' is a Fluentd plugin to parse UserAgent strings and to filter/drop specified categories of user terminals (like 'pc', 'smartphone' and so on).
6
+
7
+ 'woothee' is multi-language user-agent strings parser project. See: https://github.com/tagomoris/woothee
8
+
9
+ ## Configuration
10
+
11
+ To add woothee parser result into matched messages:
12
+
13
+ <match input.**>
14
+ type woothee
15
+ key_name agent
16
+ remove_prefix input
17
+ add_prefix merged
18
+ merge_agent_info yes
19
+ </match>
20
+
21
+ Output messages with tag 'merged.**' has attributes like 'agent\_name', 'agent\_category' and 'agent\_os' from woothee parser result. If you want to change attribute names, or want to merge more attributes of browser vendor and its version, write configurations as below:
22
+
23
+ <match input.**>
24
+ type woothee
25
+ key_name agent
26
+ remove_prefix input
27
+ add_prefix merged
28
+ merge_agent_info yes
29
+ out_key_name ua_name
30
+ out_key_category ua_category
31
+ out_key_os ua_os
32
+ out_key_version ua_version
33
+ out_key_vendor ua_vendor
34
+ </match>
35
+
36
+ To re-emit messages with specified user-agent categories (and merge woothee parser result), configure like this:
37
+
38
+ <match input.**>
39
+ type woothee
40
+ key_name agent
41
+ filter_categories pc,smartphone,mobilephone,appliance
42
+ remove_prefix input
43
+ add_prefix merged
44
+ merge_agent_info yes
45
+ </match>
46
+
47
+ Or, you can specify categories to drop (and not to merge woothee result):
48
+
49
+ <match input.**>
50
+ type woothee
51
+ key_name agent
52
+ drop_categories crawler
53
+ remove_prefix input
54
+ add_prefix merged
55
+ merge_agent_info false # default
56
+ </match>
57
+
58
+ ### Fast Crawler Filter
59
+
60
+ If you want to drop __almost__ all of messages with crawler's user-agent, and not to merge woothee result, you just specify plugin type:
61
+
62
+ <match input.**>
63
+ type woothee_fast_crawler_filter
64
+ key_name useragent
65
+ tag filtered
66
+ </match>
67
+
68
+ 'fluent-plugin-woothee' uses 'Woothee.is_crawler' of woothee with this configuration, fast and incomplete method to judge user-agent is crawler or not.
69
+ If you want to drop all of crawlers completely, specify 'type woothee' and 'drop_categories crawler'.
70
+
71
+ ## TODO
72
+
73
+ * patches welcome!
74
+
75
+ ## Copyright
76
+
77
+ * Copyright (c) 2012- TAGOMORI Satoshi (tagomoris)
78
+ * License
79
+ * Apache License, Version 2.0
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.pattern = 'test/**/test_*.rb'
8
+ test.verbose = true
9
+ end
10
+
11
+ task :default => :test
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = "fluent-plugin-woothee"
5
+ gem.version = "0.0.1"
6
+ gem.authors = ["TAGOMORI Satoshi"]
7
+ gem.email = ["tagomoris@gmail.com"]
8
+ gem.description = %q{parsing by Project Woothee. See https://github.com/tagomoris/woothee }
9
+ gem.summary = %q{Fluentd plugin to parse UserAgent strings}
10
+ gem.homepage = "https://github.com/tagomoris/fluent-plugin-woothee"
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.require_paths = ["lib"]
16
+
17
+ gem.add_development_dependency "rake"
18
+ gem.add_development_dependency "fluentd"
19
+ gem.add_development_dependency "woothee", ">= 0.2.4"
20
+ gem.add_runtime_dependency "fluentd"
21
+ gem.add_runtime_dependency "woothee", ">= 0.2.4"
22
+ end
@@ -0,0 +1,142 @@
1
+ class Fluent::WootheeOutput < Fluent::Output
2
+ Fluent::Plugin.register_output('woothee', self)
3
+ Fluent::Plugin.register_output('woothee_fast_crawler_filter', self)
4
+
5
+ config_param :tag, :string, :default => nil
6
+ config_param :remove_prefix, :string, :default => nil
7
+ config_param :add_prefix, :string, :default => nil
8
+
9
+ config_param :fast_crawler_filter_mode, :bool, :default => false
10
+
11
+ config_param :key_name, :string
12
+
13
+ config_param :filter_categories, :default => [] do |val|
14
+ val.split(',').map(&:to_sym)
15
+ end
16
+ config_param :drop_categories, :default => [] do |val|
17
+ val.split(',').map(&:to_sym)
18
+ end
19
+ attr_accessor :mode
20
+
21
+ config_param :merge_agent_info, :bool, :default => false
22
+ config_param :out_key_name, :string, :default => 'agent_name'
23
+ config_param :out_key_category, :string, :default => 'agent_category'
24
+ config_param :out_key_os, :string, :default => 'agent_os'
25
+ config_param :out_key_version, :string, :default => nil # supress output
26
+ config_param :out_key_vendor, :string, :default => nil # supress output
27
+
28
+ def initialize
29
+ super
30
+ require 'woothee'
31
+ end
32
+
33
+ def configure(conf)
34
+ super
35
+
36
+ # tag ->
37
+ if not @tag and not @remove_prefix and not @add_prefix
38
+ raise Fluent::ConfigError, "missing both of remove_prefix and add_prefix"
39
+ end
40
+ if @tag and (@remove_prefix or @add_prefix)
41
+ raise Fluent::ConfigError, "both of tag and remove_prefix/add_prefix must not be specified"
42
+ end
43
+ if @remove_prefix
44
+ @removed_prefix_string = @remove_prefix + '.'
45
+ @removed_length = @removed_prefix_string.length
46
+ end
47
+ if @add_prefix
48
+ @added_prefix_string = @add_prefix + '.'
49
+ end
50
+ # <- tag
51
+
52
+ if conf['type'] == 'woothee_fast_crawler_filter' or @fast_crawler_filter_mode
53
+ @fast_crawler_filter_mode = true
54
+
55
+ if @filter_categories.size > 0 or @drop_categories.size > 0 or @merge_agent_info
56
+ raise Fluent::ConfigError, "fast_crawler_filter cannot be specified with filter/drop/merge options"
57
+ end
58
+
59
+ return
60
+ end
61
+
62
+ if @filter_categories.size > 0 and @drop_categories.size > 0
63
+ raise Fluent::ConfigError, "both of 'filter' and 'drop' categories specified"
64
+ elsif @filter_categories.size > 0
65
+ unless @filter_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
66
+ raise Fluent::ConfigError, "filter_categories has invalid category name"
67
+ end
68
+ @mode = :filter
69
+ elsif @drop_categories.size > 0
70
+ unless @drop_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
71
+ raise Fluent::ConfigError, "drop_categories has invalid category name"
72
+ end
73
+ @mode = :drop
74
+ else
75
+ @mode = :through
76
+ end
77
+
78
+ if @mode == :through and not @merge_agent_info
79
+ raise Fluent::ConfigError, "configured not to do nothing (not to do either filter/drop nor addition of parser result)"
80
+ end
81
+ end
82
+
83
+ def tag_mangle(tag)
84
+ if @tag
85
+ @tag
86
+ else
87
+ if @remove_prefix and
88
+ ( (tag.start_with?(@removed_prefix_string) and tag.length > @removed_length) or tag == @remove_prefix)
89
+ tag = tag[@removed_length..-1]
90
+ end
91
+ if @add_prefix
92
+ tag = if tag and tag.length > 0
93
+ @added_prefix_string + tag
94
+ else
95
+ @add_prefix
96
+ end
97
+ end
98
+ tag
99
+ end
100
+ end
101
+
102
+ def fast_crawler_filter_emit(tag, es)
103
+ es.each do |time,record|
104
+ unless Woothee.is_crawler(record[@key_name] || '')
105
+ Fluent::Engine.emit(tag, time, record)
106
+ end
107
+ end
108
+ end
109
+
110
+ def normal_emit(tag, es)
111
+ es.each do |time,record|
112
+ parsed = Woothee.parse(record[@key_name] || '')
113
+
114
+ category = parsed[Woothee::ATTRIBUTE_CATEGORY]
115
+ next if @mode == :filter and not @filter_categories.include?(category)
116
+ next if @mode == :drop and @drop_categories.include?(category)
117
+
118
+ if @merge_agent_info
119
+ record = record.merge({
120
+ @out_key_name => parsed[Woothee::ATTRIBUTE_NAME],
121
+ @out_key_category => parsed[Woothee::ATTRIBUTE_CATEGORY].to_s,
122
+ @out_key_os => parsed[Woothee::ATTRIBUTE_OS]
123
+ })
124
+ record[@out_key_version] = parsed[Woothee::ATTRIBUTE_VERSION] if @out_key_version
125
+ record[@out_key_vendor] = parsed[Woothee::ATTRIBUTE_VENDOR] if @out_key_vendor
126
+ end
127
+ Fluent::Engine.emit(tag, time, record)
128
+ end
129
+ end
130
+
131
+ def emit(tag, es, chain)
132
+ tag = tag_mangle(tag)
133
+
134
+ if @fast_crawler_filter_mode
135
+ fast_crawler_filter_emit(tag, es)
136
+ else
137
+ normal_emit(tag, es)
138
+ end
139
+
140
+ chain.next
141
+ end
142
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ # pass
20
+ end
21
+ }
22
+ $log = nulllogger
23
+ end
24
+
25
+ require 'fluent/plugin/out_woothee'
26
+
27
+ class Test::Unit::TestCase
28
+ end
@@ -0,0 +1,352 @@
1
+ require 'helper'
2
+
3
+ class Fluent::WootheeOutputTest < Test::Unit::TestCase
4
+ # fast crawler filter
5
+ CONFIG0 = %[
6
+ type woothee_fast_crawler_filter
7
+ key_name useragent
8
+ tag filtered
9
+ ]
10
+
11
+ # through & merge
12
+ CONFIG1 = %[
13
+ type woothee
14
+ key_name agent
15
+ remove_prefix test
16
+ add_prefix merged
17
+ merge_agent_info yes
18
+ ]
19
+
20
+ # filter & merge
21
+ CONFIG2 = %[
22
+ type woothee
23
+ key_name agent
24
+ filter_categories pc,smartphone,mobilephone,appliance
25
+ remove_prefix test
26
+ add_prefix merged
27
+ merge_agent_info yes
28
+ out_key_name ua_name
29
+ out_key_category ua_category
30
+ out_key_os ua_os
31
+ out_key_version ua_version
32
+ out_key_vendor ua_vendor
33
+ ]
34
+
35
+ # drop & non-merge
36
+ CONFIG3 = %[
37
+ type woothee
38
+ key_name user_agent
39
+ drop_categories crawler,misc
40
+ tag selected
41
+ ]
42
+
43
+ def create_driver(conf=CONFIG1,tag='test')
44
+ Fluent::Test::OutputTestDriver.new(Fluent::WootheeOutput, tag).configure(conf)
45
+ end
46
+
47
+ def test_configure
48
+ # fast_crawler_filter
49
+ d = create_driver CONFIG0
50
+ assert_equal true, d.instance.fast_crawler_filter_mode
51
+ assert_equal 'useragent', d.instance.key_name
52
+ assert_equal 'filtered', d.instance.tag
53
+
54
+ # through & merge
55
+ d = create_driver CONFIG1
56
+ assert_equal false, d.instance.fast_crawler_filter_mode
57
+ assert_equal 'agent', d.instance.key_name
58
+ assert_equal 'test', d.instance.remove_prefix
59
+ assert_equal 'merged', d.instance.add_prefix
60
+
61
+ assert_equal 0, d.instance.filter_categories.size
62
+ assert_equal 0, d.instance.drop_categories.size
63
+ assert_equal :through, d.instance.mode
64
+
65
+ assert_equal true, d.instance.merge_agent_info
66
+ assert_equal 'agent_name', d.instance.out_key_name
67
+ assert_equal 'agent_category', d.instance.out_key_category
68
+ assert_equal 'agent_os', d.instance.out_key_os
69
+ assert_nil d.instance.out_key_version
70
+ assert_nil d.instance.out_key_vendor
71
+
72
+ # filter & merge
73
+ d = create_driver CONFIG2
74
+ assert_equal false, d.instance.fast_crawler_filter_mode
75
+ assert_equal 'agent', d.instance.key_name
76
+ assert_equal 'test', d.instance.remove_prefix
77
+ assert_equal 'merged', d.instance.add_prefix
78
+
79
+ assert_equal 4, d.instance.filter_categories.size
80
+ assert_equal [:pc,:smartphone,:mobilephone,:appliance], d.instance.filter_categories
81
+ assert_equal 0, d.instance.drop_categories.size
82
+ assert_equal :filter, d.instance.mode
83
+
84
+ assert_equal true, d.instance.merge_agent_info
85
+ assert_equal 'ua_name', d.instance.out_key_name
86
+ assert_equal 'ua_category', d.instance.out_key_category
87
+ assert_equal 'ua_os', d.instance.out_key_os
88
+ assert_equal 'ua_version', d.instance.out_key_version
89
+ assert_equal 'ua_vendor', d.instance.out_key_vendor
90
+
91
+ # drop & non-merge
92
+ d = create_driver CONFIG3
93
+ assert_equal false, d.instance.fast_crawler_filter_mode
94
+ assert_equal 'user_agent', d.instance.key_name
95
+ assert_equal 'selected', d.instance.tag
96
+
97
+ assert_equal 0, d.instance.filter_categories.size
98
+ assert_equal 2, d.instance.drop_categories.size
99
+ assert_equal [:crawler,:misc], d.instance.drop_categories
100
+ assert_equal :drop, d.instance.mode
101
+
102
+ assert_equal false, d.instance.merge_agent_info
103
+ end
104
+
105
+ def test_tag_mangle
106
+ p = create_driver(CONFIG0).instance
107
+ assert_equal 'filtered', p.tag_mangle('data')
108
+ assert_equal 'filtered', p.tag_mangle('test.data')
109
+ assert_equal 'filtered', p.tag_mangle('test.test.data')
110
+ assert_equal 'filtered', p.tag_mangle('test')
111
+
112
+ p = create_driver(CONFIG1).instance
113
+ assert_equal 'merged.data', p.tag_mangle('data')
114
+ assert_equal 'merged.data', p.tag_mangle('test.data')
115
+ assert_equal 'merged.test.data', p.tag_mangle('test.test.data')
116
+ assert_equal 'merged', p.tag_mangle('test')
117
+
118
+ p = create_driver(CONFIG3).instance
119
+ assert_equal 'selected', p.tag_mangle('data')
120
+ assert_equal 'selected', p.tag_mangle('test.data')
121
+ assert_equal 'selected', p.tag_mangle('test.test.data')
122
+ assert_equal 'selected', p.tag_mangle('test')
123
+ end
124
+
125
+ def test_emit_fast_crawler_filter
126
+ d = create_driver CONFIG0
127
+ time = Time.parse('2012-07-20 16:19:00').to_i
128
+ d.run do
129
+ d.emit({'useragent' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'value' => 1}, time)
130
+ d.emit({'useragent' => 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', 'value' => 2}, time)
131
+ d.emit({'useragent' => 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', 'value' => 3}, time)
132
+ d.emit({'useragent' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'value' => 4}, time)
133
+ d.emit({'useragent' => 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'value' => 5}, time)
134
+ d.emit({'useragent' => 'Mozilla/5.0 (compatible; Rakutenbot/1.0; +http://dynamic.rakuten.co.jp/bot.html)', 'value' => 6}, time)
135
+ d.emit({'useragent' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; ja-jp) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1', 'value' => 7}, time)
136
+ d.emit({'useragent' => 'Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)', 'value' => 8}, time)
137
+ end
138
+
139
+ emits = d.emits
140
+ assert_equal 4, emits.size
141
+
142
+ assert_equal 'filtered', emits[0][0]
143
+ assert_equal time, emits[0][1]
144
+ assert_equal 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', emits[0][2]['useragent']
145
+ assert_equal 3, emits[0][2]['value']
146
+ assert_equal 2, emits[0][2].keys.size
147
+
148
+ assert_equal 4, emits[1][2]['value']
149
+ assert_equal 6, emits[2][2]['value']
150
+ assert_equal 7, emits[3][2]['value']
151
+ end
152
+
153
+ # # through & merge
154
+ def test_emit_through
155
+ d = create_driver(CONFIG1, 'test.message')
156
+ time = Time.parse('2012-07-20 16:40:30').to_i
157
+ d.run do
158
+ d.emit({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
159
+ d.emit({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
160
+ d.emit({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
161
+ d.emit({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
162
+ d.emit({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
163
+ d.emit({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
164
+ d.emit({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
165
+ d.emit({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
166
+ end
167
+
168
+ emits = d.emits
169
+ assert_equal 8, emits.size
170
+ assert_equal 'merged.message', emits[0][0]
171
+ assert_equal time, emits[0][1]
172
+
173
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
174
+ m = emits[0][2]
175
+ assert_equal 0, m['value']
176
+ assert_equal 'Internet Explorer', m['agent_name']
177
+ assert_equal 'pc', m['agent_category']
178
+ assert_equal 'Windows 8', m['agent_os']
179
+ assert_equal 5, m.keys.size
180
+
181
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
182
+ m = emits[1][2]
183
+ assert_equal 1, m['value']
184
+ assert_equal 'Firefox', m['agent_name']
185
+ assert_equal 'pc', m['agent_category']
186
+ assert_equal 'Windows Vista', m['agent_os']
187
+
188
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
189
+ m = emits[2][2]
190
+ assert_equal 2, m['value']
191
+ assert_equal 'Firefox', m['agent_name']
192
+ assert_equal 'pc', m['agent_category']
193
+ assert_equal 'Linux', m['agent_os']
194
+
195
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
196
+ m = emits[3][2]
197
+ assert_equal 3, m['value']
198
+ assert_equal 'Safari', m['agent_name']
199
+ assert_equal 'smartphone', m['agent_category']
200
+ assert_equal 'Android', m['agent_os']
201
+
202
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
203
+ m = emits[4][2]
204
+ assert_equal 4, m['value']
205
+ assert_equal 'docomo', m['agent_name']
206
+ assert_equal 'mobilephone', m['agent_category']
207
+ assert_equal 'docomo', m['agent_os']
208
+
209
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
210
+ m = emits[5][2]
211
+ assert_equal 5, m['value']
212
+ assert_equal 'PlayStation Vita', m['agent_name']
213
+ assert_equal 'appliance', m['agent_category']
214
+ assert_equal 'PlayStation Vita', m['agent_os']
215
+
216
+ # 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'
217
+ m = emits[6][2]
218
+ assert_equal 6, m['value']
219
+ assert_equal 'Google Desktop', m['agent_name']
220
+ assert_equal 'misc', m['agent_category']
221
+ assert_equal 'UNKNOWN', m['agent_os']
222
+
223
+ # 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'
224
+ m = emits[7][2]
225
+ assert_equal 7, m['value']
226
+ assert_equal 'msnbot', m['agent_name']
227
+ assert_equal 'crawler', m['agent_category']
228
+ assert_equal 'UNKNOWN', m['agent_os']
229
+ end
230
+
231
+ # # filter & merge
232
+ def test_emit_filter
233
+ d = create_driver(CONFIG2, 'test.message')
234
+ time = Time.parse('2012-07-20 16:40:30').to_i
235
+ d.run do
236
+ d.emit({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
237
+ d.emit({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
238
+ d.emit({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
239
+ d.emit({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
240
+ d.emit({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
241
+ d.emit({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
242
+ d.emit({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
243
+ d.emit({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
244
+ end
245
+
246
+ emits = d.emits
247
+ assert_equal 6, emits.size
248
+ assert_equal 'merged.message', emits[0][0]
249
+ assert_equal time, emits[0][1]
250
+
251
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
252
+ m = emits[0][2]
253
+ assert_equal 7, m.keys.size
254
+ assert_equal 0, m['value']
255
+ assert_equal 'Internet Explorer', m['ua_name']
256
+ assert_equal 'pc', m['ua_category']
257
+ assert_equal 'Windows 8', m['ua_os']
258
+ assert_equal 'Microsoft', m['ua_vendor']
259
+ assert_equal '10.0', m['ua_version']
260
+
261
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
262
+ m = emits[1][2]
263
+ assert_equal 1, m['value']
264
+ assert_equal 'Firefox', m['ua_name']
265
+ assert_equal 'pc', m['ua_category']
266
+ assert_equal 'Windows Vista', m['ua_os']
267
+ assert_equal 'Mozilla', m['ua_vendor']
268
+ assert_equal '9.0.1', m['ua_version']
269
+
270
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
271
+ m = emits[2][2]
272
+ assert_equal 2, m['value']
273
+ assert_equal 'Firefox', m['ua_name']
274
+ assert_equal 'pc', m['ua_category']
275
+ assert_equal 'Linux', m['ua_os']
276
+ assert_equal 'Mozilla', m['ua_vendor']
277
+ assert_equal '9.0.1', m['ua_version']
278
+
279
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
280
+ m = emits[3][2]
281
+ assert_equal 3, m['value']
282
+ assert_equal 'Safari', m['ua_name']
283
+ assert_equal 'smartphone', m['ua_category']
284
+ assert_equal 'Android', m['ua_os']
285
+ assert_equal 'Apple', m['ua_vendor']
286
+ assert_equal '4.0', m['ua_version']
287
+
288
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
289
+ m = emits[4][2]
290
+ assert_equal 4, m['value']
291
+ assert_equal 'docomo', m['ua_name']
292
+ assert_equal 'mobilephone', m['ua_category']
293
+ assert_equal 'docomo', m['ua_os']
294
+ assert_equal 'docomo', m['ua_vendor']
295
+ assert_equal 'N505i', m['ua_version']
296
+
297
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
298
+ m = emits[5][2]
299
+ assert_equal 5, m['value']
300
+ assert_equal 'PlayStation Vita', m['ua_name']
301
+ assert_equal 'appliance', m['ua_category']
302
+ assert_equal 'PlayStation Vita', m['ua_os']
303
+ assert_equal 'Sony', m['ua_vendor']
304
+ assert_equal 'UNKNOWN', m['ua_version']
305
+ end
306
+
307
+ # # drop & non-merge
308
+ def test_emit_drop
309
+ d = create_driver(CONFIG3, 'test.message')
310
+ time = Time.parse('2012-07-20 16:40:30').to_i
311
+ d.run do
312
+ d.emit({'value' => 0, 'user_agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
313
+ d.emit({'value' => 1, 'user_agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
314
+ d.emit({'value' => 2, 'user_agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
315
+ d.emit({'value' => 3, 'user_agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
316
+ d.emit({'value' => 4, 'user_agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
317
+ d.emit({'value' => 5, 'user_agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
318
+ d.emit({'value' => 6, 'user_agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
319
+ d.emit({'value' => 7, 'user_agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
320
+ end
321
+
322
+ emits = d.emits
323
+ assert_equal 6, emits.size
324
+ assert_equal 'selected', emits[0][0]
325
+ assert_equal time, emits[0][1]
326
+
327
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
328
+ m = emits[0][2]
329
+ assert_equal 0, m['value']
330
+ assert_equal 2, m.keys.size
331
+
332
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
333
+ m = emits[1][2]
334
+ assert_equal 1, m['value']
335
+
336
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
337
+ m = emits[2][2]
338
+ assert_equal 2, m['value']
339
+
340
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
341
+ m = emits[3][2]
342
+ assert_equal 3, m['value']
343
+
344
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
345
+ m = emits[4][2]
346
+ assert_equal 4, m['value']
347
+
348
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
349
+ m = emits[5][2]
350
+ assert_equal 5, m['value']
351
+ end
352
+ end
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-woothee
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - TAGOMORI Satoshi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: fluentd
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: woothee
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 0.2.4
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.2.4
62
+ - !ruby/object:Gem::Dependency
63
+ name: fluentd
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: woothee
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 0.2.4
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 0.2.4
94
+ description: ! 'parsing by Project Woothee. See https://github.com/tagomoris/woothee '
95
+ email:
96
+ - tagomoris@gmail.com
97
+ executables: []
98
+ extensions: []
99
+ extra_rdoc_files: []
100
+ files:
101
+ - .gitignore
102
+ - Gemfile
103
+ - LICENSE.txt
104
+ - README.md
105
+ - Rakefile
106
+ - fluent-plugin-woothee.gemspec
107
+ - lib/fluent/plugin/out_woothee.rb
108
+ - test/helper.rb
109
+ - test/plugin/test_out_woothee.rb
110
+ homepage: https://github.com/tagomoris/fluent-plugin-woothee
111
+ licenses: []
112
+ post_install_message:
113
+ rdoc_options: []
114
+ require_paths:
115
+ - lib
116
+ required_ruby_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ! '>='
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ requirements: []
129
+ rubyforge_project:
130
+ rubygems_version: 1.8.21
131
+ signing_key:
132
+ specification_version: 3
133
+ summary: Fluentd plugin to parse UserAgent strings
134
+ test_files:
135
+ - test/helper.rb
136
+ - test/plugin/test_out_woothee.rb