fluent-plugin-woothee 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a023e5af86e6a0c8e61e478271210b67f49a0cb9
4
- data.tar.gz: 47c03e7e2554c15a2839b4e78f207cce867ebdd2
3
+ metadata.gz: 7d68a5e296eb04df8c7a1234cf96db2a097c52f8
4
+ data.tar.gz: cd1d52ccf24f32dc9e3cb5f47c774e654da3ed17
5
5
  SHA512:
6
- metadata.gz: 4d4ec145c669b50764a57b546e2697d25237789c0e5e9339f6e35c9908db5bfcfd99cfe55225e9b305e80464e5becdbb1fabbffff21d5ae8c936ef66f75f3503
7
- data.tar.gz: 629c34ed3cc1c175df26a3f06409fd1c0c3923163f0f7673b72a1fb967dc14db38f16ee3fb675b85320bcfb2d49f319742e5bd02457b50444d92348d21bcfa3b
6
+ metadata.gz: 06a22c57d945e94c124a64a066fd202784d489f5e5b46d55277c1289edacf404a018a9002ab93758c5df2c8c49cc1f1838159110aefbe1884e70362bb75bf0b6
7
+ data.tar.gz: fce1881e0f0c538e48f4d86de8f07db25df84445381a086927e03c96a67f477fd7e7518d18c452ab5ccc4758edec4778a6869929b442529cb1db9c6a7f754fb3
data/README.md CHANGED
@@ -69,6 +69,65 @@ If you want to drop __almost__ all of messages with crawler's user-agent, and no
69
69
  'fluent-plugin-woothee' uses 'Woothee.is_crawler' of woothee with this configuration, fast and incomplete method to judge user-agent is crawler or not.
70
70
  If you want to drop all of crawlers completely, specify 'type woothee' and 'drop_categories crawler'.
71
71
 
72
+ ## WootheeFilter
73
+
74
+ This is filter version of 'fluent-plugin-woothee'.
75
+ Note that this filter version does not have rewrite tag functionality.
76
+
77
+ ## Configuration
78
+
79
+ To add woothee parser result into filtered messages:
80
+
81
+ <filter input.**>
82
+ type woothee
83
+ key_name agent
84
+ merge_agent_info yes
85
+ </filter>
86
+
87
+ Filtered messages with non-modified tag has attributes like 'agent\_name', 'agent\_category' and 'agent\_os' from woothee parser result. If you want to change attribute names, or want to merge more attributes of browser vendor and its version, write configurations as below:
88
+
89
+ <filter input.**>
90
+ type woothee
91
+ key_name agent
92
+ merge_agent_info yes
93
+ out_key_name ua_name
94
+ out_key_category ua_category
95
+ out_key_os ua_os
96
+ out_key_os_version ua_os_version
97
+ out_key_version ua_version
98
+ out_key_vendor ua_vendor
99
+ </filter>
100
+
101
+ To filter messages with specified user-agent categories (and merge woothee parser result), configure like this:
102
+
103
+ <filter input.**>
104
+ type woothee
105
+ key_name agent
106
+ filter_categories pc,smartphone,mobilephone,appliance
107
+ merge_agent_info yes
108
+ </filter>
109
+
110
+ Or, you can specify categories to drop (and not to merge woothee result):
111
+
112
+ <filter input.**>
113
+ type woothee
114
+ key_name agent
115
+ drop_categories crawler
116
+ merge_agent_info false # default
117
+ </filter>
118
+
119
+ ### Fast Crawler Filter
120
+
121
+ If you want to drop __almost__ all of messages with crawler's user-agent, and not to merge woothee result, you just specify plugin type:
122
+
123
+ <filter input.**>
124
+ type woothee_fast_crawler_filter
125
+ key_name useragent
126
+ </filter>
127
+
128
+ 'fluent-plugin-woothee' uses 'Woothee.is_crawler' of woothee with this configuration, fast and incomplete method to judge user-agent is crawler or not.
129
+ If you want to drop all of crawlers completely, specify 'type woothee' and 'drop_categories crawler'.
130
+
72
131
  ## TODO
73
132
 
74
133
  * patches welcome!
@@ -2,13 +2,13 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-woothee"
5
- gem.version = "0.1.1"
5
+ gem.version = "0.2.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.description = %q{parsing by Project Woothee. See https://github.com/woothee/woothee }
9
9
  gem.summary = %q{Fluentd plugin to parse UserAgent strings with woothee parser. It adds device information or filter records with specific device types.}
10
10
  gem.homepage = "https://github.com/tagomoris/fluent-plugin-woothee"
11
- gem.license = "APLv2"
11
+ gem.license = "Apache-2.0"
12
12
 
13
13
  gem.files = `git ls-files`.split($\)
14
14
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
@@ -0,0 +1,107 @@
1
+ class Fluent::WootheeFilter < Fluent::Filter
2
+ Fluent::Plugin.register_filter('woothee', self)
3
+ Fluent::Plugin.register_filter('woothee_fast_crawler_filter', self)
4
+
5
+ config_param :fast_crawler_filter_mode, :bool, :default => false
6
+
7
+ config_param :key_name, :string
8
+
9
+ config_param :filter_categories, :default => [] do |val|
10
+ val.split(',').map(&:to_sym)
11
+ end
12
+ config_param :drop_categories, :default => [] do |val|
13
+ val.split(',').map(&:to_sym)
14
+ end
15
+ attr_accessor :mode
16
+
17
+ config_param :merge_agent_info, :bool, :default => false
18
+ config_param :out_key_name, :string, :default => 'agent_name'
19
+ config_param :out_key_category, :string, :default => 'agent_category'
20
+ config_param :out_key_os, :string, :default => 'agent_os'
21
+ config_param :out_key_os_version, :string, :default => nil # supress output
22
+ config_param :out_key_version, :string, :default => nil # supress output
23
+ config_param :out_key_vendor, :string, :default => nil # supress output
24
+
25
+ def initialize
26
+ super
27
+ require 'woothee'
28
+ end
29
+
30
+ def configure(conf)
31
+ super
32
+
33
+ if conf['type'] == 'woothee_fast_crawler_filter' or @fast_crawler_filter_mode
34
+ @fast_crawler_filter_mode = true
35
+
36
+ if @filter_categories.size > 0 or @drop_categories.size > 0 or @merge_agent_info
37
+ raise Fluent::ConfigError, "fast_crawler_filter cannot be specified with filter/drop/merge options"
38
+ end
39
+
40
+ return
41
+ end
42
+
43
+ if @filter_categories.size > 0 and @drop_categories.size > 0
44
+ raise Fluent::ConfigError, "both of 'filter' and 'drop' categories specified"
45
+ elsif @filter_categories.size > 0
46
+ unless @filter_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
47
+ raise Fluent::ConfigError, "filter_categories has invalid category name"
48
+ end
49
+ @mode = :filter
50
+ elsif @drop_categories.size > 0
51
+ unless @drop_categories.reduce(true){|r,i| r and Woothee::CATEGORY_LIST.include?(i)}
52
+ raise Fluent::ConfigError, "drop_categories has invalid category name"
53
+ end
54
+ @mode = :drop
55
+ else
56
+ @mode = :through
57
+ end
58
+
59
+ if @mode == :through and not @merge_agent_info
60
+ raise Fluent::ConfigError, "configured not to do nothing (not to do either filter/drop nor addition of parser result)"
61
+ end
62
+ end
63
+
64
+ def fast_crawler_filter_stream(tag, es)
65
+ new_es = Fluent::MultiEventStream.new
66
+
67
+ es.each do |time,record|
68
+ unless Woothee.is_crawler(record[@key_name] || '')
69
+ new_es.add(time, record.dup)
70
+ end
71
+ end
72
+ new_es
73
+ end
74
+
75
+ def normal_filter_stream(tag, es)
76
+ new_es = Fluent::MultiEventStream.new
77
+
78
+ es.each do |time,record|
79
+ parsed = Woothee.parse(record[@key_name] || '')
80
+
81
+ category = parsed[Woothee::ATTRIBUTE_CATEGORY]
82
+ next if @mode == :filter and not @filter_categories.include?(category)
83
+ next if @mode == :drop and @drop_categories.include?(category)
84
+
85
+ if @merge_agent_info
86
+ record = record.merge({
87
+ @out_key_name => parsed[Woothee::ATTRIBUTE_NAME],
88
+ @out_key_category => parsed[Woothee::ATTRIBUTE_CATEGORY].to_s,
89
+ @out_key_os => parsed[Woothee::ATTRIBUTE_OS]
90
+ })
91
+ record[@out_key_os_version] = parsed[Woothee::ATTRIBUTE_OS_VERSION] if @out_key_os_version
92
+ record[@out_key_version] = parsed[Woothee::ATTRIBUTE_VERSION] if @out_key_version
93
+ record[@out_key_vendor] = parsed[Woothee::ATTRIBUTE_VENDOR] if @out_key_vendor
94
+ end
95
+ new_es.add(time, record.dup)
96
+ end
97
+ new_es
98
+ end
99
+
100
+ def filter_stream(tag, es)
101
+ if @fast_crawler_filter_mode
102
+ fast_crawler_filter_stream(tag, es)
103
+ else
104
+ normal_filter_stream(tag, es)
105
+ end
106
+ end
107
+ end if defined?(Fluent::Filter)
@@ -2,6 +2,11 @@ class Fluent::WootheeOutput < Fluent::Output
2
2
  Fluent::Plugin.register_output('woothee', self)
3
3
  Fluent::Plugin.register_output('woothee_fast_crawler_filter', self)
4
4
 
5
+ # Define `router` method of v0.12 to support v0.10 or earlier
6
+ unless method_defined?(:router)
7
+ define_method("router") { Fluent::Engine }
8
+ end
9
+
5
10
  config_param :tag, :string, :default => nil
6
11
  config_param :remove_prefix, :string, :default => nil
7
12
  config_param :add_prefix, :string, :default => nil
@@ -103,7 +108,7 @@ class Fluent::WootheeOutput < Fluent::Output
103
108
  def fast_crawler_filter_emit(tag, es)
104
109
  es.each do |time,record|
105
110
  unless Woothee.is_crawler(record[@key_name] || '')
106
- Fluent::Engine.emit(tag, time, record)
111
+ router.emit(tag, time, record)
107
112
  end
108
113
  end
109
114
  end
@@ -126,7 +131,7 @@ class Fluent::WootheeOutput < Fluent::Output
126
131
  record[@out_key_version] = parsed[Woothee::ATTRIBUTE_VERSION] if @out_key_version
127
132
  record[@out_key_vendor] = parsed[Woothee::ATTRIBUTE_VENDOR] if @out_key_vendor
128
133
  end
129
- Fluent::Engine.emit(tag, time, record)
134
+ router.emit(tag, time, record)
130
135
  end
131
136
  end
132
137
 
data/test/helper.rb CHANGED
@@ -23,6 +23,7 @@ unless ENV.has_key?('VERBOSE')
23
23
  end
24
24
 
25
25
  require 'fluent/plugin/out_woothee'
26
+ require 'fluent/plugin/filter_woothee'
26
27
 
27
28
  class Test::Unit::TestCase
28
29
  end
@@ -0,0 +1,338 @@
1
+ require 'helper'
2
+
3
+ class Fluent::WootheeFilterTest < Test::Unit::TestCase
4
+ # fast crawler filter
5
+ CONFIG0 = %[
6
+ type woothee_fast_crawler_filter
7
+ key_name useragent
8
+ ]
9
+
10
+ # through & merge
11
+ CONFIG1 = %[
12
+ type woothee
13
+ key_name agent
14
+ merge_agent_info yes
15
+ ]
16
+
17
+ # filter & merge
18
+ CONFIG2 = %[
19
+ type woothee
20
+ key_name agent
21
+ filter_categories pc,smartphone,mobilephone,appliance
22
+ merge_agent_info yes
23
+ out_key_name ua_name
24
+ out_key_category ua_category
25
+ out_key_os ua_os
26
+ out_key_os_version ua_os_version
27
+ out_key_version ua_version
28
+ out_key_vendor ua_vendor
29
+ ]
30
+
31
+ # drop & non-merge
32
+ CONFIG3 = %[
33
+ type woothee
34
+ key_name user_agent
35
+ drop_categories crawler,misc
36
+ ]
37
+
38
+ def setup
39
+ omit("Use fluentd v0.12 or later") unless defined?(Fluent::Filter)
40
+
41
+ Fluent::Test.setup
42
+ end
43
+
44
+ def create_driver(conf=CONFIG1,tag='test')
45
+ Fluent::Test::FilterTestDriver.new(Fluent::WootheeFilter, tag).configure(conf)
46
+ end
47
+
48
+ class TestConfigure < self
49
+ def test_fast_crawer_filter
50
+ d = create_driver CONFIG0
51
+ assert_equal true, d.instance.fast_crawler_filter_mode
52
+ assert_equal 'useragent', d.instance.key_name
53
+ end
54
+
55
+ def test_through_and_merge
56
+ d = create_driver CONFIG1
57
+ assert_equal false, d.instance.fast_crawler_filter_mode
58
+ assert_equal 'agent', d.instance.key_name
59
+
60
+ assert_equal 0, d.instance.filter_categories.size
61
+ assert_equal 0, d.instance.drop_categories.size
62
+ assert_equal :through, d.instance.mode
63
+
64
+ assert_equal true, d.instance.merge_agent_info
65
+ assert_equal 'agent_name', d.instance.out_key_name
66
+ assert_equal 'agent_category', d.instance.out_key_category
67
+ assert_equal 'agent_os', d.instance.out_key_os
68
+ assert_nil d.instance.out_key_version
69
+ assert_nil d.instance.out_key_vendor
70
+ end
71
+
72
+ def test_filter_and_merge
73
+ d = create_driver CONFIG2
74
+ assert_equal false, d.instance.fast_crawler_filter_mode
75
+ assert_equal 'agent', d.instance.key_name
76
+
77
+ assert_equal 4, d.instance.filter_categories.size
78
+ assert_equal [:pc,:smartphone,:mobilephone,:appliance], d.instance.filter_categories
79
+ assert_equal 0, d.instance.drop_categories.size
80
+ assert_equal :filter, d.instance.mode
81
+
82
+ assert_equal true, d.instance.merge_agent_info
83
+ assert_equal 'ua_name', d.instance.out_key_name
84
+ assert_equal 'ua_category', d.instance.out_key_category
85
+ assert_equal 'ua_os', d.instance.out_key_os
86
+ assert_equal 'ua_os_version', d.instance.out_key_os_version
87
+ assert_equal 'ua_version', d.instance.out_key_version
88
+ assert_equal 'ua_vendor', d.instance.out_key_vendor
89
+ end
90
+
91
+ def test_drop_and_non_merge
92
+ d = create_driver CONFIG3
93
+ assert_equal false, d.instance.fast_crawler_filter_mode
94
+ assert_equal 'user_agent', d.instance.key_name
95
+
96
+ assert_equal 0, d.instance.filter_categories.size
97
+ assert_equal 2, d.instance.drop_categories.size
98
+ assert_equal [:crawler,:misc], d.instance.drop_categories
99
+ assert_equal :drop, d.instance.mode
100
+
101
+ assert_equal false, d.instance.merge_agent_info
102
+ end
103
+ end
104
+
105
+ def test_filter_fast_crawler_filter_stream
106
+ d = create_driver CONFIG0
107
+ time = Time.parse('2012-07-20 16:19:00').to_i
108
+ d.run do
109
+ d.filter({'useragent' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'value' => 1}, time)
110
+ d.filter({'useragent' => 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', 'value' => 2}, time)
111
+ d.filter({'useragent' => 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', 'value' => 3}, time)
112
+ d.filter({'useragent' => 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'value' => 4}, time)
113
+ d.filter({'useragent' => 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'value' => 5}, time)
114
+ d.filter({'useragent' => 'Mozilla/5.0 (compatible; Rakutenbot/1.0; +http://dynamic.rakuten.co.jp/bot.html)', 'value' => 6}, time)
115
+ d.filter({'useragent' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4; ja-jp) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1', 'value' => 7}, time)
116
+ d.filter({'useragent' => 'Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)', 'value' => 8}, time)
117
+ end
118
+
119
+ filtered = d.filtered_as_array
120
+ assert_equal 4, filtered.size
121
+
122
+ assert_equal 'test', filtered[0][0]
123
+ assert_equal time, filtered[0][1]
124
+ assert_equal 'Mozilla/5.0 (iPad; U; CPU OS 4_3_2 like Mac OS X; ja-jp) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5', filtered[0][2]['useragent']
125
+ assert_equal 3, filtered[0][2]['value']
126
+ assert_equal 2, filtered[0][2].keys.size
127
+
128
+ assert_equal 4, filtered[1][2]['value']
129
+ assert_equal 6, filtered[2][2]['value']
130
+ assert_equal 7, filtered[3][2]['value']
131
+ end
132
+
133
+ # through & merge
134
+ def test_filter_through
135
+ d = create_driver(CONFIG1, 'test.message')
136
+ time = Time.parse('2012-07-20 16:40:30').to_i
137
+ d.run do
138
+ d.filter({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
139
+ d.filter({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
140
+ d.filter({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
141
+ d.filter({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
142
+ d.filter({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
143
+ d.filter({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
144
+ d.filter({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
145
+ d.filter({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
146
+ end
147
+
148
+ filtered = d.filtered_as_array
149
+ assert_equal 8, filtered.size
150
+ assert_equal 'test.message', filtered[0][0]
151
+ assert_equal time, filtered[0][1]
152
+
153
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
154
+ m = filtered[0][2]
155
+ assert_equal 0, m['value']
156
+ assert_equal 'Internet Explorer', m['agent_name']
157
+ assert_equal 'pc', m['agent_category']
158
+ assert_equal 'Windows 8', m['agent_os']
159
+ assert_equal 5, m.keys.size
160
+
161
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
162
+ m = filtered[1][2]
163
+ assert_equal 1, m['value']
164
+ assert_equal 'Firefox', m['agent_name']
165
+ assert_equal 'pc', m['agent_category']
166
+ assert_equal 'Windows Vista', m['agent_os']
167
+
168
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
169
+ m = filtered[2][2]
170
+ assert_equal 2, m['value']
171
+ assert_equal 'Firefox', m['agent_name']
172
+ assert_equal 'pc', m['agent_category']
173
+ assert_equal 'Linux', m['agent_os']
174
+
175
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
176
+ m = filtered[3][2]
177
+ assert_equal 3, m['value']
178
+ assert_equal 'Safari', m['agent_name']
179
+ assert_equal 'smartphone', m['agent_category']
180
+ assert_equal 'Android', m['agent_os']
181
+
182
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
183
+ m = filtered[4][2]
184
+ assert_equal 4, m['value']
185
+ assert_equal 'docomo', m['agent_name']
186
+ assert_equal 'mobilephone', m['agent_category']
187
+ assert_equal 'docomo', m['agent_os']
188
+
189
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
190
+ m = filtered[5][2]
191
+ assert_equal 5, m['value']
192
+ assert_equal 'PlayStation Vita', m['agent_name']
193
+ assert_equal 'appliance', m['agent_category']
194
+ assert_equal 'PlayStation Vita', m['agent_os']
195
+
196
+ # 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'
197
+ m = filtered[6][2]
198
+ assert_equal 6, m['value']
199
+ assert_equal 'Google Desktop', m['agent_name']
200
+ assert_equal 'misc', m['agent_category']
201
+ assert_equal 'UNKNOWN', m['agent_os']
202
+
203
+ # 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'
204
+ m = filtered[7][2]
205
+ assert_equal 7, m['value']
206
+ assert_equal 'msnbot', m['agent_name']
207
+ assert_equal 'crawler', m['agent_category']
208
+ assert_equal 'UNKNOWN', m['agent_os']
209
+ end
210
+
211
+ # filter & merge
212
+ def test_filter_stream
213
+ d = create_driver(CONFIG2, 'test.message')
214
+ time = Time.parse('2012-07-20 16:40:30').to_i
215
+ d.run do
216
+ d.filter({'value' => 0, 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
217
+ d.filter({'value' => 1, 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
218
+ d.filter({'value' => 2, 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
219
+ d.filter({'value' => 3, 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
220
+ d.filter({'value' => 4, 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
221
+ d.filter({'value' => 5, 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
222
+ d.filter({'value' => 6, 'agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
223
+ d.filter({'value' => 7, 'agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
224
+ end
225
+
226
+ filtered = d.filtered_as_array
227
+ assert_equal 6, filtered.size
228
+ assert_equal 'test.message', filtered[0][0]
229
+ assert_equal time, filtered[0][1]
230
+
231
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
232
+ m = filtered[0][2]
233
+ assert_equal 8, m.keys.size
234
+ assert_equal 0, m['value']
235
+ assert_equal 'Internet Explorer', m['ua_name']
236
+ assert_equal 'pc', m['ua_category']
237
+ assert_equal 'Windows 8', m['ua_os']
238
+ assert_equal 'NT 6.2', m['ua_os_version']
239
+ assert_equal 'Microsoft', m['ua_vendor']
240
+ assert_equal '10.0', m['ua_version']
241
+
242
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
243
+ m = filtered[1][2]
244
+ assert_equal 1, m['value']
245
+ assert_equal 'Firefox', m['ua_name']
246
+ assert_equal 'pc', m['ua_category']
247
+ assert_equal 'Windows Vista', m['ua_os']
248
+ assert_equal 'NT 6.0', m['ua_os_version']
249
+ assert_equal 'Mozilla', m['ua_vendor']
250
+ assert_equal '9.0.1', m['ua_version']
251
+
252
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
253
+ m = filtered[2][2]
254
+ assert_equal 2, m['value']
255
+ assert_equal 'Firefox', m['ua_name']
256
+ assert_equal 'pc', m['ua_category']
257
+ assert_equal 'Linux', m['ua_os']
258
+ assert_equal 'UNKNOWN', m['ua_os_version']
259
+ assert_equal 'Mozilla', m['ua_vendor']
260
+ assert_equal '9.0.1', m['ua_version']
261
+
262
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
263
+ m = filtered[3][2]
264
+ assert_equal 3, m['value']
265
+ assert_equal 'Safari', m['ua_name']
266
+ assert_equal 'smartphone', m['ua_category']
267
+ assert_equal 'Android', m['ua_os']
268
+ assert_equal '3.1', m['ua_os_version']
269
+ assert_equal 'Apple', m['ua_vendor']
270
+ assert_equal '4.0', m['ua_version']
271
+
272
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
273
+ m = filtered[4][2]
274
+ assert_equal 4, m['value']
275
+ assert_equal 'docomo', m['ua_name']
276
+ assert_equal 'mobilephone', m['ua_category']
277
+ assert_equal 'docomo', m['ua_os']
278
+ assert_equal 'UNKNOWN', m['ua_os_version']
279
+ assert_equal 'docomo', m['ua_vendor']
280
+ assert_equal 'N505i', m['ua_version']
281
+
282
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
283
+ m = filtered[5][2]
284
+ assert_equal 5, m['value']
285
+ assert_equal 'PlayStation Vita', m['ua_name']
286
+ assert_equal 'appliance', m['ua_category']
287
+ assert_equal 'PlayStation Vita', m['ua_os']
288
+ assert_equal '1.51', m['ua_os_version']
289
+ assert_equal 'Sony', m['ua_vendor']
290
+ assert_equal 'UNKNOWN', m['ua_version']
291
+ end
292
+
293
+ # drop & non-merge
294
+ def test_filter_drop
295
+ d = create_driver(CONFIG3, 'test.message')
296
+ time = Time.parse('2012-07-20 16:40:30').to_i
297
+ d.run do
298
+ d.filter({'value' => 0, 'user_agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'}, time)
299
+ d.filter({'value' => 1, 'user_agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
300
+ d.filter({'value' => 2, 'user_agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}, time)
301
+ d.filter({'value' => 3, 'user_agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'}, time)
302
+ d.filter({'value' => 4, 'user_agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'}, time)
303
+ d.filter({'value' => 5, 'user_agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'}, time)
304
+ d.filter({'value' => 6, 'user_agent' => 'Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)'}, time)
305
+ d.filter({'value' => 7, 'user_agent' => 'msnbot/1.1 (+http://search.msn.com/msnbot.htm)'}, time)
306
+ end
307
+
308
+ filtered = d.filtered_as_array
309
+ assert_equal 6, filtered.size
310
+ assert_equal 'test.message', filtered[0][0]
311
+ assert_equal time, filtered[0][1]
312
+
313
+ # 'agent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)'
314
+ m = filtered[0][2]
315
+ assert_equal 0, m['value']
316
+ assert_equal 2, m.keys.size
317
+
318
+ # 'agent' => 'Mozilla/5.0 (Windows NT 6.0; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
319
+ m = filtered[1][2]
320
+ assert_equal 1, m['value']
321
+
322
+ # 'agent' => 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
323
+ m = filtered[2][2]
324
+ assert_equal 2, m['value']
325
+
326
+ # 'agent' => 'Mozilla/5.0 (Linux; U; Android 3.1; ja-jp; L-06C Build/HMJ37) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
327
+ m = filtered[3][2]
328
+ assert_equal 3, m['value']
329
+
330
+ # 'agent' => 'DoCoMo/1.0/N505i/c20/TB/W24H12'
331
+ m = filtered[4][2]
332
+ assert_equal 4, m['value']
333
+
334
+ # 'agent' => 'Mozilla/5.0 (PlayStation Vita 1.51) AppleWebKit/531.22.8 (KHTML, like Gecko) Silk/3.2'
335
+ m = filtered[5][2]
336
+ assert_equal 5, m['value']
337
+ end
338
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-woothee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-26 00:00:00.000000000 Z
11
+ date: 2015-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -80,12 +80,14 @@ files:
80
80
  - README.md
81
81
  - Rakefile
82
82
  - fluent-plugin-woothee.gemspec
83
+ - lib/fluent/plugin/filter_woothee.rb
83
84
  - lib/fluent/plugin/out_woothee.rb
84
85
  - test/helper.rb
86
+ - test/plugin/test_filter_woothee.rb
85
87
  - test/plugin/test_out_woothee.rb
86
88
  homepage: https://github.com/tagomoris/fluent-plugin-woothee
87
89
  licenses:
88
- - APLv2
90
+ - Apache-2.0
89
91
  metadata: {}
90
92
  post_install_message:
91
93
  rdoc_options: []
@@ -110,5 +112,6 @@ summary: Fluentd plugin to parse UserAgent strings with woothee parser. It adds
110
112
  information or filter records with specific device types.
111
113
  test_files:
112
114
  - test/helper.rb
115
+ - test/plugin/test_filter_woothee.rb
113
116
  - test/plugin/test_out_woothee.rb
114
117
  has_rdoc: