fluent-plugin-referer-parser 0.0.10 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c5c18c51dd917efdde31a1b3569afc85f60186c7
4
- data.tar.gz: 67bf6e6d8771b84fdf83d13f14e1fa81a2c5389d
3
+ metadata.gz: 7e2b1770c1c3d1a3fc00efce8fc3b2fd88496891
4
+ data.tar.gz: ee51c634a8b0cbf538e310c858d6bea6d512007e
5
5
  SHA512:
6
- metadata.gz: e9ea39d3e47f6490fbed05d20a26ebf024e484c86b910328b45eed881886a8f2a3f405eb21e8647a690b3f2307a18f4328b97ed42868fb2ba832cc0fc2b8b505
7
- data.tar.gz: 3883ba94f1f12970e9edf3ba8c6dd73c17fb980e137dfcf534bfa73d4162527b1b99e37081d84302a00db9dafb2d9577b53ac03ac1affde475e2ec03784f18ec
6
+ metadata.gz: eebfc2c7ceca2a7107d6b10a08f966ac7aa10826f25552b2b79073f970100d8045e9c6140b46c790004621ab19e9a8f91a966f6f70ee54bb6e73feafc3b52dcb
7
+ data.tar.gz: 1b283b06e7d82231b84b6ebab648c7f342150e562f325c456f503709e290cf01aed39d24dea99e225e1b4e0d2782db94904068dfb2af4204bf6c41c66ee466da
data/README.md CHANGED
@@ -1,29 +1,31 @@
1
1
  # fluent-plugin-referer-parser, a plugin for [Fluentd](http://fluentd.org)
2
2
 
3
- ## RefererParserOutput
3
+ ## RefererParserFilter
4
4
 
5
5
  'fluent-plugin-referer-parser' is a Fluentd plugin to parse Referer strings, based on [tagomoris/fluent-plugin-woothee](https://github.com/tagomoris/fluent-plugin-woothee).
6
6
  'fluent-plugin-referer-parser' uses [snowplow/referer-parser](https://github.com/snowplow/referer-parser).
7
7
 
8
+ ## Requirements
9
+
10
+ | fluent-plugin-referer-parser | fluentd | ruby |
11
+ |------------------------------|-------------|--------|
12
+ | >= 0.1.0 | >= v0.14.15 | >= 2.1 |
13
+ | < 0.1.0 | >= v0.12.0 | >= 1.9 |
8
14
 
9
15
  ## Configuration
10
16
 
11
17
  To add referer-parser result into matched messages:
12
18
 
13
- <match input.**>
19
+ <filter>
14
20
  @type referer_parser
15
21
  key_name referer
16
- remove_prefix input
17
- add_prefix merged
18
22
  </match>
19
23
 
20
24
  Output messages with tag 'merged.**' has 'referer_known', 'referer_referer' and 'referer_search_term' attributes. If you want to change attribute names, write configurations as below:
21
25
 
22
- <match input.**>
26
+ <filter>
23
27
  @type referer_parser
24
28
  key_name ref
25
- remove_prefix input
26
- add_prefix merged
27
29
  out_key_known ref_known
28
30
  out_key_referer ref_referer
29
31
  out_key_host ref_host
@@ -3,8 +3,8 @@ require 'English'
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = 'fluent-plugin-referer-parser'
6
- gem.version = '0.0.10'
7
- gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
6
+ gem.version = '0.1.0'
7
+ gem.authors = ['HARUYAMA Seigo']
8
8
  gem.email = ['haruyama@unixuser.org']
9
9
  gem.description = %q(parsing by referer-parser. See: https://github.com/snowplow/referer-parser)
10
10
  gem.summary = %q(Fluentd plugin to parse UserAgent strings)
@@ -18,6 +18,6 @@ Gem::Specification.new do |gem|
18
18
 
19
19
  gem.add_development_dependency 'rake'
20
20
  gem.add_development_dependency 'test-unit', '>= 3.2'
21
- gem.add_runtime_dependency 'fluentd', '~> 0.12.0'
21
+ gem.add_runtime_dependency 'fluentd', '>= 0.14.15', '< 2'
22
22
  gem.add_runtime_dependency 'referer-parser', '~> 0.3.0'
23
23
  end
@@ -2,7 +2,9 @@ require 'cgi'
2
2
  require 'yaml'
3
3
  require 'referer-parser'
4
4
 
5
- class Fluent::RefererParserFilter < Fluent::Filter
5
+ require 'fluent/plugin/input'
6
+
7
+ class Fluent::Plugin::RefererParserFilter < Fluent::Plugin::Filter
6
8
  Fluent::Plugin.register_filter('referer_parser', self)
7
9
 
8
10
  config_param :key_name, :string
data/test/helper.rb CHANGED
@@ -1,26 +1,6 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts 'Run `bundle install` to install missing gems'
8
- exit e.status_code
9
- end
1
+ require 'bundler/setup'
10
2
  require 'test/unit'
11
3
 
12
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
- $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
5
+ $LOAD_PATH.unshift(__dir__)
14
6
  require 'fluent/test'
15
- unless ENV.key?('VERBOSE')
16
- nulllogger = Object.new
17
- nulllogger.instance_eval do |obj|
18
- def method_missing(method, *args)
19
- # pass
20
- end
21
- end
22
- $log = nulllogger
23
- end
24
-
25
- class Test::Unit::TestCase
26
- end
@@ -1,7 +1,8 @@
1
1
  require 'helper'
2
+ require 'fluent/test/driver/filter'
2
3
  require 'fluent/plugin/filter_referer_parser'
3
4
 
4
- class Fluent::RefererParserFilterTest < Test::Unit::TestCase
5
+ class RefererParserFilterTest < Test::Unit::TestCase
5
6
  # through & merge
6
7
  CONFIG1 = %(
7
8
  key_name referer
@@ -28,20 +29,19 @@ class Fluent::RefererParserFilterTest < Test::Unit::TestCase
28
29
  Fluent::Test.setup
29
30
  end
30
31
 
31
- def create_driver(conf = CONFIG1, tag = 'test')
32
- Fluent::Test::FilterTestDriver.new(Fluent::RefererParserFilter, tag).configure(conf)
32
+ def create_driver(conf = CONFIG1)
33
+ Fluent::Test::Driver::Filter.new(Fluent::Plugin::RefererParserFilter).configure(conf)
33
34
  end
34
35
 
35
36
  def filter(config, messages)
36
37
  d = create_driver(config)
37
38
  time = Time.parse('2012-07-20 16:40:30').to_i
38
- d.run do
39
+ d.run(default_tag: 'test') do
39
40
  messages.each do |message|
40
- d.emit(message, time)
41
+ d.feed(time, message)
41
42
  end
42
43
  end
43
- filtered = d.filtered_as_array
44
- filtered.map {|m| m[2] }
44
+ d.filtered_records
45
45
  end
46
46
 
47
47
  sub_test_case 'configure' do
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-referer-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
- - TAGOMORI Satoshi
8
7
  - HARUYAMA Seigo
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2017-05-30 00:00:00.000000000 Z
11
+ date: 2017-06-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rake
@@ -43,16 +42,22 @@ dependencies:
43
42
  name: fluentd
44
43
  requirement: !ruby/object:Gem::Requirement
45
44
  requirements:
46
- - - "~>"
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 0.14.15
48
+ - - "<"
47
49
  - !ruby/object:Gem::Version
48
- version: 0.12.0
50
+ version: '2'
49
51
  type: :runtime
50
52
  prerelease: false
51
53
  version_requirements: !ruby/object:Gem::Requirement
52
54
  requirements:
53
- - - "~>"
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: 0.14.15
58
+ - - "<"
54
59
  - !ruby/object:Gem::Version
55
- version: 0.12.0
60
+ version: '2'
56
61
  - !ruby/object:Gem::Dependency
57
62
  name: referer-parser
58
63
  requirement: !ruby/object:Gem::Requirement
@@ -83,13 +88,11 @@ files:
83
88
  - Rakefile
84
89
  - fluent-plugin-referer-parser.gemspec
85
90
  - lib/fluent/plugin/filter_referer_parser.rb
86
- - lib/fluent/plugin/out_referer_parser.rb
87
91
  - test/.rubocop.yml
88
92
  - test/data/encodings.yaml
89
93
  - test/data/referers.yaml
90
94
  - test/helper.rb
91
95
  - test/plugin/test_filter_referer_parser.rb
92
- - test/plugin/test_out_referer_parser.rb
93
96
  homepage: https://github.com/haruyama/fluent-plugin-referer-parser
94
97
  licenses:
95
98
  - Apache-2.0
@@ -120,4 +123,3 @@ test_files:
120
123
  - test/data/referers.yaml
121
124
  - test/helper.rb
122
125
  - test/plugin/test_filter_referer_parser.rb
123
- - test/plugin/test_out_referer_parser.rb
@@ -1,106 +0,0 @@
1
- # referer parser output
2
- class Fluent::RefererParserOutput < Fluent::Output
3
- Fluent::Plugin.register_output('referer_parser', self)
4
-
5
- config_param :tag, :string, default: nil
6
- config_param :remove_prefix, :string, default: nil
7
- config_param :add_prefix, :string, default: nil
8
-
9
- config_param :key_name, :string
10
- config_param :referers_yaml, :string, default: nil
11
- config_param :encodings_yaml, :string, default: nil
12
-
13
- config_param :out_key_known, :string, default: 'referer_known'
14
- config_param :out_key_referer, :string, default: 'referer_referer'
15
- config_param :out_key_host, :string, default: 'referer_host'
16
- config_param :out_key_search_term, :string, default: 'referer_search_term'
17
-
18
- def initialize
19
- super
20
- require 'cgi'
21
- require 'yaml'
22
- require 'referer-parser'
23
- end
24
-
25
- def configure(conf)
26
- super
27
-
28
- @referer_parser = if @referers_yaml
29
- RefererParser::Parser.new(@referers_yaml)
30
- else
31
- RefererParser::Parser.new
32
- end
33
-
34
- if @encodings_yaml
35
- @encodings = YAML.load_file(@encodings_yaml)
36
- else
37
- @encodings = {}
38
- end
39
-
40
- if !@tag && !@remove_prefix && !@add_prefix
41
- fail Fluent::ConfigError, 'missing both of remove_prefix and add_prefix'
42
- end
43
- if @tag && (@remove_prefix || @add_prefix)
44
- fail Fluent::ConfigError, 'both of tag and remove_prefix/add_prefix must not be specified'
45
- end
46
- if @remove_prefix
47
- @removed_prefix_string = @remove_prefix + '.'
48
- @removed_length = @removed_prefix_string.length
49
- end
50
- @added_prefix_string = @add_prefix + '.' if @add_prefix
51
- end
52
-
53
- def tag_mangle(tag)
54
- if @tag
55
- @tag
56
- else
57
- if @remove_prefix &&
58
- ( (tag.start_with?(@removed_prefix_string) && tag.length > @removed_length) || tag == @remove_prefix)
59
- tag = tag[@removed_length..-1]
60
- end
61
- if @add_prefix
62
- tag = if tag && tag.length > 0
63
- @added_prefix_string + tag
64
- else
65
- @add_prefix
66
- end
67
- end
68
- tag
69
- end
70
- end
71
-
72
- def emit(tag, es, chain)
73
- tag = tag_mangle(tag)
74
- es.each do |time, record|
75
- is_valid = true
76
- parsed = begin
77
- @referer_parser.parse(record[@key_name])
78
- rescue
79
- is_valid = false
80
- {}
81
- end
82
- if is_valid && parsed[:known]
83
- search_term = parsed[:term]
84
- uri = URI.parse(parsed[:uri])
85
- host = uri.host
86
- parameters = CGI.parse(uri.query)
87
- input_encoding = @encodings[host] || parameters['ie'][0] || parameters['ei'][0]
88
- begin
89
- search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /\Autf-?8\z/i !~ input_encoding
90
- rescue
91
- log.error('invalid referer: ' + uri.to_s)
92
- end
93
- record.merge!(
94
- @out_key_known => true,
95
- @out_key_referer => parsed[:source],
96
- @out_key_host => host,
97
- @out_key_search_term => search_term
98
- )
99
- else
100
- record[@out_key_known] = false
101
- end
102
- router.emit(tag, time, record)
103
- end
104
- chain.next
105
- end
106
- end
@@ -1,227 +0,0 @@
1
- require 'helper'
2
- require 'fluent/plugin/out_referer_parser'
3
-
4
- # RefererParserOutput test
5
- class Fluent::RefererParserOutputTest < Test::Unit::TestCase
6
- # through & merge
7
- CONFIG1 = %(
8
- type referer_parser
9
- key_name referer
10
- remove_prefix test
11
- add_prefix merged
12
- )
13
-
14
- CONFIG2 = %(
15
- type referer_parser
16
- key_name ref
17
- remove_prefix test
18
- add_prefix merged
19
- out_key_known ref_known
20
- out_key_referer ref_referer
21
- out_key_host ref_host
22
- out_key_search_term ref_search_term
23
- )
24
-
25
- CONFIG3 = %(
26
- type referer_parser
27
- key_name ref
28
- remove_prefix test
29
- add_prefix merged
30
- referers_yaml test/data/referers.yaml
31
- encodings_yaml test/data/encodings.yaml
32
- )
33
-
34
- def setup
35
- Fluent::Test.setup
36
- end
37
-
38
- def create_driver(conf = CONFIG1, tag = 'test')
39
- Fluent::Test::OutputTestDriver.new(Fluent::RefererParserOutput, tag).configure(conf)
40
- end
41
-
42
- def test_configure
43
- # through & merge
44
- d = create_driver CONFIG1
45
- assert_equal 'referer', d.instance.key_name
46
- assert_equal 'test', d.instance.remove_prefix
47
- assert_equal 'merged', d.instance.add_prefix
48
-
49
- assert_equal 'referer_known', d.instance.out_key_known
50
- assert_equal 'referer_referer', d.instance.out_key_referer
51
- assert_equal 'referer_search_term', d.instance.out_key_search_term
52
-
53
- # filter & merge
54
- d = create_driver CONFIG2
55
- assert_equal 'ref', d.instance.key_name
56
- assert_equal 'test', d.instance.remove_prefix
57
- assert_equal 'merged', d.instance.add_prefix
58
-
59
- assert_equal 'ref_known', d.instance.out_key_known
60
- assert_equal 'ref_referer', d.instance.out_key_referer
61
- assert_equal 'ref_search_term', d.instance.out_key_search_term
62
- end
63
-
64
- def test_tag_mangle
65
- p = create_driver(CONFIG1).instance
66
- assert_equal 'merged.data', p.tag_mangle('data')
67
- assert_equal 'merged.data', p.tag_mangle('test.data')
68
- assert_equal 'merged.test.data', p.tag_mangle('test.test.data')
69
- assert_equal 'merged', p.tag_mangle('test')
70
- end
71
-
72
- def test_emit1
73
- d = create_driver(CONFIG1, 'test.message')
74
- time = Time.parse('2012-07-20 16:40:30').to_i
75
- d.run do
76
- d.emit({ 'value' => 0 }, time)
77
- d.emit({ 'value' => 1, 'referer' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' }, time)
78
- d.emit({ 'value' => 2, 'referer' => 'http://www.unixuser.org/' }, time)
79
- d.emit({ 'value' => 3, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
80
- d.emit({ 'value' => 4, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_J&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
81
- d.emit({ 'value' => 5, 'referer' => 'http://search.yahoo.co.jp/search?p=%E3%81%BB%E3%81%92&aq=-1&oq=&ei=UTF-8&fr=sfp_as&x=wrt' }, time)
82
- end
83
-
84
- emits = d.emits
85
- assert_equal 6, emits.size
86
- assert_equal 'merged.message', emits[0][0]
87
- assert_equal time, emits[0][1]
88
-
89
- m = emits[0][2]
90
- assert_equal 0, m['value']
91
- assert_equal false, m['referer_known']
92
- assert_nil m['referer_referer']
93
- assert_nil m['referer_search_term']
94
- assert_equal 2, m.keys.size
95
-
96
- m = emits[1][2]
97
- assert_equal 1, m['value']
98
- assert_equal true, m['referer_known']
99
- assert_equal 'Google', m['referer_referer']
100
- assert_equal 'www.google.com', m['referer_host']
101
- assert_equal 'gateway oracle cards denise linn', m['referer_search_term']
102
- assert_equal 6, m.keys.size
103
-
104
- m = emits[2][2]
105
- assert_equal 2, m['value']
106
- assert_equal false, m['referer_known']
107
- assert_nil m['referer_referer']
108
- assert_nil m['referer_search_term']
109
- assert_equal 3, m.keys.size
110
-
111
- m = emits[3][2]
112
- assert_equal 3, m['value']
113
- assert_equal true, m['referer_known']
114
- assert_equal 'Google', m['referer_referer']
115
- assert_equal 'www.google.co.jp', m['referer_host']
116
- assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
117
- assert_equal 6, m.keys.size
118
-
119
- # invalid input_encoding
120
- m = emits[4][2]
121
- assert_equal 4, m['value']
122
- assert_equal true, m['referer_known']
123
- assert_equal 'Google', m['referer_referer']
124
- assert_equal 'www.google.co.jp', m['referer_host']
125
- assert_equal 6, m.keys.size
126
-
127
- m = emits[5][2]
128
- assert_equal 5, m['value']
129
- assert_equal true, m['referer_known']
130
- assert_equal 'Yahoo!', m['referer_referer']
131
- assert_equal 'search.yahoo.co.jp', m['referer_host']
132
- assert_equal 'ほげ', m['referer_search_term']
133
- assert_equal 6, m.keys.size
134
- end
135
-
136
- def test_emit2
137
- d = create_driver(CONFIG2, 'test.message')
138
- time = Time.parse('2012-07-20 16:40:30').to_i
139
- d.run do
140
- d.emit({ 'value' => 0 }, time)
141
- d.emit({ 'value' => 1, 'ref' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' }, time)
142
- d.emit({ 'value' => 2, 'ref' => 'http://www.unixuser.org/' }, time)
143
- d.emit({ 'value' => 3, 'ref' => 'https://www.google.com/search?q=%E3%81%BB%E3%81%92&ie=utf-8&oe=utf-8' }, time)
144
- end
145
-
146
- emits = d.emits
147
- assert_equal 4, emits.size
148
- assert_equal 'merged.message', emits[0][0]
149
- assert_equal time, emits[0][1]
150
-
151
- m = emits[0][2]
152
- assert_equal 0, m['value']
153
- assert_equal false, m['ref_known']
154
- assert_nil m['ref_referer']
155
- assert_nil m['ref_search_term']
156
- assert_equal 2, m.keys.size
157
-
158
- m = emits[1][2]
159
- assert_equal 1, m['value']
160
- assert_equal true, m['ref_known']
161
- assert_equal 'Google', m['ref_referer']
162
- assert_equal 'www.google.com', m['ref_host']
163
- assert_equal 'gateway oracle cards denise linn', m['ref_search_term']
164
- assert_equal 6, m.keys.size
165
-
166
- m = emits[2][2]
167
- assert_equal 2, m['value']
168
- assert_equal false, m['ref_known']
169
- assert_nil m['ref_referer']
170
- assert_nil m['ref_host']
171
- assert_nil m['ref_search_term']
172
-
173
- m = emits[3][2]
174
- assert_equal 3, m['value']
175
- assert_equal true, m['ref_known']
176
- assert_equal 'Google', m['ref_referer']
177
- assert_equal 'www.google.com', m['ref_host']
178
- assert_equal 'ほげ', m['ref_search_term']
179
- end
180
-
181
- def test_emit3
182
- d = create_driver(CONFIG3, 'test.message')
183
- time = Time.parse('2012-07-20 16:40:30').to_i
184
- d.run do
185
- d.emit({ 'value' => 0 }, time)
186
- d.emit({ 'value' => 1, 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I' }, time)
187
- d.emit({ 'value' => 2, 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D' }, time)
188
- d.emit({ 'value' => 3, 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
189
- end
190
-
191
- emits = d.emits
192
- assert_equal 4, emits.size
193
- assert_equal 'merged.message', emits[0][0]
194
- assert_equal time, emits[0][1]
195
-
196
- m = emits[0][2]
197
- assert_equal 0, m['value']
198
- assert_equal false, m['referer_known']
199
- assert_nil m['referer_referer']
200
- assert_nil m['referer_search_term']
201
- assert_equal 2, m.keys.size
202
-
203
- m = emits[1][2]
204
- assert_equal 1, m['value']
205
- assert_equal true, m['referer_known']
206
- assert_equal 'Ezweb', m['referer_referer']
207
- assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
208
- assert_equal 'aiueo 病的', m['referer_search_term']
209
- assert_equal 6, m.keys.size
210
-
211
- m = emits[2][2]
212
- assert_equal 2, m['value']
213
- assert_equal true, m['referer_known']
214
- assert_equal 'Ezweb', m['referer_referer']
215
- assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
216
- assert_equal 'ロ', m['referer_search_term']
217
- assert_equal 6, m.keys.size
218
-
219
- m = emits[3][2]
220
- assert_equal 3, m['value']
221
- assert_equal true, m['referer_known']
222
- assert_equal 'Google', m['referer_referer']
223
- assert_equal 'www.google.co.jp', m['referer_host']
224
- assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
225
- assert_equal 6, m.keys.size
226
- end
227
- end