fluent-plugin-referer-parser 0.0.10 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c5c18c51dd917efdde31a1b3569afc85f60186c7
4
- data.tar.gz: 67bf6e6d8771b84fdf83d13f14e1fa81a2c5389d
3
+ metadata.gz: 7e2b1770c1c3d1a3fc00efce8fc3b2fd88496891
4
+ data.tar.gz: ee51c634a8b0cbf538e310c858d6bea6d512007e
5
5
  SHA512:
6
- metadata.gz: e9ea39d3e47f6490fbed05d20a26ebf024e484c86b910328b45eed881886a8f2a3f405eb21e8647a690b3f2307a18f4328b97ed42868fb2ba832cc0fc2b8b505
7
- data.tar.gz: 3883ba94f1f12970e9edf3ba8c6dd73c17fb980e137dfcf534bfa73d4162527b1b99e37081d84302a00db9dafb2d9577b53ac03ac1affde475e2ec03784f18ec
6
+ metadata.gz: eebfc2c7ceca2a7107d6b10a08f966ac7aa10826f25552b2b79073f970100d8045e9c6140b46c790004621ab19e9a8f91a966f6f70ee54bb6e73feafc3b52dcb
7
+ data.tar.gz: 1b283b06e7d82231b84b6ebab648c7f342150e562f325c456f503709e290cf01aed39d24dea99e225e1b4e0d2782db94904068dfb2af4204bf6c41c66ee466da
data/README.md CHANGED
@@ -1,29 +1,31 @@
1
1
  # fluent-plugin-referer-parser, a plugin for [Fluentd](http://fluentd.org)
2
2
 
3
- ## RefererParserOutput
3
+ ## RefererParserFilter
4
4
 
5
5
  'fluent-plugin-referer-parser' is a Fluentd plugin to parse Referer strings, based on [tagomoris/fluent-plugin-woothee](https://github.com/tagomoris/fluent-plugin-woothee).
6
6
  'fluent-plugin-referer-parser' uses [snowplow/referer-parser](https://github.com/snowplow/referer-parser).
7
7
 
8
+ ## Requirements
9
+
10
+ | fluent-plugin-referer-parser | fluentd | ruby |
11
+ |------------------------------|-------------|--------|
12
+ | >= 0.1.0 | >= v0.14.15 | >= 2.1 |
13
+ | < 0.1.0 | >= v0.12.0 | >= 1.9 |
8
14
 
9
15
  ## Configuration
10
16
 
11
17
  To add referer-parser result into matched messages:
12
18
 
13
- <match input.**>
19
+ <filter>
14
20
  @type referer_parser
15
21
  key_name referer
16
- remove_prefix input
17
- add_prefix merged
18
22
  </match>
19
23
 
20
24
  Output messages with tag 'merged.**' has 'referer_known', 'referer_referer' and 'referer_search_term' attributes. If you want to change attribute names, write configurations as below:
21
25
 
22
- <match input.**>
26
+ <filter>
23
27
  @type referer_parser
24
28
  key_name ref
25
- remove_prefix input
26
- add_prefix merged
27
29
  out_key_known ref_known
28
30
  out_key_referer ref_referer
29
31
  out_key_host ref_host
@@ -3,8 +3,8 @@ require 'English'
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = 'fluent-plugin-referer-parser'
6
- gem.version = '0.0.10'
7
- gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
6
+ gem.version = '0.1.0'
7
+ gem.authors = ['HARUYAMA Seigo']
8
8
  gem.email = ['haruyama@unixuser.org']
9
9
  gem.description = %q(parsing by referer-parser. See: https://github.com/snowplow/referer-parser)
10
10
  gem.summary = %q(Fluentd plugin to parse UserAgent strings)
@@ -18,6 +18,6 @@ Gem::Specification.new do |gem|
18
18
 
19
19
  gem.add_development_dependency 'rake'
20
20
  gem.add_development_dependency 'test-unit', '>= 3.2'
21
- gem.add_runtime_dependency 'fluentd', '~> 0.12.0'
21
+ gem.add_runtime_dependency 'fluentd', '>= 0.14.15', '< 2'
22
22
  gem.add_runtime_dependency 'referer-parser', '~> 0.3.0'
23
23
  end
@@ -2,7 +2,9 @@ require 'cgi'
2
2
  require 'yaml'
3
3
  require 'referer-parser'
4
4
 
5
- class Fluent::RefererParserFilter < Fluent::Filter
5
+ require 'fluent/plugin/input'
6
+
7
+ class Fluent::Plugin::RefererParserFilter < Fluent::Plugin::Filter
6
8
  Fluent::Plugin.register_filter('referer_parser', self)
7
9
 
8
10
  config_param :key_name, :string
data/test/helper.rb CHANGED
@@ -1,26 +1,6 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts 'Run `bundle install` to install missing gems'
8
- exit e.status_code
9
- end
1
+ require 'bundler/setup'
10
2
  require 'test/unit'
11
3
 
12
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
- $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
5
+ $LOAD_PATH.unshift(__dir__)
14
6
  require 'fluent/test'
15
- unless ENV.key?('VERBOSE')
16
- nulllogger = Object.new
17
- nulllogger.instance_eval do |obj|
18
- def method_missing(method, *args)
19
- # pass
20
- end
21
- end
22
- $log = nulllogger
23
- end
24
-
25
- class Test::Unit::TestCase
26
- end
@@ -1,7 +1,8 @@
1
1
  require 'helper'
2
+ require 'fluent/test/driver/filter'
2
3
  require 'fluent/plugin/filter_referer_parser'
3
4
 
4
- class Fluent::RefererParserFilterTest < Test::Unit::TestCase
5
+ class RefererParserFilterTest < Test::Unit::TestCase
5
6
  # through & merge
6
7
  CONFIG1 = %(
7
8
  key_name referer
@@ -28,20 +29,19 @@ class Fluent::RefererParserFilterTest < Test::Unit::TestCase
28
29
  Fluent::Test.setup
29
30
  end
30
31
 
31
- def create_driver(conf = CONFIG1, tag = 'test')
32
- Fluent::Test::FilterTestDriver.new(Fluent::RefererParserFilter, tag).configure(conf)
32
+ def create_driver(conf = CONFIG1)
33
+ Fluent::Test::Driver::Filter.new(Fluent::Plugin::RefererParserFilter).configure(conf)
33
34
  end
34
35
 
35
36
  def filter(config, messages)
36
37
  d = create_driver(config)
37
38
  time = Time.parse('2012-07-20 16:40:30').to_i
38
- d.run do
39
+ d.run(default_tag: 'test') do
39
40
  messages.each do |message|
40
- d.emit(message, time)
41
+ d.feed(time, message)
41
42
  end
42
43
  end
43
- filtered = d.filtered_as_array
44
- filtered.map {|m| m[2] }
44
+ d.filtered_records
45
45
  end
46
46
 
47
47
  sub_test_case 'configure' do
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-referer-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
- - TAGOMORI Satoshi
8
7
  - HARUYAMA Seigo
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2017-05-30 00:00:00.000000000 Z
11
+ date: 2017-06-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rake
@@ -43,16 +42,22 @@ dependencies:
43
42
  name: fluentd
44
43
  requirement: !ruby/object:Gem::Requirement
45
44
  requirements:
46
- - - "~>"
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 0.14.15
48
+ - - "<"
47
49
  - !ruby/object:Gem::Version
48
- version: 0.12.0
50
+ version: '2'
49
51
  type: :runtime
50
52
  prerelease: false
51
53
  version_requirements: !ruby/object:Gem::Requirement
52
54
  requirements:
53
- - - "~>"
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: 0.14.15
58
+ - - "<"
54
59
  - !ruby/object:Gem::Version
55
- version: 0.12.0
60
+ version: '2'
56
61
  - !ruby/object:Gem::Dependency
57
62
  name: referer-parser
58
63
  requirement: !ruby/object:Gem::Requirement
@@ -83,13 +88,11 @@ files:
83
88
  - Rakefile
84
89
  - fluent-plugin-referer-parser.gemspec
85
90
  - lib/fluent/plugin/filter_referer_parser.rb
86
- - lib/fluent/plugin/out_referer_parser.rb
87
91
  - test/.rubocop.yml
88
92
  - test/data/encodings.yaml
89
93
  - test/data/referers.yaml
90
94
  - test/helper.rb
91
95
  - test/plugin/test_filter_referer_parser.rb
92
- - test/plugin/test_out_referer_parser.rb
93
96
  homepage: https://github.com/haruyama/fluent-plugin-referer-parser
94
97
  licenses:
95
98
  - Apache-2.0
@@ -120,4 +123,3 @@ test_files:
120
123
  - test/data/referers.yaml
121
124
  - test/helper.rb
122
125
  - test/plugin/test_filter_referer_parser.rb
123
- - test/plugin/test_out_referer_parser.rb
@@ -1,106 +0,0 @@
1
- # referer parser output
2
- class Fluent::RefererParserOutput < Fluent::Output
3
- Fluent::Plugin.register_output('referer_parser', self)
4
-
5
- config_param :tag, :string, default: nil
6
- config_param :remove_prefix, :string, default: nil
7
- config_param :add_prefix, :string, default: nil
8
-
9
- config_param :key_name, :string
10
- config_param :referers_yaml, :string, default: nil
11
- config_param :encodings_yaml, :string, default: nil
12
-
13
- config_param :out_key_known, :string, default: 'referer_known'
14
- config_param :out_key_referer, :string, default: 'referer_referer'
15
- config_param :out_key_host, :string, default: 'referer_host'
16
- config_param :out_key_search_term, :string, default: 'referer_search_term'
17
-
18
- def initialize
19
- super
20
- require 'cgi'
21
- require 'yaml'
22
- require 'referer-parser'
23
- end
24
-
25
- def configure(conf)
26
- super
27
-
28
- @referer_parser = if @referers_yaml
29
- RefererParser::Parser.new(@referers_yaml)
30
- else
31
- RefererParser::Parser.new
32
- end
33
-
34
- if @encodings_yaml
35
- @encodings = YAML.load_file(@encodings_yaml)
36
- else
37
- @encodings = {}
38
- end
39
-
40
- if !@tag && !@remove_prefix && !@add_prefix
41
- fail Fluent::ConfigError, 'missing both of remove_prefix and add_prefix'
42
- end
43
- if @tag && (@remove_prefix || @add_prefix)
44
- fail Fluent::ConfigError, 'both of tag and remove_prefix/add_prefix must not be specified'
45
- end
46
- if @remove_prefix
47
- @removed_prefix_string = @remove_prefix + '.'
48
- @removed_length = @removed_prefix_string.length
49
- end
50
- @added_prefix_string = @add_prefix + '.' if @add_prefix
51
- end
52
-
53
- def tag_mangle(tag)
54
- if @tag
55
- @tag
56
- else
57
- if @remove_prefix &&
58
- ( (tag.start_with?(@removed_prefix_string) && tag.length > @removed_length) || tag == @remove_prefix)
59
- tag = tag[@removed_length..-1]
60
- end
61
- if @add_prefix
62
- tag = if tag && tag.length > 0
63
- @added_prefix_string + tag
64
- else
65
- @add_prefix
66
- end
67
- end
68
- tag
69
- end
70
- end
71
-
72
- def emit(tag, es, chain)
73
- tag = tag_mangle(tag)
74
- es.each do |time, record|
75
- is_valid = true
76
- parsed = begin
77
- @referer_parser.parse(record[@key_name])
78
- rescue
79
- is_valid = false
80
- {}
81
- end
82
- if is_valid && parsed[:known]
83
- search_term = parsed[:term]
84
- uri = URI.parse(parsed[:uri])
85
- host = uri.host
86
- parameters = CGI.parse(uri.query)
87
- input_encoding = @encodings[host] || parameters['ie'][0] || parameters['ei'][0]
88
- begin
89
- search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /\Autf-?8\z/i !~ input_encoding
90
- rescue
91
- log.error('invalid referer: ' + uri.to_s)
92
- end
93
- record.merge!(
94
- @out_key_known => true,
95
- @out_key_referer => parsed[:source],
96
- @out_key_host => host,
97
- @out_key_search_term => search_term
98
- )
99
- else
100
- record[@out_key_known] = false
101
- end
102
- router.emit(tag, time, record)
103
- end
104
- chain.next
105
- end
106
- end
@@ -1,227 +0,0 @@
1
- require 'helper'
2
- require 'fluent/plugin/out_referer_parser'
3
-
4
- # RefererParserOutput test
5
- class Fluent::RefererParserOutputTest < Test::Unit::TestCase
6
- # through & merge
7
- CONFIG1 = %(
8
- type referer_parser
9
- key_name referer
10
- remove_prefix test
11
- add_prefix merged
12
- )
13
-
14
- CONFIG2 = %(
15
- type referer_parser
16
- key_name ref
17
- remove_prefix test
18
- add_prefix merged
19
- out_key_known ref_known
20
- out_key_referer ref_referer
21
- out_key_host ref_host
22
- out_key_search_term ref_search_term
23
- )
24
-
25
- CONFIG3 = %(
26
- type referer_parser
27
- key_name ref
28
- remove_prefix test
29
- add_prefix merged
30
- referers_yaml test/data/referers.yaml
31
- encodings_yaml test/data/encodings.yaml
32
- )
33
-
34
- def setup
35
- Fluent::Test.setup
36
- end
37
-
38
- def create_driver(conf = CONFIG1, tag = 'test')
39
- Fluent::Test::OutputTestDriver.new(Fluent::RefererParserOutput, tag).configure(conf)
40
- end
41
-
42
- def test_configure
43
- # through & merge
44
- d = create_driver CONFIG1
45
- assert_equal 'referer', d.instance.key_name
46
- assert_equal 'test', d.instance.remove_prefix
47
- assert_equal 'merged', d.instance.add_prefix
48
-
49
- assert_equal 'referer_known', d.instance.out_key_known
50
- assert_equal 'referer_referer', d.instance.out_key_referer
51
- assert_equal 'referer_search_term', d.instance.out_key_search_term
52
-
53
- # filter & merge
54
- d = create_driver CONFIG2
55
- assert_equal 'ref', d.instance.key_name
56
- assert_equal 'test', d.instance.remove_prefix
57
- assert_equal 'merged', d.instance.add_prefix
58
-
59
- assert_equal 'ref_known', d.instance.out_key_known
60
- assert_equal 'ref_referer', d.instance.out_key_referer
61
- assert_equal 'ref_search_term', d.instance.out_key_search_term
62
- end
63
-
64
- def test_tag_mangle
65
- p = create_driver(CONFIG1).instance
66
- assert_equal 'merged.data', p.tag_mangle('data')
67
- assert_equal 'merged.data', p.tag_mangle('test.data')
68
- assert_equal 'merged.test.data', p.tag_mangle('test.test.data')
69
- assert_equal 'merged', p.tag_mangle('test')
70
- end
71
-
72
- def test_emit1
73
- d = create_driver(CONFIG1, 'test.message')
74
- time = Time.parse('2012-07-20 16:40:30').to_i
75
- d.run do
76
- d.emit({ 'value' => 0 }, time)
77
- d.emit({ 'value' => 1, 'referer' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' }, time)
78
- d.emit({ 'value' => 2, 'referer' => 'http://www.unixuser.org/' }, time)
79
- d.emit({ 'value' => 3, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
80
- d.emit({ 'value' => 4, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_J&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
81
- d.emit({ 'value' => 5, 'referer' => 'http://search.yahoo.co.jp/search?p=%E3%81%BB%E3%81%92&aq=-1&oq=&ei=UTF-8&fr=sfp_as&x=wrt' }, time)
82
- end
83
-
84
- emits = d.emits
85
- assert_equal 6, emits.size
86
- assert_equal 'merged.message', emits[0][0]
87
- assert_equal time, emits[0][1]
88
-
89
- m = emits[0][2]
90
- assert_equal 0, m['value']
91
- assert_equal false, m['referer_known']
92
- assert_nil m['referer_referer']
93
- assert_nil m['referer_search_term']
94
- assert_equal 2, m.keys.size
95
-
96
- m = emits[1][2]
97
- assert_equal 1, m['value']
98
- assert_equal true, m['referer_known']
99
- assert_equal 'Google', m['referer_referer']
100
- assert_equal 'www.google.com', m['referer_host']
101
- assert_equal 'gateway oracle cards denise linn', m['referer_search_term']
102
- assert_equal 6, m.keys.size
103
-
104
- m = emits[2][2]
105
- assert_equal 2, m['value']
106
- assert_equal false, m['referer_known']
107
- assert_nil m['referer_referer']
108
- assert_nil m['referer_search_term']
109
- assert_equal 3, m.keys.size
110
-
111
- m = emits[3][2]
112
- assert_equal 3, m['value']
113
- assert_equal true, m['referer_known']
114
- assert_equal 'Google', m['referer_referer']
115
- assert_equal 'www.google.co.jp', m['referer_host']
116
- assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
117
- assert_equal 6, m.keys.size
118
-
119
- # invalid input_encoding
120
- m = emits[4][2]
121
- assert_equal 4, m['value']
122
- assert_equal true, m['referer_known']
123
- assert_equal 'Google', m['referer_referer']
124
- assert_equal 'www.google.co.jp', m['referer_host']
125
- assert_equal 6, m.keys.size
126
-
127
- m = emits[5][2]
128
- assert_equal 5, m['value']
129
- assert_equal true, m['referer_known']
130
- assert_equal 'Yahoo!', m['referer_referer']
131
- assert_equal 'search.yahoo.co.jp', m['referer_host']
132
- assert_equal 'ほげ', m['referer_search_term']
133
- assert_equal 6, m.keys.size
134
- end
135
-
136
- def test_emit2
137
- d = create_driver(CONFIG2, 'test.message')
138
- time = Time.parse('2012-07-20 16:40:30').to_i
139
- d.run do
140
- d.emit({ 'value' => 0 }, time)
141
- d.emit({ 'value' => 1, 'ref' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' }, time)
142
- d.emit({ 'value' => 2, 'ref' => 'http://www.unixuser.org/' }, time)
143
- d.emit({ 'value' => 3, 'ref' => 'https://www.google.com/search?q=%E3%81%BB%E3%81%92&ie=utf-8&oe=utf-8' }, time)
144
- end
145
-
146
- emits = d.emits
147
- assert_equal 4, emits.size
148
- assert_equal 'merged.message', emits[0][0]
149
- assert_equal time, emits[0][1]
150
-
151
- m = emits[0][2]
152
- assert_equal 0, m['value']
153
- assert_equal false, m['ref_known']
154
- assert_nil m['ref_referer']
155
- assert_nil m['ref_search_term']
156
- assert_equal 2, m.keys.size
157
-
158
- m = emits[1][2]
159
- assert_equal 1, m['value']
160
- assert_equal true, m['ref_known']
161
- assert_equal 'Google', m['ref_referer']
162
- assert_equal 'www.google.com', m['ref_host']
163
- assert_equal 'gateway oracle cards denise linn', m['ref_search_term']
164
- assert_equal 6, m.keys.size
165
-
166
- m = emits[2][2]
167
- assert_equal 2, m['value']
168
- assert_equal false, m['ref_known']
169
- assert_nil m['ref_referer']
170
- assert_nil m['ref_host']
171
- assert_nil m['ref_search_term']
172
-
173
- m = emits[3][2]
174
- assert_equal 3, m['value']
175
- assert_equal true, m['ref_known']
176
- assert_equal 'Google', m['ref_referer']
177
- assert_equal 'www.google.com', m['ref_host']
178
- assert_equal 'ほげ', m['ref_search_term']
179
- end
180
-
181
- def test_emit3
182
- d = create_driver(CONFIG3, 'test.message')
183
- time = Time.parse('2012-07-20 16:40:30').to_i
184
- d.run do
185
- d.emit({ 'value' => 0 }, time)
186
- d.emit({ 'value' => 1, 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I' }, time)
187
- d.emit({ 'value' => 2, 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D' }, time)
188
- d.emit({ 'value' => 3, 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
189
- end
190
-
191
- emits = d.emits
192
- assert_equal 4, emits.size
193
- assert_equal 'merged.message', emits[0][0]
194
- assert_equal time, emits[0][1]
195
-
196
- m = emits[0][2]
197
- assert_equal 0, m['value']
198
- assert_equal false, m['referer_known']
199
- assert_nil m['referer_referer']
200
- assert_nil m['referer_search_term']
201
- assert_equal 2, m.keys.size
202
-
203
- m = emits[1][2]
204
- assert_equal 1, m['value']
205
- assert_equal true, m['referer_known']
206
- assert_equal 'Ezweb', m['referer_referer']
207
- assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
208
- assert_equal 'aiueo 病的', m['referer_search_term']
209
- assert_equal 6, m.keys.size
210
-
211
- m = emits[2][2]
212
- assert_equal 2, m['value']
213
- assert_equal true, m['referer_known']
214
- assert_equal 'Ezweb', m['referer_referer']
215
- assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
216
- assert_equal 'ロ', m['referer_search_term']
217
- assert_equal 6, m.keys.size
218
-
219
- m = emits[3][2]
220
- assert_equal 3, m['value']
221
- assert_equal true, m['referer_known']
222
- assert_equal 'Google', m['referer_referer']
223
- assert_equal 'www.google.co.jp', m['referer_host']
224
- assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
225
- assert_equal 6, m.keys.size
226
- end
227
- end