fluent-plugin-referer-parser 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dd83b311f03254625e7b916003dec4f6035cc61a
4
- data.tar.gz: ece5d27318aa190985fef7e3cc10d85214210884
3
+ metadata.gz: b2e28d942761a6c6393cd106b2a64ff5be802e4a
4
+ data.tar.gz: c03bf0ed0ebbe8bf97b4368d609f7ad1f0866186
5
5
  SHA512:
6
- metadata.gz: 451ebb2c50be591dc417bcfbffd33bd10f5dd919707f510ce4dfbff4dfde3befd5c39f44b2628f1369f094f8033f622be3fa1b7477832b5fb1bda0aa0adbe291
7
- data.tar.gz: 6df8bb5c8542c145dc5517724a06e6bae3d443092e87f96370c6eac331a02a263f76090d15440054b5d7c349dc3830536b342754e3bfa71f235a36227ffb7d62
6
+ metadata.gz: 137841368528697ede2578449cb931f0ff92f4dcb63202dbd3680312403e80c124be3d9ec3fabbd885d3277c05747ebb1cf09536bfd42562dc8f5d47c6f670be
7
+ data.tar.gz: 5a2841b7f3160bb25dd032c39c7ec484f34e8fb773fd079976c18d3913656448415a15856a4b4f6a9618fe918063897bf098d442e718a276e6cb83e7fbec3bd3
data/README.md CHANGED
@@ -17,7 +17,7 @@ To add referer-parser result into matched messages:
17
17
  add_prefix merged
18
18
  </match>
19
19
 
20
- Output messages with tag 'merged.**' has 'referer\_known', 'referer\_referer' and 'referer\_search\_term' attributes. If you want to change attribute names, write configurations as below:
20
+ Output messages with tag 'merged.**' has 'referer_known', 'referer_referer' and 'referer_search_term' attributes. If you want to change attribute names, write configurations as below:
21
21
 
22
22
  <match input.**>
23
23
  type referer_parser
@@ -26,9 +26,14 @@ Output messages with tag 'merged.**' has 'referer\_known', 'referer\_referer' an
26
26
  add_prefix merged
27
27
  out_key_known ref_known
28
28
  out_key_referer ref_referer
29
+ out_key_host ref_host
29
30
  out_key_search_term ref_search_term
30
31
  </match>
31
32
 
33
+ If you want to use your own referers definition, you can use 'referers_yaml' attribute.
34
+ 'referers_yaml' should be referers.yaml format of [snowplow/referer-parser](https://github.com/snowplow/referer-parser).
35
+
36
+ * [Sample](test/data/referers.yaml)
32
37
 
33
38
  ## Copyright
34
39
 
@@ -2,9 +2,9 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = 'fluent-plugin-referer-parser'
5
- gem.version = '0.0.5'
5
+ gem.version = '0.0.6'
6
6
  gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
7
- gem.email = ['tagomoris@gmail.com', 'haruyama@unixuser.org']
7
+ gem.email = ['haruyama@unixuser.org']
8
8
  gem.description = %q{parsing by referer-parser. See: https://github.com/snowplow/referer-parser}
9
9
  gem.summary = %q{Fluentd plugin to parse UserAgent strings}
10
10
  gem.homepage = 'https://github.com/haruyama/fluent-plugin-referer-parser'
@@ -1,5 +1,3 @@
1
- require 'referer-parser'
2
-
3
1
  # referer parser output
4
2
  class Fluent::RefererParserOutput < Fluent::Output
5
3
  Fluent::Plugin.register_output('referer_parser', self)
@@ -8,22 +6,33 @@ class Fluent::RefererParserOutput < Fluent::Output
8
6
  config_param :remove_prefix, :string, default: nil
9
7
  config_param :add_prefix, :string, default: nil
10
8
 
11
- config_param :key_name, :string
9
+ config_param :key_name, :string
10
+ config_param :referers_yaml, :string, default: nil
11
+ config_param :encodings_yaml, :string, default: nil
12
12
 
13
13
  config_param :out_key_known, :string, default: 'referer_known'
14
14
  config_param :out_key_referer, :string, default: 'referer_referer'
15
+ config_param :out_key_host, :string, default: 'referer_host'
15
16
  config_param :out_key_search_term, :string, default: 'referer_search_term'
16
17
 
17
- REFERER = RefererParser::Referer.new('http://example.org/')
18
-
19
18
  def initialize
20
19
  super
21
20
  require 'cgi'
21
+ require 'yaml'
22
+ require 'referer-parser'
22
23
  end
23
24
 
24
25
  def configure(conf)
25
26
  super
26
27
 
28
+ @referer_parser = RefererParser::Referer.new('http://example.org/', @referers_yaml)
29
+
30
+ if @encodings_yaml
31
+ @encodings = YAML.load_file(@encodings_yaml)
32
+ else
33
+ @encodings = {}
34
+ end
35
+
27
36
  if !@tag && !@remove_prefix && !@add_prefix
28
37
  fail Fluent::ConfigError, 'missing both of remove_prefix and add_prefix'
29
38
  end
@@ -59,24 +68,26 @@ class Fluent::RefererParserOutput < Fluent::Output
59
68
  def emit(tag, es, chain)
60
69
  tag = tag_mangle(tag)
61
70
  es.each do |time, record|
62
- valid = true
71
+ is_valid = true
63
72
  begin
64
- REFERER.parse(record[@key_name])
73
+ @referer_parser.parse(record[@key_name])
65
74
  rescue
66
- valid = false
75
+ is_valid = false
67
76
  end
68
- if valid && REFERER.known?
69
- search_term = REFERER.search_term
70
- parameters = CGI.parse(REFERER.uri.query)
71
- input_encoding = parameters['ie'][0] || parameters['ei'][0]
77
+ if is_valid && @referer_parser.known?
78
+ search_term = @referer_parser.search_term
79
+ host = @referer_parser.uri.host
80
+ parameters = CGI.parse(@referer_parser.uri.query)
81
+ input_encoding = @encodings[host] || parameters['ie'][0] || parameters['ei'][0]
72
82
  begin
73
83
  search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /\Autf-?8\z/i !~ input_encoding
74
84
  rescue
75
- $log.error('invalid referer: ' + REFERER.uri.to_s)
85
+ $log.error('invalid referer: ' + @referer_parser.uri.to_s)
76
86
  end
77
87
  record.merge!(
78
88
  @out_key_known => true,
79
- @out_key_referer => REFERER.referer,
89
+ @out_key_referer => @referer_parser.referer,
90
+ @out_key_host => host,
80
91
  @out_key_search_term => search_term,
81
92
  )
82
93
  else
@@ -0,0 +1,2 @@
1
+ ---
2
+ ezsch.ezweb.ne.jp: Shift_JIS
@@ -0,0 +1,16 @@
1
+ search:
2
+
3
+ Google:
4
+ parameters:
5
+ - q
6
+ domains:
7
+ - www.google.com
8
+ - www.google.co.jp
9
+
10
+ Ezweb:
11
+ parameters:
12
+ - query
13
+ domains:
14
+ - ezsch.ezweb.ne.jp
15
+
16
+
@@ -17,7 +17,17 @@ remove_prefix test
17
17
  add_prefix merged
18
18
  out_key_known ref_known
19
19
  out_key_referer ref_referer
20
+ out_key_host ref_host
20
21
  out_key_search_term ref_search_term
22
+ ]
23
+
24
+ CONFIG3 = %[
25
+ type referer_parser
26
+ key_name ref
27
+ remove_prefix test
28
+ add_prefix merged
29
+ referers_yaml test/data/referers.yaml
30
+ encodings_yaml test/data/encodings.yaml
21
31
  ]
22
32
 
23
33
  def create_driver(conf = CONFIG1, tag = 'test')
@@ -82,8 +92,9 @@ out_key_search_term ref_search_term
82
92
  assert_equal 1, m['value']
83
93
  assert_equal true, m['referer_known']
84
94
  assert_equal 'Google', m['referer_referer']
95
+ assert_equal 'www.google.com', m['referer_host']
85
96
  assert_equal 'gateway oracle cards denise linn', m['referer_search_term']
86
- assert_equal 5, m.keys.size
97
+ assert_equal 6, m.keys.size
87
98
 
88
99
  m = emits[2][2]
89
100
  assert_equal 2, m['value']
@@ -96,22 +107,25 @@ out_key_search_term ref_search_term
96
107
  assert_equal 3, m['value']
97
108
  assert_equal true, m['referer_known']
98
109
  assert_equal 'Google', m['referer_referer']
110
+ assert_equal 'www.google.co.jp', m['referer_host']
99
111
  assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
100
- assert_equal 5, m.keys.size
112
+ assert_equal 6, m.keys.size
101
113
 
102
114
  # invalid input_encoding
103
115
  m = emits[4][2]
104
116
  assert_equal 4, m['value']
105
117
  assert_equal true, m['referer_known']
106
118
  assert_equal 'Google', m['referer_referer']
107
- assert_equal 5, m.keys.size
119
+ assert_equal 'www.google.co.jp', m['referer_host']
120
+ assert_equal 6, m.keys.size
108
121
 
109
122
  m = emits[5][2]
110
- assert_equal 5, m['value']
111
- assert_equal true, m['referer_known']
112
- assert_equal 'Yahoo!', m['referer_referer']
113
- assert_equal 'ほげ', m['referer_search_term']
114
- assert_equal 5, m.keys.size
123
+ assert_equal 5, m['value']
124
+ assert_equal true, m['referer_known']
125
+ assert_equal 'Yahoo!', m['referer_referer']
126
+ assert_equal 'search.yahoo.co.jp', m['referer_host']
127
+ assert_equal 'ほげ', m['referer_search_term']
128
+ assert_equal 6, m.keys.size
115
129
  end
116
130
 
117
131
  def test_emit2
@@ -132,27 +146,77 @@ out_key_search_term ref_search_term
132
146
  m = emits[0][2]
133
147
  assert_equal 0, m['value']
134
148
  assert_equal false, m['ref_known']
135
- assert_nil m['referer_referer']
136
- assert_nil m['referer_search_term']
149
+ assert_nil m['ref_referer']
150
+ assert_nil m['ref_search_term']
137
151
  assert_equal 2, m.keys.size
138
152
 
139
153
  m = emits[1][2]
140
154
  assert_equal 1, m['value']
141
155
  assert_equal true, m['ref_known']
142
156
  assert_equal 'Google', m['ref_referer']
157
+ assert_equal 'www.google.com', m['ref_host']
143
158
  assert_equal 'gateway oracle cards denise linn', m['ref_search_term']
144
- assert_equal 5, m.keys.size
159
+ assert_equal 6, m.keys.size
145
160
 
146
161
  m = emits[2][2]
147
162
  assert_equal 2, m['value']
148
163
  assert_equal false, m['ref_known']
149
- assert_nil m['referer_referer']
150
- assert_nil m['referer_search_term']
164
+ assert_nil m['ref_referer']
165
+ assert_nil m['ref_host']
166
+ assert_nil m['ref_search_term']
151
167
 
152
168
  m = emits[3][2]
153
- assert_equal 3, m['value']
154
- assert_equal true, m['ref_known']
155
- assert_equal 'Google', m['ref_referer']
156
- assert_equal 'ほげ', m['ref_search_term']
169
+ assert_equal 3, m['value']
170
+ assert_equal true, m['ref_known']
171
+ assert_equal 'Google', m['ref_referer']
172
+ assert_equal 'www.google.com', m['ref_host']
173
+ assert_equal 'ほげ', m['ref_search_term']
174
+ end
175
+
176
+ def test_emit3
177
+ d = create_driver(CONFIG3, 'test.message')
178
+ time = Time.parse('2012-07-20 16:40:30').to_i
179
+ d.run do
180
+ d.emit({ 'value' => 0 }, time)
181
+ d.emit({ 'value' => 1, 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I' }, time)
182
+ d.emit({ 'value' => 2, 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D' }, time)
183
+ d.emit({ 'value' => 3, 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
184
+ end
185
+
186
+ emits = d.emits
187
+ assert_equal 4, emits.size
188
+ assert_equal 'merged.message', emits[0][0]
189
+ assert_equal time, emits[0][1]
190
+
191
+ m = emits[0][2]
192
+ assert_equal 0, m['value']
193
+ assert_equal false, m['referer_known']
194
+ assert_nil m['referer_referer']
195
+ assert_nil m['referer_search_term']
196
+ assert_equal 2, m.keys.size
197
+
198
+ m = emits[1][2]
199
+ assert_equal 1, m['value']
200
+ assert_equal true, m['referer_known']
201
+ assert_equal 'Ezweb', m['referer_referer']
202
+ assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
203
+ assert_equal 'aiueo 病的', m['referer_search_term']
204
+ assert_equal 6, m.keys.size
205
+
206
+ m = emits[2][2]
207
+ assert_equal 2, m['value']
208
+ assert_equal true, m['referer_known']
209
+ assert_equal 'Ezweb', m['referer_referer']
210
+ assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
211
+ assert_equal 'ロ', m['referer_search_term']
212
+ assert_equal 6, m.keys.size
213
+
214
+ m = emits[3][2]
215
+ assert_equal 3, m['value']
216
+ assert_equal true, m['referer_known']
217
+ assert_equal 'Google', m['referer_referer']
218
+ assert_equal 'www.google.co.jp', m['referer_host']
219
+ assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
220
+ assert_equal 6, m.keys.size
157
221
  end
158
222
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-referer-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
@@ -9,60 +9,59 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-01-07 00:00:00.000000000 Z
12
+ date: 2014-02-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - '>='
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '0'
21
21
  type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '>='
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: fluentd
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - '>='
32
+ - - ">="
33
33
  - !ruby/object:Gem::Version
34
34
  version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - '>='
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: referer-parser
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - '>='
46
+ - - ">="
47
47
  - !ruby/object:Gem::Version
48
48
  version: 0.2.0
49
49
  type: :runtime
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - '>='
53
+ - - ">="
54
54
  - !ruby/object:Gem::Version
55
55
  version: 0.2.0
56
56
  description: 'parsing by referer-parser. See: https://github.com/snowplow/referer-parser'
57
57
  email:
58
- - tagomoris@gmail.com
59
58
  - haruyama@unixuser.org
60
59
  executables: []
61
60
  extensions: []
62
61
  extra_rdoc_files: []
63
62
  files:
64
- - .gitignore
65
- - .travis.yml
63
+ - ".gitignore"
64
+ - ".travis.yml"
66
65
  - Gemfile
67
66
  - LICENSE.txt
68
67
  - README.md
@@ -70,6 +69,8 @@ files:
70
69
  - fluent-plugin-referer-parser.gemspec
71
70
  - lib/fluent/plugin/.rubocop.yml
72
71
  - lib/fluent/plugin/out_referer_parser.rb
72
+ - test/data/encodings.yaml
73
+ - test/data/referers.yaml
73
74
  - test/helper.rb
74
75
  - test/plugin/.rubocop.yml
75
76
  - test/plugin/test_out_referer_parser.rb
@@ -83,12 +84,12 @@ require_paths:
83
84
  - lib
84
85
  required_ruby_version: !ruby/object:Gem::Requirement
85
86
  requirements:
86
- - - '>='
87
+ - - ">="
87
88
  - !ruby/object:Gem::Version
88
89
  version: '0'
89
90
  required_rubygems_version: !ruby/object:Gem::Requirement
90
91
  requirements:
91
- - - '>='
92
+ - - ">="
92
93
  - !ruby/object:Gem::Version
93
94
  version: '0'
94
95
  requirements: []
@@ -98,6 +99,8 @@ signing_key:
98
99
  specification_version: 4
99
100
  summary: Fluentd plugin to parse UserAgent strings
100
101
  test_files:
102
+ - test/data/encodings.yaml
103
+ - test/data/referers.yaml
101
104
  - test/helper.rb
102
105
  - test/plugin/.rubocop.yml
103
106
  - test/plugin/test_out_referer_parser.rb