fluent-plugin-referer-parser 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dd83b311f03254625e7b916003dec4f6035cc61a
4
- data.tar.gz: ece5d27318aa190985fef7e3cc10d85214210884
3
+ metadata.gz: b2e28d942761a6c6393cd106b2a64ff5be802e4a
4
+ data.tar.gz: c03bf0ed0ebbe8bf97b4368d609f7ad1f0866186
5
5
  SHA512:
6
- metadata.gz: 451ebb2c50be591dc417bcfbffd33bd10f5dd919707f510ce4dfbff4dfde3befd5c39f44b2628f1369f094f8033f622be3fa1b7477832b5fb1bda0aa0adbe291
7
- data.tar.gz: 6df8bb5c8542c145dc5517724a06e6bae3d443092e87f96370c6eac331a02a263f76090d15440054b5d7c349dc3830536b342754e3bfa71f235a36227ffb7d62
6
+ metadata.gz: 137841368528697ede2578449cb931f0ff92f4dcb63202dbd3680312403e80c124be3d9ec3fabbd885d3277c05747ebb1cf09536bfd42562dc8f5d47c6f670be
7
+ data.tar.gz: 5a2841b7f3160bb25dd032c39c7ec484f34e8fb773fd079976c18d3913656448415a15856a4b4f6a9618fe918063897bf098d442e718a276e6cb83e7fbec3bd3
data/README.md CHANGED
@@ -17,7 +17,7 @@ To add referer-parser result into matched messages:
17
17
  add_prefix merged
18
18
  </match>
19
19
 
20
- Output messages with tag 'merged.**' has 'referer\_known', 'referer\_referer' and 'referer\_search\_term' attributes. If you want to change attribute names, write configurations as below:
20
+ Output messages with tag 'merged.**' has 'referer_known', 'referer_referer' and 'referer_search_term' attributes. If you want to change attribute names, write configurations as below:
21
21
 
22
22
  <match input.**>
23
23
  type referer_parser
@@ -26,9 +26,14 @@ Output messages with tag 'merged.**' has 'referer\_known', 'referer\_referer' an
26
26
  add_prefix merged
27
27
  out_key_known ref_known
28
28
  out_key_referer ref_referer
29
+ out_key_host ref_host
29
30
  out_key_search_term ref_search_term
30
31
  </match>
31
32
 
33
+ If you want to use your own referers definition, you can use 'referers_yaml' attribute.
34
+ 'referers_yaml' should be referers.yaml format of [snowplow/referer-parser](https://github.com/snowplow/referer-parser).
35
+
36
+ * [Sample](test/data/referers.yaml)
32
37
 
33
38
  ## Copyright
34
39
 
@@ -2,9 +2,9 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = 'fluent-plugin-referer-parser'
5
- gem.version = '0.0.5'
5
+ gem.version = '0.0.6'
6
6
  gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
7
- gem.email = ['tagomoris@gmail.com', 'haruyama@unixuser.org']
7
+ gem.email = ['haruyama@unixuser.org']
8
8
  gem.description = %q{parsing by referer-parser. See: https://github.com/snowplow/referer-parser}
9
9
  gem.summary = %q{Fluentd plugin to parse UserAgent strings}
10
10
  gem.homepage = 'https://github.com/haruyama/fluent-plugin-referer-parser'
@@ -1,5 +1,3 @@
1
- require 'referer-parser'
2
-
3
1
  # referer parser output
4
2
  class Fluent::RefererParserOutput < Fluent::Output
5
3
  Fluent::Plugin.register_output('referer_parser', self)
@@ -8,22 +6,33 @@ class Fluent::RefererParserOutput < Fluent::Output
8
6
  config_param :remove_prefix, :string, default: nil
9
7
  config_param :add_prefix, :string, default: nil
10
8
 
11
- config_param :key_name, :string
9
+ config_param :key_name, :string
10
+ config_param :referers_yaml, :string, default: nil
11
+ config_param :encodings_yaml, :string, default: nil
12
12
 
13
13
  config_param :out_key_known, :string, default: 'referer_known'
14
14
  config_param :out_key_referer, :string, default: 'referer_referer'
15
+ config_param :out_key_host, :string, default: 'referer_host'
15
16
  config_param :out_key_search_term, :string, default: 'referer_search_term'
16
17
 
17
- REFERER = RefererParser::Referer.new('http://example.org/')
18
-
19
18
  def initialize
20
19
  super
21
20
  require 'cgi'
21
+ require 'yaml'
22
+ require 'referer-parser'
22
23
  end
23
24
 
24
25
  def configure(conf)
25
26
  super
26
27
 
28
+ @referer_parser = RefererParser::Referer.new('http://example.org/', @referers_yaml)
29
+
30
+ if @encodings_yaml
31
+ @encodings = YAML.load_file(@encodings_yaml)
32
+ else
33
+ @encodings = {}
34
+ end
35
+
27
36
  if !@tag && !@remove_prefix && !@add_prefix
28
37
  fail Fluent::ConfigError, 'missing both of remove_prefix and add_prefix'
29
38
  end
@@ -59,24 +68,26 @@ class Fluent::RefererParserOutput < Fluent::Output
59
68
  def emit(tag, es, chain)
60
69
  tag = tag_mangle(tag)
61
70
  es.each do |time, record|
62
- valid = true
71
+ is_valid = true
63
72
  begin
64
- REFERER.parse(record[@key_name])
73
+ @referer_parser.parse(record[@key_name])
65
74
  rescue
66
- valid = false
75
+ is_valid = false
67
76
  end
68
- if valid && REFERER.known?
69
- search_term = REFERER.search_term
70
- parameters = CGI.parse(REFERER.uri.query)
71
- input_encoding = parameters['ie'][0] || parameters['ei'][0]
77
+ if is_valid && @referer_parser.known?
78
+ search_term = @referer_parser.search_term
79
+ host = @referer_parser.uri.host
80
+ parameters = CGI.parse(@referer_parser.uri.query)
81
+ input_encoding = @encodings[host] || parameters['ie'][0] || parameters['ei'][0]
72
82
  begin
73
83
  search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /\Autf-?8\z/i !~ input_encoding
74
84
  rescue
75
- $log.error('invalid referer: ' + REFERER.uri.to_s)
85
+ $log.error('invalid referer: ' + @referer_parser.uri.to_s)
76
86
  end
77
87
  record.merge!(
78
88
  @out_key_known => true,
79
- @out_key_referer => REFERER.referer,
89
+ @out_key_referer => @referer_parser.referer,
90
+ @out_key_host => host,
80
91
  @out_key_search_term => search_term,
81
92
  )
82
93
  else
@@ -0,0 +1,2 @@
1
+ ---
2
+ ezsch.ezweb.ne.jp: Shift_JIS
@@ -0,0 +1,16 @@
1
+ search:
2
+
3
+ Google:
4
+ parameters:
5
+ - q
6
+ domains:
7
+ - www.google.com
8
+ - www.google.co.jp
9
+
10
+ Ezweb:
11
+ parameters:
12
+ - query
13
+ domains:
14
+ - ezsch.ezweb.ne.jp
15
+
16
+
@@ -17,7 +17,17 @@ remove_prefix test
17
17
  add_prefix merged
18
18
  out_key_known ref_known
19
19
  out_key_referer ref_referer
20
+ out_key_host ref_host
20
21
  out_key_search_term ref_search_term
22
+ ]
23
+
24
+ CONFIG3 = %[
25
+ type referer_parser
26
+ key_name ref
27
+ remove_prefix test
28
+ add_prefix merged
29
+ referers_yaml test/data/referers.yaml
30
+ encodings_yaml test/data/encodings.yaml
21
31
  ]
22
32
 
23
33
  def create_driver(conf = CONFIG1, tag = 'test')
@@ -82,8 +92,9 @@ out_key_search_term ref_search_term
82
92
  assert_equal 1, m['value']
83
93
  assert_equal true, m['referer_known']
84
94
  assert_equal 'Google', m['referer_referer']
95
+ assert_equal 'www.google.com', m['referer_host']
85
96
  assert_equal 'gateway oracle cards denise linn', m['referer_search_term']
86
- assert_equal 5, m.keys.size
97
+ assert_equal 6, m.keys.size
87
98
 
88
99
  m = emits[2][2]
89
100
  assert_equal 2, m['value']
@@ -96,22 +107,25 @@ out_key_search_term ref_search_term
96
107
  assert_equal 3, m['value']
97
108
  assert_equal true, m['referer_known']
98
109
  assert_equal 'Google', m['referer_referer']
110
+ assert_equal 'www.google.co.jp', m['referer_host']
99
111
  assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
100
- assert_equal 5, m.keys.size
112
+ assert_equal 6, m.keys.size
101
113
 
102
114
  # invalid input_encoding
103
115
  m = emits[4][2]
104
116
  assert_equal 4, m['value']
105
117
  assert_equal true, m['referer_known']
106
118
  assert_equal 'Google', m['referer_referer']
107
- assert_equal 5, m.keys.size
119
+ assert_equal 'www.google.co.jp', m['referer_host']
120
+ assert_equal 6, m.keys.size
108
121
 
109
122
  m = emits[5][2]
110
- assert_equal 5, m['value']
111
- assert_equal true, m['referer_known']
112
- assert_equal 'Yahoo!', m['referer_referer']
113
- assert_equal 'ほげ', m['referer_search_term']
114
- assert_equal 5, m.keys.size
123
+ assert_equal 5, m['value']
124
+ assert_equal true, m['referer_known']
125
+ assert_equal 'Yahoo!', m['referer_referer']
126
+ assert_equal 'search.yahoo.co.jp', m['referer_host']
127
+ assert_equal 'ほげ', m['referer_search_term']
128
+ assert_equal 6, m.keys.size
115
129
  end
116
130
 
117
131
  def test_emit2
@@ -132,27 +146,77 @@ out_key_search_term ref_search_term
132
146
  m = emits[0][2]
133
147
  assert_equal 0, m['value']
134
148
  assert_equal false, m['ref_known']
135
- assert_nil m['referer_referer']
136
- assert_nil m['referer_search_term']
149
+ assert_nil m['ref_referer']
150
+ assert_nil m['ref_search_term']
137
151
  assert_equal 2, m.keys.size
138
152
 
139
153
  m = emits[1][2]
140
154
  assert_equal 1, m['value']
141
155
  assert_equal true, m['ref_known']
142
156
  assert_equal 'Google', m['ref_referer']
157
+ assert_equal 'www.google.com', m['ref_host']
143
158
  assert_equal 'gateway oracle cards denise linn', m['ref_search_term']
144
- assert_equal 5, m.keys.size
159
+ assert_equal 6, m.keys.size
145
160
 
146
161
  m = emits[2][2]
147
162
  assert_equal 2, m['value']
148
163
  assert_equal false, m['ref_known']
149
- assert_nil m['referer_referer']
150
- assert_nil m['referer_search_term']
164
+ assert_nil m['ref_referer']
165
+ assert_nil m['ref_host']
166
+ assert_nil m['ref_search_term']
151
167
 
152
168
  m = emits[3][2]
153
- assert_equal 3, m['value']
154
- assert_equal true, m['ref_known']
155
- assert_equal 'Google', m['ref_referer']
156
- assert_equal 'ほげ', m['ref_search_term']
169
+ assert_equal 3, m['value']
170
+ assert_equal true, m['ref_known']
171
+ assert_equal 'Google', m['ref_referer']
172
+ assert_equal 'www.google.com', m['ref_host']
173
+ assert_equal 'ほげ', m['ref_search_term']
174
+ end
175
+
176
+ def test_emit3
177
+ d = create_driver(CONFIG3, 'test.message')
178
+ time = Time.parse('2012-07-20 16:40:30').to_i
179
+ d.run do
180
+ d.emit({ 'value' => 0 }, time)
181
+ d.emit({ 'value' => 1, 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I' }, time)
182
+ d.emit({ 'value' => 2, 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D' }, time)
183
+ d.emit({ 'value' => 3, 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
184
+ end
185
+
186
+ emits = d.emits
187
+ assert_equal 4, emits.size
188
+ assert_equal 'merged.message', emits[0][0]
189
+ assert_equal time, emits[0][1]
190
+
191
+ m = emits[0][2]
192
+ assert_equal 0, m['value']
193
+ assert_equal false, m['referer_known']
194
+ assert_nil m['referer_referer']
195
+ assert_nil m['referer_search_term']
196
+ assert_equal 2, m.keys.size
197
+
198
+ m = emits[1][2]
199
+ assert_equal 1, m['value']
200
+ assert_equal true, m['referer_known']
201
+ assert_equal 'Ezweb', m['referer_referer']
202
+ assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
203
+ assert_equal 'aiueo 病的', m['referer_search_term']
204
+ assert_equal 6, m.keys.size
205
+
206
+ m = emits[2][2]
207
+ assert_equal 2, m['value']
208
+ assert_equal true, m['referer_known']
209
+ assert_equal 'Ezweb', m['referer_referer']
210
+ assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
211
+ assert_equal 'ロ', m['referer_search_term']
212
+ assert_equal 6, m.keys.size
213
+
214
+ m = emits[3][2]
215
+ assert_equal 3, m['value']
216
+ assert_equal true, m['referer_known']
217
+ assert_equal 'Google', m['referer_referer']
218
+ assert_equal 'www.google.co.jp', m['referer_host']
219
+ assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
220
+ assert_equal 6, m.keys.size
157
221
  end
158
222
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-referer-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
@@ -9,60 +9,59 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-01-07 00:00:00.000000000 Z
12
+ date: 2014-02-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - '>='
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '0'
21
21
  type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '>='
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: fluentd
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - '>='
32
+ - - ">="
33
33
  - !ruby/object:Gem::Version
34
34
  version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - '>='
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: referer-parser
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - '>='
46
+ - - ">="
47
47
  - !ruby/object:Gem::Version
48
48
  version: 0.2.0
49
49
  type: :runtime
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - '>='
53
+ - - ">="
54
54
  - !ruby/object:Gem::Version
55
55
  version: 0.2.0
56
56
  description: 'parsing by referer-parser. See: https://github.com/snowplow/referer-parser'
57
57
  email:
58
- - tagomoris@gmail.com
59
58
  - haruyama@unixuser.org
60
59
  executables: []
61
60
  extensions: []
62
61
  extra_rdoc_files: []
63
62
  files:
64
- - .gitignore
65
- - .travis.yml
63
+ - ".gitignore"
64
+ - ".travis.yml"
66
65
  - Gemfile
67
66
  - LICENSE.txt
68
67
  - README.md
@@ -70,6 +69,8 @@ files:
70
69
  - fluent-plugin-referer-parser.gemspec
71
70
  - lib/fluent/plugin/.rubocop.yml
72
71
  - lib/fluent/plugin/out_referer_parser.rb
72
+ - test/data/encodings.yaml
73
+ - test/data/referers.yaml
73
74
  - test/helper.rb
74
75
  - test/plugin/.rubocop.yml
75
76
  - test/plugin/test_out_referer_parser.rb
@@ -83,12 +84,12 @@ require_paths:
83
84
  - lib
84
85
  required_ruby_version: !ruby/object:Gem::Requirement
85
86
  requirements:
86
- - - '>='
87
+ - - ">="
87
88
  - !ruby/object:Gem::Version
88
89
  version: '0'
89
90
  required_rubygems_version: !ruby/object:Gem::Requirement
90
91
  requirements:
91
- - - '>='
92
+ - - ">="
92
93
  - !ruby/object:Gem::Version
93
94
  version: '0'
94
95
  requirements: []
@@ -98,6 +99,8 @@ signing_key:
98
99
  specification_version: 4
99
100
  summary: Fluentd plugin to parse UserAgent strings
100
101
  test_files:
102
+ - test/data/encodings.yaml
103
+ - test/data/referers.yaml
101
104
  - test/helper.rb
102
105
  - test/plugin/.rubocop.yml
103
106
  - test/plugin/test_out_referer_parser.rb