fluent-plugin-referer-parser 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d258ae6d39fefa38045afceb356ddef7b0b63bd3
4
- data.tar.gz: aa497f178a1d9bfae485102a05b315a021c5232e
3
+ metadata.gz: c5c18c51dd917efdde31a1b3569afc85f60186c7
4
+ data.tar.gz: 67bf6e6d8771b84fdf83d13f14e1fa81a2c5389d
5
5
  SHA512:
6
- metadata.gz: 16e4fc677a4f6251c4815e091a0a1f8bffb632c454d47d5d5f49c5bca757655ff4b58509d445833e4787d319c05a9669aefba12e74f6c05d719c81dd5aac3784
7
- data.tar.gz: 3928930d0b7158f3b46c12934c3ef23c4f38ee16a550892b607e45ac0248783d3d0371cec3ef8801656a26f7af26dc5e1da72769d25e3a83b934d3aecb208c9c
6
+ metadata.gz: e9ea39d3e47f6490fbed05d20a26ebf024e484c86b910328b45eed881886a8f2a3f405eb21e8647a690b3f2307a18f4328b97ed42868fb2ba832cc0fc2b8b505
7
+ data.tar.gz: 3883ba94f1f12970e9edf3ba8c6dd73c17fb980e137dfcf534bfa73d4162527b1b99e37081d84302a00db9dafb2d9577b53ac03ac1affde475e2ec03784f18ec
@@ -3,7 +3,7 @@ require 'English'
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = 'fluent-plugin-referer-parser'
6
- gem.version = '0.0.9'
6
+ gem.version = '0.0.10'
7
7
  gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
8
8
  gem.email = ['haruyama@unixuser.org']
9
9
  gem.description = %q(parsing by referer-parser. See: https://github.com/snowplow/referer-parser)
@@ -0,0 +1,60 @@
1
+ require 'cgi'
2
+ require 'yaml'
3
+ require 'referer-parser'
4
+
5
+ class Fluent::RefererParserFilter < Fluent::Filter
6
+ Fluent::Plugin.register_filter('referer_parser', self)
7
+
8
+ config_param :key_name, :string
9
+ config_param :referers_yaml, :string, default: nil
10
+ config_param :encodings_yaml, :string, default: nil
11
+
12
+ config_param :out_key_known, :string, default: 'referer_known'
13
+ config_param :out_key_referer, :string, default: 'referer_referer'
14
+ config_param :out_key_host, :string, default: 'referer_host'
15
+ config_param :out_key_search_term, :string, default: 'referer_search_term'
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ @referer_parser = if @referers_yaml
21
+ RefererParser::Parser.new(@referers_yaml)
22
+ else
23
+ RefererParser::Parser.new
24
+ end
25
+
26
+ if @encodings_yaml
27
+ @encodings = YAML.load_file(@encodings_yaml)
28
+ else
29
+ @encodings = {}
30
+ end
31
+ end
32
+
33
+ def filter(tag, time, record)
34
+ begin
35
+ parsed = @referer_parser.parse(record[@key_name])
36
+ record[@out_key_known] = parsed[:known]
37
+ if parsed[:known]
38
+ search_term = parsed[:term]
39
+ uri = URI.parse(parsed[:uri])
40
+ host = uri.host
41
+ parameters = CGI.parse(uri.query)
42
+ input_encoding = @encodings[host] || parameters['ie'][0] || parameters['ei'][0]
43
+ begin
44
+ search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /\Autf-?8\z/i !~ input_encoding
45
+ rescue
46
+ log.error('invalid referer: ' + uri.to_s)
47
+ end
48
+ record.merge!(
49
+ @out_key_known => true,
50
+ @out_key_referer => parsed[:source],
51
+ @out_key_host => host,
52
+ @out_key_search_term => search_term
53
+ )
54
+ end
55
+ rescue
56
+ record[@out_key_known] = false
57
+ end
58
+ record
59
+ end
60
+ end
data/test/helper.rb CHANGED
@@ -22,7 +22,5 @@ unless ENV.key?('VERBOSE')
22
22
  $log = nulllogger
23
23
  end
24
24
 
25
- require 'fluent/plugin/out_referer_parser'
26
-
27
25
  class Test::Unit::TestCase
28
26
  end
@@ -0,0 +1,204 @@
1
+ require 'helper'
2
+ require 'fluent/plugin/filter_referer_parser'
3
+
4
+ class Fluent::RefererParserFilterTest < Test::Unit::TestCase
5
+ # through & merge
6
+ CONFIG1 = %(
7
+ key_name referer
8
+ remove_prefix test
9
+ add_prefix merged
10
+ )
11
+
12
+ CONFIG2 = %(
13
+ key_name ref
14
+ out_key_known ref_known
15
+ out_key_referer ref_referer
16
+ out_key_host ref_host
17
+ out_key_search_term ref_search_term
18
+ )
19
+
20
+ CONFIG3 = %(
21
+ type referer_parser
22
+ key_name ref
23
+ referers_yaml test/data/referers.yaml
24
+ encodings_yaml test/data/encodings.yaml
25
+ )
26
+
27
+ def setup
28
+ Fluent::Test.setup
29
+ end
30
+
31
+ def create_driver(conf = CONFIG1, tag = 'test')
32
+ Fluent::Test::FilterTestDriver.new(Fluent::RefererParserFilter, tag).configure(conf)
33
+ end
34
+
35
+ def filter(config, messages)
36
+ d = create_driver(config)
37
+ time = Time.parse('2012-07-20 16:40:30').to_i
38
+ d.run do
39
+ messages.each do |message|
40
+ d.emit(message, time)
41
+ end
42
+ end
43
+ filtered = d.filtered_as_array
44
+ filtered.map {|m| m[2] }
45
+ end
46
+
47
+ sub_test_case 'configure' do
48
+ test 'through & merge' do
49
+ d = create_driver CONFIG1
50
+ assert_equal 'referer', d.instance.key_name
51
+
52
+ assert_equal 'referer_known', d.instance.out_key_known
53
+ assert_equal 'referer_referer', d.instance.out_key_referer
54
+ assert_equal 'referer_search_term', d.instance.out_key_search_term
55
+ end
56
+
57
+ test 'filter & merge' do
58
+ d = create_driver CONFIG2
59
+ assert_equal 'ref', d.instance.key_name
60
+
61
+ assert_equal 'ref_known', d.instance.out_key_known
62
+ assert_equal 'ref_referer', d.instance.out_key_referer
63
+ assert_equal 'ref_search_term', d.instance.out_key_search_term
64
+ end
65
+ end
66
+
67
+ sub_test_case 'filter' do
68
+ test 'through & merge' do
69
+ messages = [
70
+ { 'value' => 0 },
71
+ { 'value' => 1, 'referer' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' },
72
+ { 'value' => 2, 'referer' => 'http://www.unixuser.org/' },
73
+ { 'value' => 3, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' },
74
+ { 'value' => 4, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_J&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' },
75
+ { 'value' => 5, 'referer' => 'http://search.yahoo.co.jp/search?p=%E3%81%BB%E3%81%92&aq=-1&oq=&ei=UTF-8&fr=sfp_as&x=wrt' },
76
+ ]
77
+ expected = [
78
+ {
79
+ 'value' => 0,
80
+ 'referer_known' => false
81
+ },
82
+ {
83
+ 'value' => 1,
84
+ 'referer' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari',
85
+ 'referer_known' => true,
86
+ 'referer_referer' => 'Google',
87
+ 'referer_host' => 'www.google.com',
88
+ 'referer_search_term' => 'gateway oracle cards denise linn'
89
+ },
90
+ {
91
+ 'value' => 2,
92
+ 'referer' => 'http://www.unixuser.org/',
93
+ 'referer_known' => false
94
+ },
95
+ {
96
+ 'value' => 3,
97
+ 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=',
98
+ 'referer_known' => true,
99
+ 'referer_referer' => 'Google',
100
+ 'referer_host' => 'www.google.co.jp',
101
+ 'referer_search_term' => 'マルチキャスト 学士論文'
102
+ },
103
+ # invalid input_encoding
104
+ {
105
+ 'value' => 4,
106
+ 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_J&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=',
107
+ 'referer_known' => true,
108
+ 'referer_referer' => 'Google',
109
+ 'referer_host' => 'www.google.co.jp',
110
+ 'referer_search_term' => 'マルチキャスト 学士論文'.encode("Shift_JIS").force_encoding("US-ASCII")
111
+ },
112
+ {
113
+ 'value' => 5,
114
+ 'referer' => 'http://search.yahoo.co.jp/search?p=%E3%81%BB%E3%81%92&aq=-1&oq=&ei=UTF-8&fr=sfp_as&x=wrt',
115
+ 'referer_known' => true,
116
+ 'referer_referer' => 'Yahoo!',
117
+ 'referer_host' => 'search.yahoo.co.jp',
118
+ 'referer_search_term' => 'ほげ'
119
+ }
120
+ ]
121
+ filtered = filter(CONFIG1, messages)
122
+ assert_equal(expected, filtered)
123
+ end
124
+
125
+ test 'filter & merge' do
126
+ messages = [
127
+ { 'value' => 0 },
128
+ { 'value' => 1, 'ref' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' },
129
+ { 'value' => 2, 'ref' => 'http://www.unixuser.org/' },
130
+ { 'value' => 3, 'ref' => 'https://www.google.com/search?q=%E3%81%BB%E3%81%92&ie=utf-8&oe=utf-8' }
131
+ ]
132
+ expected = [
133
+ {
134
+ 'value' => 0,
135
+ 'ref_known' => false
136
+ },
137
+ {
138
+ 'value' => 1,
139
+ 'ref' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari',
140
+ 'ref_known' => true,
141
+ 'ref_referer' => 'Google',
142
+ 'ref_host' => 'www.google.com',
143
+ 'ref_search_term' => 'gateway oracle cards denise linn'
144
+ },
145
+ {
146
+ 'value' => 2,
147
+ 'ref' => 'http://www.unixuser.org/',
148
+ 'ref_known' => false,
149
+ },
150
+ {
151
+ 'value' => 3,
152
+ 'ref' => 'https://www.google.com/search?q=%E3%81%BB%E3%81%92&ie=utf-8&oe=utf-8',
153
+ 'ref_known' => true,
154
+ 'ref_referer' => 'Google',
155
+ 'ref_host' => 'www.google.com',
156
+ 'ref_search_term' => 'ほげ'
157
+ }
158
+ ]
159
+ filtered = filter(CONFIG2, messages)
160
+ assert_equal(expected, filtered)
161
+ end
162
+
163
+ test 'file' do
164
+ messages = [
165
+ { 'value' => 0 },
166
+ { 'value' => 1, 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I' },
167
+ { 'value' => 2, 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D' },
168
+ { 'value' => 3, 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }
169
+ ]
170
+ expected = [
171
+ {
172
+ 'value' => 0,
173
+ 'referer_known' => false
174
+ },
175
+ {
176
+ 'value' => 1,
177
+ 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I',
178
+ 'referer_known' => true,
179
+ 'referer_referer' => 'Ezweb',
180
+ 'referer_host' => 'ezsch.ezweb.ne.jp',
181
+ 'referer_search_term' => 'aiueo 病的'
182
+ },
183
+ {
184
+ 'value' => 2,
185
+ 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D',
186
+ 'referer_known' => true,
187
+ 'referer_referer' => 'Ezweb',
188
+ 'referer_host' => 'ezsch.ezweb.ne.jp',
189
+ 'referer_search_term' => 'ロ'
190
+ },
191
+ {
192
+ 'value' => 3,
193
+ 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=',
194
+ 'referer_known' => true,
195
+ 'referer_referer' => 'Google',
196
+ 'referer_host' => 'www.google.co.jp',
197
+ 'referer_search_term' => 'マルチキャスト 学士論文'
198
+ }
199
+ ]
200
+ filtered = filter(CONFIG3, messages)
201
+ assert_equal(expected, filtered)
202
+ end
203
+ end
204
+ end
@@ -1,4 +1,5 @@
1
1
  require 'helper'
2
+ require 'fluent/plugin/out_referer_parser'
2
3
 
3
4
  # RefererParserOutput test
4
5
  class Fluent::RefererParserOutputTest < Test::Unit::TestCase
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-referer-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-05-29 00:00:00.000000000 Z
12
+ date: 2017-05-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -82,11 +82,13 @@ files:
82
82
  - README.md
83
83
  - Rakefile
84
84
  - fluent-plugin-referer-parser.gemspec
85
+ - lib/fluent/plugin/filter_referer_parser.rb
85
86
  - lib/fluent/plugin/out_referer_parser.rb
86
87
  - test/.rubocop.yml
87
88
  - test/data/encodings.yaml
88
89
  - test/data/referers.yaml
89
90
  - test/helper.rb
91
+ - test/plugin/test_filter_referer_parser.rb
90
92
  - test/plugin/test_out_referer_parser.rb
91
93
  homepage: https://github.com/haruyama/fluent-plugin-referer-parser
92
94
  licenses:
@@ -117,4 +119,5 @@ test_files:
117
119
  - test/data/encodings.yaml
118
120
  - test/data/referers.yaml
119
121
  - test/helper.rb
122
+ - test/plugin/test_filter_referer_parser.rb
120
123
  - test/plugin/test_out_referer_parser.rb