fluent-plugin-referer-parser 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5c18c51dd917efdde31a1b3569afc85f60186c7
|
4
|
+
data.tar.gz: 67bf6e6d8771b84fdf83d13f14e1fa81a2c5389d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9ea39d3e47f6490fbed05d20a26ebf024e484c86b910328b45eed881886a8f2a3f405eb21e8647a690b3f2307a18f4328b97ed42868fb2ba832cc0fc2b8b505
|
7
|
+
data.tar.gz: 3883ba94f1f12970e9edf3ba8c6dd73c17fb980e137dfcf534bfa73d4162527b1b99e37081d84302a00db9dafb2d9577b53ac03ac1affde475e2ec03784f18ec
|
@@ -3,7 +3,7 @@ require 'English'
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.name = 'fluent-plugin-referer-parser'
|
6
|
-
gem.version = '0.0.
|
6
|
+
gem.version = '0.0.10'
|
7
7
|
gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
|
8
8
|
gem.email = ['haruyama@unixuser.org']
|
9
9
|
gem.description = %q(parsing by referer-parser. See: https://github.com/snowplow/referer-parser)
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'yaml'
|
3
|
+
require 'referer-parser'
|
4
|
+
|
5
|
+
class Fluent::RefererParserFilter < Fluent::Filter
|
6
|
+
Fluent::Plugin.register_filter('referer_parser', self)
|
7
|
+
|
8
|
+
config_param :key_name, :string
|
9
|
+
config_param :referers_yaml, :string, default: nil
|
10
|
+
config_param :encodings_yaml, :string, default: nil
|
11
|
+
|
12
|
+
config_param :out_key_known, :string, default: 'referer_known'
|
13
|
+
config_param :out_key_referer, :string, default: 'referer_referer'
|
14
|
+
config_param :out_key_host, :string, default: 'referer_host'
|
15
|
+
config_param :out_key_search_term, :string, default: 'referer_search_term'
|
16
|
+
|
17
|
+
def configure(conf)
|
18
|
+
super
|
19
|
+
|
20
|
+
@referer_parser = if @referers_yaml
|
21
|
+
RefererParser::Parser.new(@referers_yaml)
|
22
|
+
else
|
23
|
+
RefererParser::Parser.new
|
24
|
+
end
|
25
|
+
|
26
|
+
if @encodings_yaml
|
27
|
+
@encodings = YAML.load_file(@encodings_yaml)
|
28
|
+
else
|
29
|
+
@encodings = {}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def filter(tag, time, record)
|
34
|
+
begin
|
35
|
+
parsed = @referer_parser.parse(record[@key_name])
|
36
|
+
record[@out_key_known] = parsed[:known]
|
37
|
+
if parsed[:known]
|
38
|
+
search_term = parsed[:term]
|
39
|
+
uri = URI.parse(parsed[:uri])
|
40
|
+
host = uri.host
|
41
|
+
parameters = CGI.parse(uri.query)
|
42
|
+
input_encoding = @encodings[host] || parameters['ie'][0] || parameters['ei'][0]
|
43
|
+
begin
|
44
|
+
search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /\Autf-?8\z/i !~ input_encoding
|
45
|
+
rescue
|
46
|
+
log.error('invalid referer: ' + uri.to_s)
|
47
|
+
end
|
48
|
+
record.merge!(
|
49
|
+
@out_key_known => true,
|
50
|
+
@out_key_referer => parsed[:source],
|
51
|
+
@out_key_host => host,
|
52
|
+
@out_key_search_term => search_term
|
53
|
+
)
|
54
|
+
end
|
55
|
+
rescue
|
56
|
+
record[@out_key_known] = false
|
57
|
+
end
|
58
|
+
record
|
59
|
+
end
|
60
|
+
end
|
data/test/helper.rb
CHANGED
@@ -0,0 +1,204 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'fluent/plugin/filter_referer_parser'
|
3
|
+
|
4
|
+
class Fluent::RefererParserFilterTest < Test::Unit::TestCase
|
5
|
+
# through & merge
|
6
|
+
CONFIG1 = %(
|
7
|
+
key_name referer
|
8
|
+
remove_prefix test
|
9
|
+
add_prefix merged
|
10
|
+
)
|
11
|
+
|
12
|
+
CONFIG2 = %(
|
13
|
+
key_name ref
|
14
|
+
out_key_known ref_known
|
15
|
+
out_key_referer ref_referer
|
16
|
+
out_key_host ref_host
|
17
|
+
out_key_search_term ref_search_term
|
18
|
+
)
|
19
|
+
|
20
|
+
CONFIG3 = %(
|
21
|
+
type referer_parser
|
22
|
+
key_name ref
|
23
|
+
referers_yaml test/data/referers.yaml
|
24
|
+
encodings_yaml test/data/encodings.yaml
|
25
|
+
)
|
26
|
+
|
27
|
+
def setup
|
28
|
+
Fluent::Test.setup
|
29
|
+
end
|
30
|
+
|
31
|
+
def create_driver(conf = CONFIG1, tag = 'test')
|
32
|
+
Fluent::Test::FilterTestDriver.new(Fluent::RefererParserFilter, tag).configure(conf)
|
33
|
+
end
|
34
|
+
|
35
|
+
def filter(config, messages)
|
36
|
+
d = create_driver(config)
|
37
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
38
|
+
d.run do
|
39
|
+
messages.each do |message|
|
40
|
+
d.emit(message, time)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
filtered = d.filtered_as_array
|
44
|
+
filtered.map {|m| m[2] }
|
45
|
+
end
|
46
|
+
|
47
|
+
sub_test_case 'configure' do
|
48
|
+
test 'through & merge' do
|
49
|
+
d = create_driver CONFIG1
|
50
|
+
assert_equal 'referer', d.instance.key_name
|
51
|
+
|
52
|
+
assert_equal 'referer_known', d.instance.out_key_known
|
53
|
+
assert_equal 'referer_referer', d.instance.out_key_referer
|
54
|
+
assert_equal 'referer_search_term', d.instance.out_key_search_term
|
55
|
+
end
|
56
|
+
|
57
|
+
test 'filter & merge' do
|
58
|
+
d = create_driver CONFIG2
|
59
|
+
assert_equal 'ref', d.instance.key_name
|
60
|
+
|
61
|
+
assert_equal 'ref_known', d.instance.out_key_known
|
62
|
+
assert_equal 'ref_referer', d.instance.out_key_referer
|
63
|
+
assert_equal 'ref_search_term', d.instance.out_key_search_term
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
sub_test_case 'filter' do
|
68
|
+
test 'through & merge' do
|
69
|
+
messages = [
|
70
|
+
{ 'value' => 0 },
|
71
|
+
{ 'value' => 1, 'referer' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' },
|
72
|
+
{ 'value' => 2, 'referer' => 'http://www.unixuser.org/' },
|
73
|
+
{ 'value' => 3, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' },
|
74
|
+
{ 'value' => 4, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_J&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' },
|
75
|
+
{ 'value' => 5, 'referer' => 'http://search.yahoo.co.jp/search?p=%E3%81%BB%E3%81%92&aq=-1&oq=&ei=UTF-8&fr=sfp_as&x=wrt' },
|
76
|
+
]
|
77
|
+
expected = [
|
78
|
+
{
|
79
|
+
'value' => 0,
|
80
|
+
'referer_known' => false
|
81
|
+
},
|
82
|
+
{
|
83
|
+
'value' => 1,
|
84
|
+
'referer' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari',
|
85
|
+
'referer_known' => true,
|
86
|
+
'referer_referer' => 'Google',
|
87
|
+
'referer_host' => 'www.google.com',
|
88
|
+
'referer_search_term' => 'gateway oracle cards denise linn'
|
89
|
+
},
|
90
|
+
{
|
91
|
+
'value' => 2,
|
92
|
+
'referer' => 'http://www.unixuser.org/',
|
93
|
+
'referer_known' => false
|
94
|
+
},
|
95
|
+
{
|
96
|
+
'value' => 3,
|
97
|
+
'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=',
|
98
|
+
'referer_known' => true,
|
99
|
+
'referer_referer' => 'Google',
|
100
|
+
'referer_host' => 'www.google.co.jp',
|
101
|
+
'referer_search_term' => 'マルチキャスト 学士論文'
|
102
|
+
},
|
103
|
+
# invalid input_encoding
|
104
|
+
{
|
105
|
+
'value' => 4,
|
106
|
+
'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_J&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=',
|
107
|
+
'referer_known' => true,
|
108
|
+
'referer_referer' => 'Google',
|
109
|
+
'referer_host' => 'www.google.co.jp',
|
110
|
+
'referer_search_term' => 'マルチキャスト 学士論文'.encode("Shift_JIS").force_encoding("US-ASCII")
|
111
|
+
},
|
112
|
+
{
|
113
|
+
'value' => 5,
|
114
|
+
'referer' => 'http://search.yahoo.co.jp/search?p=%E3%81%BB%E3%81%92&aq=-1&oq=&ei=UTF-8&fr=sfp_as&x=wrt',
|
115
|
+
'referer_known' => true,
|
116
|
+
'referer_referer' => 'Yahoo!',
|
117
|
+
'referer_host' => 'search.yahoo.co.jp',
|
118
|
+
'referer_search_term' => 'ほげ'
|
119
|
+
}
|
120
|
+
]
|
121
|
+
filtered = filter(CONFIG1, messages)
|
122
|
+
assert_equal(expected, filtered)
|
123
|
+
end
|
124
|
+
|
125
|
+
test 'filter & merge' do
|
126
|
+
messages = [
|
127
|
+
{ 'value' => 0 },
|
128
|
+
{ 'value' => 1, 'ref' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' },
|
129
|
+
{ 'value' => 2, 'ref' => 'http://www.unixuser.org/' },
|
130
|
+
{ 'value' => 3, 'ref' => 'https://www.google.com/search?q=%E3%81%BB%E3%81%92&ie=utf-8&oe=utf-8' }
|
131
|
+
]
|
132
|
+
expected = [
|
133
|
+
{
|
134
|
+
'value' => 0,
|
135
|
+
'ref_known' => false
|
136
|
+
},
|
137
|
+
{
|
138
|
+
'value' => 1,
|
139
|
+
'ref' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari',
|
140
|
+
'ref_known' => true,
|
141
|
+
'ref_referer' => 'Google',
|
142
|
+
'ref_host' => 'www.google.com',
|
143
|
+
'ref_search_term' => 'gateway oracle cards denise linn'
|
144
|
+
},
|
145
|
+
{
|
146
|
+
'value' => 2,
|
147
|
+
'ref' => 'http://www.unixuser.org/',
|
148
|
+
'ref_known' => false,
|
149
|
+
},
|
150
|
+
{
|
151
|
+
'value' => 3,
|
152
|
+
'ref' => 'https://www.google.com/search?q=%E3%81%BB%E3%81%92&ie=utf-8&oe=utf-8',
|
153
|
+
'ref_known' => true,
|
154
|
+
'ref_referer' => 'Google',
|
155
|
+
'ref_host' => 'www.google.com',
|
156
|
+
'ref_search_term' => 'ほげ'
|
157
|
+
}
|
158
|
+
]
|
159
|
+
filtered = filter(CONFIG2, messages)
|
160
|
+
assert_equal(expected, filtered)
|
161
|
+
end
|
162
|
+
|
163
|
+
test 'file' do
|
164
|
+
messages = [
|
165
|
+
{ 'value' => 0 },
|
166
|
+
{ 'value' => 1, 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I' },
|
167
|
+
{ 'value' => 2, 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D' },
|
168
|
+
{ 'value' => 3, 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }
|
169
|
+
]
|
170
|
+
expected = [
|
171
|
+
{
|
172
|
+
'value' => 0,
|
173
|
+
'referer_known' => false
|
174
|
+
},
|
175
|
+
{
|
176
|
+
'value' => 1,
|
177
|
+
'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I',
|
178
|
+
'referer_known' => true,
|
179
|
+
'referer_referer' => 'Ezweb',
|
180
|
+
'referer_host' => 'ezsch.ezweb.ne.jp',
|
181
|
+
'referer_search_term' => 'aiueo 病的'
|
182
|
+
},
|
183
|
+
{
|
184
|
+
'value' => 2,
|
185
|
+
'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D',
|
186
|
+
'referer_known' => true,
|
187
|
+
'referer_referer' => 'Ezweb',
|
188
|
+
'referer_host' => 'ezsch.ezweb.ne.jp',
|
189
|
+
'referer_search_term' => 'ロ'
|
190
|
+
},
|
191
|
+
{
|
192
|
+
'value' => 3,
|
193
|
+
'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=',
|
194
|
+
'referer_known' => true,
|
195
|
+
'referer_referer' => 'Google',
|
196
|
+
'referer_host' => 'www.google.co.jp',
|
197
|
+
'referer_search_term' => 'マルチキャスト 学士論文'
|
198
|
+
}
|
199
|
+
]
|
200
|
+
filtered = filter(CONFIG3, messages)
|
201
|
+
assert_equal(expected, filtered)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-referer-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-05-
|
12
|
+
date: 2017-05-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -82,11 +82,13 @@ files:
|
|
82
82
|
- README.md
|
83
83
|
- Rakefile
|
84
84
|
- fluent-plugin-referer-parser.gemspec
|
85
|
+
- lib/fluent/plugin/filter_referer_parser.rb
|
85
86
|
- lib/fluent/plugin/out_referer_parser.rb
|
86
87
|
- test/.rubocop.yml
|
87
88
|
- test/data/encodings.yaml
|
88
89
|
- test/data/referers.yaml
|
89
90
|
- test/helper.rb
|
91
|
+
- test/plugin/test_filter_referer_parser.rb
|
90
92
|
- test/plugin/test_out_referer_parser.rb
|
91
93
|
homepage: https://github.com/haruyama/fluent-plugin-referer-parser
|
92
94
|
licenses:
|
@@ -117,4 +119,5 @@ test_files:
|
|
117
119
|
- test/data/encodings.yaml
|
118
120
|
- test/data/referers.yaml
|
119
121
|
- test/helper.rb
|
122
|
+
- test/plugin/test_filter_referer_parser.rb
|
120
123
|
- test/plugin/test_out_referer_parser.rb
|