fluent-plugin-referer-parser 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/fluent-plugin-referer-parser.gemspec +2 -2
- data/lib/fluent/plugin/out_referer_parser.rb +25 -14
- data/test/data/encodings.yaml +2 -0
- data/test/data/referers.yaml +16 -0
- data/test/plugin/test_out_referer_parser.rb +81 -17
- metadata +16 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2e28d942761a6c6393cd106b2a64ff5be802e4a
|
4
|
+
data.tar.gz: c03bf0ed0ebbe8bf97b4368d609f7ad1f0866186
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 137841368528697ede2578449cb931f0ff92f4dcb63202dbd3680312403e80c124be3d9ec3fabbd885d3277c05747ebb1cf09536bfd42562dc8f5d47c6f670be
|
7
|
+
data.tar.gz: 5a2841b7f3160bb25dd032c39c7ec484f34e8fb773fd079976c18d3913656448415a15856a4b4f6a9618fe918063897bf098d442e718a276e6cb83e7fbec3bd3
|
data/README.md
CHANGED
@@ -17,7 +17,7 @@ To add referer-parser result into matched messages:
|
|
17
17
|
add_prefix merged
|
18
18
|
</match>
|
19
19
|
|
20
|
-
Output messages with tag 'merged.**' has '
|
20
|
+
Output messages with tag 'merged.**' has 'referer_known', 'referer_referer' and 'referer_search_term' attributes. If you want to change attribute names, write configurations as below:
|
21
21
|
|
22
22
|
<match input.**>
|
23
23
|
type referer_parser
|
@@ -26,9 +26,14 @@ Output messages with tag 'merged.**' has 'referer\_known', 'referer\_referer' an
|
|
26
26
|
add_prefix merged
|
27
27
|
out_key_known ref_known
|
28
28
|
out_key_referer ref_referer
|
29
|
+
out_key_host ref_host
|
29
30
|
out_key_search_term ref_search_term
|
30
31
|
</match>
|
31
32
|
|
33
|
+
If you want to use your own referers definition, you can use 'referers_yaml' attribute.
|
34
|
+
'referers_yaml' should be referers.yaml format of [snowplow/referer-parser](https://github.com/snowplow/referer-parser).
|
35
|
+
|
36
|
+
* [Sample](test/data/referers.yaml)
|
32
37
|
|
33
38
|
## Copyright
|
34
39
|
|
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = 'fluent-plugin-referer-parser'
|
5
|
-
gem.version = '0.0.
|
5
|
+
gem.version = '0.0.6'
|
6
6
|
gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
|
7
|
-
gem.email = ['
|
7
|
+
gem.email = ['haruyama@unixuser.org']
|
8
8
|
gem.description = %q{parsing by referer-parser. See: https://github.com/snowplow/referer-parser}
|
9
9
|
gem.summary = %q{Fluentd plugin to parse UserAgent strings}
|
10
10
|
gem.homepage = 'https://github.com/haruyama/fluent-plugin-referer-parser'
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'referer-parser'
|
2
|
-
|
3
1
|
# referer parser output
|
4
2
|
class Fluent::RefererParserOutput < Fluent::Output
|
5
3
|
Fluent::Plugin.register_output('referer_parser', self)
|
@@ -8,22 +6,33 @@ class Fluent::RefererParserOutput < Fluent::Output
|
|
8
6
|
config_param :remove_prefix, :string, default: nil
|
9
7
|
config_param :add_prefix, :string, default: nil
|
10
8
|
|
11
|
-
config_param :key_name,
|
9
|
+
config_param :key_name, :string
|
10
|
+
config_param :referers_yaml, :string, default: nil
|
11
|
+
config_param :encodings_yaml, :string, default: nil
|
12
12
|
|
13
13
|
config_param :out_key_known, :string, default: 'referer_known'
|
14
14
|
config_param :out_key_referer, :string, default: 'referer_referer'
|
15
|
+
config_param :out_key_host, :string, default: 'referer_host'
|
15
16
|
config_param :out_key_search_term, :string, default: 'referer_search_term'
|
16
17
|
|
17
|
-
REFERER = RefererParser::Referer.new('http://example.org/')
|
18
|
-
|
19
18
|
def initialize
|
20
19
|
super
|
21
20
|
require 'cgi'
|
21
|
+
require 'yaml'
|
22
|
+
require 'referer-parser'
|
22
23
|
end
|
23
24
|
|
24
25
|
def configure(conf)
|
25
26
|
super
|
26
27
|
|
28
|
+
@referer_parser = RefererParser::Referer.new('http://example.org/', @referers_yaml)
|
29
|
+
|
30
|
+
if @encodings_yaml
|
31
|
+
@encodings = YAML.load_file(@encodings_yaml)
|
32
|
+
else
|
33
|
+
@encodings = {}
|
34
|
+
end
|
35
|
+
|
27
36
|
if !@tag && !@remove_prefix && !@add_prefix
|
28
37
|
fail Fluent::ConfigError, 'missing both of remove_prefix and add_prefix'
|
29
38
|
end
|
@@ -59,24 +68,26 @@ class Fluent::RefererParserOutput < Fluent::Output
|
|
59
68
|
def emit(tag, es, chain)
|
60
69
|
tag = tag_mangle(tag)
|
61
70
|
es.each do |time, record|
|
62
|
-
|
71
|
+
is_valid = true
|
63
72
|
begin
|
64
|
-
|
73
|
+
@referer_parser.parse(record[@key_name])
|
65
74
|
rescue
|
66
|
-
|
75
|
+
is_valid = false
|
67
76
|
end
|
68
|
-
if
|
69
|
-
search_term =
|
70
|
-
|
71
|
-
|
77
|
+
if is_valid && @referer_parser.known?
|
78
|
+
search_term = @referer_parser.search_term
|
79
|
+
host = @referer_parser.uri.host
|
80
|
+
parameters = CGI.parse(@referer_parser.uri.query)
|
81
|
+
input_encoding = @encodings[host] || parameters['ie'][0] || parameters['ei'][0]
|
72
82
|
begin
|
73
83
|
search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /\Autf-?8\z/i !~ input_encoding
|
74
84
|
rescue
|
75
|
-
$log.error('invalid referer: ' +
|
85
|
+
$log.error('invalid referer: ' + @referer_parser.uri.to_s)
|
76
86
|
end
|
77
87
|
record.merge!(
|
78
88
|
@out_key_known => true,
|
79
|
-
@out_key_referer =>
|
89
|
+
@out_key_referer => @referer_parser.referer,
|
90
|
+
@out_key_host => host,
|
80
91
|
@out_key_search_term => search_term,
|
81
92
|
)
|
82
93
|
else
|
@@ -17,7 +17,17 @@ remove_prefix test
|
|
17
17
|
add_prefix merged
|
18
18
|
out_key_known ref_known
|
19
19
|
out_key_referer ref_referer
|
20
|
+
out_key_host ref_host
|
20
21
|
out_key_search_term ref_search_term
|
22
|
+
]
|
23
|
+
|
24
|
+
CONFIG3 = %[
|
25
|
+
type referer_parser
|
26
|
+
key_name ref
|
27
|
+
remove_prefix test
|
28
|
+
add_prefix merged
|
29
|
+
referers_yaml test/data/referers.yaml
|
30
|
+
encodings_yaml test/data/encodings.yaml
|
21
31
|
]
|
22
32
|
|
23
33
|
def create_driver(conf = CONFIG1, tag = 'test')
|
@@ -82,8 +92,9 @@ out_key_search_term ref_search_term
|
|
82
92
|
assert_equal 1, m['value']
|
83
93
|
assert_equal true, m['referer_known']
|
84
94
|
assert_equal 'Google', m['referer_referer']
|
95
|
+
assert_equal 'www.google.com', m['referer_host']
|
85
96
|
assert_equal 'gateway oracle cards denise linn', m['referer_search_term']
|
86
|
-
assert_equal
|
97
|
+
assert_equal 6, m.keys.size
|
87
98
|
|
88
99
|
m = emits[2][2]
|
89
100
|
assert_equal 2, m['value']
|
@@ -96,22 +107,25 @@ out_key_search_term ref_search_term
|
|
96
107
|
assert_equal 3, m['value']
|
97
108
|
assert_equal true, m['referer_known']
|
98
109
|
assert_equal 'Google', m['referer_referer']
|
110
|
+
assert_equal 'www.google.co.jp', m['referer_host']
|
99
111
|
assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
|
100
|
-
assert_equal
|
112
|
+
assert_equal 6, m.keys.size
|
101
113
|
|
102
114
|
# invalid input_encoding
|
103
115
|
m = emits[4][2]
|
104
116
|
assert_equal 4, m['value']
|
105
117
|
assert_equal true, m['referer_known']
|
106
118
|
assert_equal 'Google', m['referer_referer']
|
107
|
-
assert_equal
|
119
|
+
assert_equal 'www.google.co.jp', m['referer_host']
|
120
|
+
assert_equal 6, m.keys.size
|
108
121
|
|
109
122
|
m = emits[5][2]
|
110
|
-
assert_equal 5,
|
111
|
-
assert_equal true,
|
112
|
-
assert_equal 'Yahoo!',
|
113
|
-
assert_equal '
|
114
|
-
assert_equal
|
123
|
+
assert_equal 5, m['value']
|
124
|
+
assert_equal true, m['referer_known']
|
125
|
+
assert_equal 'Yahoo!', m['referer_referer']
|
126
|
+
assert_equal 'search.yahoo.co.jp', m['referer_host']
|
127
|
+
assert_equal 'ほげ', m['referer_search_term']
|
128
|
+
assert_equal 6, m.keys.size
|
115
129
|
end
|
116
130
|
|
117
131
|
def test_emit2
|
@@ -132,27 +146,77 @@ out_key_search_term ref_search_term
|
|
132
146
|
m = emits[0][2]
|
133
147
|
assert_equal 0, m['value']
|
134
148
|
assert_equal false, m['ref_known']
|
135
|
-
assert_nil m['
|
136
|
-
assert_nil m['
|
149
|
+
assert_nil m['ref_referer']
|
150
|
+
assert_nil m['ref_search_term']
|
137
151
|
assert_equal 2, m.keys.size
|
138
152
|
|
139
153
|
m = emits[1][2]
|
140
154
|
assert_equal 1, m['value']
|
141
155
|
assert_equal true, m['ref_known']
|
142
156
|
assert_equal 'Google', m['ref_referer']
|
157
|
+
assert_equal 'www.google.com', m['ref_host']
|
143
158
|
assert_equal 'gateway oracle cards denise linn', m['ref_search_term']
|
144
|
-
assert_equal
|
159
|
+
assert_equal 6, m.keys.size
|
145
160
|
|
146
161
|
m = emits[2][2]
|
147
162
|
assert_equal 2, m['value']
|
148
163
|
assert_equal false, m['ref_known']
|
149
|
-
assert_nil m['
|
150
|
-
assert_nil m['
|
164
|
+
assert_nil m['ref_referer']
|
165
|
+
assert_nil m['ref_host']
|
166
|
+
assert_nil m['ref_search_term']
|
151
167
|
|
152
168
|
m = emits[3][2]
|
153
|
-
assert_equal 3,
|
154
|
-
assert_equal true,
|
155
|
-
assert_equal 'Google',
|
156
|
-
assert_equal '
|
169
|
+
assert_equal 3, m['value']
|
170
|
+
assert_equal true, m['ref_known']
|
171
|
+
assert_equal 'Google', m['ref_referer']
|
172
|
+
assert_equal 'www.google.com', m['ref_host']
|
173
|
+
assert_equal 'ほげ', m['ref_search_term']
|
174
|
+
end
|
175
|
+
|
176
|
+
def test_emit3
|
177
|
+
d = create_driver(CONFIG3, 'test.message')
|
178
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
179
|
+
d.run do
|
180
|
+
d.emit({ 'value' => 0 }, time)
|
181
|
+
d.emit({ 'value' => 1, 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I' }, time)
|
182
|
+
d.emit({ 'value' => 2, 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D' }, time)
|
183
|
+
d.emit({ 'value' => 3, 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
|
184
|
+
end
|
185
|
+
|
186
|
+
emits = d.emits
|
187
|
+
assert_equal 4, emits.size
|
188
|
+
assert_equal 'merged.message', emits[0][0]
|
189
|
+
assert_equal time, emits[0][1]
|
190
|
+
|
191
|
+
m = emits[0][2]
|
192
|
+
assert_equal 0, m['value']
|
193
|
+
assert_equal false, m['referer_known']
|
194
|
+
assert_nil m['referer_referer']
|
195
|
+
assert_nil m['referer_search_term']
|
196
|
+
assert_equal 2, m.keys.size
|
197
|
+
|
198
|
+
m = emits[1][2]
|
199
|
+
assert_equal 1, m['value']
|
200
|
+
assert_equal true, m['referer_known']
|
201
|
+
assert_equal 'Ezweb', m['referer_referer']
|
202
|
+
assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
|
203
|
+
assert_equal 'aiueo 病的', m['referer_search_term']
|
204
|
+
assert_equal 6, m.keys.size
|
205
|
+
|
206
|
+
m = emits[2][2]
|
207
|
+
assert_equal 2, m['value']
|
208
|
+
assert_equal true, m['referer_known']
|
209
|
+
assert_equal 'Ezweb', m['referer_referer']
|
210
|
+
assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
|
211
|
+
assert_equal 'ロ', m['referer_search_term']
|
212
|
+
assert_equal 6, m.keys.size
|
213
|
+
|
214
|
+
m = emits[3][2]
|
215
|
+
assert_equal 3, m['value']
|
216
|
+
assert_equal true, m['referer_known']
|
217
|
+
assert_equal 'Google', m['referer_referer']
|
218
|
+
assert_equal 'www.google.co.jp', m['referer_host']
|
219
|
+
assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
|
220
|
+
assert_equal 6, m.keys.size
|
157
221
|
end
|
158
222
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-referer-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
@@ -9,60 +9,59 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-02-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: '0'
|
21
21
|
type: :development
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: fluentd
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- -
|
32
|
+
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
34
|
version: '0'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- -
|
39
|
+
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: referer-parser
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
-
- -
|
46
|
+
- - ">="
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: 0.2.0
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
|
-
- -
|
53
|
+
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: 0.2.0
|
56
56
|
description: 'parsing by referer-parser. See: https://github.com/snowplow/referer-parser'
|
57
57
|
email:
|
58
|
-
- tagomoris@gmail.com
|
59
58
|
- haruyama@unixuser.org
|
60
59
|
executables: []
|
61
60
|
extensions: []
|
62
61
|
extra_rdoc_files: []
|
63
62
|
files:
|
64
|
-
- .gitignore
|
65
|
-
- .travis.yml
|
63
|
+
- ".gitignore"
|
64
|
+
- ".travis.yml"
|
66
65
|
- Gemfile
|
67
66
|
- LICENSE.txt
|
68
67
|
- README.md
|
@@ -70,6 +69,8 @@ files:
|
|
70
69
|
- fluent-plugin-referer-parser.gemspec
|
71
70
|
- lib/fluent/plugin/.rubocop.yml
|
72
71
|
- lib/fluent/plugin/out_referer_parser.rb
|
72
|
+
- test/data/encodings.yaml
|
73
|
+
- test/data/referers.yaml
|
73
74
|
- test/helper.rb
|
74
75
|
- test/plugin/.rubocop.yml
|
75
76
|
- test/plugin/test_out_referer_parser.rb
|
@@ -83,12 +84,12 @@ require_paths:
|
|
83
84
|
- lib
|
84
85
|
required_ruby_version: !ruby/object:Gem::Requirement
|
85
86
|
requirements:
|
86
|
-
- -
|
87
|
+
- - ">="
|
87
88
|
- !ruby/object:Gem::Version
|
88
89
|
version: '0'
|
89
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
91
|
requirements:
|
91
|
-
- -
|
92
|
+
- - ">="
|
92
93
|
- !ruby/object:Gem::Version
|
93
94
|
version: '0'
|
94
95
|
requirements: []
|
@@ -98,6 +99,8 @@ signing_key:
|
|
98
99
|
specification_version: 4
|
99
100
|
summary: Fluentd plugin to parse UserAgent strings
|
100
101
|
test_files:
|
102
|
+
- test/data/encodings.yaml
|
103
|
+
- test/data/referers.yaml
|
101
104
|
- test/helper.rb
|
102
105
|
- test/plugin/.rubocop.yml
|
103
106
|
- test/plugin/test_out_referer_parser.rb
|