fluent-plugin-referer-parser 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/fluent-plugin-referer-parser.gemspec +2 -2
- data/lib/fluent/plugin/out_referer_parser.rb +25 -14
- data/test/data/encodings.yaml +2 -0
- data/test/data/referers.yaml +16 -0
- data/test/plugin/test_out_referer_parser.rb +81 -17
- metadata +16 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2e28d942761a6c6393cd106b2a64ff5be802e4a
|
4
|
+
data.tar.gz: c03bf0ed0ebbe8bf97b4368d609f7ad1f0866186
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 137841368528697ede2578449cb931f0ff92f4dcb63202dbd3680312403e80c124be3d9ec3fabbd885d3277c05747ebb1cf09536bfd42562dc8f5d47c6f670be
|
7
|
+
data.tar.gz: 5a2841b7f3160bb25dd032c39c7ec484f34e8fb773fd079976c18d3913656448415a15856a4b4f6a9618fe918063897bf098d442e718a276e6cb83e7fbec3bd3
|
data/README.md
CHANGED
@@ -17,7 +17,7 @@ To add referer-parser result into matched messages:
|
|
17
17
|
add_prefix merged
|
18
18
|
</match>
|
19
19
|
|
20
|
-
Output messages with tag 'merged.**' has '
|
20
|
+
Output messages with tag 'merged.**' has 'referer_known', 'referer_referer' and 'referer_search_term' attributes. If you want to change attribute names, write configurations as below:
|
21
21
|
|
22
22
|
<match input.**>
|
23
23
|
type referer_parser
|
@@ -26,9 +26,14 @@ Output messages with tag 'merged.**' has 'referer\_known', 'referer\_referer' an
|
|
26
26
|
add_prefix merged
|
27
27
|
out_key_known ref_known
|
28
28
|
out_key_referer ref_referer
|
29
|
+
out_key_host ref_host
|
29
30
|
out_key_search_term ref_search_term
|
30
31
|
</match>
|
31
32
|
|
33
|
+
If you want to use your own referers definition, you can use 'referers_yaml' attribute.
|
34
|
+
'referers_yaml' should be referers.yaml format of [snowplow/referer-parser](https://github.com/snowplow/referer-parser).
|
35
|
+
|
36
|
+
* [Sample](test/data/referers.yaml)
|
32
37
|
|
33
38
|
## Copyright
|
34
39
|
|
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = 'fluent-plugin-referer-parser'
|
5
|
-
gem.version = '0.0.
|
5
|
+
gem.version = '0.0.6'
|
6
6
|
gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
|
7
|
-
gem.email = ['
|
7
|
+
gem.email = ['haruyama@unixuser.org']
|
8
8
|
gem.description = %q{parsing by referer-parser. See: https://github.com/snowplow/referer-parser}
|
9
9
|
gem.summary = %q{Fluentd plugin to parse UserAgent strings}
|
10
10
|
gem.homepage = 'https://github.com/haruyama/fluent-plugin-referer-parser'
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'referer-parser'
|
2
|
-
|
3
1
|
# referer parser output
|
4
2
|
class Fluent::RefererParserOutput < Fluent::Output
|
5
3
|
Fluent::Plugin.register_output('referer_parser', self)
|
@@ -8,22 +6,33 @@ class Fluent::RefererParserOutput < Fluent::Output
|
|
8
6
|
config_param :remove_prefix, :string, default: nil
|
9
7
|
config_param :add_prefix, :string, default: nil
|
10
8
|
|
11
|
-
config_param :key_name,
|
9
|
+
config_param :key_name, :string
|
10
|
+
config_param :referers_yaml, :string, default: nil
|
11
|
+
config_param :encodings_yaml, :string, default: nil
|
12
12
|
|
13
13
|
config_param :out_key_known, :string, default: 'referer_known'
|
14
14
|
config_param :out_key_referer, :string, default: 'referer_referer'
|
15
|
+
config_param :out_key_host, :string, default: 'referer_host'
|
15
16
|
config_param :out_key_search_term, :string, default: 'referer_search_term'
|
16
17
|
|
17
|
-
REFERER = RefererParser::Referer.new('http://example.org/')
|
18
|
-
|
19
18
|
def initialize
|
20
19
|
super
|
21
20
|
require 'cgi'
|
21
|
+
require 'yaml'
|
22
|
+
require 'referer-parser'
|
22
23
|
end
|
23
24
|
|
24
25
|
def configure(conf)
|
25
26
|
super
|
26
27
|
|
28
|
+
@referer_parser = RefererParser::Referer.new('http://example.org/', @referers_yaml)
|
29
|
+
|
30
|
+
if @encodings_yaml
|
31
|
+
@encodings = YAML.load_file(@encodings_yaml)
|
32
|
+
else
|
33
|
+
@encodings = {}
|
34
|
+
end
|
35
|
+
|
27
36
|
if !@tag && !@remove_prefix && !@add_prefix
|
28
37
|
fail Fluent::ConfigError, 'missing both of remove_prefix and add_prefix'
|
29
38
|
end
|
@@ -59,24 +68,26 @@ class Fluent::RefererParserOutput < Fluent::Output
|
|
59
68
|
def emit(tag, es, chain)
|
60
69
|
tag = tag_mangle(tag)
|
61
70
|
es.each do |time, record|
|
62
|
-
|
71
|
+
is_valid = true
|
63
72
|
begin
|
64
|
-
|
73
|
+
@referer_parser.parse(record[@key_name])
|
65
74
|
rescue
|
66
|
-
|
75
|
+
is_valid = false
|
67
76
|
end
|
68
|
-
if
|
69
|
-
search_term =
|
70
|
-
|
71
|
-
|
77
|
+
if is_valid && @referer_parser.known?
|
78
|
+
search_term = @referer_parser.search_term
|
79
|
+
host = @referer_parser.uri.host
|
80
|
+
parameters = CGI.parse(@referer_parser.uri.query)
|
81
|
+
input_encoding = @encodings[host] || parameters['ie'][0] || parameters['ei'][0]
|
72
82
|
begin
|
73
83
|
search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /\Autf-?8\z/i !~ input_encoding
|
74
84
|
rescue
|
75
|
-
$log.error('invalid referer: ' +
|
85
|
+
$log.error('invalid referer: ' + @referer_parser.uri.to_s)
|
76
86
|
end
|
77
87
|
record.merge!(
|
78
88
|
@out_key_known => true,
|
79
|
-
@out_key_referer =>
|
89
|
+
@out_key_referer => @referer_parser.referer,
|
90
|
+
@out_key_host => host,
|
80
91
|
@out_key_search_term => search_term,
|
81
92
|
)
|
82
93
|
else
|
@@ -17,7 +17,17 @@ remove_prefix test
|
|
17
17
|
add_prefix merged
|
18
18
|
out_key_known ref_known
|
19
19
|
out_key_referer ref_referer
|
20
|
+
out_key_host ref_host
|
20
21
|
out_key_search_term ref_search_term
|
22
|
+
]
|
23
|
+
|
24
|
+
CONFIG3 = %[
|
25
|
+
type referer_parser
|
26
|
+
key_name ref
|
27
|
+
remove_prefix test
|
28
|
+
add_prefix merged
|
29
|
+
referers_yaml test/data/referers.yaml
|
30
|
+
encodings_yaml test/data/encodings.yaml
|
21
31
|
]
|
22
32
|
|
23
33
|
def create_driver(conf = CONFIG1, tag = 'test')
|
@@ -82,8 +92,9 @@ out_key_search_term ref_search_term
|
|
82
92
|
assert_equal 1, m['value']
|
83
93
|
assert_equal true, m['referer_known']
|
84
94
|
assert_equal 'Google', m['referer_referer']
|
95
|
+
assert_equal 'www.google.com', m['referer_host']
|
85
96
|
assert_equal 'gateway oracle cards denise linn', m['referer_search_term']
|
86
|
-
assert_equal
|
97
|
+
assert_equal 6, m.keys.size
|
87
98
|
|
88
99
|
m = emits[2][2]
|
89
100
|
assert_equal 2, m['value']
|
@@ -96,22 +107,25 @@ out_key_search_term ref_search_term
|
|
96
107
|
assert_equal 3, m['value']
|
97
108
|
assert_equal true, m['referer_known']
|
98
109
|
assert_equal 'Google', m['referer_referer']
|
110
|
+
assert_equal 'www.google.co.jp', m['referer_host']
|
99
111
|
assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
|
100
|
-
assert_equal
|
112
|
+
assert_equal 6, m.keys.size
|
101
113
|
|
102
114
|
# invalid input_encoding
|
103
115
|
m = emits[4][2]
|
104
116
|
assert_equal 4, m['value']
|
105
117
|
assert_equal true, m['referer_known']
|
106
118
|
assert_equal 'Google', m['referer_referer']
|
107
|
-
assert_equal
|
119
|
+
assert_equal 'www.google.co.jp', m['referer_host']
|
120
|
+
assert_equal 6, m.keys.size
|
108
121
|
|
109
122
|
m = emits[5][2]
|
110
|
-
assert_equal 5,
|
111
|
-
assert_equal true,
|
112
|
-
assert_equal 'Yahoo!',
|
113
|
-
assert_equal '
|
114
|
-
assert_equal
|
123
|
+
assert_equal 5, m['value']
|
124
|
+
assert_equal true, m['referer_known']
|
125
|
+
assert_equal 'Yahoo!', m['referer_referer']
|
126
|
+
assert_equal 'search.yahoo.co.jp', m['referer_host']
|
127
|
+
assert_equal 'ほげ', m['referer_search_term']
|
128
|
+
assert_equal 6, m.keys.size
|
115
129
|
end
|
116
130
|
|
117
131
|
def test_emit2
|
@@ -132,27 +146,77 @@ out_key_search_term ref_search_term
|
|
132
146
|
m = emits[0][2]
|
133
147
|
assert_equal 0, m['value']
|
134
148
|
assert_equal false, m['ref_known']
|
135
|
-
assert_nil m['
|
136
|
-
assert_nil m['
|
149
|
+
assert_nil m['ref_referer']
|
150
|
+
assert_nil m['ref_search_term']
|
137
151
|
assert_equal 2, m.keys.size
|
138
152
|
|
139
153
|
m = emits[1][2]
|
140
154
|
assert_equal 1, m['value']
|
141
155
|
assert_equal true, m['ref_known']
|
142
156
|
assert_equal 'Google', m['ref_referer']
|
157
|
+
assert_equal 'www.google.com', m['ref_host']
|
143
158
|
assert_equal 'gateway oracle cards denise linn', m['ref_search_term']
|
144
|
-
assert_equal
|
159
|
+
assert_equal 6, m.keys.size
|
145
160
|
|
146
161
|
m = emits[2][2]
|
147
162
|
assert_equal 2, m['value']
|
148
163
|
assert_equal false, m['ref_known']
|
149
|
-
assert_nil m['
|
150
|
-
assert_nil m['
|
164
|
+
assert_nil m['ref_referer']
|
165
|
+
assert_nil m['ref_host']
|
166
|
+
assert_nil m['ref_search_term']
|
151
167
|
|
152
168
|
m = emits[3][2]
|
153
|
-
assert_equal 3,
|
154
|
-
assert_equal true,
|
155
|
-
assert_equal 'Google',
|
156
|
-
assert_equal '
|
169
|
+
assert_equal 3, m['value']
|
170
|
+
assert_equal true, m['ref_known']
|
171
|
+
assert_equal 'Google', m['ref_referer']
|
172
|
+
assert_equal 'www.google.com', m['ref_host']
|
173
|
+
assert_equal 'ほげ', m['ref_search_term']
|
174
|
+
end
|
175
|
+
|
176
|
+
def test_emit3
|
177
|
+
d = create_driver(CONFIG3, 'test.message')
|
178
|
+
time = Time.parse('2012-07-20 16:40:30').to_i
|
179
|
+
d.run do
|
180
|
+
d.emit({ 'value' => 0 }, time)
|
181
|
+
d.emit({ 'value' => 1, 'ref' => 'http://ezsch.ezweb.ne.jp/search/?sr=0101&query=aiueo%20%95a%93I' }, time)
|
182
|
+
d.emit({ 'value' => 2, 'ref' => 'http://ezsch.ezweb.ne.jp/search/ezGoogleMain.php?query=%83%8D' }, time)
|
183
|
+
d.emit({ 'value' => 3, 'ref' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
|
184
|
+
end
|
185
|
+
|
186
|
+
emits = d.emits
|
187
|
+
assert_equal 4, emits.size
|
188
|
+
assert_equal 'merged.message', emits[0][0]
|
189
|
+
assert_equal time, emits[0][1]
|
190
|
+
|
191
|
+
m = emits[0][2]
|
192
|
+
assert_equal 0, m['value']
|
193
|
+
assert_equal false, m['referer_known']
|
194
|
+
assert_nil m['referer_referer']
|
195
|
+
assert_nil m['referer_search_term']
|
196
|
+
assert_equal 2, m.keys.size
|
197
|
+
|
198
|
+
m = emits[1][2]
|
199
|
+
assert_equal 1, m['value']
|
200
|
+
assert_equal true, m['referer_known']
|
201
|
+
assert_equal 'Ezweb', m['referer_referer']
|
202
|
+
assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
|
203
|
+
assert_equal 'aiueo 病的', m['referer_search_term']
|
204
|
+
assert_equal 6, m.keys.size
|
205
|
+
|
206
|
+
m = emits[2][2]
|
207
|
+
assert_equal 2, m['value']
|
208
|
+
assert_equal true, m['referer_known']
|
209
|
+
assert_equal 'Ezweb', m['referer_referer']
|
210
|
+
assert_equal 'ezsch.ezweb.ne.jp', m['referer_host']
|
211
|
+
assert_equal 'ロ', m['referer_search_term']
|
212
|
+
assert_equal 6, m.keys.size
|
213
|
+
|
214
|
+
m = emits[3][2]
|
215
|
+
assert_equal 3, m['value']
|
216
|
+
assert_equal true, m['referer_known']
|
217
|
+
assert_equal 'Google', m['referer_referer']
|
218
|
+
assert_equal 'www.google.co.jp', m['referer_host']
|
219
|
+
assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
|
220
|
+
assert_equal 6, m.keys.size
|
157
221
|
end
|
158
222
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-referer-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
@@ -9,60 +9,59 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-02-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: '0'
|
21
21
|
type: :development
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: fluentd
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- -
|
32
|
+
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
34
|
version: '0'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- -
|
39
|
+
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: referer-parser
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
-
- -
|
46
|
+
- - ">="
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: 0.2.0
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
|
-
- -
|
53
|
+
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: 0.2.0
|
56
56
|
description: 'parsing by referer-parser. See: https://github.com/snowplow/referer-parser'
|
57
57
|
email:
|
58
|
-
- tagomoris@gmail.com
|
59
58
|
- haruyama@unixuser.org
|
60
59
|
executables: []
|
61
60
|
extensions: []
|
62
61
|
extra_rdoc_files: []
|
63
62
|
files:
|
64
|
-
- .gitignore
|
65
|
-
- .travis.yml
|
63
|
+
- ".gitignore"
|
64
|
+
- ".travis.yml"
|
66
65
|
- Gemfile
|
67
66
|
- LICENSE.txt
|
68
67
|
- README.md
|
@@ -70,6 +69,8 @@ files:
|
|
70
69
|
- fluent-plugin-referer-parser.gemspec
|
71
70
|
- lib/fluent/plugin/.rubocop.yml
|
72
71
|
- lib/fluent/plugin/out_referer_parser.rb
|
72
|
+
- test/data/encodings.yaml
|
73
|
+
- test/data/referers.yaml
|
73
74
|
- test/helper.rb
|
74
75
|
- test/plugin/.rubocop.yml
|
75
76
|
- test/plugin/test_out_referer_parser.rb
|
@@ -83,12 +84,12 @@ require_paths:
|
|
83
84
|
- lib
|
84
85
|
required_ruby_version: !ruby/object:Gem::Requirement
|
85
86
|
requirements:
|
86
|
-
- -
|
87
|
+
- - ">="
|
87
88
|
- !ruby/object:Gem::Version
|
88
89
|
version: '0'
|
89
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
91
|
requirements:
|
91
|
-
- -
|
92
|
+
- - ">="
|
92
93
|
- !ruby/object:Gem::Version
|
93
94
|
version: '0'
|
94
95
|
requirements: []
|
@@ -98,6 +99,8 @@ signing_key:
|
|
98
99
|
specification_version: 4
|
99
100
|
summary: Fluentd plugin to parse UserAgent strings
|
100
101
|
test_files:
|
102
|
+
- test/data/encodings.yaml
|
103
|
+
- test/data/referers.yaml
|
101
104
|
- test/helper.rb
|
102
105
|
- test/plugin/.rubocop.yml
|
103
106
|
- test/plugin/test_out_referer_parser.rb
|