fluent-plugin-referer-parser 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 56f8bf0b194d231cd18480ccb8eb565b3f96da29
4
- data.tar.gz: 653fbea2b2c809b8c027070ef1f86be948b9f728
3
+ metadata.gz: 83be55d1792cd02e85a4a9b5bba63d745ea4355c
4
+ data.tar.gz: 9243e20f0358b066528a72fe37d5d643f81e621e
5
5
  SHA512:
6
- metadata.gz: af9bd61a4764e99dbc3ee371650bed74c4d26cbea11affb67bd01d82731dcb1e642367dda4d9c6c872fcf0330b70bd50137d39af6f8533f9f79e3c9a5c047cdb
7
- data.tar.gz: c8f624d327d091b6b55535c044e8de72dca2d349ca176dd92f925a348098ae676ef65254e173b7e74747b15d26572f8fa9c26db06b126866af2daae6aa55ec52
6
+ metadata.gz: 68f5e42275147e0283258a26c9dbbaa5c805c98cd95d4b29325ad4fd36a415ef79304450e9d2f7eb159317f9c45055ffc664d8efc191a2a9e5289df39d294b90
7
+ data.tar.gz: 5f80384e7b38990169ae9534f5d2b871ff705e04fafcc954827532b5ff75c9556a5eb4fb645f0a8c2e8b0df74f3f789da057427d5d744b270e76d59d239702b7
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = 'fluent-plugin-referer-parser'
5
- gem.version = '0.0.1'
5
+ gem.version = '0.0.2'
6
6
  gem.authors = ['TAGOMORI Satoshi', 'HARUYAMA Seigo']
7
7
  gem.email = ['tagomoris@gmail.com', 'haruyama@unixuser.org']
8
8
  gem.description = %q{parsing by referer-parser. See: https://github.com/snowplow/referer-parser}
@@ -6,4 +6,7 @@ CyclomaticComplexity:
6
6
 
7
7
  MethodLength:
8
8
  CountComments: false # count full line comments?
9
- Max: 20
9
+ Max: 50
10
+
11
+ GlobalVars:
12
+ Enabled: false
@@ -15,11 +15,11 @@ class Fluent::RefererParserOutput < Fluent::Output
15
15
  config_param :out_key_referer, :string, default: 'referer_referer'
16
16
  config_param :out_key_search_term, :string, default: 'referer_search_term'
17
17
 
18
- UNKOWWN_STRING = 'UNKNOWN'
19
- PARSE_ERROR_STRUCT = OpenStruct.new(known: false)
18
+ PARSE_ERROR_STRUCT = OpenStruct.new(known?: false)
20
19
 
21
20
  def initialize
22
21
  super
22
+ require 'cgi'
23
23
  require 'referer-parser'
24
24
  end
25
25
 
@@ -61,17 +61,29 @@ class Fluent::RefererParserOutput < Fluent::Output
61
61
  def emit(tag, es, chain)
62
62
  tag = tag_mangle(tag)
63
63
  es.each do |time, record|
64
- parsed =
64
+ referer =
65
65
  begin
66
66
  RefererParser::Referer.new(record[@key_name])
67
67
  rescue
68
68
  PARSE_ERROR_STRUCT
69
69
  end
70
- record.merge!(
71
- @out_key_known => parsed.known,
72
- @out_key_referer => parsed.referer || UNKOWWN_STRING,
73
- @out_key_search_term => parsed.search_term || UNKOWWN_STRING,
74
- )
70
+ if referer.known?
71
+ search_term = referer.search_term
72
+ parameters = CGI.parse(referer.uri.query)
73
+ input_encoding = parameters['ie'][0] || parameters['ei'][0]
74
+ begin
75
+ search_term = search_term.force_encoding(input_encoding).encode('utf-8') if input_encoding && /utf-?8/i !~ input_encoding
76
+ rescue
77
+ $log.error('invalid referer: ' + referer.uri.to_s)
78
+ end
79
+ record.merge!(
80
+ @out_key_known => true,
81
+ @out_key_referer => referer.referer,
82
+ @out_key_search_term => search_term,
83
+ )
84
+ else
85
+ record.merge!(@out_key_known => false)
86
+ end
75
87
  Fluent::Engine.emit(tag, time, record)
76
88
  end
77
89
  chain.next
@@ -61,31 +61,57 @@ out_key_search_term ref_search_term
61
61
  d.emit({ 'value' => 0 }, time)
62
62
  d.emit({ 'value' => 1, 'referer' => 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari' }, time)
63
63
  d.emit({ 'value' => 2, 'referer' => 'http://www.unixuser.org/' }, time)
64
+ d.emit({ 'value' => 3, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_JIS&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
65
+ d.emit({ 'value' => 4, 'referer' => 'http://www.google.co.jp/search?hl=ja&ie=Shift_J&c2coff=1&q=%83%7D%83%8B%83%60%83L%83%83%83X%83g%81@%8Aw%8Em%98_%95%B6&lr=' }, time)
66
+ d.emit({ 'value' => 5, 'referer' => 'http://search.yahoo.co.jp/search?p=%E3%81%BB%E3%81%92&aq=-1&oq=&ei=UTF-8&fr=sfp_as&x=wrt' }, time)
64
67
  end
65
68
 
66
69
  emits = d.emits
67
- assert_equal 3, emits.size
70
+ assert_equal 6, emits.size
68
71
  assert_equal 'merged.message', emits[0][0]
69
72
  assert_equal time, emits[0][1]
70
73
 
71
74
  m = emits[0][2]
72
75
  assert_equal 0, m['value']
73
76
  assert_equal false, m['referer_known']
74
- assert_equal 'UNKNOWN', m['referer_referer']
75
- assert_equal 'UNKNOWN', m['referer_search_term']
76
- assert_equal 4, m.keys.size
77
+ assert_nil m['referer_referer']
78
+ assert_nil m['referer_search_term']
79
+ assert_equal 2, m.keys.size
77
80
 
78
81
  m = emits[1][2]
79
82
  assert_equal 1, m['value']
80
83
  assert_equal true, m['referer_known']
81
84
  assert_equal 'Google', m['referer_referer']
82
85
  assert_equal 'gateway oracle cards denise linn', m['referer_search_term']
86
+ assert_equal 5, m.keys.size
83
87
 
84
88
  m = emits[2][2]
85
89
  assert_equal 2, m['value']
86
90
  assert_equal false, m['referer_known']
87
- assert_equal 'UNKNOWN', m['referer_referer']
88
- assert_equal 'UNKNOWN', m['referer_search_term']
91
+ assert_nil m['referer_referer']
92
+ assert_nil m['referer_search_term']
93
+ assert_equal 3, m.keys.size
94
+
95
+ m = emits[3][2]
96
+ assert_equal 3, m['value']
97
+ assert_equal true, m['referer_known']
98
+ assert_equal 'Google', m['referer_referer']
99
+ assert_equal 'マルチキャスト 学士論文', m['referer_search_term']
100
+ assert_equal 5, m.keys.size
101
+
102
+ # invalid input_encoding
103
+ m = emits[4][2]
104
+ assert_equal 4, m['value']
105
+ assert_equal true, m['referer_known']
106
+ assert_equal 'Google', m['referer_referer']
107
+ assert_equal 5, m.keys.size
108
+
109
+ m = emits[5][2]
110
+ assert_equal 5, m['value']
111
+ assert_equal true, m['referer_known']
112
+ assert_equal 'Yahoo!', m['referer_referer']
113
+ assert_equal 'ほげ', m['referer_search_term']
114
+ assert_equal 5, m.keys.size
89
115
  end
90
116
 
91
117
  def test_emit2
@@ -106,21 +132,22 @@ out_key_search_term ref_search_term
106
132
  m = emits[0][2]
107
133
  assert_equal 0, m['value']
108
134
  assert_equal false, m['ref_known']
109
- assert_equal 'UNKNOWN', m['ref_referer']
110
- assert_equal 'UNKNOWN', m['ref_search_term']
111
- assert_equal 4, m.keys.size
135
+ assert_nil m['referer_referer']
136
+ assert_nil m['referer_search_term']
137
+ assert_equal 2, m.keys.size
112
138
 
113
139
  m = emits[1][2]
114
140
  assert_equal 1, m['value']
115
141
  assert_equal true, m['ref_known']
116
142
  assert_equal 'Google', m['ref_referer']
117
143
  assert_equal 'gateway oracle cards denise linn', m['ref_search_term']
144
+ assert_equal 5, m.keys.size
118
145
 
119
146
  m = emits[2][2]
120
147
  assert_equal 2, m['value']
121
148
  assert_equal false, m['ref_known']
122
- assert_equal 'UNKNOWN', m['ref_referer']
123
- assert_equal 'UNKNOWN', m['ref_search_term']
149
+ assert_nil m['referer_referer']
150
+ assert_nil m['referer_search_term']
124
151
 
125
152
  m = emits[3][2]
126
153
  assert_equal 3, m['value']
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-referer-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi