logstash-codec-snowplow 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/lib/logstash/codecs/snowplow.rb +8 -143
- data/lib/logstash/serializer.rb +161 -0
- data/logstash-codec-snowplow.gemspec +2 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90da0028f8f79c9b026078966eef30d3930196b7
|
4
|
+
data.tar.gz: 21dd115d541c8efef29524b306fef1124b5d5345
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc7c35086824551cadbcaa3f6eeda78f3c80706ed659278bf1c3505f84383cae494c1c25c0703fcc0b34d2aad733eb90bbc0ef70c138bb17a553029aca2f9a43
|
7
|
+
data.tar.gz: 8eaf58bd27921ccb2d86c273914394f1e7526c7bc58459e8907b9f09ff8b42781975e9153dc899a42cbe7612e2ce4bd83c04b8b428dd4e42fd6e35549cb9a09f
|
data/Gemfile
CHANGED
@@ -4,6 +4,7 @@ require 'json'
|
|
4
4
|
require 'logger'
|
5
5
|
require 'logstash/codecs/base'
|
6
6
|
require 'logstash/namespace'
|
7
|
+
require 'logstash/serializer'
|
7
8
|
|
8
9
|
# Read serialized Thrift Snowplow enriched-event records as Logstash events
|
9
10
|
#
|
@@ -39,156 +40,20 @@ require 'logstash/namespace'
|
|
39
40
|
# }
|
40
41
|
# ----------------------------------
|
41
42
|
|
42
|
-
ENRICHED_EVENT = %w[
|
43
|
-
app_id
|
44
|
-
platform
|
45
|
-
etl_tstamp
|
46
|
-
collector_tstamp
|
47
|
-
dvce_created_tstamp
|
48
|
-
event
|
49
|
-
event_id
|
50
|
-
txn_id
|
51
|
-
name_tracker
|
52
|
-
v_tracker
|
53
|
-
v_collector
|
54
|
-
v_etl
|
55
|
-
user_id
|
56
|
-
user_ipaddress
|
57
|
-
user_fingerprint
|
58
|
-
domain_userid
|
59
|
-
domain_sessionidx
|
60
|
-
network_userid
|
61
|
-
geo_country
|
62
|
-
geo_region
|
63
|
-
geo_city
|
64
|
-
geo_zipcode
|
65
|
-
geo_latitude
|
66
|
-
geo_longitude
|
67
|
-
geo_region_name
|
68
|
-
ip_isp
|
69
|
-
ip_organization
|
70
|
-
ip_domain
|
71
|
-
ip_netspeed
|
72
|
-
page_url
|
73
|
-
page_title
|
74
|
-
page_referrer
|
75
|
-
page_urlscheme
|
76
|
-
page_urlhost
|
77
|
-
page_urlport
|
78
|
-
page_urlpath
|
79
|
-
page_urlquery
|
80
|
-
page_urlfragment
|
81
|
-
refr_urlscheme
|
82
|
-
refr_urlhost
|
83
|
-
refr_urlport
|
84
|
-
refr_urlpath
|
85
|
-
refr_urlquery
|
86
|
-
refr_urlfragment
|
87
|
-
refr_medium
|
88
|
-
refr_source
|
89
|
-
refr_term
|
90
|
-
mkt_medium
|
91
|
-
mkt_source
|
92
|
-
mkt_term
|
93
|
-
mkt_content
|
94
|
-
mkt_campaign
|
95
|
-
contexts
|
96
|
-
se_category
|
97
|
-
se_action
|
98
|
-
se_label
|
99
|
-
se_property
|
100
|
-
se_value
|
101
|
-
unstruct_event
|
102
|
-
tr_orderid
|
103
|
-
tr_affiliation
|
104
|
-
tr_total
|
105
|
-
tr_tax
|
106
|
-
tr_shipping
|
107
|
-
tr_city
|
108
|
-
tr_state
|
109
|
-
tr_country
|
110
|
-
ti_orderid
|
111
|
-
ti_sku
|
112
|
-
ti_name
|
113
|
-
ti_category
|
114
|
-
ti_price
|
115
|
-
ti_quantity
|
116
|
-
pp_xoffset_min
|
117
|
-
pp_xoffset_max
|
118
|
-
pp_yoffset_min
|
119
|
-
pp_yoffset_max
|
120
|
-
useragent
|
121
|
-
br_name
|
122
|
-
br_family
|
123
|
-
br_version
|
124
|
-
br_type
|
125
|
-
br_renderengine
|
126
|
-
br_lang
|
127
|
-
br_features_pdf
|
128
|
-
br_features_flash
|
129
|
-
br_features_java
|
130
|
-
br_features_director
|
131
|
-
br_features_quicktime
|
132
|
-
br_features_realplayer
|
133
|
-
br_features_windowsmedia
|
134
|
-
br_features_gears
|
135
|
-
br_features_silverlight
|
136
|
-
br_cookies
|
137
|
-
br_colordepth
|
138
|
-
br_viewwidth
|
139
|
-
br_viewheight
|
140
|
-
os_name
|
141
|
-
os_family
|
142
|
-
os_manufacturer
|
143
|
-
os_timezone
|
144
|
-
dvce_type
|
145
|
-
dvce_ismobile
|
146
|
-
dvce_screenwidth
|
147
|
-
dvce_screenheight
|
148
|
-
doc_charset
|
149
|
-
doc_width
|
150
|
-
doc_height
|
151
|
-
tr_currency
|
152
|
-
tr_total_base
|
153
|
-
tr_tax_base
|
154
|
-
tr_shipping_base
|
155
|
-
ti_currency
|
156
|
-
ti_price_base
|
157
|
-
base_currency
|
158
|
-
geo_timezone
|
159
|
-
mkt_clickid
|
160
|
-
mkt_network
|
161
|
-
etl_tags
|
162
|
-
dvce_sent_tstamp
|
163
|
-
refr_domain_userid
|
164
|
-
refr_dvce_tstamp
|
165
|
-
derived_contexts
|
166
|
-
domain_sessionid
|
167
|
-
derived_tstamp
|
168
|
-
event_vendor
|
169
|
-
event_name
|
170
|
-
event_format
|
171
|
-
event_version
|
172
|
-
event_fingerprint
|
173
|
-
].freeze
|
174
|
-
|
175
43
|
class LogStash::Codecs::Snowplow < LogStash::Codecs::Base
|
176
44
|
config_name 'snowplow'
|
45
|
+
config :json_fields, validate: :array, default: %w[contexts derived_contexts se_property unstruct_event], required: true
|
177
46
|
|
178
47
|
public
|
179
|
-
|
180
48
|
def register
|
181
|
-
@logger.info(
|
49
|
+
@logger.info("Initializing logstash snowplow codec: json fields: #{@json_fields}!")
|
182
50
|
end
|
183
51
|
|
184
52
|
public
|
185
53
|
def decode(data)
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
rescue
|
191
|
-
@logger.error("Fail to decode: #{data.to_s}")
|
192
|
-
end
|
54
|
+
event = Serializer.deserialize(data, @json_fields)
|
55
|
+
yield LogStash::Event.new(event)
|
56
|
+
rescue
|
57
|
+
@logger.error("Fail to decode: #{data}")
|
193
58
|
end
|
194
|
-
end
|
59
|
+
end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
ENRICHED_EVENT = %w[
|
6
|
+
app_id
|
7
|
+
platform
|
8
|
+
etl_tstamp
|
9
|
+
collector_tstamp
|
10
|
+
dvce_created_tstamp
|
11
|
+
event
|
12
|
+
event_id
|
13
|
+
txn_id
|
14
|
+
name_tracker
|
15
|
+
v_tracker
|
16
|
+
v_collector
|
17
|
+
v_etl
|
18
|
+
user_id
|
19
|
+
user_ipaddress
|
20
|
+
user_fingerprint
|
21
|
+
domain_userid
|
22
|
+
domain_sessionidx
|
23
|
+
network_userid
|
24
|
+
geo_country
|
25
|
+
geo_region
|
26
|
+
geo_city
|
27
|
+
geo_zipcode
|
28
|
+
geo_latitude
|
29
|
+
geo_longitude
|
30
|
+
geo_region_name
|
31
|
+
ip_isp
|
32
|
+
ip_organization
|
33
|
+
ip_domain
|
34
|
+
ip_netspeed
|
35
|
+
page_url
|
36
|
+
page_title
|
37
|
+
page_referrer
|
38
|
+
page_urlscheme
|
39
|
+
page_urlhost
|
40
|
+
page_urlport
|
41
|
+
page_urlpath
|
42
|
+
page_urlquery
|
43
|
+
page_urlfragment
|
44
|
+
refr_urlscheme
|
45
|
+
refr_urlhost
|
46
|
+
refr_urlport
|
47
|
+
refr_urlpath
|
48
|
+
refr_urlquery
|
49
|
+
refr_urlfragment
|
50
|
+
refr_medium
|
51
|
+
refr_source
|
52
|
+
refr_term
|
53
|
+
mkt_medium
|
54
|
+
mkt_source
|
55
|
+
mkt_term
|
56
|
+
mkt_content
|
57
|
+
mkt_campaign
|
58
|
+
contexts
|
59
|
+
se_category
|
60
|
+
se_action
|
61
|
+
se_label
|
62
|
+
se_property
|
63
|
+
se_value
|
64
|
+
unstruct_event
|
65
|
+
tr_orderid
|
66
|
+
tr_affiliation
|
67
|
+
tr_total
|
68
|
+
tr_tax
|
69
|
+
tr_shipping
|
70
|
+
tr_city
|
71
|
+
tr_state
|
72
|
+
tr_country
|
73
|
+
ti_orderid
|
74
|
+
ti_sku
|
75
|
+
ti_name
|
76
|
+
ti_category
|
77
|
+
ti_price
|
78
|
+
ti_quantity
|
79
|
+
pp_xoffset_min
|
80
|
+
pp_xoffset_max
|
81
|
+
pp_yoffset_min
|
82
|
+
pp_yoffset_max
|
83
|
+
useragent
|
84
|
+
br_name
|
85
|
+
br_family
|
86
|
+
br_version
|
87
|
+
br_type
|
88
|
+
br_renderengine
|
89
|
+
br_lang
|
90
|
+
br_features_pdf
|
91
|
+
br_features_flash
|
92
|
+
br_features_java
|
93
|
+
br_features_director
|
94
|
+
br_features_quicktime
|
95
|
+
br_features_realplayer
|
96
|
+
br_features_windowsmedia
|
97
|
+
br_features_gears
|
98
|
+
br_features_silverlight
|
99
|
+
br_cookies
|
100
|
+
br_colordepth
|
101
|
+
br_viewwidth
|
102
|
+
br_viewheight
|
103
|
+
os_name
|
104
|
+
os_family
|
105
|
+
os_manufacturer
|
106
|
+
os_timezone
|
107
|
+
dvce_type
|
108
|
+
dvce_ismobile
|
109
|
+
dvce_screenwidth
|
110
|
+
dvce_screenheight
|
111
|
+
doc_charset
|
112
|
+
doc_width
|
113
|
+
doc_height
|
114
|
+
tr_currency
|
115
|
+
tr_total_base
|
116
|
+
tr_tax_base
|
117
|
+
tr_shipping_base
|
118
|
+
ti_currency
|
119
|
+
ti_price_base
|
120
|
+
base_currency
|
121
|
+
geo_timezone
|
122
|
+
mkt_clickid
|
123
|
+
mkt_network
|
124
|
+
etl_tags
|
125
|
+
dvce_sent_tstamp
|
126
|
+
refr_domain_userid
|
127
|
+
refr_dvce_tstamp
|
128
|
+
derived_contexts
|
129
|
+
domain_sessionid
|
130
|
+
derived_tstamp
|
131
|
+
event_vendor
|
132
|
+
event_name
|
133
|
+
event_format
|
134
|
+
event_version
|
135
|
+
event_fingerprint
|
136
|
+
true_tstamp
|
137
|
+
].freeze
|
138
|
+
|
139
|
+
class Serializer
|
140
|
+
# @param [String] event
|
141
|
+
# @param [Array] json_fields
|
142
|
+
# @return [Hash]
|
143
|
+
def self.deserialize(event, json_fields)
|
144
|
+
event_values = event.to_s.split("\t")
|
145
|
+
event = Hash[*ENRICHED_EVENT.zip(event_values).flatten]
|
146
|
+
json_fields.each do |field|
|
147
|
+
event = parse(event, field) if event.has_key? field
|
148
|
+
end
|
149
|
+
event
|
150
|
+
end
|
151
|
+
|
152
|
+
# @param [Hash] event
|
153
|
+
# @param [String] field
|
154
|
+
# @return [Hash]
|
155
|
+
def self.parse(event, field)
|
156
|
+
event[field] = JSON.parse(event[field])
|
157
|
+
return event
|
158
|
+
rescue JSON::ParserError => _
|
159
|
+
return event
|
160
|
+
end
|
161
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-codec-snowplow'
|
3
|
-
s.version = '0.
|
3
|
+
s.version = '0.5'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
|
-
s.summary = 'This
|
5
|
+
s.summary = 'This codec gets a enrich snowplow event and generates a logstash event.'
|
6
6
|
s.description = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
|
7
7
|
s.authors = ['Robson Júnior']
|
8
8
|
s.email = 'bsao@cerebello.co'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-codec-snowplow
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.5'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robson Júnior
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,6 +62,7 @@ files:
|
|
62
62
|
- LICENSE.md
|
63
63
|
- README.md
|
64
64
|
- lib/logstash/codecs/snowplow.rb
|
65
|
+
- lib/logstash/serializer.rb
|
65
66
|
- lib/thrift/event.thrift
|
66
67
|
- lib/thrift/snowplow.thrift
|
67
68
|
- logstash-codec-snowplow.gemspec
|
@@ -90,5 +91,5 @@ rubyforge_project:
|
|
90
91
|
rubygems_version: 2.6.8
|
91
92
|
signing_key:
|
92
93
|
specification_version: 4
|
93
|
-
summary: This
|
94
|
+
summary: This codec gets a enrich snowplow event and generates a logstash event.
|
94
95
|
test_files: []
|