logstash-codec-snowplow 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 735f80f0427aa930ac4d0a1222cdfb8466513011
4
- data.tar.gz: a1d55fd670e17f55ca995a84fd5bc9d5207bbd66
3
+ metadata.gz: 90da0028f8f79c9b026078966eef30d3930196b7
4
+ data.tar.gz: 21dd115d541c8efef29524b306fef1124b5d5345
5
5
  SHA512:
6
- metadata.gz: 25a549cc5cfe8299dd82c637736fd3c5220946b2c480089881003395d33e200dd5ac1dc2543e6063d5b511d7b9f9a3a295ead519a9e36394ce755f0da4650bf7
7
- data.tar.gz: c561c0b8d91ac8878616335b62a8ec0c6a316cff77a6edcaf2164e6709b79a1d4310d9bea30f3ec413c2714fa53f989e6e1a0adecf2a09efc67d3cee723c817f
6
+ metadata.gz: fc7c35086824551cadbcaa3f6eeda78f3c80706ed659278bf1c3505f84383cae494c1c25c0703fcc0b34d2aad733eb90bbc0ef70c138bb17a553029aca2f9a43
7
+ data.tar.gz: 8eaf58bd27921ccb2d86c273914394f1e7526c7bc58459e8907b9f09ff8b42781975e9153dc899a42cbe7612e2ce4bd83c04b8b428dd4e42fd6e35549cb9a09f
data/Gemfile CHANGED
@@ -1,2 +1,4 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
+ gem 'rspec', :require => false, :group => :test
4
+ gem 'simplecov', :require => false, :group => :test
@@ -4,6 +4,7 @@ require 'json'
4
4
  require 'logger'
5
5
  require 'logstash/codecs/base'
6
6
  require 'logstash/namespace'
7
+ require 'logstash/serializer'
7
8
 
8
9
  # Read serialized Thrift Snowplow enriched-event records as Logstash events
9
10
  #
@@ -39,156 +40,20 @@ require 'logstash/namespace'
39
40
  # }
40
41
  # ----------------------------------
41
42
 
42
- ENRICHED_EVENT = %w[
43
- app_id
44
- platform
45
- etl_tstamp
46
- collector_tstamp
47
- dvce_created_tstamp
48
- event
49
- event_id
50
- txn_id
51
- name_tracker
52
- v_tracker
53
- v_collector
54
- v_etl
55
- user_id
56
- user_ipaddress
57
- user_fingerprint
58
- domain_userid
59
- domain_sessionidx
60
- network_userid
61
- geo_country
62
- geo_region
63
- geo_city
64
- geo_zipcode
65
- geo_latitude
66
- geo_longitude
67
- geo_region_name
68
- ip_isp
69
- ip_organization
70
- ip_domain
71
- ip_netspeed
72
- page_url
73
- page_title
74
- page_referrer
75
- page_urlscheme
76
- page_urlhost
77
- page_urlport
78
- page_urlpath
79
- page_urlquery
80
- page_urlfragment
81
- refr_urlscheme
82
- refr_urlhost
83
- refr_urlport
84
- refr_urlpath
85
- refr_urlquery
86
- refr_urlfragment
87
- refr_medium
88
- refr_source
89
- refr_term
90
- mkt_medium
91
- mkt_source
92
- mkt_term
93
- mkt_content
94
- mkt_campaign
95
- contexts
96
- se_category
97
- se_action
98
- se_label
99
- se_property
100
- se_value
101
- unstruct_event
102
- tr_orderid
103
- tr_affiliation
104
- tr_total
105
- tr_tax
106
- tr_shipping
107
- tr_city
108
- tr_state
109
- tr_country
110
- ti_orderid
111
- ti_sku
112
- ti_name
113
- ti_category
114
- ti_price
115
- ti_quantity
116
- pp_xoffset_min
117
- pp_xoffset_max
118
- pp_yoffset_min
119
- pp_yoffset_max
120
- useragent
121
- br_name
122
- br_family
123
- br_version
124
- br_type
125
- br_renderengine
126
- br_lang
127
- br_features_pdf
128
- br_features_flash
129
- br_features_java
130
- br_features_director
131
- br_features_quicktime
132
- br_features_realplayer
133
- br_features_windowsmedia
134
- br_features_gears
135
- br_features_silverlight
136
- br_cookies
137
- br_colordepth
138
- br_viewwidth
139
- br_viewheight
140
- os_name
141
- os_family
142
- os_manufacturer
143
- os_timezone
144
- dvce_type
145
- dvce_ismobile
146
- dvce_screenwidth
147
- dvce_screenheight
148
- doc_charset
149
- doc_width
150
- doc_height
151
- tr_currency
152
- tr_total_base
153
- tr_tax_base
154
- tr_shipping_base
155
- ti_currency
156
- ti_price_base
157
- base_currency
158
- geo_timezone
159
- mkt_clickid
160
- mkt_network
161
- etl_tags
162
- dvce_sent_tstamp
163
- refr_domain_userid
164
- refr_dvce_tstamp
165
- derived_contexts
166
- domain_sessionid
167
- derived_tstamp
168
- event_vendor
169
- event_name
170
- event_format
171
- event_version
172
- event_fingerprint
173
- ].freeze
174
-
175
43
  class LogStash::Codecs::Snowplow < LogStash::Codecs::Base
176
44
  config_name 'snowplow'
45
+ config :json_fields, validate: :array, default: %w[contexts derived_contexts se_property unstruct_event], required: true
177
46
 
178
47
  public
179
-
180
48
  def register
181
- @logger.info('Initializing logstash Snowplow enriched-event codec')
49
+ @logger.info("Initializing logstash snowplow codec: json fields: #{@json_fields}!")
182
50
  end
183
51
 
184
52
  public
185
53
  def decode(data)
186
- begin
187
- values = data.to_s.split("\t")
188
- hash = Hash[*ENRICHED_EVENT.zip(values).flatten] if values.length == ENRICHED_EVENT.length
189
- yield LogStash::Event.new(hash)
190
- rescue
191
- @logger.error("Fail to decode: #{data.to_s}")
192
- end
54
+ event = Serializer.deserialize(data, @json_fields)
55
+ yield LogStash::Event.new(event)
56
+ rescue
57
+ @logger.error("Fail to decode: #{data}")
193
58
  end
194
- end
59
+ end
@@ -0,0 +1,161 @@
1
+ # encoding: utf-8
2
+
3
+ require 'json'
4
+
5
+ ENRICHED_EVENT = %w[
6
+ app_id
7
+ platform
8
+ etl_tstamp
9
+ collector_tstamp
10
+ dvce_created_tstamp
11
+ event
12
+ event_id
13
+ txn_id
14
+ name_tracker
15
+ v_tracker
16
+ v_collector
17
+ v_etl
18
+ user_id
19
+ user_ipaddress
20
+ user_fingerprint
21
+ domain_userid
22
+ domain_sessionidx
23
+ network_userid
24
+ geo_country
25
+ geo_region
26
+ geo_city
27
+ geo_zipcode
28
+ geo_latitude
29
+ geo_longitude
30
+ geo_region_name
31
+ ip_isp
32
+ ip_organization
33
+ ip_domain
34
+ ip_netspeed
35
+ page_url
36
+ page_title
37
+ page_referrer
38
+ page_urlscheme
39
+ page_urlhost
40
+ page_urlport
41
+ page_urlpath
42
+ page_urlquery
43
+ page_urlfragment
44
+ refr_urlscheme
45
+ refr_urlhost
46
+ refr_urlport
47
+ refr_urlpath
48
+ refr_urlquery
49
+ refr_urlfragment
50
+ refr_medium
51
+ refr_source
52
+ refr_term
53
+ mkt_medium
54
+ mkt_source
55
+ mkt_term
56
+ mkt_content
57
+ mkt_campaign
58
+ contexts
59
+ se_category
60
+ se_action
61
+ se_label
62
+ se_property
63
+ se_value
64
+ unstruct_event
65
+ tr_orderid
66
+ tr_affiliation
67
+ tr_total
68
+ tr_tax
69
+ tr_shipping
70
+ tr_city
71
+ tr_state
72
+ tr_country
73
+ ti_orderid
74
+ ti_sku
75
+ ti_name
76
+ ti_category
77
+ ti_price
78
+ ti_quantity
79
+ pp_xoffset_min
80
+ pp_xoffset_max
81
+ pp_yoffset_min
82
+ pp_yoffset_max
83
+ useragent
84
+ br_name
85
+ br_family
86
+ br_version
87
+ br_type
88
+ br_renderengine
89
+ br_lang
90
+ br_features_pdf
91
+ br_features_flash
92
+ br_features_java
93
+ br_features_director
94
+ br_features_quicktime
95
+ br_features_realplayer
96
+ br_features_windowsmedia
97
+ br_features_gears
98
+ br_features_silverlight
99
+ br_cookies
100
+ br_colordepth
101
+ br_viewwidth
102
+ br_viewheight
103
+ os_name
104
+ os_family
105
+ os_manufacturer
106
+ os_timezone
107
+ dvce_type
108
+ dvce_ismobile
109
+ dvce_screenwidth
110
+ dvce_screenheight
111
+ doc_charset
112
+ doc_width
113
+ doc_height
114
+ tr_currency
115
+ tr_total_base
116
+ tr_tax_base
117
+ tr_shipping_base
118
+ ti_currency
119
+ ti_price_base
120
+ base_currency
121
+ geo_timezone
122
+ mkt_clickid
123
+ mkt_network
124
+ etl_tags
125
+ dvce_sent_tstamp
126
+ refr_domain_userid
127
+ refr_dvce_tstamp
128
+ derived_contexts
129
+ domain_sessionid
130
+ derived_tstamp
131
+ event_vendor
132
+ event_name
133
+ event_format
134
+ event_version
135
+ event_fingerprint
136
+ true_tstamp
137
+ ].freeze
138
+
139
+ class Serializer
140
+ # @param [String] event
141
+ # @param [Array] json_fields
142
+ # @return [Hash]
143
+ def self.deserialize(event, json_fields)
144
+ event_values = event.to_s.split("\t")
145
+ event = Hash[*ENRICHED_EVENT.zip(event_values).flatten]
146
+ json_fields.each do |field|
147
+ event = parse(event, field) if event.has_key? field
148
+ end
149
+ event
150
+ end
151
+
152
+ # @param [Hash] event
153
+ # @param [String] field
154
+ # @return [Hash]
155
+ def self.parse(event, field)
156
+ event[field] = JSON.parse(event[field])
157
+ return event
158
+ rescue JSON::ParserError => _
159
+ return event
160
+ end
161
+ end
@@ -1,8 +1,8 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-codec-snowplow'
3
- s.version = '0.4'
3
+ s.version = '0.5'
4
4
  s.licenses = ['Apache-2.0']
5
- s.summary = 'This example input streams a string at a definable interval.'
5
+ s.summary = 'This codec gets a enrich snowplow event and generates a logstash event.'
6
6
  s.description = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
7
7
  s.authors = ['Robson Júnior']
8
8
  s.email = 'bsao@cerebello.co'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-codec-snowplow
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.5'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robson Júnior
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-16 00:00:00.000000000 Z
11
+ date: 2017-08-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,6 +62,7 @@ files:
62
62
  - LICENSE.md
63
63
  - README.md
64
64
  - lib/logstash/codecs/snowplow.rb
65
+ - lib/logstash/serializer.rb
65
66
  - lib/thrift/event.thrift
66
67
  - lib/thrift/snowplow.thrift
67
68
  - logstash-codec-snowplow.gemspec
@@ -90,5 +91,5 @@ rubyforge_project:
90
91
  rubygems_version: 2.6.8
91
92
  signing_key:
92
93
  specification_version: 4
93
- summary: This example input streams a string at a definable interval.
94
+ summary: This codec gets a enrich snowplow event and generates a logstash event.
94
95
  test_files: []