logstash-codec-snowplow 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 735f80f0427aa930ac4d0a1222cdfb8466513011
4
- data.tar.gz: a1d55fd670e17f55ca995a84fd5bc9d5207bbd66
3
+ metadata.gz: 90da0028f8f79c9b026078966eef30d3930196b7
4
+ data.tar.gz: 21dd115d541c8efef29524b306fef1124b5d5345
5
5
  SHA512:
6
- metadata.gz: 25a549cc5cfe8299dd82c637736fd3c5220946b2c480089881003395d33e200dd5ac1dc2543e6063d5b511d7b9f9a3a295ead519a9e36394ce755f0da4650bf7
7
- data.tar.gz: c561c0b8d91ac8878616335b62a8ec0c6a316cff77a6edcaf2164e6709b79a1d4310d9bea30f3ec413c2714fa53f989e6e1a0adecf2a09efc67d3cee723c817f
6
+ metadata.gz: fc7c35086824551cadbcaa3f6eeda78f3c80706ed659278bf1c3505f84383cae494c1c25c0703fcc0b34d2aad733eb90bbc0ef70c138bb17a553029aca2f9a43
7
+ data.tar.gz: 8eaf58bd27921ccb2d86c273914394f1e7526c7bc58459e8907b9f09ff8b42781975e9153dc899a42cbe7612e2ce4bd83c04b8b428dd4e42fd6e35549cb9a09f
data/Gemfile CHANGED
@@ -1,2 +1,4 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
+ gem 'rspec', :require => false, :group => :test
4
+ gem 'simplecov', :require => false, :group => :test
@@ -4,6 +4,7 @@ require 'json'
4
4
  require 'logger'
5
5
  require 'logstash/codecs/base'
6
6
  require 'logstash/namespace'
7
+ require 'logstash/serializer'
7
8
 
8
9
  # Read serialized Thrift Snowplow enriched-event records as Logstash events
9
10
  #
@@ -39,156 +40,20 @@ require 'logstash/namespace'
39
40
  # }
40
41
  # ----------------------------------
41
42
 
42
- ENRICHED_EVENT = %w[
43
- app_id
44
- platform
45
- etl_tstamp
46
- collector_tstamp
47
- dvce_created_tstamp
48
- event
49
- event_id
50
- txn_id
51
- name_tracker
52
- v_tracker
53
- v_collector
54
- v_etl
55
- user_id
56
- user_ipaddress
57
- user_fingerprint
58
- domain_userid
59
- domain_sessionidx
60
- network_userid
61
- geo_country
62
- geo_region
63
- geo_city
64
- geo_zipcode
65
- geo_latitude
66
- geo_longitude
67
- geo_region_name
68
- ip_isp
69
- ip_organization
70
- ip_domain
71
- ip_netspeed
72
- page_url
73
- page_title
74
- page_referrer
75
- page_urlscheme
76
- page_urlhost
77
- page_urlport
78
- page_urlpath
79
- page_urlquery
80
- page_urlfragment
81
- refr_urlscheme
82
- refr_urlhost
83
- refr_urlport
84
- refr_urlpath
85
- refr_urlquery
86
- refr_urlfragment
87
- refr_medium
88
- refr_source
89
- refr_term
90
- mkt_medium
91
- mkt_source
92
- mkt_term
93
- mkt_content
94
- mkt_campaign
95
- contexts
96
- se_category
97
- se_action
98
- se_label
99
- se_property
100
- se_value
101
- unstruct_event
102
- tr_orderid
103
- tr_affiliation
104
- tr_total
105
- tr_tax
106
- tr_shipping
107
- tr_city
108
- tr_state
109
- tr_country
110
- ti_orderid
111
- ti_sku
112
- ti_name
113
- ti_category
114
- ti_price
115
- ti_quantity
116
- pp_xoffset_min
117
- pp_xoffset_max
118
- pp_yoffset_min
119
- pp_yoffset_max
120
- useragent
121
- br_name
122
- br_family
123
- br_version
124
- br_type
125
- br_renderengine
126
- br_lang
127
- br_features_pdf
128
- br_features_flash
129
- br_features_java
130
- br_features_director
131
- br_features_quicktime
132
- br_features_realplayer
133
- br_features_windowsmedia
134
- br_features_gears
135
- br_features_silverlight
136
- br_cookies
137
- br_colordepth
138
- br_viewwidth
139
- br_viewheight
140
- os_name
141
- os_family
142
- os_manufacturer
143
- os_timezone
144
- dvce_type
145
- dvce_ismobile
146
- dvce_screenwidth
147
- dvce_screenheight
148
- doc_charset
149
- doc_width
150
- doc_height
151
- tr_currency
152
- tr_total_base
153
- tr_tax_base
154
- tr_shipping_base
155
- ti_currency
156
- ti_price_base
157
- base_currency
158
- geo_timezone
159
- mkt_clickid
160
- mkt_network
161
- etl_tags
162
- dvce_sent_tstamp
163
- refr_domain_userid
164
- refr_dvce_tstamp
165
- derived_contexts
166
- domain_sessionid
167
- derived_tstamp
168
- event_vendor
169
- event_name
170
- event_format
171
- event_version
172
- event_fingerprint
173
- ].freeze
174
-
175
43
  class LogStash::Codecs::Snowplow < LogStash::Codecs::Base
176
44
  config_name 'snowplow'
45
+ config :json_fields, validate: :array, default: %w[contexts derived_contexts se_property unstruct_event], required: true
177
46
 
178
47
  public
179
-
180
48
  def register
181
- @logger.info('Initializing logstash Snowplow enriched-event codec')
49
+ @logger.info("Initializing logstash snowplow codec: json fields: #{@json_fields}!")
182
50
  end
183
51
 
184
52
  public
185
53
  def decode(data)
186
- begin
187
- values = data.to_s.split("\t")
188
- hash = Hash[*ENRICHED_EVENT.zip(values).flatten] if values.length == ENRICHED_EVENT.length
189
- yield LogStash::Event.new(hash)
190
- rescue
191
- @logger.error("Fail to decode: #{data.to_s}")
192
- end
54
+ event = Serializer.deserialize(data, @json_fields)
55
+ yield LogStash::Event.new(event)
56
+ rescue
57
+ @logger.error("Fail to decode: #{data}")
193
58
  end
194
- end
59
+ end
@@ -0,0 +1,161 @@
1
+ # encoding: utf-8
2
+
3
+ require 'json'
4
+
5
+ ENRICHED_EVENT = %w[
6
+ app_id
7
+ platform
8
+ etl_tstamp
9
+ collector_tstamp
10
+ dvce_created_tstamp
11
+ event
12
+ event_id
13
+ txn_id
14
+ name_tracker
15
+ v_tracker
16
+ v_collector
17
+ v_etl
18
+ user_id
19
+ user_ipaddress
20
+ user_fingerprint
21
+ domain_userid
22
+ domain_sessionidx
23
+ network_userid
24
+ geo_country
25
+ geo_region
26
+ geo_city
27
+ geo_zipcode
28
+ geo_latitude
29
+ geo_longitude
30
+ geo_region_name
31
+ ip_isp
32
+ ip_organization
33
+ ip_domain
34
+ ip_netspeed
35
+ page_url
36
+ page_title
37
+ page_referrer
38
+ page_urlscheme
39
+ page_urlhost
40
+ page_urlport
41
+ page_urlpath
42
+ page_urlquery
43
+ page_urlfragment
44
+ refr_urlscheme
45
+ refr_urlhost
46
+ refr_urlport
47
+ refr_urlpath
48
+ refr_urlquery
49
+ refr_urlfragment
50
+ refr_medium
51
+ refr_source
52
+ refr_term
53
+ mkt_medium
54
+ mkt_source
55
+ mkt_term
56
+ mkt_content
57
+ mkt_campaign
58
+ contexts
59
+ se_category
60
+ se_action
61
+ se_label
62
+ se_property
63
+ se_value
64
+ unstruct_event
65
+ tr_orderid
66
+ tr_affiliation
67
+ tr_total
68
+ tr_tax
69
+ tr_shipping
70
+ tr_city
71
+ tr_state
72
+ tr_country
73
+ ti_orderid
74
+ ti_sku
75
+ ti_name
76
+ ti_category
77
+ ti_price
78
+ ti_quantity
79
+ pp_xoffset_min
80
+ pp_xoffset_max
81
+ pp_yoffset_min
82
+ pp_yoffset_max
83
+ useragent
84
+ br_name
85
+ br_family
86
+ br_version
87
+ br_type
88
+ br_renderengine
89
+ br_lang
90
+ br_features_pdf
91
+ br_features_flash
92
+ br_features_java
93
+ br_features_director
94
+ br_features_quicktime
95
+ br_features_realplayer
96
+ br_features_windowsmedia
97
+ br_features_gears
98
+ br_features_silverlight
99
+ br_cookies
100
+ br_colordepth
101
+ br_viewwidth
102
+ br_viewheight
103
+ os_name
104
+ os_family
105
+ os_manufacturer
106
+ os_timezone
107
+ dvce_type
108
+ dvce_ismobile
109
+ dvce_screenwidth
110
+ dvce_screenheight
111
+ doc_charset
112
+ doc_width
113
+ doc_height
114
+ tr_currency
115
+ tr_total_base
116
+ tr_tax_base
117
+ tr_shipping_base
118
+ ti_currency
119
+ ti_price_base
120
+ base_currency
121
+ geo_timezone
122
+ mkt_clickid
123
+ mkt_network
124
+ etl_tags
125
+ dvce_sent_tstamp
126
+ refr_domain_userid
127
+ refr_dvce_tstamp
128
+ derived_contexts
129
+ domain_sessionid
130
+ derived_tstamp
131
+ event_vendor
132
+ event_name
133
+ event_format
134
+ event_version
135
+ event_fingerprint
136
+ true_tstamp
137
+ ].freeze
138
+
139
+ class Serializer
140
+ # @param [String] event
141
+ # @param [Array] json_fields
142
+ # @return [Hash]
143
+ def self.deserialize(event, json_fields)
144
+ event_values = event.to_s.split("\t")
145
+ event = Hash[*ENRICHED_EVENT.zip(event_values).flatten]
146
+ json_fields.each do |field|
147
+ event = parse(event, field) if event.has_key? field
148
+ end
149
+ event
150
+ end
151
+
152
+ # @param [Hash] event
153
+ # @param [String] field
154
+ # @return [Hash]
155
+ def self.parse(event, field)
156
+ event[field] = JSON.parse(event[field])
157
+ return event
158
+ rescue JSON::ParserError => _
159
+ return event
160
+ end
161
+ end
@@ -1,8 +1,8 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-codec-snowplow'
3
- s.version = '0.4'
3
+ s.version = '0.5'
4
4
  s.licenses = ['Apache-2.0']
5
- s.summary = 'This example input streams a string at a definable interval.'
5
+ s.summary = 'This codec gets a enrich snowplow event and generates a logstash event.'
6
6
  s.description = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
7
7
  s.authors = ['Robson Júnior']
8
8
  s.email = 'bsao@cerebello.co'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-codec-snowplow
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.5'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robson Júnior
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-16 00:00:00.000000000 Z
11
+ date: 2017-08-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,6 +62,7 @@ files:
62
62
  - LICENSE.md
63
63
  - README.md
64
64
  - lib/logstash/codecs/snowplow.rb
65
+ - lib/logstash/serializer.rb
65
66
  - lib/thrift/event.thrift
66
67
  - lib/thrift/snowplow.thrift
67
68
  - logstash-codec-snowplow.gemspec
@@ -90,5 +91,5 @@ rubyforge_project:
90
91
  rubygems_version: 2.6.8
91
92
  signing_key:
92
93
  specification_version: 4
93
- summary: This example input streams a string at a definable interval.
94
+ summary: This codec gets a enrich snowplow event and generates a logstash event.
94
95
  test_files: []