logstash-codec-snowplow 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cbabf39fbd655f440e460086dafdb072f5adf009c20d688cb104edcb0f9c70d2
4
+ data.tar.gz: c71c78e4fa66d8ab25ff673deb8d3c9a66725dbfc0edae0b52c6b682b0f16725
5
+ SHA512:
6
+ metadata.gz: 56cb875ad7d37040599c891ea2b4211fc5a7d89dff0d3d634611c8e61c8cb8abc560eafbb828bda61b8b03d3a97dcb397a91f844b73b4cce9990e1ac12b2892d
7
+ data.tar.gz: b4c5f79d1974f3cf1980b2795d2e276a06839aa2f09128bb5af8a614cfa199ffabeb3c209ff64d25a0235774619f6406711daffc95eb13e065048b56aaa10fb5
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.md ADDED
@@ -0,0 +1,13 @@
1
+ Copyright 2016 Active Agent AG
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # Logstash Codec snowplow
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/logstash-codec-snowplow.svg)](http://badge.fury.io/rb/logstash-codec-snowplow) [![Build Status](https://travis-ci.org/active-agent/logstash-codec-snowplow.svg?branch=master)](https://travis-ci.org/active-agent/logstash-codec-snowplow)
4
+
5
+ ## Install
6
+
7
+ $ ./bin/plugin install logstash-codec-snowplow
8
+
9
+ ## Usage
10
+
11
+ ```ruby
12
+ input {
13
+ kafka {
14
+ codec => snowplow {}
15
+ }
16
+ }
17
+ ```
18
+
19
+ ## Install dependencies
20
+
21
+ Install jruby:
22
+
23
+ $ brew install jruby
24
+
25
+ Install jruby bundler:
26
+
27
+ $ jruby -S gem install bundler
28
+
29
+ Install plugin dependencies:
30
+
31
+ $ jruby -S bundler install
32
+
33
+ ## Build & Install
34
+
35
+ Build Gem:
36
+
37
+ $ gem build logstash-codec-snowplow.gemspec
38
+
39
+ Install Gem:
40
+
41
+ $ ./bin/plugin install /your/local/plugin/logstash-codec-snowplow.gem
@@ -0,0 +1,191 @@
1
+ # encoding: utf-8
2
+
3
+ require 'json'
4
+ require 'logger'
5
+ require 'logstash/codecs/base'
6
+
7
+ # Read serialized Thrift Snowplow enriched-event records as Logstash events
8
+ #
9
+ # This plugin is used to serialize Logstash events as
10
+ # Snowplow enriched-event objects.
11
+ #
12
+ # ==== Encoding
13
+ #
14
+ # This codec is for serializing individual Logstash events as Snowplow enriched-event.
15
+ #
16
+ #
17
+ # ==== Decoding
18
+ #
19
+ # This codec is for deserializing individual Snowplow enriched-event serialized objects as Logstash events.
20
+ #
21
+ #
22
+ # ==== Usage
23
+ # Example usage with kafka input.
24
+ #
25
+ # [source,ruby]
26
+ # ----------------------------------
27
+ # input {
28
+ # kafka {
29
+ # codec => snowplow {
30
+ # }
31
+ # }
32
+ # }
33
+ # filter {
34
+ # ...
35
+ # }
36
+ # output {
37
+ # ...
38
+ # }
39
+ # ----------------------------------
40
+
41
+ ENRICHED_EVENT = %w[
42
+ app_id
43
+ platform
44
+ etl_tstamp
45
+ collector_tstamp
46
+ dvce_created_tstamp
47
+ event
48
+ event_id
49
+ txn_id
50
+ name_tracker
51
+ v_tracker
52
+ v_collector
53
+ v_etl
54
+ user_id
55
+ user_ipaddress
56
+ user_fingerprint
57
+ domain_userid
58
+ domain_sessionidx
59
+ network_userid
60
+ geo_country
61
+ geo_region
62
+ geo_city
63
+ geo_zipcode
64
+ geo_latitude
65
+ geo_longitude
66
+ geo_region_name
67
+ ip_isp
68
+ ip_organization
69
+ ip_domain
70
+ ip_netspeed
71
+ page_url
72
+ page_title
73
+ page_referrer
74
+ page_urlscheme
75
+ page_urlhost
76
+ page_urlport
77
+ page_urlpath
78
+ page_urlquery
79
+ page_urlfragment
80
+ refr_urlscheme
81
+ refr_urlhost
82
+ refr_urlport
83
+ refr_urlpath
84
+ refr_urlquery
85
+ refr_urlfragment
86
+ refr_medium
87
+ refr_source
88
+ refr_term
89
+ mkt_medium
90
+ mkt_source
91
+ mkt_term
92
+ mkt_content
93
+ mkt_campaign
94
+ contexts
95
+ se_category
96
+ se_action
97
+ se_label
98
+ se_property
99
+ se_value
100
+ unstruct_event
101
+ tr_orderid
102
+ tr_affiliation
103
+ tr_total
104
+ tr_tax
105
+ tr_shipping
106
+ tr_city
107
+ tr_state
108
+ tr_country
109
+ ti_orderid
110
+ ti_sku
111
+ ti_name
112
+ ti_category
113
+ ti_price
114
+ ti_quantity
115
+ pp_xoffset_min
116
+ pp_xoffset_max
117
+ pp_yoffset_min
118
+ pp_yoffset_max
119
+ useragent
120
+ br_name
121
+ br_family
122
+ br_version
123
+ br_type
124
+ br_renderengine
125
+ br_lang
126
+ br_features_pdf
127
+ br_features_flash
128
+ br_features_java
129
+ br_features_director
130
+ br_features_quicktime
131
+ br_features_realplayer
132
+ br_features_windowsmedia
133
+ br_features_gears
134
+ br_features_silverlight
135
+ br_cookies
136
+ br_colordepth
137
+ br_viewwidth
138
+ br_viewheight
139
+ os_name
140
+ os_family
141
+ os_manufacturer
142
+ os_timezone
143
+ dvce_type
144
+ dvce_ismobile
145
+ dvce_screenwidth
146
+ dvce_screenheight
147
+ doc_charset
148
+ doc_width
149
+ doc_height
150
+ tr_currency
151
+ tr_total_base
152
+ tr_tax_base
153
+ tr_shipping_base
154
+ ti_currency
155
+ ti_price_base
156
+ base_currency
157
+ geo_timezone
158
+ mkt_clickid
159
+ mkt_network
160
+ etl_tags
161
+ dvce_sent_tstamp
162
+ refr_domain_userid
163
+ refr_dvce_tstamp
164
+ derived_contexts
165
+ domain_sessionid
166
+ derived_tstamp
167
+ event_vendor
168
+ event_name
169
+ event_format
170
+ event_version
171
+ event_fingerprint
172
+ ].freeze
173
+
174
+ class LogStash::Codecs::Snowplow < LogStash::Codecs::Base
175
+ config_name 'snowplow'
176
+
177
+ public
178
+
179
+ def register
180
+ @logger.info('Initializing logstash Snowplow enriched-event codec')
181
+ end
182
+
183
+ public
184
+ def decode(data)
185
+ values = data.to_s.split("\t")
186
+ hash = Hash[*ENRICHED_EVENT.zip(values).flatten] if values.length == ENRICHED_EVENT.length
187
+ yield LogStash::Event.new(hash)
188
+ rescue
189
+ @logger.error("Fail to decode: #{data.to_s}")
190
+ end
191
+ end
@@ -0,0 +1,6 @@
1
+ struct event {
2
+ 1: required string timestamp, #timestamp as iso8601
3
+ 2: required string version,
4
+ 3: optional string host,
5
+ 4: optional string message
6
+ }
@@ -0,0 +1,206 @@
1
+ struct SchemaSniffer {
2
+
3
+ // The application (site, game, app etc) this event belongs to, and the tracker platform
4
+ 1: optional string app_id,
5
+ 2: optional string platform,
6
+
7
+ // Date/time
8
+ 3: optional string etl_tstamp,
9
+ 4: optional string collector_tstamp,
10
+ 5: optional string dvce_created_tstamp,
11
+
12
+ // Transaction (i.e. this logging event)
13
+ 6: optional string event,
14
+ 7: optional string event_id,
15
+ 8: optional string txn_id,
16
+
17
+ // Versioning
18
+ 9: optional string name_tracker,
19
+ 10: optional string v_tracker,
20
+ 11: optional string v_collector,
21
+ 12: optional string v_etl,
22
+
23
+ // User and visit
24
+ 13: optional string user_id,
25
+ 14: optional string user_ipaddress,
26
+ 15: optional string user_fingerprint,
27
+ 17: optional string domain_userid,
28
+ 18: optional i16 domain_sessionidx,
29
+ 19: optional string network_userid,
30
+
31
+ // Location
32
+ 20: optional string geo_country,
33
+ 21: optional string geo_region,
34
+ 22: optional string geo_city,
35
+ 23: optional string geo_zipcode,
36
+ 24: optional double geo_latitude,
37
+ 25: optional double geo_longitude,
38
+ 26: optional string geo_region_name,
39
+
40
+ // Other IP lookups
41
+ 27: optional string ip_isp,
42
+ 28: optional string ip_organization,
43
+ 29: optional string ip_domain,
44
+ 30: optional string ip_netspeed,
45
+
46
+ // Page
47
+ 31: optional string page_url,
48
+ 32: optional string page_title,
49
+ 33: optional string page_referrer,
50
+
51
+ // Page URL components
52
+ 34: optional string page_urlscheme,
53
+ 35: optional string page_urlhost,
54
+ 36: optional string page_urlport,
55
+ 37: optional string page_urlpath,
56
+ 38: optional string page_urlquery,
57
+ 39: optional string page_urlfragment,
58
+
59
+ // Referrer URL components
60
+ 40: optional string refr_urlscheme,
61
+ 41: optional string refr_urlhost,
62
+ 42: optional string refr_urlport,
63
+ 43: optional string refr_urlpath,
64
+ 44: optional string refr_urlquery,
65
+ 45: optional string refr_urlfragment,
66
+
67
+ // Referrer details
68
+ 46: optional string refr_medium,
69
+ 47: optional string refr_source,
70
+ 48: optional string refr_term,
71
+
72
+ // Marketing
73
+ 49: optional string mkt_medium,
74
+ 50: optional string mkt_source,
75
+ 51: optional string mkt_term,
76
+ 52: optional string mkt_content,
77
+ 53: optional string mkt_campaign,
78
+
79
+ // Custom Contexts
80
+ 54: optional string contexts,
81
+
82
+ // Structured Event
83
+ 55: optional string se_category,
84
+ 56: optional string se_action,
85
+ 57: optional string se_label,
86
+ 58: optional string se_property,
87
+ 59: optional string se_value, // Technically should be a Double but may be rendered incorrectly by Cascading with scientific notification (which Redshift can't process)
88
+
89
+ // Unstructured Event
90
+ 60: optional string unstruct_event,
91
+
92
+ // Ecommerce transaction (from querystring)
93
+ 61: optional string tr_orderid,
94
+ 62: optional string tr_affiliation,
95
+ 63: optional string tr_total,
96
+ 64: optional string tr_tax,
97
+ 65: optional string tr_shipping,
98
+ 66: optional string tr_city,
99
+ 67: optional string tr_state,
100
+ 68: optional string tr_country,
101
+
102
+ // Ecommerce transaction item (from querystring)
103
+ 69: optional string ti_orderid,
104
+ 70: optional string ti_sku,
105
+ 71: optional string ti_name,
106
+ 72: optional string ti_category,
107
+ 73: optional string ti_price,
108
+ 74: optional string ti_quantity,
109
+
110
+ // Page Pings
111
+ 75: optional string pp_xoffset_min,
112
+ 76: optional string pp_xoffset_max,
113
+ 77: optional string pp_yoffset_min,
114
+ 78: optional string pp_yoffset_max,
115
+
116
+ // User Agent
117
+ 79: optional string useragent,
118
+
119
+ // Browser (from user-agent)
120
+ 80: optional string br_name,
121
+ 81: optional string br_family,
122
+ 82: optional string br_version,
123
+ 83: optional string br_type,
124
+ 84: optional string br_renderengine,
125
+
126
+ // Browser (from querystring)
127
+ 85: optional string br_lang,
128
+
129
+ // Individual feature fields for non-Hive targets (e.g. Infobright)
130
+ 86: optional i8 br_features_pdf,
131
+ 87: optional i8 br_features_flash,
132
+ 88: optional i8 br_features_java,
133
+ 89: optional i8 br_features_director,
134
+ 90: optional i8 br_features_quicktime,
135
+ 91: optional i8 br_features_realplayer,
136
+ 92: optional i8 br_features_windowsmedia,
137
+ 93: optional i8 br_features_gears,
138
+ 94: optional i8 br_features_silverlight,
139
+ 95: optional i8 br_cookies,
140
+ 96: optional string br_colordepth,
141
+ 97: optional i16 br_viewwidth,
142
+ 98: optional i16 br_viewheight,
143
+
144
+ // OS (from user-agent)
145
+ 99: optional string os_name,
146
+ 100: optional string os_family,
147
+ 101: optional string os_manufacturer,
148
+ 102: optional string os_timezone,
149
+
150
+ // Device/Hardware (from user-agent)
151
+ 103: optional string dvce_type,
152
+ 104: optional string dvce_ismobile,
153
+
154
+ // Device (from querystring)
155
+ 105: optional i16 dvce_screenwidth,
156
+ 106: optional i16 dvce_screenheight,
157
+
158
+ // Document
159
+ 107: optional string doc_charset,
160
+ 108: optional i16 doc_width,
161
+ 109: optional i16 doc_height,
162
+
163
+ // Currency
164
+ 110: optional string tr_currency,
165
+ 111: optional string tr_total_base,
166
+ 112: optional string tr_tax_base,
167
+ 113: optional string tr_shipping_base,
168
+ 114: optional string ti_currency,
169
+ 115: optional string ti_price_base,
170
+ 116: optional string base_currency,
171
+
172
+ // Geolocation
173
+ 117: optional string geo_timezone,
174
+
175
+ // Click ID
176
+ 118: optional string mkt_clickid,
177
+ 119: optional string mkt_network,
178
+
179
+ // ETL tags
180
+ 120: optional string etl_tags,
181
+
182
+ // Time event was sent
183
+ 121: optional string dvce_sent_tstamp,
184
+
185
+ // Referer
186
+ 122: optional string refr_domain_userid,
187
+ 123: optional string refr_dvce_tstamp,
188
+
189
+ // Derived contexts
190
+ 124: optional string derived_contexts,
191
+
192
+ // Session ID
193
+ 125: optional string domain_sessionid,
194
+
195
+ // Derived timestamp
196
+ 126: optional string derived_tstamp,
197
+
198
+ // Derived event vendor/name/format/version
199
+ 127: optional string event_vendor,
200
+ 128: optional string event_name,
201
+ 129: optional string event_format,
202
+ 130: optional string event_version,
203
+
204
+ // Event fingerprint
205
+ 131: optional string event_fingerprint
206
+ }
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-codec-snowplow'
3
+ s.version = '0.3'
4
+ s.licenses = ['Apache-2.0']
5
+ s.summary = 'This example input streams a string at a definable interval.'
6
+ s.description = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
7
+ s.authors = ['Robson Júnior']
8
+ s.email = 'bsao@cerebello.co'
9
+ s.homepage = 'http://cerebello.co'
10
+ s.require_paths = ['lib']
11
+
12
+ # Files
13
+ s.files = Dir['lib/**/*', '*.gemspec', '*.md', 'Gemfile']
14
+
15
+ # Tests
16
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
+
18
+ # Special flag to let us know this is actually a logstash plugin
19
+ s.metadata = {'logstash_plugin' => 'true', 'logstash_group' => 'codec'}
20
+
21
+ # Gem dependencies
22
+ s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
23
+ s.add_runtime_dependency 'logstash-codec-line'
24
+ s.add_development_dependency 'logstash-devutils'
25
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-codec-snowplow
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.3'
5
+ platform: ruby
6
+ authors:
7
+ - Robson Júnior
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-08-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '2.0'
19
+ name: logstash-core-plugin-api
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ name: logstash-codec-line
34
+ prerelease: false
35
+ type: :runtime
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ name: logstash-devutils
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: This gem is a logstash plugin required to be installed on top of the
56
+ Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
57
+ a stand-alone program
58
+ email: bsao@cerebello.co
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - Gemfile
64
+ - LICENSE.md
65
+ - README.md
66
+ - lib/logstash/codecs/snowplow.rb
67
+ - lib/thrift/event.thrift
68
+ - lib/thrift/snowplow.thrift
69
+ - logstash-codec-snowplow.gemspec
70
+ homepage: http://cerebello.co
71
+ licenses:
72
+ - Apache-2.0
73
+ metadata:
74
+ logstash_plugin: 'true'
75
+ logstash_group: codec
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.6.11
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: This example input streams a string at a definable interval.
96
+ test_files: []