logstash-codec-snowplow 0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cbabf39fbd655f440e460086dafdb072f5adf009c20d688cb104edcb0f9c70d2
4
+ data.tar.gz: c71c78e4fa66d8ab25ff673deb8d3c9a66725dbfc0edae0b52c6b682b0f16725
5
+ SHA512:
6
+ metadata.gz: 56cb875ad7d37040599c891ea2b4211fc5a7d89dff0d3d634611c8e61c8cb8abc560eafbb828bda61b8b03d3a97dcb397a91f844b73b4cce9990e1ac12b2892d
7
+ data.tar.gz: b4c5f79d1974f3cf1980b2795d2e276a06839aa2f09128bb5af8a614cfa199ffabeb3c209ff64d25a0235774619f6406711daffc95eb13e065048b56aaa10fb5
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.md ADDED
@@ -0,0 +1,13 @@
1
+ Copyright 2016 Active Agent AG
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # Logstash Codec snowplow
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/logstash-codec-snowplow.svg)](http://badge.fury.io/rb/logstash-codec-snowplow) [![Build Status](https://travis-ci.org/active-agent/logstash-codec-snowplow.svg?branch=master)](https://travis-ci.org/active-agent/logstash-codec-snowplow)
4
+
5
+ ## Install
6
+
7
+ $ ./bin/plugin install logstash-codec-snowplow
8
+
9
+ ## Usage
10
+
11
+ ```ruby
12
+ input {
13
+ kafka {
14
+ codec => snowplow {}
15
+ }
16
+ }
17
+ ```
18
+
19
+ ## Install dependencies
20
+
21
+ Install jruby:
22
+
23
+ $ brew install jruby
24
+
25
+ Install jruby bundler:
26
+
27
+ $ jruby -S gem install bundler
28
+
29
+ Install plugin dependencies:
30
+
31
+ $ jruby -S bundler install
32
+
33
+ ## Build & Install
34
+
35
+ Build Gem:
36
+
37
+ $ gem build logstash-codec-snowplow.gemspec
38
+
39
+ Install Gem:
40
+
41
+ $ ./bin/plugin install /your/local/plugin/logstash-codec-snowplow.gem
@@ -0,0 +1,191 @@
1
+ # encoding: utf-8
2
+
3
+ require 'json'
4
+ require 'logger'
5
+ require 'logstash/codecs/base'
6
+
7
+ # Read serialized Thrift Snowplow enriched-event records as Logstash events
8
+ #
9
+ # This plugin is used to serialize Logstash events as
10
+ # Snowplow enriched-event objects.
11
+ #
12
+ # ==== Encoding
13
+ #
14
+ # This codec is for serializing individual Logstash events as Snowplow enriched-event.
15
+ #
16
+ #
17
+ # ==== Decoding
18
+ #
19
+ # This codec is for deserializing individual Snowplow enriched-event serialized objects as Logstash events.
20
+ #
21
+ #
22
+ # ==== Usage
23
+ # Example usage with kafka input.
24
+ #
25
+ # [source,ruby]
26
+ # ----------------------------------
27
+ # input {
28
+ # kafka {
29
+ # codec => snowplow {
30
+ # }
31
+ # }
32
+ # }
33
+ # filter {
34
+ # ...
35
+ # }
36
+ # output {
37
+ # ...
38
+ # }
39
+ # ----------------------------------
40
+
41
+ ENRICHED_EVENT = %w[
42
+ app_id
43
+ platform
44
+ etl_tstamp
45
+ collector_tstamp
46
+ dvce_created_tstamp
47
+ event
48
+ event_id
49
+ txn_id
50
+ name_tracker
51
+ v_tracker
52
+ v_collector
53
+ v_etl
54
+ user_id
55
+ user_ipaddress
56
+ user_fingerprint
57
+ domain_userid
58
+ domain_sessionidx
59
+ network_userid
60
+ geo_country
61
+ geo_region
62
+ geo_city
63
+ geo_zipcode
64
+ geo_latitude
65
+ geo_longitude
66
+ geo_region_name
67
+ ip_isp
68
+ ip_organization
69
+ ip_domain
70
+ ip_netspeed
71
+ page_url
72
+ page_title
73
+ page_referrer
74
+ page_urlscheme
75
+ page_urlhost
76
+ page_urlport
77
+ page_urlpath
78
+ page_urlquery
79
+ page_urlfragment
80
+ refr_urlscheme
81
+ refr_urlhost
82
+ refr_urlport
83
+ refr_urlpath
84
+ refr_urlquery
85
+ refr_urlfragment
86
+ refr_medium
87
+ refr_source
88
+ refr_term
89
+ mkt_medium
90
+ mkt_source
91
+ mkt_term
92
+ mkt_content
93
+ mkt_campaign
94
+ contexts
95
+ se_category
96
+ se_action
97
+ se_label
98
+ se_property
99
+ se_value
100
+ unstruct_event
101
+ tr_orderid
102
+ tr_affiliation
103
+ tr_total
104
+ tr_tax
105
+ tr_shipping
106
+ tr_city
107
+ tr_state
108
+ tr_country
109
+ ti_orderid
110
+ ti_sku
111
+ ti_name
112
+ ti_category
113
+ ti_price
114
+ ti_quantity
115
+ pp_xoffset_min
116
+ pp_xoffset_max
117
+ pp_yoffset_min
118
+ pp_yoffset_max
119
+ useragent
120
+ br_name
121
+ br_family
122
+ br_version
123
+ br_type
124
+ br_renderengine
125
+ br_lang
126
+ br_features_pdf
127
+ br_features_flash
128
+ br_features_java
129
+ br_features_director
130
+ br_features_quicktime
131
+ br_features_realplayer
132
+ br_features_windowsmedia
133
+ br_features_gears
134
+ br_features_silverlight
135
+ br_cookies
136
+ br_colordepth
137
+ br_viewwidth
138
+ br_viewheight
139
+ os_name
140
+ os_family
141
+ os_manufacturer
142
+ os_timezone
143
+ dvce_type
144
+ dvce_ismobile
145
+ dvce_screenwidth
146
+ dvce_screenheight
147
+ doc_charset
148
+ doc_width
149
+ doc_height
150
+ tr_currency
151
+ tr_total_base
152
+ tr_tax_base
153
+ tr_shipping_base
154
+ ti_currency
155
+ ti_price_base
156
+ base_currency
157
+ geo_timezone
158
+ mkt_clickid
159
+ mkt_network
160
+ etl_tags
161
+ dvce_sent_tstamp
162
+ refr_domain_userid
163
+ refr_dvce_tstamp
164
+ derived_contexts
165
+ domain_sessionid
166
+ derived_tstamp
167
+ event_vendor
168
+ event_name
169
+ event_format
170
+ event_version
171
+ event_fingerprint
172
+ ].freeze
173
+
174
+ class LogStash::Codecs::Snowplow < LogStash::Codecs::Base
175
+ config_name 'snowplow'
176
+
177
+ public
178
+
179
+ def register
180
+ @logger.info('Initializing logstash Snowplow enriched-event codec')
181
+ end
182
+
183
+ public
184
+ def decode(data)
185
+ values = data.to_s.split("\t")
186
+ hash = Hash[*ENRICHED_EVENT.zip(values).flatten] if values.length == ENRICHED_EVENT.length
187
+ yield LogStash::Event.new(hash)
188
+ rescue
189
+ @logger.error("Fail to decode: #{data.to_s}")
190
+ end
191
+ end
@@ -0,0 +1,6 @@
1
+ struct event {
2
+ 1: required string timestamp, #timestamp as iso8601
3
+ 2: required string version,
4
+ 3: optional string host,
5
+ 4: optional string message
6
+ }
@@ -0,0 +1,206 @@
1
+ struct SchemaSniffer {
2
+
3
+ // The application (site, game, app etc) this event belongs to, and the tracker platform
4
+ 1: optional string app_id,
5
+ 2: optional string platform,
6
+
7
+ // Date/time
8
+ 3: optional string etl_tstamp,
9
+ 4: optional string collector_tstamp,
10
+ 5: optional string dvce_created_tstamp,
11
+
12
+ // Transaction (i.e. this logging event)
13
+ 6: optional string event,
14
+ 7: optional string event_id,
15
+ 8: optional string txn_id,
16
+
17
+ // Versioning
18
+ 9: optional string name_tracker,
19
+ 10: optional string v_tracker,
20
+ 11: optional string v_collector,
21
+ 12: optional string v_etl,
22
+
23
+ // User and visit
24
+ 13: optional string user_id,
25
+ 14: optional string user_ipaddress,
26
+ 15: optional string user_fingerprint,
27
+ 17: optional string domain_userid,
28
+ 18: optional i16 domain_sessionidx,
29
+ 19: optional string network_userid,
30
+
31
+ // Location
32
+ 20: optional string geo_country,
33
+ 21: optional string geo_region,
34
+ 22: optional string geo_city,
35
+ 23: optional string geo_zipcode,
36
+ 24: optional double geo_latitude,
37
+ 25: optional double geo_longitude,
38
+ 26: optional string geo_region_name,
39
+
40
+ // Other IP lookups
41
+ 27: optional string ip_isp,
42
+ 28: optional string ip_organization,
43
+ 29: optional string ip_domain,
44
+ 30: optional string ip_netspeed,
45
+
46
+ // Page
47
+ 31: optional string page_url,
48
+ 32: optional string page_title,
49
+ 33: optional string page_referrer,
50
+
51
+ // Page URL components
52
+ 34: optional string page_urlscheme,
53
+ 35: optional string page_urlhost,
54
+ 36: optional string page_urlport,
55
+ 37: optional string page_urlpath,
56
+ 38: optional string page_urlquery,
57
+ 39: optional string page_urlfragment,
58
+
59
+ // Referrer URL components
60
+ 40: optional string refr_urlscheme,
61
+ 41: optional string refr_urlhost,
62
+ 42: optional string refr_urlport,
63
+ 43: optional string refr_urlpath,
64
+ 44: optional string refr_urlquery,
65
+ 45: optional string refr_urlfragment,
66
+
67
+ // Referrer details
68
+ 46: optional string refr_medium,
69
+ 47: optional string refr_source,
70
+ 48: optional string refr_term,
71
+
72
+ // Marketing
73
+ 49: optional string mkt_medium,
74
+ 50: optional string mkt_source,
75
+ 51: optional string mkt_term,
76
+ 52: optional string mkt_content,
77
+ 53: optional string mkt_campaign,
78
+
79
+ // Custom Contexts
80
+ 54: optional string contexts,
81
+
82
+ // Structured Event
83
+ 55: optional string se_category,
84
+ 56: optional string se_action,
85
+ 57: optional string se_label,
86
+ 58: optional string se_property,
87
+ 59: optional string se_value, // Technically should be a Double but may be rendered incorrectly by Cascading with scientific notification (which Redshift can't process)
88
+
89
+ // Unstructured Event
90
+ 60: optional string unstruct_event,
91
+
92
+ // Ecommerce transaction (from querystring)
93
+ 61: optional string tr_orderid,
94
+ 62: optional string tr_affiliation,
95
+ 63: optional string tr_total,
96
+ 64: optional string tr_tax,
97
+ 65: optional string tr_shipping,
98
+ 66: optional string tr_city,
99
+ 67: optional string tr_state,
100
+ 68: optional string tr_country,
101
+
102
+ // Ecommerce transaction item (from querystring)
103
+ 69: optional string ti_orderid,
104
+ 70: optional string ti_sku,
105
+ 71: optional string ti_name,
106
+ 72: optional string ti_category,
107
+ 73: optional string ti_price,
108
+ 74: optional string ti_quantity,
109
+
110
+ // Page Pings
111
+ 75: optional string pp_xoffset_min,
112
+ 76: optional string pp_xoffset_max,
113
+ 77: optional string pp_yoffset_min,
114
+ 78: optional string pp_yoffset_max,
115
+
116
+ // User Agent
117
+ 79: optional string useragent,
118
+
119
+ // Browser (from user-agent)
120
+ 80: optional string br_name,
121
+ 81: optional string br_family,
122
+ 82: optional string br_version,
123
+ 83: optional string br_type,
124
+ 84: optional string br_renderengine,
125
+
126
+ // Browser (from querystring)
127
+ 85: optional string br_lang,
128
+
129
+ // Individual feature fields for non-Hive targets (e.g. Infobright)
130
+ 86: optional i8 br_features_pdf,
131
+ 87: optional i8 br_features_flash,
132
+ 88: optional i8 br_features_java,
133
+ 89: optional i8 br_features_director,
134
+ 90: optional i8 br_features_quicktime,
135
+ 91: optional i8 br_features_realplayer,
136
+ 92: optional i8 br_features_windowsmedia,
137
+ 93: optional i8 br_features_gears,
138
+ 94: optional i8 br_features_silverlight,
139
+ 95: optional i8 br_cookies,
140
+ 96: optional string br_colordepth,
141
+ 97: optional i16 br_viewwidth,
142
+ 98: optional i16 br_viewheight,
143
+
144
+ // OS (from user-agent)
145
+ 99: optional string os_name,
146
+ 100: optional string os_family,
147
+ 101: optional string os_manufacturer,
148
+ 102: optional string os_timezone,
149
+
150
+ // Device/Hardware (from user-agent)
151
+ 103: optional string dvce_type,
152
+ 104: optional string dvce_ismobile,
153
+
154
+ // Device (from querystring)
155
+ 105: optional i16 dvce_screenwidth,
156
+ 106: optional i16 dvce_screenheight,
157
+
158
+ // Document
159
+ 107: optional string doc_charset,
160
+ 108: optional i16 doc_width,
161
+ 109: optional i16 doc_height,
162
+
163
+ // Currency
164
+ 110: optional string tr_currency,
165
+ 111: optional string tr_total_base,
166
+ 112: optional string tr_tax_base,
167
+ 113: optional string tr_shipping_base,
168
+ 114: optional string ti_currency,
169
+ 115: optional string ti_price_base,
170
+ 116: optional string base_currency,
171
+
172
+ // Geolocation
173
+ 117: optional string geo_timezone,
174
+
175
+ // Click ID
176
+ 118: optional string mkt_clickid,
177
+ 119: optional string mkt_network,
178
+
179
+ // ETL tags
180
+ 120: optional string etl_tags,
181
+
182
+ // Time event was sent
183
+ 121: optional string dvce_sent_tstamp,
184
+
185
+ // Referer
186
+ 122: optional string refr_domain_userid,
187
+ 123: optional string refr_dvce_tstamp,
188
+
189
+ // Derived contexts
190
+ 124: optional string derived_contexts,
191
+
192
+ // Session ID
193
+ 125: optional string domain_sessionid,
194
+
195
+ // Derived timestamp
196
+ 126: optional string derived_tstamp,
197
+
198
+ // Derived event vendor/name/format/version
199
+ 127: optional string event_vendor,
200
+ 128: optional string event_name,
201
+ 129: optional string event_format,
202
+ 130: optional string event_version,
203
+
204
+ // Event fingerprint
205
+ 131: optional string event_fingerprint
206
+ }
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-codec-snowplow'
3
+ s.version = '0.3'
4
+ s.licenses = ['Apache-2.0']
5
+ s.summary = 'This example input streams a string at a definable interval.'
6
+ s.description = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
7
+ s.authors = ['Robson Júnior']
8
+ s.email = 'bsao@cerebello.co'
9
+ s.homepage = 'http://cerebello.co'
10
+ s.require_paths = ['lib']
11
+
12
+ # Files
13
+ s.files = Dir['lib/**/*', '*.gemspec', '*.md', 'Gemfile']
14
+
15
+ # Tests
16
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
+
18
+ # Special flag to let us know this is actually a logstash plugin
19
+ s.metadata = {'logstash_plugin' => 'true', 'logstash_group' => 'codec'}
20
+
21
+ # Gem dependencies
22
+ s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
23
+ s.add_runtime_dependency 'logstash-codec-line'
24
+ s.add_development_dependency 'logstash-devutils'
25
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-codec-snowplow
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.3'
5
+ platform: ruby
6
+ authors:
7
+ - Robson Júnior
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-08-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '2.0'
19
+ name: logstash-core-plugin-api
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ name: logstash-codec-line
34
+ prerelease: false
35
+ type: :runtime
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ name: logstash-devutils
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: This gem is a logstash plugin required to be installed on top of the
56
+ Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
57
+ a stand-alone program
58
+ email: bsao@cerebello.co
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - Gemfile
64
+ - LICENSE.md
65
+ - README.md
66
+ - lib/logstash/codecs/snowplow.rb
67
+ - lib/thrift/event.thrift
68
+ - lib/thrift/snowplow.thrift
69
+ - logstash-codec-snowplow.gemspec
70
+ homepage: http://cerebello.co
71
+ licenses:
72
+ - Apache-2.0
73
+ metadata:
74
+ logstash_plugin: 'true'
75
+ logstash_group: codec
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.6.11
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: This example input streams a string at a definable interval.
96
+ test_files: []