sourmix 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b57a705f8b8c54cbb0b5d9cd86921a8cd9fce4e7
4
+ data.tar.gz: a4d7d2e672f212479b4dc80a4786ab16511d598b
5
+ SHA512:
6
+ metadata.gz: 42a196ee0b5426b43bd50a903009679c74365562969196c426bb51c2451942d7fed355bde303c564d078754517e1ffea6e9f1732d89adab502bb3527e289a831
7
+ data.tar.gz: f67e6ff4da041e5e9ce33e687fd1c068a273f1e1197d0e1a13b9c33a91d1b67c21884848f4df0a390519202de1ad6a8ee909bd1f458c9203a4c1d76d668d404a
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2016 Sean Clemmer
2
+
3
+ Permission to use, copy, modify, and/or distribute this software for any
4
+ purpose with or without fee is hereby granted, provided that the above
5
+ copyright notice and this permission notice appear in all copies.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
8
+ REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
9
+ AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
10
+ INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
11
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
12
+ OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
13
+ PERFORMANCE OF THIS SOFTWARE.
data/Readme.md ADDED
@@ -0,0 +1,48 @@
1
+ # SourMix ![Version](https://img.shields.io/gem/v/sourmix.svg?style=flat-square)
2
+
3
+ Shove [Mixpanel](http://mixpanel.com/) events into [Kafka](http://kafka.apache.org/)
4
+ by way of [Theon](https://github.com/sczizzo/theon).
5
+
6
+
7
+ ## Installation
8
+
9
+ You can build a gem from this repository, install Ubuntu packages from the
10
+ [Releases](https://github.com/sczizzo/sourmix/releases) page, or use RubyGems:
11
+
12
+ $ gem install sourmix
13
+
14
+ SourMix shells out to `curl` and `gzip`, so you'll also need those installed.
15
+
16
+
17
+ ## Usage
18
+
19
+ Just call for help!
20
+
21
+ $ sourmix help
22
+ Commands:
23
+ sourmix art ...
24
+ sourmix go -d, --date=DATE -k, --mixpanel-api-key=MIXPANEL_API_KEY -s, --mixpanel-api-secret=MIXPANEL_API_SECRET ...
25
+ sourmix help [COMMAND] ...
26
+ sourmix version
27
+
28
+ You're probably most interested in the `go` command:
29
+
30
+ Usage:
31
+ sourmix go -d, --date=DATE -k, --mixpanel-api-key=MIXPANEL_API_KEY -s, --mixpanel-api-secret=MIXPANEL_API_SECRET -t, --theon-uri=THEON_URI
32
+
33
+ Options:
34
+ -d, --date=DATE # Date to index (YYYY-mm-dd)
35
+ # Default: 2016-02-08
36
+ -k, --mixpanel-api-key=MIXPANEL_API_KEY # Mixpanel API key
37
+ -s, --mixpanel-api-secret=MIXPANEL_API_SECRET # Mixpanel API secret
38
+ -t, --theon-uri=THEON_URI # Theon URI (including auth)
39
+ -L, [--log=LOG] # Log to file instead of STDERR
40
+ -V, [--debug], [--no-debug] # Enable DEBUG-level logging
41
+
42
+ Do your thing, SourMix
43
+
44
+ ## Changelog
45
+
46
+ #### v1.0.0
47
+
48
+ - Initial implementation
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
data/bin/sourmix ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require 'sourmix'
3
+ SourMix::Main.start ARGV
@@ -0,0 +1,56 @@
1
+ require_relative 'metadata'
2
+ require_relative 'mjolnir'
3
+ require_relative 'perform'
4
+
5
+ Thread.abort_on_exception = true
6
+
7
+
8
+ module SourMix
9
+ class Main < Mjolnir
10
+ include Perform
11
+
12
+
13
+ desc 'version', 'Echo the application version'
14
+ def version
15
+ puts VERSION
16
+ end
17
+
18
+
19
+ desc 'art', 'View the application art'
20
+ def art
21
+ puts "\n%s\n" % ART
22
+ end
23
+
24
+
25
+ desc 'go', 'Do your thing, SourMix'
26
+ option :date, \
27
+ type: :string,
28
+ aliases: %w[ -d ],
29
+ desc: 'Date to index (YYYY-mm-dd)',
30
+ required: true,
31
+ default: Time.now.strftime('%Y-%m-%d')
32
+ option :mixpanel_api_key, \
33
+ type: :string,
34
+ aliases: %w[ -k ],
35
+ desc: 'Mixpanel API key',
36
+ required: true,
37
+ default: ENV['SOURMIX_MIXPANEL_API_KEY']
38
+ option :mixpanel_api_secret, \
39
+ type: :string,
40
+ aliases: %w[ -s ],
41
+ desc: 'Mixpanel API secret',
42
+ required: true,
43
+ default: ENV['SOURMIX_MIXPANEL_API_SECRET']
44
+ option :theon_uri, \
45
+ type: :string,
46
+ aliases: %w[ -t ],
47
+ desc: 'Theon URI (including auth)',
48
+ required: true,
49
+ default: ENV['SOURMIX_THEON_URI']
50
+ include_common_options
51
+ def go
52
+ perform
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,42 @@
1
+ module SourMix
2
+
3
+ # We use a VERSION file to tie into our build pipeline
4
+ VERSION = File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).strip
5
+
6
+ # We don't really do all that much, be humble
7
+ SUMMARY = 'Shove Mixpanel events into Kafka by way of Theon'
8
+
9
+ # Like the MIT license, but even simpler
10
+ LICENSE = 'ISC'
11
+
12
+ # Where you should look first
13
+ HOMEPAGE = 'https://github.com/sczizzo/sourmix'
14
+
15
+ # Your benevolent dictator for life
16
+ AUTHOR = 'Sean Clemmer'
17
+
18
+ # Turn here to strangle your dictator
19
+ EMAIL = 'sczizzo@gmail.com'
20
+
21
+ # Bundled extensions
22
+ TRAVELING_RUBY_BUCKET = 'http://d6r77u77i8pq3.cloudfront.net'
23
+ TRAVELING_RUBY_VERSION = '20150715-2.2.2'
24
+ JSON_VERSION = '1.8.2'
25
+
26
+ # Every project deserves its own ASCII art
27
+ ART = <<-'EOART' % VERSION
28
+
29
+ _ _ _ _ _ _ _ _ _
30
+ / /\ /\ \ /\_\ /\ \ /\_\/\_\ _ /\ \ /_/\ /\ \
31
+ / / \ / \ \ / / / _ / \ \ / / / / //\_\ \ \ \\ \ \ \ \_\
32
+ / / /\ \__ / /\ \ \\ \ \__ /\_\ / /\ \ \ /\ \/ \ \/ / / /\ \_\\ \ \__/ / /
33
+ / / /\ \___\ / / /\ \ \\ \___\ / / // / /\ \_\ / \____\__/ / / /\/_/ \ \__ \/_/
34
+ \ \ \ \/___// / / \ \_\\__ / / / // / /_/ / // /\/________/ / / / \/_/\__/\
35
+ \ \ \ / / / / / // / / / / // / /__\/ // / /\/_// / / / / / _/\/__\ \
36
+ _ \ \ \ / / / / / // / / / / // / /_____// / / / / / / / / / _/_/\ \ \
37
+ /_/\__/ / / / / /___/ / // / /___/ / // / /\ \ \ / / / / / /___/ / /__ / / / \ \ \
38
+ \ \/___/ / / / /____\/ // / /____\/ // / / \ \ \\/_/ / / //\__\/_/___\ / / / /_/ /
39
+ \_____\/ \/_________/ \/_________/ \/_/ \_\/ \/_/ \/_________/ \/_/ \_\/ v%s
40
+
41
+ EOART
42
+ end
@@ -0,0 +1,74 @@
1
+ require 'digest/md5'
2
+ require 'net/http'
3
+ require 'json'
4
+ require 'cgi'
5
+
6
+
7
+ module Mixpanel
8
+ class Client
9
+ attr_reader :api_key, :api_secret, :base_url, :data_url, :expire_at
10
+
11
+ def initialize options
12
+ @api_key = options.fetch :api_key
13
+ @api_secret = options.fetch :api_secret
14
+ @base_url = options.fetch :base_url, 'https://mixpanel.com/api/2.0'
15
+ @data_url = options.fetch :data_url, 'https://data.mixpanel.com/api/2.0'
16
+ @expire_at = options.fetch :expire_at, 90 # seconds
17
+ end
18
+
19
+ def request endpoint, params
20
+ resp = Net::HTTP.get request_uri(endpoint, params)
21
+ resp.lines.map { |l| JSON.parse l }
22
+ end
23
+
24
+ def request_uri endpoint, params
25
+ URI request_url(endpoint, params)
26
+ end
27
+
28
+
29
+ private
30
+
31
+ def request_url endpoint, params
32
+ api = request_api endpoint
33
+ url = File.join api, endpoint.to_s
34
+ sps = sign_params endpoint, params
35
+ qs = encode_params sps
36
+ "#{url}?#{qs}"
37
+ end
38
+
39
+ def request_api endpoint
40
+ return data_url if endpoint == 'export'
41
+ return base_url
42
+ end
43
+
44
+ def request_format endpoint
45
+ return :raw if endpoint == 'export'
46
+ return :json
47
+ end
48
+
49
+ def encode_params params
50
+ params.map { |k,v| "#{k}=#{CGI.escape(v.to_s)}" }.sort.join('&')
51
+ end
52
+
53
+ def sign_params endpoint, params
54
+ nps = normalize_params endpoint, params
55
+ jps = nps.map { |k,v| "#{k}=#{v}" }.sort.join('')
56
+ req = jps + api_secret
57
+ nps[:sig] = Digest::MD5.hexdigest req
58
+ nps
59
+ end
60
+
61
+ def normalize_params endpoint, params
62
+ params.merge \
63
+ api_key: api_key,
64
+ expire: request_expire(params),
65
+ format: request_format(endpoint)
66
+ end
67
+
68
+ def request_expire params
69
+ return params[:expire] if params[:expire]
70
+ Time.now.to_i + expire_at
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,49 @@
1
+ require 'slog'
2
+ require 'thor'
3
+
4
+
5
+ # Thor's hammer! Like Thor with better logging
6
+ class Mjolnir < Thor
7
+
8
+ # Common options for Thor commands
9
+ COMMON_OPTIONS = {
10
+ log: {
11
+ type: :string,
12
+ aliases: %w[ -L ],
13
+ desc: 'Log to file instead of STDERR',
14
+ default: ENV['SOURMIX_LOG'] || nil
15
+ },
16
+ debug: {
17
+ type: :boolean,
18
+ aliases: %w[ -V ],
19
+ desc: 'Enable DEBUG-level logging',
20
+ default: ENV['SOURMIX_DEBUG'] || false
21
+ }
22
+ }
23
+
24
+ # Decorate Thor commands with the options above
25
+ def self.include_common_options
26
+ COMMON_OPTIONS.each do |name, spec|
27
+ option name, spec
28
+ end
29
+ end
30
+
31
+
32
+ no_commands do
33
+
34
+ # Construct a Logger given the command-line options
35
+ def log
36
+ return @logger if defined? @logger
37
+ level = :info
38
+ level = :debug if options.debug?
39
+ device = options.log || $stderr
40
+ pretty = device.tty? rescue false
41
+ @logger = Slog.new \
42
+ out: device,
43
+ level: level,
44
+ colorize: pretty,
45
+ prettify: pretty
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,181 @@
1
+ require 'shellwords'
2
+ require 'digest/md5'
3
+ require 'tempfile'
4
+ require 'thread'
5
+ require 'logger'
6
+ require 'json'
7
+ require 'date'
8
+ require 'time'
9
+
10
+ require_relative 'mixpanel_client'
11
+
12
+
13
+
14
+ module SourMix
15
+ module Perform
16
+ PAGE_SIZE = 50_000
17
+
18
+ def perform
19
+ mixpanel = Mixpanel::Client.new \
20
+ api_key: options.mixpanel_api_key,
21
+ api_secret: options.mixpanel_api_secret
22
+ dataset = download_dataset mixpanel
23
+ results = process_dataset dataset
24
+ gzip_results results
25
+ report_results results
26
+ end
27
+
28
+
29
+
30
+ private
31
+
32
+ def download_dataset mixpanel
33
+ log.debug event: :download_dataset
34
+
35
+ dataset = Tempfile.new 'dataset'
36
+
37
+ request_uri = mixpanel.request_uri 'export', \
38
+ from_date: options.date, to_date: options.date
39
+
40
+ download_dataset = 'curl -Lf -o %{out} %{url}' % {
41
+ url: Shellwords.escape(request_uri),
42
+ out: Shellwords.escape(dataset.path)
43
+ }
44
+
45
+ _, elapsed_s = timed { sh download_dataset }
46
+
47
+ log.info event: :download_dataset, exited: $?.exitstatus, elapsed_s: elapsed_s
48
+
49
+ dataset
50
+ end
51
+
52
+
53
+ def process_dataset dataset
54
+ log.debug event: :process_dataset
55
+
56
+ results = []
57
+ nok, nerr = 0, 0
58
+
59
+ _, elapsed_s = timed do
60
+ dataset.each_line do |e|
61
+ if nok % PAGE_SIZE == 0
62
+ log.debug event: :process_dataset_page, okay: nok
63
+ results << Tempfile.new('results')
64
+ end
65
+
66
+ begin
67
+ event = JSON.generate(normalize_event(JSON.parse(e)))
68
+ if v2_api?
69
+ results.last.puts ':%s' % event
70
+ else
71
+ results.last.puts event
72
+ end
73
+ nok += 1
74
+ rescue => e
75
+ log.error event: :process_dataset_error, error: e.inspect, line: e
76
+ nerr += 1
77
+ end
78
+ end
79
+ end
80
+
81
+ dataset.close
82
+ dataset.unlink
83
+ results.map(&:close)
84
+
85
+ ops = 1.0 * (nok + nerr) / elapsed_s
86
+
87
+ log.info event: :process_dataset, okay: nok, errors: nerr, elapsed_s: elapsed_s, ops: ops, pages: results.size
88
+
89
+ results
90
+ end
91
+
92
+
93
+ def gzip_results results
94
+ log.debug event: :gzip_results
95
+ started, total_size = Time.now.to_f, 0
96
+
97
+ results.map do |result|
98
+ Thread.new do
99
+ gzip_result = 'gzip -k %s' % Shellwords.escape(result.path)
100
+ _, elapsed_s = timed { sh gzip_result }
101
+
102
+ size = File::Stat.new(result.path + '.gz').size
103
+ total_size += size
104
+
105
+ log.debug event: :gzip_result, exited: $?.exitstatus, elapsed_s: elapsed_s, size: size
106
+ end
107
+ end.map(&:join)
108
+
109
+ log.info event: :gzip_result, total_elapsed_s: (Time.now.to_f - started), total_size: total_size
110
+ end
111
+
112
+
113
+ def report_results results
114
+ log.debug event: :report_results
115
+
116
+ theon_uri = URI options.theon_uri
117
+ theon_auth = [ theon_uri.user, theon_uri.password ].join(':')
118
+ theon_url = '%s://%s:%s%s' % [
119
+ theon_uri.scheme, theon_uri.host, theon_uri.port, theon_uri.path
120
+ ]
121
+
122
+ auth_opt = theon_auth.nil? ? nil : '-u %s' % Shellwords.escape(theon_auth)
123
+ started = Time.now.to_f
124
+
125
+ results.map do |result|
126
+ Thread.new do
127
+ report_result = \
128
+ 'curl %{auth} -Lf %{url} -XPOST %{headers} --data-binary @%{out}.gz' % {
129
+ auth: auth_opt,
130
+ url: Shellwords.escape(theon_url),
131
+ out: Shellwords.escape(result.path),
132
+ headers: "-H 'Content-Encoding: gzip'"
133
+ }
134
+
135
+ _, elapsed_s = timed { sh report_result }
136
+ result.unlink
137
+
138
+ log.debug event: :report_result, exited: $?.exitstatus, elapsed_s: elapsed_s
139
+ end
140
+ end.map(&:join)
141
+
142
+ log.info event: :report_results, total_elapsed_s: (Time.now.to_f - started)
143
+ end
144
+
145
+
146
+ def v2_api? ; options.theon_uri.include? '/v2/' end
147
+
148
+
149
+ def normalize_event e
150
+ digest = Digest::MD5.hexdigest e.to_s
151
+ e.merge! e.delete('properties')
152
+ e = e.inject({}) do |h,(k,v)|
153
+ k = k.gsub('.','_').gsub(/^\W+/,'')
154
+ h[k] = v unless v.nil? || v == '' || v == [] || v == {}
155
+ h
156
+ end
157
+ e['@timestamp'] = Time.at(e['time']).utc.iso8601(3)
158
+ e['@id'] = digest
159
+ e
160
+ end
161
+
162
+
163
+ def sh command
164
+ if options.debug
165
+ puts command
166
+ system command
167
+ else
168
+ `#{command} 2>&1`
169
+ end
170
+ end
171
+
172
+
173
+ def timed &block
174
+ started = Time.now.to_f
175
+ rvalue = yield
176
+ elapsed = Time.now.to_f - started
177
+ return rvalue, elapsed
178
+ end
179
+
180
+ end
181
+ end
data/lib/sourmix.rb ADDED
@@ -0,0 +1 @@
1
+ require_relative 'sourmix/main'
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sourmix
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Sean Clemmer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: slog
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: thor
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: json
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.8.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.8.2
55
+ description: Shove Mixpanel events into Kafka by way of Theon.
56
+ email: sczizzo@gmail.com
57
+ executables:
58
+ - sourmix
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - LICENSE
63
+ - Readme.md
64
+ - VERSION
65
+ - bin/sourmix
66
+ - lib/sourmix.rb
67
+ - lib/sourmix/main.rb
68
+ - lib/sourmix/metadata.rb
69
+ - lib/sourmix/mixpanel_client.rb
70
+ - lib/sourmix/mjolnir.rb
71
+ - lib/sourmix/perform.rb
72
+ homepage: https://github.com/sczizzo/sourmix
73
+ licenses:
74
+ - ISC
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.4.5.1
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: Shove Mixpanel events into Kafka by way of Theon
96
+ test_files: []