sourmix 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b57a705f8b8c54cbb0b5d9cd86921a8cd9fce4e7
4
+ data.tar.gz: a4d7d2e672f212479b4dc80a4786ab16511d598b
5
+ SHA512:
6
+ metadata.gz: 42a196ee0b5426b43bd50a903009679c74365562969196c426bb51c2451942d7fed355bde303c564d078754517e1ffea6e9f1732d89adab502bb3527e289a831
7
+ data.tar.gz: f67e6ff4da041e5e9ce33e687fd1c068a273f1e1197d0e1a13b9c33a91d1b67c21884848f4df0a390519202de1ad6a8ee909bd1f458c9203a4c1d76d668d404a
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2016 Sean Clemmer
2
+
3
+ Permission to use, copy, modify, and/or distribute this software for any
4
+ purpose with or without fee is hereby granted, provided that the above
5
+ copyright notice and this permission notice appear in all copies.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
8
+ REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
9
+ AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
10
+ INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
11
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
12
+ OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
13
+ PERFORMANCE OF THIS SOFTWARE.
data/Readme.md ADDED
@@ -0,0 +1,48 @@
1
+ # SourMix ![Version](https://img.shields.io/gem/v/sourmix.svg?style=flat-square)
2
+
3
+ Shove [Mixpanel](http://mixpanel.com/) events into [Kafka](http://kafka.apache.org/)
4
+ by way of [Theon](https://github.com/sczizzo/theon).
5
+
6
+
7
+ ## Installation
8
+
9
+ You can build a gem from this repository, install Ubuntu packages from the
10
+ [Releases](https://github.com/sczizzo/sourmix/releases) page, or use RubyGems:
11
+
12
+ $ gem install sourmix
13
+
14
+ SourMix shells out to `curl` and `gzip`, so you'll also need those installed.
15
+
16
+
17
+ ## Usage
18
+
19
+ Just call for help!
20
+
21
+ $ sourmix help
22
+ Commands:
23
+ sourmix art ...
24
+ sourmix go -d, --date=DATE -k, --mixpanel-api-key=MIXPANEL_API_KEY -s, --mixpanel-api-secret=MIXPANEL_API_SECRET ...
25
+ sourmix help [COMMAND] ...
26
+ sourmix version
27
+
28
+ You're probably most interested in the `go` command:
29
+
30
+ Usage:
31
+ sourmix go -d, --date=DATE -k, --mixpanel-api-key=MIXPANEL_API_KEY -s, --mixpanel-api-secret=MIXPANEL_API_SECRET -t, --theon-uri=THEON_URI
32
+
33
+ Options:
34
+ -d, --date=DATE # Date to index (YYYY-mm-dd)
35
+ # Default: 2016-02-08
36
+ -k, --mixpanel-api-key=MIXPANEL_API_KEY # Mixpanel API key
37
+ -s, --mixpanel-api-secret=MIXPANEL_API_SECRET # Mixpanel API secret
38
+ -t, --theon-uri=THEON_URI # Theon URI (including auth)
39
+ -L, [--log=LOG] # Log to file instead of STDERR
40
+ -V, [--debug], [--no-debug] # Enable DEBUG-level logging
41
+
42
+ Do your thing, SourMix
43
+
44
+ ## Changelog
45
+
46
+ #### v1.0.0
47
+
48
+ - Initial implementation
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
data/bin/sourmix ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require 'sourmix'
3
+ SourMix::Main.start ARGV
@@ -0,0 +1,56 @@
1
+ require_relative 'metadata'
2
+ require_relative 'mjolnir'
3
+ require_relative 'perform'
4
+
5
+ Thread.abort_on_exception = true
6
+
7
+
8
+ module SourMix
9
+ class Main < Mjolnir
10
+ include Perform
11
+
12
+
13
+ desc 'version', 'Echo the application version'
14
+ def version
15
+ puts VERSION
16
+ end
17
+
18
+
19
+ desc 'art', 'View the application art'
20
+ def art
21
+ puts "\n%s\n" % ART
22
+ end
23
+
24
+
25
+ desc 'go', 'Do your thing, SourMix'
26
+ option :date, \
27
+ type: :string,
28
+ aliases: %w[ -d ],
29
+ desc: 'Date to index (YYYY-mm-dd)',
30
+ required: true,
31
+ default: Time.now.strftime('%Y-%m-%d')
32
+ option :mixpanel_api_key, \
33
+ type: :string,
34
+ aliases: %w[ -k ],
35
+ desc: 'Mixpanel API key',
36
+ required: true,
37
+ default: ENV['SOURMIX_MIXPANEL_API_KEY']
38
+ option :mixpanel_api_secret, \
39
+ type: :string,
40
+ aliases: %w[ -s ],
41
+ desc: 'Mixpanel API secret',
42
+ required: true,
43
+ default: ENV['SOURMIX_MIXPANEL_API_SECRET']
44
+ option :theon_uri, \
45
+ type: :string,
46
+ aliases: %w[ -t ],
47
+ desc: 'Theon URI (including auth)',
48
+ required: true,
49
+ default: ENV['SOURMIX_THEON_URI']
50
+ include_common_options
51
+ def go
52
+ perform
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,42 @@
1
+ module SourMix
2
+
3
+ # We use a VERSION file to tie into our build pipeline
4
+ VERSION = File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).strip
5
+
6
+ # We don't really do all that much, be humble
7
+ SUMMARY = 'Shove Mixpanel events into Kafka by way of Theon'
8
+
9
+ # Like the MIT license, but even simpler
10
+ LICENSE = 'ISC'
11
+
12
+ # Where you should look first
13
+ HOMEPAGE = 'https://github.com/sczizzo/sourmix'
14
+
15
+ # Your benevolent dictator for life
16
+ AUTHOR = 'Sean Clemmer'
17
+
18
+ # Turn here to strangle your dictator
19
+ EMAIL = 'sczizzo@gmail.com'
20
+
21
+ # Bundled extensions
22
+ TRAVELING_RUBY_BUCKET = 'http://d6r77u77i8pq3.cloudfront.net'
23
+ TRAVELING_RUBY_VERSION = '20150715-2.2.2'
24
+ JSON_VERSION = '1.8.2'
25
+
26
+ # Every project deserves its own ASCII art
27
+ ART = <<-'EOART' % VERSION
28
+
29
+ _ _ _ _ _ _ _ _ _
30
+ / /\ /\ \ /\_\ /\ \ /\_\/\_\ _ /\ \ /_/\ /\ \
31
+ / / \ / \ \ / / / _ / \ \ / / / / //\_\ \ \ \\ \ \ \ \_\
32
+ / / /\ \__ / /\ \ \\ \ \__ /\_\ / /\ \ \ /\ \/ \ \/ / / /\ \_\\ \ \__/ / /
33
+ / / /\ \___\ / / /\ \ \\ \___\ / / // / /\ \_\ / \____\__/ / / /\/_/ \ \__ \/_/
34
+ \ \ \ \/___// / / \ \_\\__ / / / // / /_/ / // /\/________/ / / / \/_/\__/\
35
+ \ \ \ / / / / / // / / / / // / /__\/ // / /\/_// / / / / / _/\/__\ \
36
+ _ \ \ \ / / / / / // / / / / // / /_____// / / / / / / / / / _/_/\ \ \
37
+ /_/\__/ / / / / /___/ / // / /___/ / // / /\ \ \ / / / / / /___/ / /__ / / / \ \ \
38
+ \ \/___/ / / / /____\/ // / /____\/ // / / \ \ \\/_/ / / //\__\/_/___\ / / / /_/ /
39
+ \_____\/ \/_________/ \/_________/ \/_/ \_\/ \/_/ \/_________/ \/_/ \_\/ v%s
40
+
41
+ EOART
42
+ end
@@ -0,0 +1,74 @@
1
+ require 'digest/md5'
2
+ require 'net/http'
3
+ require 'json'
4
+ require 'cgi'
5
+
6
+
7
+ module Mixpanel
8
+ class Client
9
+ attr_reader :api_key, :api_secret, :base_url, :data_url, :expire_at
10
+
11
+ def initialize options
12
+ @api_key = options.fetch :api_key
13
+ @api_secret = options.fetch :api_secret
14
+ @base_url = options.fetch :base_url, 'https://mixpanel.com/api/2.0'
15
+ @data_url = options.fetch :data_url, 'https://data.mixpanel.com/api/2.0'
16
+ @expire_at = options.fetch :expire_at, 90 # seconds
17
+ end
18
+
19
+ def request endpoint, params
20
+ resp = Net::HTTP.get request_uri(endpoint, params)
21
+ resp.lines.map { |l| JSON.parse l }
22
+ end
23
+
24
+ def request_uri endpoint, params
25
+ URI request_url(endpoint, params)
26
+ end
27
+
28
+
29
+ private
30
+
31
+ def request_url endpoint, params
32
+ api = request_api endpoint
33
+ url = File.join api, endpoint.to_s
34
+ sps = sign_params endpoint, params
35
+ qs = encode_params sps
36
+ "#{url}?#{qs}"
37
+ end
38
+
39
+ def request_api endpoint
40
+ return data_url if endpoint == 'export'
41
+ return base_url
42
+ end
43
+
44
+ def request_format endpoint
45
+ return :raw if endpoint == 'export'
46
+ return :json
47
+ end
48
+
49
+ def encode_params params
50
+ params.map { |k,v| "#{k}=#{CGI.escape(v.to_s)}" }.sort.join('&')
51
+ end
52
+
53
+ def sign_params endpoint, params
54
+ nps = normalize_params endpoint, params
55
+ jps = nps.map { |k,v| "#{k}=#{v}" }.sort.join('')
56
+ req = jps + api_secret
57
+ nps[:sig] = Digest::MD5.hexdigest req
58
+ nps
59
+ end
60
+
61
+ def normalize_params endpoint, params
62
+ params.merge \
63
+ api_key: api_key,
64
+ expire: request_expire(params),
65
+ format: request_format(endpoint)
66
+ end
67
+
68
+ def request_expire params
69
+ return params[:expire] if params[:expire]
70
+ Time.now.to_i + expire_at
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,49 @@
1
+ require 'slog'
2
+ require 'thor'
3
+
4
+
5
+ # Thor's hammer! Like Thor with better logging
6
+ class Mjolnir < Thor
7
+
8
+ # Common options for Thor commands
9
+ COMMON_OPTIONS = {
10
+ log: {
11
+ type: :string,
12
+ aliases: %w[ -L ],
13
+ desc: 'Log to file instead of STDERR',
14
+ default: ENV['SOURMIX_LOG'] || nil
15
+ },
16
+ debug: {
17
+ type: :boolean,
18
+ aliases: %w[ -V ],
19
+ desc: 'Enable DEBUG-level logging',
20
+ default: ENV['SOURMIX_DEBUG'] || false
21
+ }
22
+ }
23
+
24
+ # Decorate Thor commands with the options above
25
+ def self.include_common_options
26
+ COMMON_OPTIONS.each do |name, spec|
27
+ option name, spec
28
+ end
29
+ end
30
+
31
+
32
+ no_commands do
33
+
34
+ # Construct a Logger given the command-line options
35
+ def log
36
+ return @logger if defined? @logger
37
+ level = :info
38
+ level = :debug if options.debug?
39
+ device = options.log || $stderr
40
+ pretty = device.tty? rescue false
41
+ @logger = Slog.new \
42
+ out: device,
43
+ level: level,
44
+ colorize: pretty,
45
+ prettify: pretty
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,181 @@
1
+ require 'shellwords'
2
+ require 'digest/md5'
3
+ require 'tempfile'
4
+ require 'thread'
5
+ require 'logger'
6
+ require 'json'
7
+ require 'date'
8
+ require 'time'
9
+
10
+ require_relative 'mixpanel_client'
11
+
12
+
13
+
14
+ module SourMix
15
+ module Perform
16
+ PAGE_SIZE = 50_000
17
+
18
+ def perform
19
+ mixpanel = Mixpanel::Client.new \
20
+ api_key: options.mixpanel_api_key,
21
+ api_secret: options.mixpanel_api_secret
22
+ dataset = download_dataset mixpanel
23
+ results = process_dataset dataset
24
+ gzip_results results
25
+ report_results results
26
+ end
27
+
28
+
29
+
30
+ private
31
+
32
+ def download_dataset mixpanel
33
+ log.debug event: :download_dataset
34
+
35
+ dataset = Tempfile.new 'dataset'
36
+
37
+ request_uri = mixpanel.request_uri 'export', \
38
+ from_date: options.date, to_date: options.date
39
+
40
+ download_dataset = 'curl -Lf -o %{out} %{url}' % {
41
+ url: Shellwords.escape(request_uri),
42
+ out: Shellwords.escape(dataset.path)
43
+ }
44
+
45
+ _, elapsed_s = timed { sh download_dataset }
46
+
47
+ log.info event: :download_dataset, exited: $?.exitstatus, elapsed_s: elapsed_s
48
+
49
+ dataset
50
+ end
51
+
52
+
53
+ def process_dataset dataset
54
+ log.debug event: :process_dataset
55
+
56
+ results = []
57
+ nok, nerr = 0, 0
58
+
59
+ _, elapsed_s = timed do
60
+ dataset.each_line do |e|
61
+ if nok % PAGE_SIZE == 0
62
+ log.debug event: :process_dataset_page, okay: nok
63
+ results << Tempfile.new('results')
64
+ end
65
+
66
+ begin
67
+ event = JSON.generate(normalize_event(JSON.parse(e)))
68
+ if v2_api?
69
+ results.last.puts ':%s' % event
70
+ else
71
+ results.last.puts event
72
+ end
73
+ nok += 1
74
+ rescue => e
75
+ log.error event: :process_dataset_error, error: e.inspect, line: e
76
+ nerr += 1
77
+ end
78
+ end
79
+ end
80
+
81
+ dataset.close
82
+ dataset.unlink
83
+ results.map(&:close)
84
+
85
+ ops = 1.0 * (nok + nerr) / elapsed_s
86
+
87
+ log.info event: :process_dataset, okay: nok, errors: nerr, elapsed_s: elapsed_s, ops: ops, pages: results.size
88
+
89
+ results
90
+ end
91
+
92
+
93
+ def gzip_results results
94
+ log.debug event: :gzip_results
95
+ started, total_size = Time.now.to_f, 0
96
+
97
+ results.map do |result|
98
+ Thread.new do
99
+ gzip_result = 'gzip -k %s' % Shellwords.escape(result.path)
100
+ _, elapsed_s = timed { sh gzip_result }
101
+
102
+ size = File::Stat.new(result.path + '.gz').size
103
+ total_size += size
104
+
105
+ log.debug event: :gzip_result, exited: $?.exitstatus, elapsed_s: elapsed_s, size: size
106
+ end
107
+ end.map(&:join)
108
+
109
+ log.info event: :gzip_result, total_elapsed_s: (Time.now.to_f - started), total_size: total_size
110
+ end
111
+
112
+
113
+ def report_results results
114
+ log.debug event: :report_results
115
+
116
+ theon_uri = URI options.theon_uri
117
+ theon_auth = [ theon_uri.user, theon_uri.password ].join(':')
118
+ theon_url = '%s://%s:%s%s' % [
119
+ theon_uri.scheme, theon_uri.host, theon_uri.port, theon_uri.path
120
+ ]
121
+
122
+ auth_opt = theon_auth.nil? ? nil : '-u %s' % Shellwords.escape(theon_auth)
123
+ started = Time.now.to_f
124
+
125
+ results.map do |result|
126
+ Thread.new do
127
+ report_result = \
128
+ 'curl %{auth} -Lf %{url} -XPOST %{headers} --data-binary @%{out}.gz' % {
129
+ auth: auth_opt,
130
+ url: Shellwords.escape(theon_url),
131
+ out: Shellwords.escape(result.path),
132
+ headers: "-H 'Content-Encoding: gzip'"
133
+ }
134
+
135
+ _, elapsed_s = timed { sh report_result }
136
+ result.unlink
137
+
138
+ log.debug event: :report_result, exited: $?.exitstatus, elapsed_s: elapsed_s
139
+ end
140
+ end.map(&:join)
141
+
142
+ log.info event: :report_results, total_elapsed_s: (Time.now.to_f - started)
143
+ end
144
+
145
+
146
+ def v2_api? ; options.theon_uri.include? '/v2/' end
147
+
148
+
149
+ def normalize_event e
150
+ digest = Digest::MD5.hexdigest e.to_s
151
+ e.merge! e.delete('properties')
152
+ e = e.inject({}) do |h,(k,v)|
153
+ k = k.gsub('.','_').gsub(/^\W+/,'')
154
+ h[k] = v unless v.nil? || v == '' || v == [] || v == {}
155
+ h
156
+ end
157
+ e['@timestamp'] = Time.at(e['time']).utc.iso8601(3)
158
+ e['@id'] = digest
159
+ e
160
+ end
161
+
162
+
163
+ def sh command
164
+ if options.debug
165
+ puts command
166
+ system command
167
+ else
168
+ `#{command} 2>&1`
169
+ end
170
+ end
171
+
172
+
173
+ def timed &block
174
+ started = Time.now.to_f
175
+ rvalue = yield
176
+ elapsed = Time.now.to_f - started
177
+ return rvalue, elapsed
178
+ end
179
+
180
+ end
181
+ end
data/lib/sourmix.rb ADDED
@@ -0,0 +1 @@
1
+ require_relative 'sourmix/main'
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sourmix
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Sean Clemmer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: slog
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: thor
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: json
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.8.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.8.2
55
+ description: Shove Mixpanel events into Kafka by way of Theon.
56
+ email: sczizzo@gmail.com
57
+ executables:
58
+ - sourmix
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - LICENSE
63
+ - Readme.md
64
+ - VERSION
65
+ - bin/sourmix
66
+ - lib/sourmix.rb
67
+ - lib/sourmix/main.rb
68
+ - lib/sourmix/metadata.rb
69
+ - lib/sourmix/mixpanel_client.rb
70
+ - lib/sourmix/mjolnir.rb
71
+ - lib/sourmix/perform.rb
72
+ homepage: https://github.com/sczizzo/sourmix
73
+ licenses:
74
+ - ISC
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.4.5.1
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: Shove Mixpanel events into Kafka by way of Theon
96
+ test_files: []