sneaql-standard 0.0.1-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 93a14cbef7a60a4a02e32472240d9039a8299ec5
4
+ data.tar.gz: 025aa637977c8ec97d79283cd1e3a1366705265f
5
+ SHA512:
6
+ metadata.gz: c770787a6e7188206f0702f8c2934616a24949ef456338d38e813f38a7c7d73fc6c19273977c51bff0cf263587401b0172334eec6cd407f874c58e7b8178d6b5
7
+ data.tar.gz: 3c42b563381fcd46d67c97bd91fbe2a65f97e26b48944947a74c439dc9ee9fb05751968f55be600bbe067beaf38fb673d5ff3208bd646475fec28ea8028d798d
data/bin/sneaql ADDED
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'sneaql_standard'
4
+ require 'thor'
5
+
6
+ module Sneaql
7
+ module Standard
8
+ class CLI < Thor
9
+
10
+ desc "run_transforms", "run sneaql transforms"
11
+ long_desc <<-LONGDESC
12
+ runs transforms based upon current ENV vars
13
+
14
+ LONGDESC
15
+ def run_transforms()
16
+ Sneaql::SneaqlStandard.new().run()
17
+ end
18
+
19
+ desc "configure_db", "configure database"
20
+ long_desc <<-LONGDESC
21
+ creates the sneaql.transforms table
22
+
23
+ LONGDESC
24
+ def configure_db()
25
+ Sneaql::SneaqlStandard.new().create_db_objects()
26
+ end
27
+
28
+ end
29
+ end
30
+ end
31
+
32
+ Sneaql::Standard::CLI.start(ARGV)
@@ -0,0 +1,252 @@
1
+ require 'sneaql'
2
+ require 'jdbc_helpers'
3
+ require 'dotenv'
4
+ require 'thread'
5
+
6
+ require_relative 'sneaql_standard_lib/logging.rb'
7
+ require_relative 'sneaql_standard_lib/parallelize.rb'
8
+ require_relative 'sneaql_standard_lib/jdbc_drivers.rb'
9
+
10
+ Dotenv.load
11
+
12
+ module Sneaql
13
+ # top level class for interacting with sneaql standard
14
+ class SneaqlStandard
15
+ # exposed for unit testing
16
+ attr_reader :params
17
+ attr_accessor :q
18
+
19
+ # runs all transforms
20
+ def run
21
+ set_params
22
+ configure_jdbc_driver
23
+ build_transform_queue
24
+ run_transforms
25
+ end
26
+
27
+ # creates database objects
28
+ # @param [String] transform_table_name if provided will override sneaql.transforms
29
+ def create_db_objects(transform_table_name = nil)
30
+ set_params
31
+ configure_jdbc_driver
32
+ create_transforms_table(transform_table_name)
33
+ end
34
+
35
+ # creates transform_table
36
+ # @param [String] transform_table_name if provided will override sneaql.transforms
37
+ def create_transforms_table(transform_table_name = nil)
38
+ transform_table_name = 'sneaql.transforms' unless transform_table_name
39
+
40
+ connection = create_connection
41
+
42
+ db_manager = Sneaql::Core.find_class(
43
+ :database,
44
+ @params[:database]
45
+ ).new
46
+
47
+ if transform_table_name =~ /\w+\.\w+/
48
+ # indicates schema qualfied object
49
+ # make sure db supports schemas
50
+ unless ['sqlite'].include?(@params[:database])
51
+ # create schema if needed
52
+ JDBCHelpers::Execute.new(
53
+ connection,
54
+ "create schema if not exists #{transform_table_name.match(/^\w+/)};"
55
+ )
56
+ end
57
+ end
58
+
59
+ creator = Sneaql::Standard::DBObjectCreator.new(
60
+ connection,
61
+ db_manager,
62
+ logger
63
+ )
64
+ creator.create_transforms_table(transform_table_name)
65
+
66
+ ensure
67
+ connection.close
68
+ end
69
+
70
+ # processes environment variables
71
+ def set_params
72
+ @params = {}
73
+
74
+ # each of these lil' hashes represents an env_var
75
+ # that is required, as well as an optional
76
+ # regex validation
77
+ [
78
+ { var: 'SNEAQL_JDBC_URL', sym: :jdbc_url, validation: /^jdbc\:.+/i },
79
+ { var: 'SNEAQL_DB_USER', sym: :db_user },
80
+ { var: 'SNEAQL_DB_PASS', sym: :db_pass },
81
+ { var: 'SNEAQL_JDBC_DRIVER_JAR', sym: :jdbc_driver_jar, validation: /^(http\:\/\/.+|file\:\/\/.+|s3\:\/\/.+)/i },
82
+ { var: 'SNEAQL_JDBC_DRIVER_CLASS', sym: :jdbc_driver_class }
83
+ ].each do |env_var|
84
+ raise "required environment variable #{env_var[:var]} not provided" unless ENV[env_var[:var]]
85
+ # assign the value of the env_var to the symbol key of @params
86
+ @params[env_var[:sym]] = ENV[env_var[:var]]
87
+
88
+ # validate if a validation is provided
89
+ if env_var[:validation]
90
+ unless @params[env_var[:sym]] =~ (env_var[:validation])
91
+ raise "required environment variable #{env_var[:var]} looks invalid"
92
+ end
93
+ end
94
+ end
95
+
96
+ # optional env vars are iterated in a similar manner
97
+ # but instead of validation they have a default
98
+ [
99
+ {
100
+ var: 'SNEAQL_JDBC_DRIVER_JAR_MD5',
101
+ sym: :jdbc_driver_jar_md5,
102
+ default: nil
103
+ },
104
+ {
105
+ var: 'SNEAQL_METADATA_MANAGER_TYPE',
106
+ sym: :step_metadata_manager_type,
107
+ default: 'transform_steps_table'
108
+ },
109
+ {
110
+ var: 'SNEAQL_REPO_BASE_DIR',
111
+ sym: :repo_base_dir,
112
+ default: '/tmp/sneaql/repos'
113
+ },
114
+ {
115
+ var: 'SNEAQL_TRANSFORM_CONCURRENCY',
116
+ sym: :concurrency,
117
+ default: 1
118
+ },
119
+ {
120
+ var: 'SNEAQL_TRANSFORM_TABLE_NAME',
121
+ sym: :transform_table_name,
122
+ default: 'sneaql.transforms'
123
+ }
124
+ ].each do |env_var|
125
+ @params[env_var[:sym]] = ENV[env_var[:var]] ? ENV[env_var[:var]] : env_var[:default]
126
+ end
127
+
128
+ # numeric parameter provided by env var should be casted
129
+ @params[:concurrency] = @params[:concurrency].to_i
130
+
131
+ # determine database type based jdbc url
132
+ # while technically any jdbc driver should work
133
+ # with sneaql, the database type allows for better
134
+ # handling of transactions, boolean, etc.
135
+ @params[:database] = Sneaql::Core.database_type(@params[:jdbc_url])
136
+ rescue => e
137
+ logger.error(e.message)
138
+ raise e
139
+ end
140
+
141
+ # creates a threadsafe queue with all active transforms
142
+ def build_transform_queue
143
+ # creates a queue to hold all the transform parameter hashes
144
+ @q = Queue.new
145
+
146
+ transforms = get_transforms
147
+ logger.info("#{transforms.length} transforms found in database...")
148
+
149
+ # push transforms on to queue
150
+ transforms.each do |t|
151
+ tmp = {}.merge(@params)
152
+ tmp[:transform_name] = t['transform_name']
153
+
154
+ # repo must be http or git https
155
+ raise 'malformed transform definition' unless t['sql_repository'] =~ /^http.*/i
156
+
157
+ tmp[:repo_url] = t['sql_repository']
158
+
159
+ # determine repo type based upon the the presence or absence of branch
160
+ # this comes from sql which is why the casting and strip
161
+ if t['sql_repository_branch'].to_s.strip == ''
162
+ tmp[:repo_type] = 'http'
163
+ else
164
+ tmp[:repo_type] = 'git'
165
+ tmp[:sql_repository_branch] = t['sql_repository_branch']
166
+ end
167
+
168
+ tmp[:compression] = 'zip' if tmp[:repo_url] =~ /.*\.zip$/
169
+
170
+ # only step manager option
171
+ tmp[:step_metadata_manager_type] = 'local_file'
172
+
173
+ # must be sneaql.json in the base of the sneaql repo
174
+ tmp[:step_metadata_file_path] = "#{@params[:repo_base_dir]}/#{tmp[:transform_name]}/sneaql.json"
175
+
176
+ @q.push tmp
177
+ end
178
+ rescue => e
179
+ logger.error(e.message)
180
+ e.backtrace.each { |b| logger.error(b) }
181
+ end
182
+
183
+ def get_transforms
184
+ # configure driver and db manager
185
+ configure_jdbc_driver
186
+ db_manager = Sneaql::Core.find_class(
187
+ :database,
188
+ @params[:database]
189
+ ).new
190
+
191
+ # connect and retrieve transform list
192
+ connection = create_connection
193
+
194
+ # fetch an array of active transforms
195
+ transforms = JDBCHelpers::QueryResultsToArray.new(
196
+ connection,
197
+ %(select
198
+ transform_name
199
+ ,sql_repository
200
+ ,sql_repository_branch
201
+ from
202
+ #{@params[:transform_table_name]}
203
+ where
204
+ is_active = #{db_manager.has_boolean ? 'true' : 1}
205
+ order by
206
+ transform_name;),
207
+ logger
208
+ ).results
209
+ ensure
210
+ connection.close
211
+ return transforms
212
+ end
213
+
214
+ # perform concurrent transform run
215
+ def run_transforms
216
+ # instantiate parallelize
217
+ ParallelizeSneaqlTransforms.new(
218
+ @q,
219
+ @params[:concurrency],
220
+ logger
221
+ )
222
+ end
223
+
224
+ # creates a jdbc connection based upon
225
+ # current driver context
226
+ # @return [JDBCHelpers::ConnectionFactory.connection]
227
+ def create_connection
228
+ JDBCHelpers::ConnectionFactory.new(
229
+ @params[:jdbc_url],
230
+ @params[:db_user],
231
+ @params[:db_pass],
232
+ logger
233
+ ).connection
234
+ end
235
+
236
+ # creates a database manager
237
+ # @return [Class]
238
+ def create_db_manager
239
+ Sneaql::Core.find_class(
240
+ :database,
241
+ @params[:database]
242
+ ).new
243
+ end
244
+
245
+ # configures the jdbc driver into the current context
246
+ def configure_jdbc_driver
247
+ j = Sneaql::JDBCDriverHandler.new(@params)
248
+ j.confirm_jdbc_driver
249
+ j.require_jdbc_driver
250
+ end
251
+ end
252
+ end
@@ -0,0 +1,112 @@
1
+ require 'digest'
2
+ require 'open-uri'
3
+ require 'aws-sdk'
4
+
5
+ module Sneaql
6
+ # idempotent handling for jdbc driver
7
+ class JDBCDriverHandler
8
+ # exposed for unit tests
9
+ attr_accessor :confirmed_path
10
+ attr_accessor :target_path
11
+
12
+ # pulls down the jdbc driver and loads it
13
+ # param [Hash] params parameter hash
14
+ def initialize(params)
15
+ @params = params
16
+ end
17
+
18
+ # driver info must be provided
19
+ # jar file should be one of the following:
20
+ # http store http://path/to/jarfile.jar
21
+ # inside container file://path/to/jarfile.jar
22
+ # s3 bucket s3://path/to/jarfile.jar requires aws credentials to be provided
23
+ # this method confirms the existence of the jdbc driver jar file
24
+ # if the file exists, no action is taken. if file does not exist
25
+ # it is downloaded from the source location, either http or s3.
26
+ def confirm_jdbc_driver
27
+ @confirmed_path = nil
28
+ if @params[:jdbc_driver_jar] =~ /^http.*/i
29
+ @target_path = '/tmp/jdbc.jar'
30
+ @confirmed_path = File.exist?(@target_path) ? @target_path : download_driver_http
31
+ elsif @params[:jdbc_driver_jar] =~ /^file.*/i
32
+ @target_path = @params[:jdbc_driver_jar].gsub(/^file\:\/\//i, '')
33
+ @confirmed_path = @target_path if File.exist?(@target_path)
34
+ elsif @params[:jdbc_driver_jar] =~ /^s3.*/i
35
+ @target_path = '/tmp/jdbc.jar'
36
+ @confirmed_path = File.exist?(@target_path) ? @target_path : download_driver_s3
37
+ else raise 'no suitable driver provided'
38
+ end
39
+
40
+ # rubocop says to turn this into a guard statement
41
+ # but this needs the driver to be present before running
42
+ if @params[:jdbc_driver_jar_md5]
43
+ raise 'driver jar md5 mismatch' unless md5_check(
44
+ @confirmed_path,
45
+ @params[:jdbc_driver_jar_md5]
46
+ )
47
+ end
48
+ end
49
+
50
+ # downloads driver from an http source assuming no credentials
51
+ # need to be provided
52
+ def download_driver_http
53
+ File.write(
54
+ @target_path,
55
+ open(@params[:jdbc_driver_jar]).read
56
+ )
57
+ end
58
+
59
+ # downloads jar file from s3 source
60
+ # uses standard AWS environment variables
61
+ # or instance profile for credentials
62
+ def download_driver_s3
63
+ bucket_name = @params[:jdbc_driver_jar].match(
64
+ /^s3\:\/\/([a-zA-Z0-9]|\.|\-)+/i
65
+ )[0].gsub(/s3\:\/\//i, '')
66
+
67
+ object_key = @params[:jdbc_driver_jar].gsub(
68
+ /^s3\:\/\/([a-zA-Z0-9]|\.|\-)+\//i,
69
+ ''
70
+ )
71
+
72
+ aws_creds =
73
+ if ENV['AWS_ACCESS_KEY_ID']
74
+ Aws::Credentials.new(
75
+ ENV['AWS_ACCESS_KEY_ID'],
76
+ ENV['AWS_SECRET_ACCESS_KEY']
77
+ )
78
+ else
79
+ Aws::InstanceProfileCredentials.new
80
+ end
81
+
82
+ s3 = Aws::S3.new(
83
+ region: ENV['AWS_REGION'],
84
+ credentials: aws_creds
85
+ )
86
+
87
+ s3.get_object(
88
+ response_target: @target_path,
89
+ bucket: bucket_name,
90
+ key: object_key
91
+ )
92
+ end
93
+
94
+ # confirms that file md5 matches value provided
95
+ # @param [String] file_path path to file
96
+ # @param [String] file_md5 known md5 of file
97
+ # @return [Boolean]
98
+ def md5_check(file_path, file_md5)
99
+ m = Digest::MD5.file(file_path)
100
+ return true if m.hexdigest == file_md5
101
+ false
102
+ end
103
+
104
+ # requires the jar file and jdbc driver class
105
+ # into the current jruby context. after this
106
+ # runs all jdbc connections will use this driver class.
107
+ def require_jdbc_driver
108
+ require @confirmed_path
109
+ java_import @params[:jdbc_driver_class]
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,19 @@
1
+ require 'logger'
2
+
3
+ module SneaqlStandard
4
+ Log = Logger.new(STDOUT)
5
+ end
6
+
7
+ def logger
8
+ SneaqlStandard::Log
9
+ end
10
+
11
+ # custom formatter provides logging with thread id and multi-line
12
+ # entries each receiving their own log prefix
13
+ logger.formatter = proc do |severity, datetime, _progname, msg|
14
+ t = ''
15
+ msg.to_s.split(/\n+/).each do |line|
16
+ t += "[#{severity}] #{datetime} tid#{Thread.current.object_id}: #{line}\n"
17
+ end
18
+ t
19
+ end
@@ -0,0 +1,48 @@
1
+ require 'thread'
2
+
3
+ # used to run concurrent sneaql transforms
4
+ # from a threadsafe queue.
5
+ class ParallelizeSneaqlTransforms
6
+ # initialize object and run concurrent transforms.
7
+ # @param [Queue] queue_to_process queue of hashes with all params needed for transform
8
+ # @param [Fixnum] concurrency number of threads
9
+ # @param [Logger] logger optional logger object
10
+ def initialize(queue_to_process, concurrency, logger = nil)
11
+ @logger = logger ? logger : Logger.new(STDOUT)
12
+ @queue_to_process = queue_to_process
13
+ @concurrency = concurrency
14
+ parallelize
15
+ end
16
+
17
+ # performs the actual parallel execution
18
+ def parallelize
19
+ @logger.info(
20
+ "processing #{@queue_to_process} with a concurrency of #{@concurrency}..."
21
+ )
22
+
23
+ threads = []
24
+ @concurrency.times do
25
+ threads << Thread.new do
26
+ # loop until there are no more things to do
27
+ until @queue_to_process.empty?
28
+ begin
29
+ object_to_process = @queue_to_process.pop(true) rescue nil
30
+ # logger.debug(object_to_process)
31
+ t = Sneaql::Transform.new(
32
+ object_to_process,
33
+ @logger
34
+ )
35
+ t.run
36
+ rescue => e
37
+ @logger.error(e.message)
38
+ e.backtrace.each { |b| @logger.error(b) }
39
+ ensure
40
+ @logger.info("finished processing #{object_to_process['transform_name']}")
41
+ end
42
+ end
43
+ end
44
+ end
45
+ threads.each { |t| t.join }
46
+ threads = nil
47
+ end
48
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sneaql-standard
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: java
6
+ authors:
7
+ - jeremy winters
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-01-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 0.0.4
19
+ name: sneaql
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.4
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '5.9'
33
+ name: minitest
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.9'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '2.6'
47
+ name: aws-sdk
48
+ prerelease: false
49
+ type: :runtime
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.6'
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '2.1'
61
+ name: dotenv
62
+ prerelease: false
63
+ type: :runtime
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.1'
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '0.19'
75
+ name: thor
76
+ prerelease: false
77
+ type: :runtime
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.19'
83
+ description: provides a cli and runtime environment for sneaql
84
+ email: jeremy.winters@full360.com
85
+ executables:
86
+ - sneaql
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - bin/sneaql
91
+ - lib/sneaql_standard.rb
92
+ - lib/sneaql_standard_lib/jdbc_drivers.rb
93
+ - lib/sneaql_standard_lib/logging.rb
94
+ - lib/sneaql_standard_lib/parallelize.rb
95
+ homepage: https://www.full360.com
96
+ licenses:
97
+ - MIT
98
+ metadata: {}
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '2.0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubyforge_project:
115
+ rubygems_version: 2.6.6
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: standard sneaql deployment
119
+ test_files: []