sneaql-standard 0.0.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 93a14cbef7a60a4a02e32472240d9039a8299ec5
4
+ data.tar.gz: 025aa637977c8ec97d79283cd1e3a1366705265f
5
+ SHA512:
6
+ metadata.gz: c770787a6e7188206f0702f8c2934616a24949ef456338d38e813f38a7c7d73fc6c19273977c51bff0cf263587401b0172334eec6cd407f874c58e7b8178d6b5
7
+ data.tar.gz: 3c42b563381fcd46d67c97bd91fbe2a65f97e26b48944947a74c439dc9ee9fb05751968f55be600bbe067beaf38fb673d5ff3208bd646475fec28ea8028d798d
data/bin/sneaql ADDED
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'sneaql_standard'
4
+ require 'thor'
5
+
6
+ module Sneaql
7
+ module Standard
8
+ class CLI < Thor
9
+
10
+ desc "run_transforms", "run sneaql transforms"
11
+ long_desc <<-LONGDESC
12
+ runs transforms based upon current ENV vars
13
+
14
+ LONGDESC
15
+ def run_transforms()
16
+ Sneaql::SneaqlStandard.new().run()
17
+ end
18
+
19
+ desc "configure_db", "configure database"
20
+ long_desc <<-LONGDESC
21
+ creates the sneaql.transforms table
22
+
23
+ LONGDESC
24
+ def configure_db()
25
+ Sneaql::SneaqlStandard.new().create_db_objects()
26
+ end
27
+
28
+ end
29
+ end
30
+ end
31
+
32
+ Sneaql::Standard::CLI.start(ARGV)
@@ -0,0 +1,252 @@
1
+ require 'sneaql'
2
+ require 'jdbc_helpers'
3
+ require 'dotenv'
4
+ require 'thread'
5
+
6
+ require_relative 'sneaql_standard_lib/logging.rb'
7
+ require_relative 'sneaql_standard_lib/parallelize.rb'
8
+ require_relative 'sneaql_standard_lib/jdbc_drivers.rb'
9
+
10
+ Dotenv.load
11
+
12
+ module Sneaql
13
+ # top level class for interacting with sneaql standard
14
+ class SneaqlStandard
15
+ # exposed for unit testing
16
+ attr_reader :params
17
+ attr_accessor :q
18
+
19
+ # runs all transforms
20
+ def run
21
+ set_params
22
+ configure_jdbc_driver
23
+ build_transform_queue
24
+ run_transforms
25
+ end
26
+
27
+ # creates database objects
28
+ # @param [String] transform_table_name if provided will override sneaql.transforms
29
+ def create_db_objects(transform_table_name = nil)
30
+ set_params
31
+ configure_jdbc_driver
32
+ create_transforms_table(transform_table_name)
33
+ end
34
+
35
+ # creates transform_table
36
+ # @param [String] transform_table_name if provided will override sneaql.transforms
37
+ def create_transforms_table(transform_table_name = nil)
38
+ transform_table_name = 'sneaql.transforms' unless transform_table_name
39
+
40
+ connection = create_connection
41
+
42
+ db_manager = Sneaql::Core.find_class(
43
+ :database,
44
+ @params[:database]
45
+ ).new
46
+
47
+ if transform_table_name =~ /\w+\.\w+/
48
+ # indicates schema qualfied object
49
+ # make sure db supports schemas
50
+ unless ['sqlite'].include?(@params[:database])
51
+ # create schema if needed
52
+ JDBCHelpers::Execute.new(
53
+ connection,
54
+ "create schema if not exists #{transform_table_name.match(/^\w+/)};"
55
+ )
56
+ end
57
+ end
58
+
59
+ creator = Sneaql::Standard::DBObjectCreator.new(
60
+ connection,
61
+ db_manager,
62
+ logger
63
+ )
64
+ creator.create_transforms_table(transform_table_name)
65
+
66
+ ensure
67
+ connection.close
68
+ end
69
+
70
+ # processes environment variables
71
+ def set_params
72
+ @params = {}
73
+
74
+ # each of these lil' hashes represents an env_var
75
+ # that is required, as well as an optional
76
+ # regex validation
77
+ [
78
+ { var: 'SNEAQL_JDBC_URL', sym: :jdbc_url, validation: /^jdbc\:.+/i },
79
+ { var: 'SNEAQL_DB_USER', sym: :db_user },
80
+ { var: 'SNEAQL_DB_PASS', sym: :db_pass },
81
+ { var: 'SNEAQL_JDBC_DRIVER_JAR', sym: :jdbc_driver_jar, validation: /^(http\:\/\/.+|file\:\/\/.+|s3\:\/\/.+)/i },
82
+ { var: 'SNEAQL_JDBC_DRIVER_CLASS', sym: :jdbc_driver_class }
83
+ ].each do |env_var|
84
+ raise "required environment variable #{env_var[:var]} not provided" unless ENV[env_var[:var]]
85
+ # assign the value of the env_var to the symbol key of @params
86
+ @params[env_var[:sym]] = ENV[env_var[:var]]
87
+
88
+ # validate if a validation is provided
89
+ if env_var[:validation]
90
+ unless @params[env_var[:sym]] =~ (env_var[:validation])
91
+ raise "required environment variable #{env_var[:var]} looks invalid"
92
+ end
93
+ end
94
+ end
95
+
96
+ # optional env vars are iterated in a similar manner
97
+ # but instead of validation they have a default
98
+ [
99
+ {
100
+ var: 'SNEAQL_JDBC_DRIVER_JAR_MD5',
101
+ sym: :jdbc_driver_jar_md5,
102
+ default: nil
103
+ },
104
+ {
105
+ var: 'SNEAQL_METADATA_MANAGER_TYPE',
106
+ sym: :step_metadata_manager_type,
107
+ default: 'transform_steps_table'
108
+ },
109
+ {
110
+ var: 'SNEAQL_REPO_BASE_DIR',
111
+ sym: :repo_base_dir,
112
+ default: '/tmp/sneaql/repos'
113
+ },
114
+ {
115
+ var: 'SNEAQL_TRANSFORM_CONCURRENCY',
116
+ sym: :concurrency,
117
+ default: 1
118
+ },
119
+ {
120
+ var: 'SNEAQL_TRANSFORM_TABLE_NAME',
121
+ sym: :transform_table_name,
122
+ default: 'sneaql.transforms'
123
+ }
124
+ ].each do |env_var|
125
+ @params[env_var[:sym]] = ENV[env_var[:var]] ? ENV[env_var[:var]] : env_var[:default]
126
+ end
127
+
128
+ # numeric parameter provided by env var should be casted
129
+ @params[:concurrency] = @params[:concurrency].to_i
130
+
131
+ # determine database type based jdbc url
132
+ # while technically any jdbc driver should work
133
+ # with sneaql, the database type allows for better
134
+ # handling of transactions, boolean, etc.
135
+ @params[:database] = Sneaql::Core.database_type(@params[:jdbc_url])
136
+ rescue => e
137
+ logger.error(e.message)
138
+ raise e
139
+ end
140
+
141
+ # creates a threadsafe queue with all active transforms
142
+ def build_transform_queue
143
+ # creates a queue to hold all the transform parameter hashes
144
+ @q = Queue.new
145
+
146
+ transforms = get_transforms
147
+ logger.info("#{transforms.length} transforms found in database...")
148
+
149
+ # push transforms on to queue
150
+ transforms.each do |t|
151
+ tmp = {}.merge(@params)
152
+ tmp[:transform_name] = t['transform_name']
153
+
154
+ # repo must be http or git https
155
+ raise 'malformed transform definition' unless t['sql_repository'] =~ /^http.*/i
156
+
157
+ tmp[:repo_url] = t['sql_repository']
158
+
159
+ # determine repo type based upon the the presence or absence of branch
160
+ # this comes from sql which is why the casting and strip
161
+ if t['sql_repository_branch'].to_s.strip == ''
162
+ tmp[:repo_type] = 'http'
163
+ else
164
+ tmp[:repo_type] = 'git'
165
+ tmp[:sql_repository_branch] = t['sql_repository_branch']
166
+ end
167
+
168
+ tmp[:compression] = 'zip' if tmp[:repo_url] =~ /.*\.zip$/
169
+
170
+ # only step manager option
171
+ tmp[:step_metadata_manager_type] = 'local_file'
172
+
173
+ # must be sneaql.json in the base of the sneaql repo
174
+ tmp[:step_metadata_file_path] = "#{@params[:repo_base_dir]}/#{tmp[:transform_name]}/sneaql.json"
175
+
176
+ @q.push tmp
177
+ end
178
+ rescue => e
179
+ logger.error(e.message)
180
+ e.backtrace.each { |b| logger.error(b) }
181
+ end
182
+
183
+ def get_transforms
184
+ # configure driver and db manager
185
+ configure_jdbc_driver
186
+ db_manager = Sneaql::Core.find_class(
187
+ :database,
188
+ @params[:database]
189
+ ).new
190
+
191
+ # connect and retrieve transform list
192
+ connection = create_connection
193
+
194
+ # fetch an array of active transforms
195
+ transforms = JDBCHelpers::QueryResultsToArray.new(
196
+ connection,
197
+ %(select
198
+ transform_name
199
+ ,sql_repository
200
+ ,sql_repository_branch
201
+ from
202
+ #{@params[:transform_table_name]}
203
+ where
204
+ is_active = #{db_manager.has_boolean ? 'true' : 1}
205
+ order by
206
+ transform_name;),
207
+ logger
208
+ ).results
209
+ ensure
210
+ connection.close
211
+ return transforms
212
+ end
213
+
214
+ # perform concurrent transform run
215
+ def run_transforms
216
+ # instantiate parallelize
217
+ ParallelizeSneaqlTransforms.new(
218
+ @q,
219
+ @params[:concurrency],
220
+ logger
221
+ )
222
+ end
223
+
224
+ # creates a jdbc connection based upon
225
+ # current driver context
226
+ # @return [JDBCHelpers::ConnectionFactory.connection]
227
+ def create_connection
228
+ JDBCHelpers::ConnectionFactory.new(
229
+ @params[:jdbc_url],
230
+ @params[:db_user],
231
+ @params[:db_pass],
232
+ logger
233
+ ).connection
234
+ end
235
+
236
+ # creates a database manager
237
+ # @return [Class]
238
+ def create_db_manager
239
+ Sneaql::Core.find_class(
240
+ :database,
241
+ @params[:database]
242
+ ).new
243
+ end
244
+
245
+ # configures the jdbc driver into the current context
246
+ def configure_jdbc_driver
247
+ j = Sneaql::JDBCDriverHandler.new(@params)
248
+ j.confirm_jdbc_driver
249
+ j.require_jdbc_driver
250
+ end
251
+ end
252
+ end
@@ -0,0 +1,112 @@
1
+ require 'digest'
2
+ require 'open-uri'
3
+ require 'aws-sdk'
4
+
5
+ module Sneaql
6
+ # idempotent handling for jdbc driver
7
+ class JDBCDriverHandler
8
+ # exposed for unit tests
9
+ attr_accessor :confirmed_path
10
+ attr_accessor :target_path
11
+
12
+ # pulls down the jdbc driver and loads it
13
+ # param [Hash] params parameter hash
14
+ def initialize(params)
15
+ @params = params
16
+ end
17
+
18
+ # driver info must be provided
19
+ # jar file should be one of the following:
20
+ # http store http://path/to/jarfile.jar
21
+ # inside container file://path/to/jarfile.jar
22
+ # s3 bucket s3://path/to/jarfile.jar requires aws credentials to be provided
23
+ # this method confirms the existence of the jdbc driver jar file
24
+ # if the file exists, no action is taken. if file does not exist
25
+ # it is downloaded from the source location, either http or s3.
26
+ def confirm_jdbc_driver
27
+ @confirmed_path = nil
28
+ if @params[:jdbc_driver_jar] =~ /^http.*/i
29
+ @target_path = '/tmp/jdbc.jar'
30
+ @confirmed_path = File.exist?(@target_path) ? @target_path : download_driver_http
31
+ elsif @params[:jdbc_driver_jar] =~ /^file.*/i
32
+ @target_path = @params[:jdbc_driver_jar].gsub(/^file\:\/\//i, '')
33
+ @confirmed_path = @target_path if File.exist?(@target_path)
34
+ elsif @params[:jdbc_driver_jar] =~ /^s3.*/i
35
+ @target_path = '/tmp/jdbc.jar'
36
+ @confirmed_path = File.exist?(@target_path) ? @target_path : download_driver_s3
37
+ else raise 'no suitable driver provided'
38
+ end
39
+
40
+ # rubocop says to turn this into a guard statement
41
+ # but this needs the driver to be present before running
42
+ if @params[:jdbc_driver_jar_md5]
43
+ raise 'driver jar md5 mismatch' unless md5_check(
44
+ @confirmed_path,
45
+ @params[:jdbc_driver_jar_md5]
46
+ )
47
+ end
48
+ end
49
+
50
+ # downloads driver from an http source assuming no credentials
51
+ # need to be provided
52
+ def download_driver_http
53
+ File.write(
54
+ @target_path,
55
+ open(@params[:jdbc_driver_jar]).read
56
+ )
57
+ end
58
+
59
+ # downloads jar file from s3 source
60
+ # uses standard AWS environment variables
61
+ # or instance profile for credentials
62
+ def download_driver_s3
63
+ bucket_name = @params[:jdbc_driver_jar].match(
64
+ /^s3\:\/\/([a-zA-Z0-9]|\.|\-)+/i
65
+ )[0].gsub(/s3\:\/\//i, '')
66
+
67
+ object_key = @params[:jdbc_driver_jar].gsub(
68
+ /^s3\:\/\/([a-zA-Z0-9]|\.|\-)+\//i,
69
+ ''
70
+ )
71
+
72
+ aws_creds =
73
+ if ENV['AWS_ACCESS_KEY_ID']
74
+ Aws::Credentials.new(
75
+ ENV['AWS_ACCESS_KEY_ID'],
76
+ ENV['AWS_SECRET_ACCESS_KEY']
77
+ )
78
+ else
79
+ Aws::InstanceProfileCredentials.new
80
+ end
81
+
82
+ s3 = Aws::S3.new(
83
+ region: ENV['AWS_REGION'],
84
+ credentials: aws_creds
85
+ )
86
+
87
+ s3.get_object(
88
+ response_target: @target_path,
89
+ bucket: bucket_name,
90
+ key: object_key
91
+ )
92
+ end
93
+
94
+ # confirms that file md5 matches value provided
95
+ # @param [String] file_path path to file
96
+ # @param [String] file_md5 known md5 of file
97
+ # @return [Boolean]
98
+ def md5_check(file_path, file_md5)
99
+ m = Digest::MD5.file(file_path)
100
+ return true if m.hexdigest == file_md5
101
+ false
102
+ end
103
+
104
+ # requires the jar file and jdbc driver class
105
+ # into the current jruby context. after this
106
+ # runs all jdbc connections will use this driver class.
107
+ def require_jdbc_driver
108
+ require @confirmed_path
109
+ java_import @params[:jdbc_driver_class]
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,19 @@
1
+ require 'logger'
2
+
3
+ module SneaqlStandard
4
+ Log = Logger.new(STDOUT)
5
+ end
6
+
7
+ def logger
8
+ SneaqlStandard::Log
9
+ end
10
+
11
+ # custom formatter provides logging with thread id and multi-line
12
+ # entries each receiving their own log prefix
13
+ logger.formatter = proc do |severity, datetime, _progname, msg|
14
+ t = ''
15
+ msg.to_s.split(/\n+/).each do |line|
16
+ t += "[#{severity}] #{datetime} tid#{Thread.current.object_id}: #{line}\n"
17
+ end
18
+ t
19
+ end
@@ -0,0 +1,48 @@
1
+ require 'thread'
2
+
3
+ # used to run concurrent sneaql transforms
4
+ # from a threadsafe queue.
5
+ class ParallelizeSneaqlTransforms
6
+ # initialize object and run concurrent transforms.
7
+ # @param [Queue] queue_to_process queue of hashes with all params needed for transform
8
+ # @param [Fixnum] concurrency number of threads
9
+ # @param [Logger] logger optional logger object
10
+ def initialize(queue_to_process, concurrency, logger = nil)
11
+ @logger = logger ? logger : Logger.new(STDOUT)
12
+ @queue_to_process = queue_to_process
13
+ @concurrency = concurrency
14
+ parallelize
15
+ end
16
+
17
+ # performs the actual parallel execution
18
+ def parallelize
19
+ @logger.info(
20
+ "processing #{@queue_to_process} with a concurrency of #{@concurrency}..."
21
+ )
22
+
23
+ threads = []
24
+ @concurrency.times do
25
+ threads << Thread.new do
26
+ # loop until there are no more things to do
27
+ until @queue_to_process.empty?
28
+ begin
29
+ object_to_process = @queue_to_process.pop(true) rescue nil
30
+ # logger.debug(object_to_process)
31
+ t = Sneaql::Transform.new(
32
+ object_to_process,
33
+ @logger
34
+ )
35
+ t.run
36
+ rescue => e
37
+ @logger.error(e.message)
38
+ e.backtrace.each { |b| @logger.error(b) }
39
+ ensure
40
+ @logger.info("finished processing #{object_to_process['transform_name']}")
41
+ end
42
+ end
43
+ end
44
+ end
45
+ threads.each { |t| t.join }
46
+ threads = nil
47
+ end
48
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sneaql-standard
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: java
6
+ authors:
7
+ - jeremy winters
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-01-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 0.0.4
19
+ name: sneaql
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.4
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '5.9'
33
+ name: minitest
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.9'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '2.6'
47
+ name: aws-sdk
48
+ prerelease: false
49
+ type: :runtime
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.6'
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '2.1'
61
+ name: dotenv
62
+ prerelease: false
63
+ type: :runtime
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.1'
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '0.19'
75
+ name: thor
76
+ prerelease: false
77
+ type: :runtime
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.19'
83
+ description: provides a cli and runtime environment for sneaql
84
+ email: jeremy.winters@full360.com
85
+ executables:
86
+ - sneaql
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - bin/sneaql
91
+ - lib/sneaql_standard.rb
92
+ - lib/sneaql_standard_lib/jdbc_drivers.rb
93
+ - lib/sneaql_standard_lib/logging.rb
94
+ - lib/sneaql_standard_lib/parallelize.rb
95
+ homepage: https://www.full360.com
96
+ licenses:
97
+ - MIT
98
+ metadata: {}
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '2.0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubyforge_project:
115
+ rubygems_version: 2.6.6
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: standard sneaql deployment
119
+ test_files: []