sneaql 0.0.8-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 007ecaf278e3fd4e9fbed5c695fa84713d240e0c
4
+ data.tar.gz: bd7bb2e4dc227543ddf499ef7f8092463f4fb6f9
5
+ SHA512:
6
+ metadata.gz: bfcd8b46a3005f5d75b0b122063413ea537d20b5dec17221aec83d4ba7c4d7aed4ae7d51de29d0fd67f1d3c81c0fdff940baed2c5cf5b46fa6e2381f32d731d6
7
+ data.tar.gz: ed18029224f57ff17604ec1edb57754bd6a88ecd0319cbd15d8adc4e3ed86ea33c435d1dd1ca61c66e76067b1accf32b16a0e70572cd0dae402f7886931ffeb8
data/bin/sneaql ADDED
@@ -0,0 +1,273 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'sneaql'
4
+ require 'thor'
5
+ require 'dotenv'
6
+
7
+ # configure global logger
8
+ module SneaqlStandard
9
+ Log = Logger.new(STDOUT)
10
+ end
11
+
12
+ def logger
13
+ SneaqlStandard::Log
14
+ end
15
+
16
+ def configure_logging_format
17
+ # custom formatter provides logging with thread id and multi-line
18
+ # entries each receiving their own log prefix
19
+ if ENV['SNEAQL_LOG_UTC']
20
+ logger.formatter = proc do |severity, datetime, _progname, msg|
21
+ t = ''
22
+ msg.to_s.split(/\n+/).each do |line|
23
+ t += "[#{severity}] #{datetime.utc} tid#{Thread.current.object_id}: #{line}\n"
24
+ end
25
+ t
26
+ end
27
+ else
28
+ # default to local TZ
29
+ logger.formatter = proc do |severity, datetime, _progname, msg|
30
+ t = ''
31
+ msg.to_s.split(/\n+/).each do |line|
32
+ t += "[#{severity}] #{datetime} tid#{Thread.current.object_id}: #{line}\n"
33
+ end
34
+ t
35
+ end
36
+ end
37
+ end
38
+
39
+
40
+ def local_gems
41
+ Gem::Specification.sort_by{ |g| [g.name.downcase, g.version] }.group_by{ |g| g.name }
42
+ end
43
+
44
+ def require_sneaql_extensions()
45
+ extensions = ENV['SNEAQL_EXTENSIONS'].split(',')
46
+ local_gems.keys.each do |r|
47
+ if extensions.include?(r)
48
+ require r
49
+ logger.info "loading sneaql extension #{r}"
50
+ end
51
+ end
52
+ end
53
+
54
+ # sneaql
55
+ module Sneaql
56
+ # standard implementation of sneaql
57
+ module Standard
58
+ # extends thor for cli functionality
59
+ class CLI < Thor
60
+
61
+ desc "exec PATH_TO_TRANFORM_ROOT", "run sneaql, default action"
62
+
63
+ method_option :step_file, :type => :string, :aliases => "-s"
64
+ method_option :env_file, :type => :string, :aliases => "-e"
65
+ method_option :jdbc_url, :type => :string, :aliases => "-url"
66
+ method_option :db_user, :type => :string, :aliases => "-u"
67
+ method_option :db_pass, :type => :string, :aliases => "-p"
68
+ method_option :jdbc_driver_jar, :type => :string, :aliases => "-j"
69
+ method_option :jdbc_driver_class, :type => :string, :aliases => "-c"
70
+ method_option :debug , :default => false, :type => :boolean
71
+
72
+ long_desc <<-LONGDESC
73
+ executes the transform provided
74
+
75
+ PARAMETERS
76
+
77
+ you can provide the following parameters via an environment variable or by putting them in a sneaql.env file
78
+ \n
79
+ * export SNEAQL_JDBC_URL=jdbc:redshift://your-redshift-hostname:5439/dbname \n
80
+ * export SNEAQL_DB_USER="dbadmin" \n
81
+ * export SNEAQL_DB_PASS="password" \n
82
+ * export SNEAQL_JDBC_DRIVER_JAR="java/RedshiftJDBC4-1.1.6.1006.jar" \n
83
+ * export SNEAQL_JDBC_DRIVER_CLASS="com.amazon.redshift.jdbc4.Driver" \n
84
+ \n
85
+ note that if you provide an attribute on the command line it will override the environment variable value.
86
+ \n
87
+ EXAMPLE \n
88
+ $ sneaql exec . \n
89
+ the above will execute the transform in the current directory. note that sneaql.json file must be in this directory or provided via -s option.
90
+ LONGDESC
91
+
92
+ def exec(transform_path)
93
+ begin
94
+ logger.level = Logger::INFO
95
+ logger.level = Logger::DEBUG if options[:debug] == true
96
+
97
+ Dotenv.load(options[:env_file] ? options[:env_file] : 'sneaql.env')
98
+
99
+ base_path = File.expand_path(transform_path)
100
+ raise 'base_path not provided' if base_path == nil
101
+
102
+ step_file = options[:step_file] ? options[:step_file] : "#{base_path}/sneaql.json"
103
+ raise 'step_file not provided' if step_file == nil
104
+
105
+ jdbc_url = options[:jdbc_url] ? options[:jdbc_url] : ENV['SNEAQL_JDBC_URL']
106
+ raise 'jdbc_url not provided' if jdbc_url == nil
107
+
108
+ db_user = options[:db_user] ? options[:db_user] : ENV['SNEAQL_DB_USER']
109
+ raise 'db_user not provided' if db_user == nil
110
+
111
+ db_pass = options[:db_pass] ? options[:db_pass] : ENV['SNEAQL_DB_PASS']
112
+ raise 'db_pass not provided' if db_pass == nil
113
+
114
+ jdbc_driver_jar = options[:jdbc_driver_jar] ? options[:jdbc_driver_jar] : ENV['SNEAQL_JDBC_DRIVER_JAR']
115
+ raise 'jdbc_driver_jar not provided' if jdbc_driver_jar == nil
116
+
117
+ jdbc_driver_class = options[:jdbc_driver_class] ? options[:jdbc_driver_class] : ENV['SNEAQL_JDBC_DRIVER_CLASS']
118
+ raise 'jdbc_driver_class not provided' if step_file == nil
119
+ rescue => e
120
+ puts e.message
121
+ exit 1
122
+ end
123
+
124
+ configure_logging_format()
125
+ require_sneaql_extensions() if ENV['SNEAQL_EXTENSIONS']
126
+
127
+ logger.debug("available sneaql resources: #{Sneaql::Core.class_map}")
128
+
129
+ database = Sneaql::Core.database_type(jdbc_url)
130
+ unless database
131
+ raise 'database specified in jdbc url is not supported, or malformed jdbc url'
132
+ exit 1
133
+ end
134
+
135
+ begin
136
+ require_relative jdbc_driver_jar
137
+ java_import jdbc_driver_class
138
+ rescue => e
139
+ raise 'jdbc driver error'
140
+ exit 1
141
+ end
142
+
143
+ t = Sneaql::Transform.new(
144
+ {
145
+ transform_name: base_path,
146
+ repo_base_dir: base_path,
147
+ repo_type: 'local',
148
+ database: database,
149
+ jdbc_url: jdbc_url,
150
+ db_user: db_user,
151
+ db_pass: db_pass,
152
+ step_metadata_manager_type: 'local_file',
153
+ step_metadata_file_path: step_file
154
+ },
155
+ logger
156
+ )
157
+ t.run
158
+
159
+ exit t.exit_code
160
+ rescue => e
161
+ logger.error e.message
162
+ e.backtrace.each { |b| logger.error b }
163
+ exit 1
164
+ end
165
+
166
+ desc "dryrun PATH_TO_TRANFORM_ROOT", "validate transform"
167
+
168
+ method_option :step_file, :type => :string, :aliases => "-s"
169
+ method_option :env_file, :type => :string, :aliases => "-e"
170
+ method_option :jdbc_url, :type => :string, :aliases => "-url"
171
+ method_option :db_user, :type => :string, :aliases => "-u"
172
+ method_option :db_pass, :type => :string, :aliases => "-p"
173
+ method_option :jdbc_driver_jar, :type => :string, :aliases => "-j"
174
+ method_option :jdbc_driver_class, :type => :string, :aliases => "-c"
175
+
176
+ long_desc <<-LONGDESC
177
+ executes the transform provided
178
+
179
+ PARAMETERS
180
+
181
+ you can provide the following parameters via an environment variable or by putting them in a sneaql.env file
182
+ \n
183
+ * export SNEAQL_JDBC_URL=jdbc:redshift://your-redshift-hostname:5439/dbname \n
184
+ * export SNEAQL_DB_USER="dbadmin" \n
185
+ * export SNEAQL_DB_PASS="password" \n
186
+ * export SNEAQL_JDBC_DRIVER_JAR="java/RedshiftJDBC4-1.1.6.1006.jar" \n
187
+ * export SNEAQL_JDBC_DRIVER_CLASS="com.amazon.redshift.jdbc4.Driver" \n
188
+ \n
189
+ note that if you provide an attribute on the command line it will override the environment variable value.
190
+ \n
191
+ EXAMPLE \n
192
+ $ sneaql exec . \n
193
+ the above will execute the transform in the current directory. note that sneaql.json file must be in this directory or provided via -s option.
194
+ LONGDESC
195
+
196
+ def dryrun(transform_path)
197
+ begin
198
+ logger.level = Logger::DEBUG
199
+
200
+ Dotenv.load(options[:env_file] ? options[:env_file] : 'sneaql.env')
201
+
202
+ base_path = File.expand_path(transform_path)
203
+ raise 'base_path not provided' if base_path == nil
204
+
205
+ step_file = options[:step_file] ? options[:step_file] : "#{base_path}/sneaql.json"
206
+ raise 'step_file not provided' if step_file == nil
207
+
208
+ jdbc_url = options[:jdbc_url] ? options[:jdbc_url] : ENV['SNEAQL_JDBC_URL']
209
+ raise 'jdbc_url not provided' if jdbc_url == nil
210
+
211
+ db_user = options[:db_user] ? options[:db_user] : ENV['SNEAQL_DB_USER']
212
+ raise 'db_user not provided' if db_user == nil
213
+
214
+ db_pass = options[:db_pass] ? options[:db_pass] : ENV['SNEAQL_DB_PASS']
215
+ raise 'db_pass not provided' if db_pass == nil
216
+
217
+ jdbc_driver_jar = options[:jdbc_driver_jar] ? options[:jdbc_driver_jar] : ENV['SNEAQL_JDBC_DRIVER_JAR']
218
+ raise 'jdbc_driver_jar not provided' if jdbc_driver_jar == nil
219
+
220
+ jdbc_driver_class = options[:jdbc_driver_class] ? options[:jdbc_driver_class] : ENV['SNEAQL_JDBC_DRIVER_CLASS']
221
+ raise 'jdbc_driver_class not provided' if step_file == nil
222
+ rescue => e
223
+ puts e.message
224
+ exit 1
225
+ end
226
+
227
+ configure_logging_format()
228
+ require_sneaql_extensions() if ENV['SNEAQL_EXTENSIONS']
229
+
230
+ logger.debug("available sneaql resources: #{Sneaql::Core.class_map}")
231
+
232
+ database = Sneaql::Core.database_type(jdbc_url)
233
+ unless database
234
+ raise 'database specified in jdbc url is not supported, or malformed jdbc url'
235
+ exit 1
236
+ end
237
+
238
+ begin
239
+ require_relative jdbc_driver_jar
240
+ java_import jdbc_driver_class
241
+ rescue => e
242
+ raise 'jdbc driver error'
243
+ exit 1
244
+ end
245
+
246
+ t = Sneaql::Transform.new(
247
+ {
248
+ transform_name: base_path,
249
+ repo_base_dir: base_path,
250
+ repo_type: 'local',
251
+ database: database,
252
+ jdbc_url: jdbc_url,
253
+ db_user: db_user,
254
+ db_pass: db_pass,
255
+ step_metadata_manager_type: 'local_file',
256
+ step_metadata_file_path: step_file
257
+ },
258
+ logger
259
+ )
260
+ t.validate
261
+
262
+ exit t.exit_code
263
+ rescue => e
264
+ logger.error e.message
265
+ e.backtrace.each { |b| logger.error b }
266
+ exit 1
267
+ end
268
+
269
+ end
270
+ end
271
+ end
272
+
273
+ Sneaql::Standard::CLI.start(ARGV)
data/lib/sneaql.rb ADDED
@@ -0,0 +1,284 @@
1
+ require 'jdbc_helpers'
2
+ require 'logger'
3
+
4
+ #require_relative 'sneaql_lib/logging.rb'
5
+ require_relative 'sneaql_lib/exceptions.rb'
6
+ require_relative 'sneaql_lib/core.rb'
7
+ require_relative 'sneaql_lib/lock_manager.rb'
8
+ require_relative 'sneaql_lib/repo_manager.rb'
9
+ require_relative 'sneaql_lib/step_manager.rb'
10
+ require_relative 'sneaql_lib/parser.rb'
11
+ require_relative 'sneaql_lib/expressions.rb'
12
+ require_relative 'sneaql_lib/recordset.rb'
13
+ require_relative 'sneaql_lib/database_manager.rb'
14
+ require_relative 'sneaql_lib/standard_db_objects.rb'
15
+
16
+ # module for sneaql
17
+ module Sneaql
18
+ # Manages and executes a SneaQL transform.
19
+ class Transform
20
+ attr_reader :current_step
21
+ attr_reader :current_statement
22
+ attr_reader :start_time
23
+ attr_reader :end_time
24
+ attr_reader :exit_code
25
+ attr_reader :transform_error
26
+ attr_reader :status
27
+
28
+ # Valid transform statuses
29
+ # :initializing, :connecting_to_database, :running, :completed, :error
30
+ # @return [Array] array of valid transform statuses
31
+ def valid_statuses
32
+ [:initializing, :connecting_to_database, :running, :completed, :error, :validating, :validated]
33
+ end
34
+
35
+ # Sets the current status of the transform.
36
+ # Must be a valid status type or it will not be set.
37
+ # Override this if you want to implement a custom status communication to
38
+ # an external target.
39
+ # @param [Symbol] status
40
+ # @see valid_status
41
+ def current_status(status)
42
+ @status = status if valid_statuses.include?(status)
43
+ end
44
+
45
+ # Creates a SneaQL transform object.
46
+ # @example
47
+ # t=Sneaql::Transform.new({
48
+ # transform_name: 'test-transform',
49
+ # repo_base_dir: "test/fixtures/test-transform",
50
+ # repo_type: 'local',
51
+ # database: 'sqlite',
52
+ # jdbc_url: 'jdbc:sqlite:memory',
53
+ # db_user: 'dbuser',
54
+ # db_pass: 'password',
55
+ # step_metadata_manager_type: 'local_file',
56
+ # step_metadata_file_path: "test/fixtures/test-transform/steps.json"
57
+ # }, logger)
58
+ #
59
+ # t.run
60
+ # @param [Hash] params various parameters are passed to define the transform
61
+ # @param [Logger] logger customer logger if provided (otherwise default logger is created)
62
+ def initialize(params, logger = nil)
63
+ @logger = logger ? logger : Logger.new(STDOUT)
64
+
65
+ @start_time = Time.new.utc
66
+
67
+ current_status(:initializing)
68
+ @params = params
69
+
70
+ @exit_code = 0
71
+
72
+ @transform_name = @params[:transform_name]
73
+ @transform_lock_id = @params[:transform_lock_id]
74
+ @jdbc_url = @params[:jdbc_url]
75
+ @db_user = @params[:db_user]
76
+ @db_pass = @params[:db_pass]
77
+ @database = @params[:database]
78
+
79
+ run if @params[:run] == true
80
+ end
81
+
82
+ # validate the transform.
83
+ def validate
84
+ @expression_handler = create_expression_handler
85
+ @recordset_manager = create_recordset_manager
86
+ @repo_manager = create_repo_manager
87
+ @steps = create_metadata_manager
88
+ @parsed_steps = create_parsed_steps(@steps)
89
+ current_status(:validating)
90
+ validate_parsed_steps(@parsed_steps)
91
+ rescue Sneaql::Exceptions::TransformIsLocked => e
92
+ @transform_error = e
93
+ @logger.info(e.message)
94
+ rescue Sneaql::Exceptions::SQLTestExitCondition => e
95
+ @transform_error = nil
96
+ @logger.info(e.message)
97
+ rescue => e
98
+ @exit_code = 1
99
+ @transform_error = e
100
+ current_status(:error)
101
+ @logger.error(e.message)
102
+ e.backtrace { |b| @logger.error b }
103
+ ensure
104
+ @end_time = Time.new.utc
105
+
106
+ if @transform_error.nil?
107
+ current_status(:validated)
108
+ else
109
+ current_status(:error)
110
+ end
111
+
112
+ @logger.info("#{@transform_name} validation time #{@end_time - @start_time}s")
113
+ @logger.info("#{@transform_name} exit code: #{@exit_code} status: #{@status}")
114
+ end
115
+
116
+ # Runs the actual transform.
117
+ def run
118
+ @expression_handler = create_expression_handler
119
+ @recordset_manager = create_recordset_manager
120
+ @repo_manager = create_repo_manager
121
+ @lock_manager = create_lock_manager if @params[:locked_transform] == true
122
+ @steps = create_metadata_manager
123
+ @parsed_steps = create_parsed_steps(@steps)
124
+ validate_parsed_steps(@parsed_steps)
125
+ @jdbc_connection = create_jdbc_connection
126
+ current_status(:running)
127
+ iterate_steps_and_statements
128
+ rescue Sneaql::Exceptions::TransformIsLocked => e
129
+ @transform_error = e
130
+ @logger.info(e.message)
131
+ rescue Sneaql::Exceptions::SQLTestExitCondition => e
132
+ @transform_error = nil
133
+ @logger.info(e.message)
134
+ rescue => e
135
+ @exit_code = 1
136
+ @transform_error = e
137
+ current_status(:error)
138
+ @logger.error(e.message)
139
+ e.backtrace { |b| @logger.error b }
140
+ ensure
141
+ @lock_manager.remove_lock if @params[:locked_transform] == true
142
+ @jdbc_connection.close if @jdbc_connection
143
+ @end_time = Time.new.utc
144
+
145
+ if @transform_error.nil?
146
+ current_status(:completed)
147
+ else
148
+ current_status(:error)
149
+ end
150
+
151
+ @logger.info("#{@transform_name} runtime #{@end_time - @start_time}s")
152
+ @logger.info("#{@transform_name} exit code: #{@exit_code} status: #{@status}")
153
+ end
154
+
155
+ # Creates an ExpressionHandler object
156
+ # @return [Sneaql::Core::ExpressionHandler]
157
+ def create_expression_handler
158
+ Sneaql::Core::ExpressionHandler.new(ENV, @logger)
159
+ end
160
+
161
+ # Creates a RepoDownloadManager object
162
+ # The actual object returns depends upon params[:repo_type] provided at initialize.
163
+ # @return [Sneaql::Core::RepoDownloadManager]
164
+ def create_repo_manager
165
+ Sneaql::Core.find_class(:repo_manager, @params[:repo_type]).new(@params, @logger)
166
+ end
167
+
168
+ # Creates a TransformLockManager object
169
+ # The actual object returns depends upon params[:locked_transform] provided at initialize.
170
+ # @return [Sneaql::Core::RepoDownloadManager]
171
+ def create_lock_manager
172
+ # create a lock manager for this transform (uses a separate connection)
173
+ lock_manager = Sneaql::TransformLockManager.new(@params, @logger)
174
+ raise Sneaql::Exceptions::TransformIsLocked unless lock_manager.acquire_lock == true
175
+ lock_manager
176
+ end
177
+
178
+ # Creates a StepMetadataManager object
179
+ # The actual object returns depends upon params[:step_metadata_manager_type] provided at initialize.
180
+ # @return [Sneaql::Core::StepMetadataManager]
181
+ def create_metadata_manager
182
+ Sneaql::Core.find_class(
183
+ :step_metadata_manager,
184
+ @params[:step_metadata_manager_type]
185
+ ).new(@params, @logger).steps
186
+ end
187
+
188
+ # Creates a StepParser object for each step file defined by the metadata manager.
189
+ # @param [Array] steps takes an array of step definitions
190
+ # @return [Array] of Sneaql::Core::StepParser
191
+ def create_parsed_steps(steps)
192
+ steps.map do |s|
193
+ {
194
+ parser: Sneaql::Core::StepParser.new(
195
+ "#{@repo_manager.repo_base_dir}/#{s[:step_file]}",
196
+ @expression_handler,
197
+ @recordset_manager,
198
+ @logger
199
+ ),
200
+ step_number: s[:step_number]
201
+ }
202
+ end
203
+ end
204
+
205
+ # Validates the arguments for all tags.
206
+ # @param [Array<Sneaql::Core::StepParser>] steps
207
+ def validate_parsed_steps(steps)
208
+ steps.each do |s|
209
+ raise Sneaql::Exceptions::StatementParsingError unless s[:parser].valid_arguments_in_all_statements?
210
+ end
211
+ end
212
+
213
+ # Creates an RecordsetManager object
214
+ # @return [Sneaql::Core::RecordsetManager]
215
+ def create_recordset_manager
216
+ Sneaql::Core::RecordsetManager.new(@expression_handler, @logger)
217
+ end
218
+
219
+ # Creates a JDBC connection
220
+ # JDBC drivers must loaded into jruby before this will work.
221
+ def create_jdbc_connection
222
+ # db specific driver should have been handled by the calling procedure
223
+ current_status(:connecting_to_database)
224
+ JDBCHelpers::ConnectionFactory.new(
225
+ @jdbc_url,
226
+ @db_user,
227
+ @db_pass,
228
+ @logger
229
+ ).connection
230
+ end
231
+
232
+ # Performs the actual work of running the transform steps.
233
+ # This method operates within the context of a single
234
+ # database session across all steps. If it fails, it will
235
+ # not rollback automatically unless that is the default RDBMS
236
+ # behavior for a connection that closes before a commit.
237
+ def iterate_steps_and_statements
238
+ @parsed_steps.each do |this_step|
239
+ #special handling is required for the exit_step_if command
240
+ #because there is a nested loop the exit_step var is needed
241
+ exit_step = false
242
+ break if exit_step == true
243
+ # set this so that other processes can poll the state
244
+ @current_step = this_step[:step_number]
245
+ # within a step... iterate through each statement
246
+ this_step[:parser].statements.each_with_index do |this_stmt, stmt_index|
247
+ # set this so that other processes can poll the state
248
+ @current_statement = stmt_index + 1
249
+
250
+ # log some useful info
251
+ @logger.info("step: #{@current_step} statement: #{@current_statement}")
252
+ @expression_handler.output_all_session_variables
253
+
254
+ # get the command hash for the current statement
255
+ this_cmd = this_step[:parser].command_at_index(stmt_index)
256
+ @logger.debug(this_cmd)
257
+
258
+ # evaluate any variable references in the arguments
259
+ if this_cmd[:arguments]
260
+ this_cmd[:arguments].map! { |a| @expression_handler.evaluate_expression(a) }
261
+ end
262
+
263
+ begin
264
+ # instantiate a new instance of the command class
265
+ # and call it's action method with arguments
266
+ c = Sneaql::Core.find_class(:command, this_cmd[:command]).new(
267
+ @jdbc_connection,
268
+ @expression_handler,
269
+ @recordset_manager,
270
+ @expression_handler.evaluate_all_expressions(this_stmt),
271
+ @logger
272
+ )
273
+
274
+ c.action(*this_cmd[:arguments])
275
+ rescue Sneaql::Exceptions::SQLTestStepExitCondition => e
276
+ exit_step = true
277
+ @logger.info e.message
278
+ break
279
+ end
280
+ end
281
+ end
282
+ end
283
+ end
284
+ end