sneaql 0.0.8-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 007ecaf278e3fd4e9fbed5c695fa84713d240e0c
4
+ data.tar.gz: bd7bb2e4dc227543ddf499ef7f8092463f4fb6f9
5
+ SHA512:
6
+ metadata.gz: bfcd8b46a3005f5d75b0b122063413ea537d20b5dec17221aec83d4ba7c4d7aed4ae7d51de29d0fd67f1d3c81c0fdff940baed2c5cf5b46fa6e2381f32d731d6
7
+ data.tar.gz: ed18029224f57ff17604ec1edb57754bd6a88ecd0319cbd15d8adc4e3ed86ea33c435d1dd1ca61c66e76067b1accf32b16a0e70572cd0dae402f7886931ffeb8
data/bin/sneaql ADDED
@@ -0,0 +1,273 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'sneaql'
4
+ require 'thor'
5
+ require 'dotenv'
6
+
7
+ # configure global logger
8
+ module SneaqlStandard
9
+ Log = Logger.new(STDOUT)
10
+ end
11
+
12
+ def logger
13
+ SneaqlStandard::Log
14
+ end
15
+
16
+ def configure_logging_format
17
+ # custom formatter provides logging with thread id and multi-line
18
+ # entries each receiving their own log prefix
19
+ if ENV['SNEAQL_LOG_UTC']
20
+ logger.formatter = proc do |severity, datetime, _progname, msg|
21
+ t = ''
22
+ msg.to_s.split(/\n+/).each do |line|
23
+ t += "[#{severity}] #{datetime.utc} tid#{Thread.current.object_id}: #{line}\n"
24
+ end
25
+ t
26
+ end
27
+ else
28
+ # default to local TZ
29
+ logger.formatter = proc do |severity, datetime, _progname, msg|
30
+ t = ''
31
+ msg.to_s.split(/\n+/).each do |line|
32
+ t += "[#{severity}] #{datetime} tid#{Thread.current.object_id}: #{line}\n"
33
+ end
34
+ t
35
+ end
36
+ end
37
+ end
38
+
39
+
40
+ def local_gems
41
+ Gem::Specification.sort_by{ |g| [g.name.downcase, g.version] }.group_by{ |g| g.name }
42
+ end
43
+
44
+ def require_sneaql_extensions()
45
+ extensions = ENV['SNEAQL_EXTENSIONS'].split(',')
46
+ local_gems.keys.each do |r|
47
+ if extensions.include?(r)
48
+ require r
49
+ logger.info "loading sneaql extension #{r}"
50
+ end
51
+ end
52
+ end
53
+
54
+ # sneaql
55
+ module Sneaql
56
+ # standard implementation of sneaql
57
+ module Standard
58
+ # extends thor for cli functionality
59
+ class CLI < Thor
60
+
61
+ desc "exec PATH_TO_TRANFORM_ROOT", "run sneaql, default action"
62
+
63
+ method_option :step_file, :type => :string, :aliases => "-s"
64
+ method_option :env_file, :type => :string, :aliases => "-e"
65
+ method_option :jdbc_url, :type => :string, :aliases => "-url"
66
+ method_option :db_user, :type => :string, :aliases => "-u"
67
+ method_option :db_pass, :type => :string, :aliases => "-p"
68
+ method_option :jdbc_driver_jar, :type => :string, :aliases => "-j"
69
+ method_option :jdbc_driver_class, :type => :string, :aliases => "-c"
70
+ method_option :debug , :default => false, :type => :boolean
71
+
72
+ long_desc <<-LONGDESC
73
+ executes the transform provided
74
+
75
+ PARAMETERS
76
+
77
+ you can provide the following parameters via an environment variable or by putting them in a sneaql.env file
78
+ \n
79
+ * export SNEAQL_JDBC_URL=jdbc:redshift://your-redshift-hostname:5439/dbname \n
80
+ * export SNEAQL_DB_USER="dbadmin" \n
81
+ * export SNEAQL_DB_PASS="password" \n
82
+ * export SNEAQL_JDBC_DRIVER_JAR="java/RedshiftJDBC4-1.1.6.1006.jar" \n
83
+ * export SNEAQL_JDBC_DRIVER_CLASS="com.amazon.redshift.jdbc4.Driver" \n
84
+ \n
85
+ note that if you provide an attribute on the command line it will override the environment variable value.
86
+ \n
87
+ EXAMPLE \n
88
+ $ sneaql exec . \n
89
+ the above will execute the transform in the current directory. note that sneaql.json file must be in this directory or provided via -s option.
90
+ LONGDESC
91
+
92
+ def exec(transform_path)
93
+ begin
94
+ logger.level = Logger::INFO
95
+ logger.level = Logger::DEBUG if options[:debug] == true
96
+
97
+ Dotenv.load(options[:env_file] ? options[:env_file] : 'sneaql.env')
98
+
99
+ base_path = File.expand_path(transform_path)
100
+ raise 'base_path not provided' if base_path == nil
101
+
102
+ step_file = options[:step_file] ? options[:step_file] : "#{base_path}/sneaql.json"
103
+ raise 'step_file not provided' if step_file == nil
104
+
105
+ jdbc_url = options[:jdbc_url] ? options[:jdbc_url] : ENV['SNEAQL_JDBC_URL']
106
+ raise 'jdbc_url not provided' if jdbc_url == nil
107
+
108
+ db_user = options[:db_user] ? options[:db_user] : ENV['SNEAQL_DB_USER']
109
+ raise 'db_user not provided' if db_user == nil
110
+
111
+ db_pass = options[:db_pass] ? options[:db_pass] : ENV['SNEAQL_DB_PASS']
112
+ raise 'db_pass not provided' if db_pass == nil
113
+
114
+ jdbc_driver_jar = options[:jdbc_driver_jar] ? options[:jdbc_driver_jar] : ENV['SNEAQL_JDBC_DRIVER_JAR']
115
+ raise 'jdbc_driver_jar not provided' if jdbc_driver_jar == nil
116
+
117
+ jdbc_driver_class = options[:jdbc_driver_class] ? options[:jdbc_driver_class] : ENV['SNEAQL_JDBC_DRIVER_CLASS']
118
+ raise 'jdbc_driver_class not provided' if step_file == nil
119
+ rescue => e
120
+ puts e.message
121
+ exit 1
122
+ end
123
+
124
+ configure_logging_format()
125
+ require_sneaql_extensions() if ENV['SNEAQL_EXTENSIONS']
126
+
127
+ logger.debug("available sneaql resources: #{Sneaql::Core.class_map}")
128
+
129
+ database = Sneaql::Core.database_type(jdbc_url)
130
+ unless database
131
+ raise 'database specified in jdbc url is not supported, or malformed jdbc url'
132
+ exit 1
133
+ end
134
+
135
+ begin
136
+ require_relative jdbc_driver_jar
137
+ java_import jdbc_driver_class
138
+ rescue => e
139
+ raise 'jdbc driver error'
140
+ exit 1
141
+ end
142
+
143
+ t = Sneaql::Transform.new(
144
+ {
145
+ transform_name: base_path,
146
+ repo_base_dir: base_path,
147
+ repo_type: 'local',
148
+ database: database,
149
+ jdbc_url: jdbc_url,
150
+ db_user: db_user,
151
+ db_pass: db_pass,
152
+ step_metadata_manager_type: 'local_file',
153
+ step_metadata_file_path: step_file
154
+ },
155
+ logger
156
+ )
157
+ t.run
158
+
159
+ exit t.exit_code
160
+ rescue => e
161
+ logger.error e.message
162
+ e.backtrace.each { |b| logger.error b }
163
+ exit 1
164
+ end
165
+
166
+ desc "dryrun PATH_TO_TRANFORM_ROOT", "validate transform"
167
+
168
+ method_option :step_file, :type => :string, :aliases => "-s"
169
+ method_option :env_file, :type => :string, :aliases => "-e"
170
+ method_option :jdbc_url, :type => :string, :aliases => "-url"
171
+ method_option :db_user, :type => :string, :aliases => "-u"
172
+ method_option :db_pass, :type => :string, :aliases => "-p"
173
+ method_option :jdbc_driver_jar, :type => :string, :aliases => "-j"
174
+ method_option :jdbc_driver_class, :type => :string, :aliases => "-c"
175
+
176
+ long_desc <<-LONGDESC
177
+ executes the transform provided
178
+
179
+ PARAMETERS
180
+
181
+ you can provide the following parameters via an environment variable or by putting them in a sneaql.env file
182
+ \n
183
+ * export SNEAQL_JDBC_URL=jdbc:redshift://your-redshift-hostname:5439/dbname \n
184
+ * export SNEAQL_DB_USER="dbadmin" \n
185
+ * export SNEAQL_DB_PASS="password" \n
186
+ * export SNEAQL_JDBC_DRIVER_JAR="java/RedshiftJDBC4-1.1.6.1006.jar" \n
187
+ * export SNEAQL_JDBC_DRIVER_CLASS="com.amazon.redshift.jdbc4.Driver" \n
188
+ \n
189
+ note that if you provide an attribute on the command line it will override the environment variable value.
190
+ \n
191
+ EXAMPLE \n
192
+ $ sneaql exec . \n
193
+ the above will execute the transform in the current directory. note that sneaql.json file must be in this directory or provided via -s option.
194
+ LONGDESC
195
+
196
+ def dryrun(transform_path)
197
+ begin
198
+ logger.level = Logger::DEBUG
199
+
200
+ Dotenv.load(options[:env_file] ? options[:env_file] : 'sneaql.env')
201
+
202
+ base_path = File.expand_path(transform_path)
203
+ raise 'base_path not provided' if base_path == nil
204
+
205
+ step_file = options[:step_file] ? options[:step_file] : "#{base_path}/sneaql.json"
206
+ raise 'step_file not provided' if step_file == nil
207
+
208
+ jdbc_url = options[:jdbc_url] ? options[:jdbc_url] : ENV['SNEAQL_JDBC_URL']
209
+ raise 'jdbc_url not provided' if jdbc_url == nil
210
+
211
+ db_user = options[:db_user] ? options[:db_user] : ENV['SNEAQL_DB_USER']
212
+ raise 'db_user not provided' if db_user == nil
213
+
214
+ db_pass = options[:db_pass] ? options[:db_pass] : ENV['SNEAQL_DB_PASS']
215
+ raise 'db_pass not provided' if db_pass == nil
216
+
217
+ jdbc_driver_jar = options[:jdbc_driver_jar] ? options[:jdbc_driver_jar] : ENV['SNEAQL_JDBC_DRIVER_JAR']
218
+ raise 'jdbc_driver_jar not provided' if jdbc_driver_jar == nil
219
+
220
+ jdbc_driver_class = options[:jdbc_driver_class] ? options[:jdbc_driver_class] : ENV['SNEAQL_JDBC_DRIVER_CLASS']
221
+ raise 'jdbc_driver_class not provided' if step_file == nil
222
+ rescue => e
223
+ puts e.message
224
+ exit 1
225
+ end
226
+
227
+ configure_logging_format()
228
+ require_sneaql_extensions() if ENV['SNEAQL_EXTENSIONS']
229
+
230
+ logger.debug("available sneaql resources: #{Sneaql::Core.class_map}")
231
+
232
+ database = Sneaql::Core.database_type(jdbc_url)
233
+ unless database
234
+ raise 'database specified in jdbc url is not supported, or malformed jdbc url'
235
+ exit 1
236
+ end
237
+
238
+ begin
239
+ require_relative jdbc_driver_jar
240
+ java_import jdbc_driver_class
241
+ rescue => e
242
+ raise 'jdbc driver error'
243
+ exit 1
244
+ end
245
+
246
+ t = Sneaql::Transform.new(
247
+ {
248
+ transform_name: base_path,
249
+ repo_base_dir: base_path,
250
+ repo_type: 'local',
251
+ database: database,
252
+ jdbc_url: jdbc_url,
253
+ db_user: db_user,
254
+ db_pass: db_pass,
255
+ step_metadata_manager_type: 'local_file',
256
+ step_metadata_file_path: step_file
257
+ },
258
+ logger
259
+ )
260
+ t.validate
261
+
262
+ exit t.exit_code
263
+ rescue => e
264
+ logger.error e.message
265
+ e.backtrace.each { |b| logger.error b }
266
+ exit 1
267
+ end
268
+
269
+ end
270
+ end
271
+ end
272
+
273
+ Sneaql::Standard::CLI.start(ARGV)
data/lib/sneaql.rb ADDED
@@ -0,0 +1,284 @@
1
+ require 'jdbc_helpers'
2
+ require 'logger'
3
+
4
+ #require_relative 'sneaql_lib/logging.rb'
5
+ require_relative 'sneaql_lib/exceptions.rb'
6
+ require_relative 'sneaql_lib/core.rb'
7
+ require_relative 'sneaql_lib/lock_manager.rb'
8
+ require_relative 'sneaql_lib/repo_manager.rb'
9
+ require_relative 'sneaql_lib/step_manager.rb'
10
+ require_relative 'sneaql_lib/parser.rb'
11
+ require_relative 'sneaql_lib/expressions.rb'
12
+ require_relative 'sneaql_lib/recordset.rb'
13
+ require_relative 'sneaql_lib/database_manager.rb'
14
+ require_relative 'sneaql_lib/standard_db_objects.rb'
15
+
16
+ # module for sneaql
17
+ module Sneaql
18
+ # Manages and executes a SneaQL transform.
19
+ class Transform
20
+ attr_reader :current_step
21
+ attr_reader :current_statement
22
+ attr_reader :start_time
23
+ attr_reader :end_time
24
+ attr_reader :exit_code
25
+ attr_reader :transform_error
26
+ attr_reader :status
27
+
28
+ # Valid transform statuses
29
+ # :initializing, :connecting_to_database, :running, :completed, :error
30
+ # @return [Array] array of valid transform statuses
31
+ def valid_statuses
32
+ [:initializing, :connecting_to_database, :running, :completed, :error, :validating, :validated]
33
+ end
34
+
35
+ # Sets the current status of the transform.
36
+ # Must be a valid status type or it will not be set.
37
+ # Override this if you want to implement a custom status communication to
38
+ # an external target.
39
+ # @param [Symbol] status
40
+ # @see valid_status
41
+ def current_status(status)
42
+ @status = status if valid_statuses.include?(status)
43
+ end
44
+
45
+ # Creates a SneaQL transform object.
46
+ # @example
47
+ # t=Sneaql::Transform.new({
48
+ # transform_name: 'test-transform',
49
+ # repo_base_dir: "test/fixtures/test-transform",
50
+ # repo_type: 'local',
51
+ # database: 'sqlite',
52
+ # jdbc_url: 'jdbc:sqlite:memory',
53
+ # db_user: 'dbuser',
54
+ # db_pass: 'password',
55
+ # step_metadata_manager_type: 'local_file',
56
+ # step_metadata_file_path: "test/fixtures/test-transform/steps.json"
57
+ # }, logger)
58
+ #
59
+ # t.run
60
+ # @param [Hash] params various parameters are passed to define the transform
61
+ # @param [Logger] logger customer logger if provided (otherwise default logger is created)
62
+ def initialize(params, logger = nil)
63
+ @logger = logger ? logger : Logger.new(STDOUT)
64
+
65
+ @start_time = Time.new.utc
66
+
67
+ current_status(:initializing)
68
+ @params = params
69
+
70
+ @exit_code = 0
71
+
72
+ @transform_name = @params[:transform_name]
73
+ @transform_lock_id = @params[:transform_lock_id]
74
+ @jdbc_url = @params[:jdbc_url]
75
+ @db_user = @params[:db_user]
76
+ @db_pass = @params[:db_pass]
77
+ @database = @params[:database]
78
+
79
+ run if @params[:run] == true
80
+ end
81
+
82
+ # validate the transform.
83
+ def validate
84
+ @expression_handler = create_expression_handler
85
+ @recordset_manager = create_recordset_manager
86
+ @repo_manager = create_repo_manager
87
+ @steps = create_metadata_manager
88
+ @parsed_steps = create_parsed_steps(@steps)
89
+ current_status(:validating)
90
+ validate_parsed_steps(@parsed_steps)
91
+ rescue Sneaql::Exceptions::TransformIsLocked => e
92
+ @transform_error = e
93
+ @logger.info(e.message)
94
+ rescue Sneaql::Exceptions::SQLTestExitCondition => e
95
+ @transform_error = nil
96
+ @logger.info(e.message)
97
+ rescue => e
98
+ @exit_code = 1
99
+ @transform_error = e
100
+ current_status(:error)
101
+ @logger.error(e.message)
102
+ e.backtrace { |b| @logger.error b }
103
+ ensure
104
+ @end_time = Time.new.utc
105
+
106
+ if @transform_error.nil?
107
+ current_status(:validated)
108
+ else
109
+ current_status(:error)
110
+ end
111
+
112
+ @logger.info("#{@transform_name} validation time #{@end_time - @start_time}s")
113
+ @logger.info("#{@transform_name} exit code: #{@exit_code} status: #{@status}")
114
+ end
115
+
116
+ # Runs the actual transform.
117
+ def run
118
+ @expression_handler = create_expression_handler
119
+ @recordset_manager = create_recordset_manager
120
+ @repo_manager = create_repo_manager
121
+ @lock_manager = create_lock_manager if @params[:locked_transform] == true
122
+ @steps = create_metadata_manager
123
+ @parsed_steps = create_parsed_steps(@steps)
124
+ validate_parsed_steps(@parsed_steps)
125
+ @jdbc_connection = create_jdbc_connection
126
+ current_status(:running)
127
+ iterate_steps_and_statements
128
+ rescue Sneaql::Exceptions::TransformIsLocked => e
129
+ @transform_error = e
130
+ @logger.info(e.message)
131
+ rescue Sneaql::Exceptions::SQLTestExitCondition => e
132
+ @transform_error = nil
133
+ @logger.info(e.message)
134
+ rescue => e
135
+ @exit_code = 1
136
+ @transform_error = e
137
+ current_status(:error)
138
+ @logger.error(e.message)
139
+ e.backtrace { |b| @logger.error b }
140
+ ensure
141
+ @lock_manager.remove_lock if @params[:locked_transform] == true
142
+ @jdbc_connection.close if @jdbc_connection
143
+ @end_time = Time.new.utc
144
+
145
+ if @transform_error.nil?
146
+ current_status(:completed)
147
+ else
148
+ current_status(:error)
149
+ end
150
+
151
+ @logger.info("#{@transform_name} runtime #{@end_time - @start_time}s")
152
+ @logger.info("#{@transform_name} exit code: #{@exit_code} status: #{@status}")
153
+ end
154
+
155
+ # Creates an ExpressionHandler object
156
+ # @return [Sneaql::Core::ExpressionHandler]
157
+ def create_expression_handler
158
+ Sneaql::Core::ExpressionHandler.new(ENV, @logger)
159
+ end
160
+
161
+ # Creates a RepoDownloadManager object
162
+ # The actual object returns depends upon params[:repo_type] provided at initialize.
163
+ # @return [Sneaql::Core::RepoDownloadManager]
164
+ def create_repo_manager
165
+ Sneaql::Core.find_class(:repo_manager, @params[:repo_type]).new(@params, @logger)
166
+ end
167
+
168
+ # Creates a TransformLockManager object
169
+ # The actual object returns depends upon params[:locked_transform] provided at initialize.
170
+ # @return [Sneaql::Core::RepoDownloadManager]
171
+ def create_lock_manager
172
+ # create a lock manager for this transform (uses a separate connection)
173
+ lock_manager = Sneaql::TransformLockManager.new(@params, @logger)
174
+ raise Sneaql::Exceptions::TransformIsLocked unless lock_manager.acquire_lock == true
175
+ lock_manager
176
+ end
177
+
178
+ # Creates a StepMetadataManager object
179
+ # The actual object returns depends upon params[:step_metadata_manager_type] provided at initialize.
180
+ # @return [Sneaql::Core::StepMetadataManager]
181
+ def create_metadata_manager
182
+ Sneaql::Core.find_class(
183
+ :step_metadata_manager,
184
+ @params[:step_metadata_manager_type]
185
+ ).new(@params, @logger).steps
186
+ end
187
+
188
+ # Creates a StepParser object for each step file defined by the metadata manager.
189
+ # @param [Array] steps takes an array of step definitions
190
+ # @return [Array] of Sneaql::Core::StepParser
191
+ def create_parsed_steps(steps)
192
+ steps.map do |s|
193
+ {
194
+ parser: Sneaql::Core::StepParser.new(
195
+ "#{@repo_manager.repo_base_dir}/#{s[:step_file]}",
196
+ @expression_handler,
197
+ @recordset_manager,
198
+ @logger
199
+ ),
200
+ step_number: s[:step_number]
201
+ }
202
+ end
203
+ end
204
+
205
+ # Validates the arguments for all tags.
206
+ # @param [Array<Sneaql::Core::StepParser>] steps
207
+ def validate_parsed_steps(steps)
208
+ steps.each do |s|
209
+ raise Sneaql::Exceptions::StatementParsingError unless s[:parser].valid_arguments_in_all_statements?
210
+ end
211
+ end
212
+
213
+ # Creates an RecordsetManager object
214
+ # @return [Sneaql::Core::RecordsetManager]
215
+ def create_recordset_manager
216
+ Sneaql::Core::RecordsetManager.new(@expression_handler, @logger)
217
+ end
218
+
219
+ # Creates a JDBC connection
220
+ # JDBC drivers must loaded into jruby before this will work.
221
+ def create_jdbc_connection
222
+ # db specific driver should have been handled by the calling procedure
223
+ current_status(:connecting_to_database)
224
+ JDBCHelpers::ConnectionFactory.new(
225
+ @jdbc_url,
226
+ @db_user,
227
+ @db_pass,
228
+ @logger
229
+ ).connection
230
+ end
231
+
232
+ # Performs the actual work of running the transform steps.
233
+ # This method operates within the context of a single
234
+ # database session across all steps. If it fails, it will
235
+ # not rollback automatically unless that is the default RDBMS
236
+ # behavior for a connection that closes before a commit.
237
+ def iterate_steps_and_statements
238
+ @parsed_steps.each do |this_step|
239
+ #special handling is required for the exit_step_if command
240
+ #because there is a nested loop the exit_step var is needed
241
+ exit_step = false
242
+ break if exit_step == true
243
+ # set this so that other processes can poll the state
244
+ @current_step = this_step[:step_number]
245
+ # within a step... iterate through each statement
246
+ this_step[:parser].statements.each_with_index do |this_stmt, stmt_index|
247
+ # set this so that other processes can poll the state
248
+ @current_statement = stmt_index + 1
249
+
250
+ # log some useful info
251
+ @logger.info("step: #{@current_step} statement: #{@current_statement}")
252
+ @expression_handler.output_all_session_variables
253
+
254
+ # get the command hash for the current statement
255
+ this_cmd = this_step[:parser].command_at_index(stmt_index)
256
+ @logger.debug(this_cmd)
257
+
258
+ # evaluate any variable references in the arguments
259
+ if this_cmd[:arguments]
260
+ this_cmd[:arguments].map! { |a| @expression_handler.evaluate_expression(a) }
261
+ end
262
+
263
+ begin
264
+ # instantiate a new instance of the command class
265
+ # and call it's action method with arguments
266
+ c = Sneaql::Core.find_class(:command, this_cmd[:command]).new(
267
+ @jdbc_connection,
268
+ @expression_handler,
269
+ @recordset_manager,
270
+ @expression_handler.evaluate_all_expressions(this_stmt),
271
+ @logger
272
+ )
273
+
274
+ c.action(*this_cmd[:arguments])
275
+ rescue Sneaql::Exceptions::SQLTestStepExitCondition => e
276
+ exit_step = true
277
+ @logger.info e.message
278
+ break
279
+ end
280
+ end
281
+ end
282
+ end
283
+ end
284
+ end