bricolage 5.8.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +4 -0
  3. data/bin/bricolage +6 -0
  4. data/bin/bricolage-jobnet +6 -0
  5. data/jobclass/create.rb +21 -0
  6. data/jobclass/exec.rb +17 -0
  7. data/jobclass/insert-delta.rb +31 -0
  8. data/jobclass/insert.rb +33 -0
  9. data/jobclass/load.rb +39 -0
  10. data/jobclass/my-export.rb +40 -0
  11. data/jobclass/my-migrate.rb +103 -0
  12. data/jobclass/noop.rb +13 -0
  13. data/jobclass/rebuild-drop.rb +37 -0
  14. data/jobclass/rebuild-rename.rb +49 -0
  15. data/jobclass/s3-put.rb +19 -0
  16. data/jobclass/sql.rb +29 -0
  17. data/jobclass/td-delete.rb +20 -0
  18. data/jobclass/td-export.rb +30 -0
  19. data/jobclass/unload.rb +30 -0
  20. data/jobclass/wait-file.rb +48 -0
  21. data/lib/bricolage/application.rb +260 -0
  22. data/lib/bricolage/commandutils.rb +52 -0
  23. data/lib/bricolage/configloader.rb +126 -0
  24. data/lib/bricolage/context.rb +108 -0
  25. data/lib/bricolage/datasource.rb +144 -0
  26. data/lib/bricolage/eventhandlers.rb +47 -0
  27. data/lib/bricolage/exception.rb +47 -0
  28. data/lib/bricolage/filedatasource.rb +42 -0
  29. data/lib/bricolage/filesystem.rb +165 -0
  30. data/lib/bricolage/genericdatasource.rb +37 -0
  31. data/lib/bricolage/job.rb +212 -0
  32. data/lib/bricolage/jobclass.rb +98 -0
  33. data/lib/bricolage/jobfile.rb +100 -0
  34. data/lib/bricolage/jobflow.rb +389 -0
  35. data/lib/bricolage/jobnetrunner.rb +264 -0
  36. data/lib/bricolage/jobresult.rb +74 -0
  37. data/lib/bricolage/logger.rb +52 -0
  38. data/lib/bricolage/mysqldatasource.rb +223 -0
  39. data/lib/bricolage/parameters.rb +653 -0
  40. data/lib/bricolage/postgresconnection.rb +78 -0
  41. data/lib/bricolage/psqldatasource.rb +449 -0
  42. data/lib/bricolage/resource.rb +68 -0
  43. data/lib/bricolage/rubyjobclass.rb +42 -0
  44. data/lib/bricolage/s3datasource.rb +144 -0
  45. data/lib/bricolage/script.rb +120 -0
  46. data/lib/bricolage/sqlstatement.rb +351 -0
  47. data/lib/bricolage/taskqueue.rb +156 -0
  48. data/lib/bricolage/tddatasource.rb +116 -0
  49. data/lib/bricolage/variables.rb +208 -0
  50. data/lib/bricolage/version.rb +4 -0
  51. data/lib/bricolage.rb +8 -0
  52. data/libexec/sqldump +9 -0
  53. data/libexec/sqldump.Darwin +0 -0
  54. data/libexec/sqldump.Linux +0 -0
  55. data/test/all.rb +3 -0
  56. data/test/home/config/development/database.yml +57 -0
  57. data/test/home/config/development/password.yml +2 -0
  58. data/test/home/subsys/separated.job +1 -0
  59. data/test/home/subsys/separated.sql +1 -0
  60. data/test/home/subsys/unified.jobnet +1 -0
  61. data/test/home/subsys/unified.sql.job +5 -0
  62. data/test/test_filesystem.rb +19 -0
  63. data/test/test_parameters.rb +401 -0
  64. data/test/test_variables.rb +114 -0
  65. metadata +192 -0
@@ -0,0 +1,264 @@
1
+ require 'bricolage/application'
2
+ require 'bricolage/context'
3
+ require 'bricolage/jobflow'
4
+ require 'bricolage/taskqueue'
5
+ require 'bricolage/job'
6
+ require 'bricolage/jobresult'
7
+ require 'bricolage/datasource'
8
+ require 'bricolage/variables'
9
+ require 'bricolage/eventhandlers'
10
+ require 'bricolage/logger'
11
+ require 'bricolage/exception'
12
+ require 'bricolage/version'
13
+ require 'pathname'
14
+ require 'optparse'
15
+
16
+ module Bricolage
17
+
18
+ class JobNetRunner
19
+ def JobNetRunner.main
20
+ new.main
21
+ end
22
+
23
+ def initialize
24
+ Signal.trap('PIPE', 'IGNORE')
25
+ @hooks = ::Bricolage
26
+ @flow_id = nil
27
+ @flow_start_time = Time.now
28
+ @log_path = nil
29
+ end
30
+
31
+ EXIT_SUCCESS = JobResult::EXIT_SUCCESS
32
+ EXIT_FAILURE = JobResult::EXIT_FAILURE
33
+ EXIT_ERROR = JobResult::EXIT_ERROR
34
+
35
+ def main
36
+ opts = Options.new(self)
37
+ @hooks.run_before_option_parsing_hooks(opts)
38
+ opts.parse ARGV
39
+ @ctx = Context.for_application(nil, opts.jobnet_file, environment: opts.environment, global_variables: opts.global_variables)
40
+ @flow_id = "#{opts.jobnet_file.dirname.basename}/#{opts.jobnet_file.basename('.jobnet')}"
41
+ @log_path = opts.log_path
42
+ flow = RootJobFlow.load(@ctx, opts.jobnet_file)
43
+ queue = get_queue(opts)
44
+ if queue.locked?
45
+ raise ParameterError, "Job queue is still locked. If you are sure to restart jobnet, #{queue.unlock_help}"
46
+ end
47
+ unless queue.queued?
48
+ enqueue_jobs flow, queue
49
+ logger.info "jobs are queued." if opts.queue_exist?
50
+ end
51
+ if opts.list_jobs?
52
+ list_jobs queue
53
+ exit EXIT_SUCCESS
54
+ end
55
+ check_jobs queue
56
+ if opts.check_only?
57
+ puts "OK"
58
+ exit EXIT_SUCCESS
59
+ end
60
+ run_queue queue
61
+ exit EXIT_SUCCESS
62
+ rescue OptionError => ex
63
+ raise if $DEBUG
64
+ usage_exit ex.message, opts.help
65
+ rescue ApplicationError => ex
66
+ raise if $DEBUG
67
+ error_exit ex.message
68
+ end
69
+
70
+ def logger
71
+ @ctx.logger
72
+ end
73
+
74
+ def get_queue(opts)
75
+ if opts.queue_path
76
+ FileTaskQueue.restore_if_exist(opts.queue_path)
77
+ else
78
+ TaskQueue.new
79
+ end
80
+ end
81
+
82
+ def enqueue_jobs(flow, queue)
83
+ flow.each_subnet_sequence do |subnet|
84
+ seq = 1
85
+ subnet.sequential_jobs.each do |ref|
86
+ queue.enq JobTask.new(subnet.ref, seq, ref)
87
+ seq += 1
88
+ end
89
+ end
90
+ queue.save
91
+ end
92
+
93
+ def list_jobs(queue)
94
+ prev = nil
95
+ queue.each do |task|
96
+ if not prev or prev.jobnet != task.jobnet
97
+ puts "---- jobnet #{task.jobnet} ---"
98
+ end
99
+ puts task.job
100
+ prev = task
101
+ end
102
+ end
103
+
104
+ def check_jobs(queue)
105
+ queue.each do |task|
106
+ Job.load_ref(task.job, @ctx).compile
107
+ end
108
+ end
109
+
110
+ def run_queue(queue)
111
+ @hooks.run_before_all_jobs_hooks(BeforeAllJobsEvent.new(@flow_id, queue))
112
+ queue.consume_each do |task|
113
+ result = execute_job(task.job, queue)
114
+ unless result.success?
115
+ logger.elapsed_time 'jobnet total: ', (Time.now - @flow_start_time)
116
+ logger.error "[job #{task.job}] #{result.message}"
117
+ @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(false, queue))
118
+ exit result.status
119
+ end
120
+ end
121
+ @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(true, queue))
122
+ logger.elapsed_time 'jobnet total: ', (Time.now - @flow_start_time)
123
+ logger.info "status all green"
124
+ end
125
+
126
+ def execute_job(ref, queue)
127
+ logger.debug "job #{ref}"
128
+ job = Job.load_ref(ref, @ctx)
129
+ job.compile
130
+ @hooks.run_before_job_hooks(BeforeJobEvent.new(ref))
131
+ result = job.execute_in_process(make_log_path(ref))
132
+ @hooks.run_after_job_hooks(AfterJobEvent.new(result))
133
+ result
134
+ rescue Exception => ex
135
+ logger.exception ex
136
+ logger.error "unexpected error: #{ref} (#{ex.class}: #{ex.message})"
137
+ JobResult.error(ex)
138
+ end
139
+
140
+ def make_log_path(job_ref)
141
+ return nil unless @log_path
142
+ start_time = Time.now
143
+ @log_path.gsub(/%\{\w+\}/) {|var|
144
+ case var
145
+ when '%{flow_start_date}' then @flow_start_time.strftime('%Y%m%d')
146
+ when '%{flow_start_time}' then @flow_start_time.strftime('%Y%m%d_%H%M%S%L')
147
+ when '%{job_start_date}' then start_time.strftime('%Y%m%d')
148
+ when '%{job_start_time}' then start_time.strftime('%Y%m%d_%H%M%S%L')
149
+ when '%{flow}', '%{flow_id}' then @flow_id.gsub('/', '::')
150
+ when '%{subsystem}' then job_ref.subsystem
151
+ when '%{job}', '%{job_id}' then job_ref.name
152
+ else
153
+ raise ParameterError, "bad log path variable: #{var}"
154
+ end
155
+ }
156
+ end
157
+
158
+ def usage_exit(msg, usage)
159
+ print_error msg
160
+ $stderr.puts usage
161
+ exit 1
162
+ end
163
+
164
+ def error_exit(msg)
165
+ print_error msg
166
+ exit 1
167
+ end
168
+
169
+ def print_error(msg)
170
+ $stderr.puts "#{program_name}: error: #{msg}"
171
+ end
172
+
173
+ def program_name
174
+ File.basename($PROGRAM_NAME, '.*')
175
+ end
176
+
177
+ class Options
178
+ def initialize(app)
179
+ @app = app
180
+ @environment = nil
181
+ @jobnet_files = nil
182
+ @log_path = nil
183
+ @queue_path = nil
184
+ @check_only = false
185
+ @list_jobs = false
186
+ @global_variables = Variables.new
187
+ @parser = OptionParser.new
188
+ define_options @parser
189
+ end
190
+
191
+ attr_reader :environment
192
+ attr_reader :jobnet_file
193
+ attr_reader :log_path
194
+ attr_reader :queue_path
195
+
196
+ def queue_exist?
197
+ !!@queue_path
198
+ end
199
+
200
+ def check_only?
201
+ @check_only
202
+ end
203
+
204
+ def list_jobs?
205
+ @list_jobs
206
+ end
207
+
208
+ attr_reader :global_variables
209
+
210
+ def help
211
+ @parser.help
212
+ end
213
+
214
+ def define_options(parser)
215
+ parser.banner = <<-EndBanner
216
+ Synopsis:
217
+ #{@app.program_name} [options] JOB_NET_FILE
218
+ Options:
219
+ EndBanner
220
+ parser.on('-e', '--environment=NAME', "Sets execution environment. [default: #{Context::DEFAULT_ENV}]") {|env|
221
+ @environment = env
222
+ }
223
+ parser.on('--log-path=PATH', 'Log file path template.') {|path|
224
+ @log_path = path
225
+ }
226
+ parser.on('--queue=PATH', 'Use job queue.') {|path|
227
+ @queue_path = Pathname(path)
228
+ }
229
+ parser.on('-c', '--check-only', 'Checks job parameters and quit without executing.') {
230
+ @check_only = true
231
+ }
232
+ parser.on('-l', '--list-jobs', 'Lists target jobs without executing.') {
233
+ @list_jobs = true
234
+ }
235
+ parser.on('-v', '--variable=NAME=VALUE', 'Defines global variable.') {|name_value|
236
+ name, value = name_value.split('=', 2)
237
+ @global_variables[name] = value
238
+ }
239
+ parser.on('--help', 'Shows this message and quit.') {
240
+ @app.puts parser.help
241
+ @app.exit 0
242
+ }
243
+ parser.on('--version', 'Shows program version and quit.') {
244
+ @app.puts "#{APPLICATION_NAME} version #{VERSION}"
245
+ @app.exit 0
246
+ }
247
+ end
248
+
249
+ def on(*args, &block)
250
+ @parser.on(*args, &block)
251
+ end
252
+
253
+ def parse(argv)
254
+ @parser.parse! argv
255
+ raise OptionError, "missing jobnet file" if argv.empty?
256
+ raise OptionError, "too many jobnet file" if argv.size > 1
257
+ @jobnet_file = argv.map {|path| Pathname(path) }.first
258
+ rescue OptionParser::ParseError => ex
259
+ raise OptionError, ex.message
260
+ end
261
+ end
262
+ end
263
+
264
+ end
@@ -0,0 +1,74 @@
1
+ require 'bricolage/exception'
2
+
3
+ module Bricolage
4
+
5
+ class JobResult
6
+ def JobResult.success
7
+ new(:success)
8
+ end
9
+
10
+ def JobResult.failure(ex)
11
+ new(:failiure, exception: ex)
12
+ end
13
+
14
+ def JobResult.error(ex)
15
+ new(:error, exception: ex)
16
+ end
17
+
18
+ def JobResult.for_bool(is_success, msg = nil)
19
+ new((is_success ? :success : :failure), message: msg)
20
+ end
21
+
22
+ def JobResult.for_process_status(st, msg = nil)
23
+ new((st.success? ? :success : :failure), process_status: st, message: msg)
24
+ end
25
+
26
+ EXIT_SUCCESS = 0
27
+ EXIT_FAILURE = 1 # production time errors; expected / unavoidable job error
28
+ EXIT_ERROR = 2 # development time errors (e.g. bad option, bad parameter, bad configuration)
29
+
30
+ def initialize(type, exception: nil, process_status: nil, message: nil)
31
+ @type = type
32
+ @exception = exception
33
+ @process_status = process_status
34
+ @message = message
35
+ end
36
+
37
+ def success?
38
+ @type == :success
39
+ end
40
+
41
+ attr_reader :exception
42
+ attr_reader :process_status
43
+
44
+ def status_string
45
+ @type.to_s.upcase
46
+ end
47
+
48
+ def status
49
+ if @process_status
50
+ @process_status.exitstatus
51
+ else
52
+ case @type
53
+ when :success then EXIT_SUCCESS
54
+ when :failure then EXIT_FAILURE
55
+ when :error then EXIT_ERROR
56
+ else EXIT_ERROR
57
+ end
58
+ end
59
+ end
60
+
61
+ alias to_i status
62
+
63
+ def message
64
+ if @message
65
+ @message
66
+ elsif @exception
67
+ @exception.message
68
+ else
69
+ success? ? 'suceeded' : 'failed'
70
+ end
71
+ end
72
+ end
73
+
74
+ end
@@ -0,0 +1,52 @@
1
+ require 'stringio'
2
+ require 'logger'
3
+
4
+ module Bricolage
5
+ class Logger < ::Logger
6
+ def Logger.default
7
+ logger = new($stderr)
8
+ logger.level = $stderr.tty? ? Logger::DEBUG : Logger::INFO
9
+ logger.formatter = -> (sev, time, prog, msg) {
10
+ "#{time}: #{sev}: #{msg}\n"
11
+ }
12
+ logger
13
+ end
14
+
15
+ def exception(ex)
16
+ buf = StringIO.new
17
+ buf.puts "#{ex.class}: #{ex.message}"
18
+ ex.backtrace.each do |trace|
19
+ buf.puts "\t" + trace
20
+ end
21
+ error buf.string
22
+ end
23
+
24
+ def with_elapsed_time(label = '')
25
+ start_time = Time.now
26
+ begin
27
+ return yield
28
+ ensure
29
+ elapsed_time(label, Time.now - start_time)
30
+ end
31
+ end
32
+
33
+ def elapsed_time(label, t)
34
+ info "#{label}#{pretty_interval(t)}"
35
+ end
36
+
37
+ private
38
+
39
+ def pretty_interval(seconds)
40
+ case
41
+ when seconds > 60 * 60
42
+ h, secs = seconds.divmod(60 * 60)
43
+ m, s = secs.divmod(60)
44
+ "%d hours %d minutes" % [h, m]
45
+ when seconds > 60
46
+ "%d minutes %d seconds" % seconds.divmod(60)
47
+ else
48
+ "%.2f secs" % seconds
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,223 @@
1
+ require 'bricolage/datasource'
2
+ require 'mysql2'
3
+ require 'json'
4
+ require 'csv'
5
+ require 'stringio'
6
+ require 'open3'
7
+
8
+ module Bricolage
9
+
10
+ class MySQLDataSource < DataSource
11
+ declare_type 'mysql'
12
+
13
+ def initialize(**mysql_options)
14
+ @mysql_options = mysql_options
15
+ @client = nil
16
+ end
17
+
18
+ def host
19
+ @mysql_options[:host]
20
+ end
21
+
22
+ def port
23
+ @mysql_options[:port]
24
+ end
25
+
26
+ def username
27
+ @mysql_options[:username]
28
+ end
29
+
30
+ def password
31
+ @mysql_options[:password]
32
+ end
33
+
34
+ def database
35
+ @mysql_options[:database]
36
+ end
37
+
38
+ def new_task
39
+ MySQLTask.new(self)
40
+ end
41
+
42
+ def open
43
+ @client = Mysql2::Client.new(**@mysql_options)
44
+ begin
45
+ yield self
46
+ ensure
47
+ c = @client
48
+ @client = nil
49
+ c.close
50
+ end
51
+ end
52
+
53
+ def query(sql, **opts)
54
+ logger.info "[SQL] #{sql}"
55
+ connection_check
56
+ @client.query(sql, **opts)
57
+ end
58
+
59
+ private
60
+
61
+ def connection_check
62
+ unless @client
63
+ raise FatalError, "#{self.class} used outside of \#open block"
64
+ end
65
+ end
66
+ end
67
+
68
+ class MySQLTask < DataSourceTask
69
+ def export(stmt, path: nil, format: nil, override: false, gzip: false, sqldump: sqldump)
70
+ add Export.new(stmt, path: path, format: format, override: override, gzip: gzip, sqldump: sqldump)
71
+ end
72
+
73
+ class Export < Action
74
+ def initialize(stmt, path: nil, format: nil, override: false, gzip: false, sqldump: false)
75
+ @statement = stmt
76
+ @path = path
77
+ @format = format
78
+ @override = override
79
+ @gzip = gzip
80
+ @sqldump = sqldump
81
+ end
82
+
83
+ def bind(*args)
84
+ @statement.bind(*args)
85
+ end
86
+
87
+ def source
88
+ @statement.stripped_source
89
+ end
90
+
91
+ def run
92
+ if @sqldump and sqldump_available? and sqldump_usable?
93
+ export_by_sqldump
94
+ else
95
+ export_by_ruby
96
+ end
97
+ JobResult.success
98
+ end
99
+
100
+ def export_by_sqldump
101
+ cmds = [[sqldump_path.to_s, "--#{@format}", ds.host, ds.port.to_s, ds.username, ds.password, ds.database, @statement.stripped_source]]
102
+ cmds.push [GZIP_COMMAND] if @gzip
103
+ cmds.last.push({out: @path.to_s})
104
+ ds.logger.info '[CMD] ' + format_pipeline(cmds)
105
+ statuses = Open3.pipeline(*cmds)
106
+ statuses.each_with_index do |st, idx|
107
+ unless st.success?
108
+ cmd = cmds[idx].first
109
+ raise JobFailure, "#{cmd} failed (status #{st.to_i})"
110
+ end
111
+ end
112
+ end
113
+
114
+ def format_pipeline(cmds)
115
+ cmds = cmds.map {|args| args.dup }
116
+ cmds.first[5] = '****'
117
+ cmds.map {|args| %Q("#{args.join('" "')}") }.join(' | ')
118
+ end
119
+
120
+ def sqldump_available?
121
+ sqldump_path.executable?
122
+ end
123
+
124
+ def sqldump_path
125
+ Pathname(__dir__).parent.parent + "libexec/sqldump"
126
+ end
127
+
128
+ def sqldump_real_path
129
+ Pathname("#{sqldump_path}.#{platform_name}")
130
+ end
131
+
132
+ def platform_name
133
+ @platform_name ||= `uname -s`
134
+ end
135
+
136
+ def sqldump_usable?
137
+ %w[json tsv].include?(@format)
138
+ end
139
+
140
+ def export_by_ruby
141
+ ds.logger.info "exporting table into #{@path} ..."
142
+ count = 0
143
+ open_target_file(@path) {|f|
144
+ writer_class = WRITER_CLASSES[@format] or raise ArgumentError, "unknown export format: #{@format.inspect}"
145
+ writer = writer_class.new(f)
146
+ rs = ds.query(@statement.stripped_source, as: writer_class.record_format, stream: true, cache_rows: false)
147
+ ds.logger.info "got result set, writing..."
148
+ rs.each do |values|
149
+ writer.write_record values
150
+ count += 1
151
+ ds.logger.info "#{count} records exported..." if count % 10_0000 == 0
152
+ end
153
+ }
154
+ ds.logger.info "#{count} records exported; export finished"
155
+ end
156
+
157
+ private
158
+
159
+ # FIXME: parameterize
160
+ GZIP_COMMAND = 'gzip'
161
+
162
+ def open_target_file(path, &block)
163
+ unless @override
164
+ raise JobFailure, "destination file already exists: #{path}" if File.exist?(path)
165
+ end
166
+ if @gzip
167
+ ds.logger.info "enable compression: gzip"
168
+ IO.popen(%Q(#{GZIP_COMMAND} > "#{path}"), 'w', &block)
169
+ else
170
+ File.open(path, 'w', &block)
171
+ end
172
+ end
173
+ end
174
+
175
+ WRITER_CLASSES = {}
176
+
177
+ class JSONWriter
178
+ def JSONWriter.record_format
179
+ :hash
180
+ end
181
+
182
+ def initialize(f)
183
+ @f = f
184
+ end
185
+
186
+ def write_record(values)
187
+ @f.puts JSON.dump(values)
188
+ end
189
+ end
190
+ WRITER_CLASSES['json'] = JSONWriter
191
+
192
+ class TSVWriter
193
+ def TSVWriter.record_format
194
+ :array
195
+ end
196
+
197
+ def initialize(f)
198
+ @f = f
199
+ end
200
+
201
+ def write_record(values)
202
+ @f.puts values.join("\t")
203
+ end
204
+ end
205
+ WRITER_CLASSES['tsv'] = TSVWriter
206
+
207
+ class CSVWriter
208
+ def CSVWriter.record_format
209
+ :array
210
+ end
211
+
212
+ def initialize(f)
213
+ @csv = CSV.new(f)
214
+ end
215
+
216
+ def write_record(values)
217
+ @csv.add_row values
218
+ end
219
+ end
220
+ WRITER_CLASSES['csv'] = CSVWriter
221
+ end
222
+
223
+ end