bricolage 5.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +4 -0
  3. data/bin/bricolage +6 -0
  4. data/bin/bricolage-jobnet +6 -0
  5. data/jobclass/create.rb +21 -0
  6. data/jobclass/exec.rb +17 -0
  7. data/jobclass/insert-delta.rb +31 -0
  8. data/jobclass/insert.rb +33 -0
  9. data/jobclass/load.rb +39 -0
  10. data/jobclass/my-export.rb +40 -0
  11. data/jobclass/my-migrate.rb +103 -0
  12. data/jobclass/noop.rb +13 -0
  13. data/jobclass/rebuild-drop.rb +37 -0
  14. data/jobclass/rebuild-rename.rb +49 -0
  15. data/jobclass/s3-put.rb +19 -0
  16. data/jobclass/sql.rb +29 -0
  17. data/jobclass/td-delete.rb +20 -0
  18. data/jobclass/td-export.rb +30 -0
  19. data/jobclass/unload.rb +30 -0
  20. data/jobclass/wait-file.rb +48 -0
  21. data/lib/bricolage/application.rb +260 -0
  22. data/lib/bricolage/commandutils.rb +52 -0
  23. data/lib/bricolage/configloader.rb +126 -0
  24. data/lib/bricolage/context.rb +108 -0
  25. data/lib/bricolage/datasource.rb +144 -0
  26. data/lib/bricolage/eventhandlers.rb +47 -0
  27. data/lib/bricolage/exception.rb +47 -0
  28. data/lib/bricolage/filedatasource.rb +42 -0
  29. data/lib/bricolage/filesystem.rb +165 -0
  30. data/lib/bricolage/genericdatasource.rb +37 -0
  31. data/lib/bricolage/job.rb +212 -0
  32. data/lib/bricolage/jobclass.rb +98 -0
  33. data/lib/bricolage/jobfile.rb +100 -0
  34. data/lib/bricolage/jobflow.rb +389 -0
  35. data/lib/bricolage/jobnetrunner.rb +264 -0
  36. data/lib/bricolage/jobresult.rb +74 -0
  37. data/lib/bricolage/logger.rb +52 -0
  38. data/lib/bricolage/mysqldatasource.rb +223 -0
  39. data/lib/bricolage/parameters.rb +653 -0
  40. data/lib/bricolage/postgresconnection.rb +78 -0
  41. data/lib/bricolage/psqldatasource.rb +449 -0
  42. data/lib/bricolage/resource.rb +68 -0
  43. data/lib/bricolage/rubyjobclass.rb +42 -0
  44. data/lib/bricolage/s3datasource.rb +144 -0
  45. data/lib/bricolage/script.rb +120 -0
  46. data/lib/bricolage/sqlstatement.rb +351 -0
  47. data/lib/bricolage/taskqueue.rb +156 -0
  48. data/lib/bricolage/tddatasource.rb +116 -0
  49. data/lib/bricolage/variables.rb +208 -0
  50. data/lib/bricolage/version.rb +4 -0
  51. data/lib/bricolage.rb +8 -0
  52. data/libexec/sqldump +9 -0
  53. data/libexec/sqldump.Darwin +0 -0
  54. data/libexec/sqldump.Linux +0 -0
  55. data/test/all.rb +3 -0
  56. data/test/home/config/development/database.yml +57 -0
  57. data/test/home/config/development/password.yml +2 -0
  58. data/test/home/subsys/separated.job +1 -0
  59. data/test/home/subsys/separated.sql +1 -0
  60. data/test/home/subsys/unified.jobnet +1 -0
  61. data/test/home/subsys/unified.sql.job +5 -0
  62. data/test/test_filesystem.rb +19 -0
  63. data/test/test_parameters.rb +401 -0
  64. data/test/test_variables.rb +114 -0
  65. metadata +192 -0
@@ -0,0 +1,264 @@
1
+ require 'bricolage/application'
2
+ require 'bricolage/context'
3
+ require 'bricolage/jobflow'
4
+ require 'bricolage/taskqueue'
5
+ require 'bricolage/job'
6
+ require 'bricolage/jobresult'
7
+ require 'bricolage/datasource'
8
+ require 'bricolage/variables'
9
+ require 'bricolage/eventhandlers'
10
+ require 'bricolage/logger'
11
+ require 'bricolage/exception'
12
+ require 'bricolage/version'
13
+ require 'pathname'
14
+ require 'optparse'
15
+
16
+ module Bricolage
17
+
18
+ class JobNetRunner
19
+ def JobNetRunner.main
20
+ new.main
21
+ end
22
+
23
+ def initialize
24
+ Signal.trap('PIPE', 'IGNORE')
25
+ @hooks = ::Bricolage
26
+ @flow_id = nil
27
+ @flow_start_time = Time.now
28
+ @log_path = nil
29
+ end
30
+
31
+ EXIT_SUCCESS = JobResult::EXIT_SUCCESS
32
+ EXIT_FAILURE = JobResult::EXIT_FAILURE
33
+ EXIT_ERROR = JobResult::EXIT_ERROR
34
+
35
+ def main
36
+ opts = Options.new(self)
37
+ @hooks.run_before_option_parsing_hooks(opts)
38
+ opts.parse ARGV
39
+ @ctx = Context.for_application(nil, opts.jobnet_file, environment: opts.environment, global_variables: opts.global_variables)
40
+ @flow_id = "#{opts.jobnet_file.dirname.basename}/#{opts.jobnet_file.basename('.jobnet')}"
41
+ @log_path = opts.log_path
42
+ flow = RootJobFlow.load(@ctx, opts.jobnet_file)
43
+ queue = get_queue(opts)
44
+ if queue.locked?
45
+ raise ParameterError, "Job queue is still locked. If you are sure to restart jobnet, #{queue.unlock_help}"
46
+ end
47
+ unless queue.queued?
48
+ enqueue_jobs flow, queue
49
+ logger.info "jobs are queued." if opts.queue_exist?
50
+ end
51
+ if opts.list_jobs?
52
+ list_jobs queue
53
+ exit EXIT_SUCCESS
54
+ end
55
+ check_jobs queue
56
+ if opts.check_only?
57
+ puts "OK"
58
+ exit EXIT_SUCCESS
59
+ end
60
+ run_queue queue
61
+ exit EXIT_SUCCESS
62
+ rescue OptionError => ex
63
+ raise if $DEBUG
64
+ usage_exit ex.message, opts.help
65
+ rescue ApplicationError => ex
66
+ raise if $DEBUG
67
+ error_exit ex.message
68
+ end
69
+
70
+ def logger
71
+ @ctx.logger
72
+ end
73
+
74
+ def get_queue(opts)
75
+ if opts.queue_path
76
+ FileTaskQueue.restore_if_exist(opts.queue_path)
77
+ else
78
+ TaskQueue.new
79
+ end
80
+ end
81
+
82
+ def enqueue_jobs(flow, queue)
83
+ flow.each_subnet_sequence do |subnet|
84
+ seq = 1
85
+ subnet.sequential_jobs.each do |ref|
86
+ queue.enq JobTask.new(subnet.ref, seq, ref)
87
+ seq += 1
88
+ end
89
+ end
90
+ queue.save
91
+ end
92
+
93
+ def list_jobs(queue)
94
+ prev = nil
95
+ queue.each do |task|
96
+ if not prev or prev.jobnet != task.jobnet
97
+ puts "---- jobnet #{task.jobnet} ---"
98
+ end
99
+ puts task.job
100
+ prev = task
101
+ end
102
+ end
103
+
104
+ def check_jobs(queue)
105
+ queue.each do |task|
106
+ Job.load_ref(task.job, @ctx).compile
107
+ end
108
+ end
109
+
110
+ def run_queue(queue)
111
+ @hooks.run_before_all_jobs_hooks(BeforeAllJobsEvent.new(@flow_id, queue))
112
+ queue.consume_each do |task|
113
+ result = execute_job(task.job, queue)
114
+ unless result.success?
115
+ logger.elapsed_time 'jobnet total: ', (Time.now - @flow_start_time)
116
+ logger.error "[job #{task.job}] #{result.message}"
117
+ @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(false, queue))
118
+ exit result.status
119
+ end
120
+ end
121
+ @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(true, queue))
122
+ logger.elapsed_time 'jobnet total: ', (Time.now - @flow_start_time)
123
+ logger.info "status all green"
124
+ end
125
+
126
+ def execute_job(ref, queue)
127
+ logger.debug "job #{ref}"
128
+ job = Job.load_ref(ref, @ctx)
129
+ job.compile
130
+ @hooks.run_before_job_hooks(BeforeJobEvent.new(ref))
131
+ result = job.execute_in_process(make_log_path(ref))
132
+ @hooks.run_after_job_hooks(AfterJobEvent.new(result))
133
+ result
134
+ rescue Exception => ex
135
+ logger.exception ex
136
+ logger.error "unexpected error: #{ref} (#{ex.class}: #{ex.message})"
137
+ JobResult.error(ex)
138
+ end
139
+
140
+ def make_log_path(job_ref)
141
+ return nil unless @log_path
142
+ start_time = Time.now
143
+ @log_path.gsub(/%\{\w+\}/) {|var|
144
+ case var
145
+ when '%{flow_start_date}' then @flow_start_time.strftime('%Y%m%d')
146
+ when '%{flow_start_time}' then @flow_start_time.strftime('%Y%m%d_%H%M%S%L')
147
+ when '%{job_start_date}' then start_time.strftime('%Y%m%d')
148
+ when '%{job_start_time}' then start_time.strftime('%Y%m%d_%H%M%S%L')
149
+ when '%{flow}', '%{flow_id}' then @flow_id.gsub('/', '::')
150
+ when '%{subsystem}' then job_ref.subsystem
151
+ when '%{job}', '%{job_id}' then job_ref.name
152
+ else
153
+ raise ParameterError, "bad log path variable: #{var}"
154
+ end
155
+ }
156
+ end
157
+
158
+ def usage_exit(msg, usage)
159
+ print_error msg
160
+ $stderr.puts usage
161
+ exit 1
162
+ end
163
+
164
+ def error_exit(msg)
165
+ print_error msg
166
+ exit 1
167
+ end
168
+
169
+ def print_error(msg)
170
+ $stderr.puts "#{program_name}: error: #{msg}"
171
+ end
172
+
173
+ def program_name
174
+ File.basename($PROGRAM_NAME, '.*')
175
+ end
176
+
177
+ class Options
178
+ def initialize(app)
179
+ @app = app
180
+ @environment = nil
181
+ @jobnet_files = nil
182
+ @log_path = nil
183
+ @queue_path = nil
184
+ @check_only = false
185
+ @list_jobs = false
186
+ @global_variables = Variables.new
187
+ @parser = OptionParser.new
188
+ define_options @parser
189
+ end
190
+
191
+ attr_reader :environment
192
+ attr_reader :jobnet_file
193
+ attr_reader :log_path
194
+ attr_reader :queue_path
195
+
196
+ def queue_exist?
197
+ !!@queue_path
198
+ end
199
+
200
+ def check_only?
201
+ @check_only
202
+ end
203
+
204
+ def list_jobs?
205
+ @list_jobs
206
+ end
207
+
208
+ attr_reader :global_variables
209
+
210
+ def help
211
+ @parser.help
212
+ end
213
+
214
+ def define_options(parser)
215
+ parser.banner = <<-EndBanner
216
+ Synopsis:
217
+ #{@app.program_name} [options] JOB_NET_FILE
218
+ Options:
219
+ EndBanner
220
+ parser.on('-e', '--environment=NAME', "Sets execution environment. [default: #{Context::DEFAULT_ENV}]") {|env|
221
+ @environment = env
222
+ }
223
+ parser.on('--log-path=PATH', 'Log file path template.') {|path|
224
+ @log_path = path
225
+ }
226
+ parser.on('--queue=PATH', 'Use job queue.') {|path|
227
+ @queue_path = Pathname(path)
228
+ }
229
+ parser.on('-c', '--check-only', 'Checks job parameters and quit without executing.') {
230
+ @check_only = true
231
+ }
232
+ parser.on('-l', '--list-jobs', 'Lists target jobs without executing.') {
233
+ @list_jobs = true
234
+ }
235
+ parser.on('-v', '--variable=NAME=VALUE', 'Defines global variable.') {|name_value|
236
+ name, value = name_value.split('=', 2)
237
+ @global_variables[name] = value
238
+ }
239
+ parser.on('--help', 'Shows this message and quit.') {
240
+ @app.puts parser.help
241
+ @app.exit 0
242
+ }
243
+ parser.on('--version', 'Shows program version and quit.') {
244
+ @app.puts "#{APPLICATION_NAME} version #{VERSION}"
245
+ @app.exit 0
246
+ }
247
+ end
248
+
249
+ def on(*args, &block)
250
+ @parser.on(*args, &block)
251
+ end
252
+
253
+ def parse(argv)
254
+ @parser.parse! argv
255
+ raise OptionError, "missing jobnet file" if argv.empty?
256
+ raise OptionError, "too many jobnet file" if argv.size > 1
257
+ @jobnet_file = argv.map {|path| Pathname(path) }.first
258
+ rescue OptionParser::ParseError => ex
259
+ raise OptionError, ex.message
260
+ end
261
+ end
262
+ end
263
+
264
+ end
@@ -0,0 +1,74 @@
1
+ require 'bricolage/exception'
2
+
3
+ module Bricolage
4
+
5
+ class JobResult
6
+ def JobResult.success
7
+ new(:success)
8
+ end
9
+
10
+ def JobResult.failure(ex)
11
+ new(:failiure, exception: ex)
12
+ end
13
+
14
+ def JobResult.error(ex)
15
+ new(:error, exception: ex)
16
+ end
17
+
18
+ def JobResult.for_bool(is_success, msg = nil)
19
+ new((is_success ? :success : :failure), message: msg)
20
+ end
21
+
22
+ def JobResult.for_process_status(st, msg = nil)
23
+ new((st.success? ? :success : :failure), process_status: st, message: msg)
24
+ end
25
+
26
+ EXIT_SUCCESS = 0
27
+ EXIT_FAILURE = 1 # production time errors; expected / unavoidable job error
28
+ EXIT_ERROR = 2 # development time errors (e.g. bad option, bad parameter, bad configuration)
29
+
30
+ def initialize(type, exception: nil, process_status: nil, message: nil)
31
+ @type = type
32
+ @exception = exception
33
+ @process_status = process_status
34
+ @message = message
35
+ end
36
+
37
+ def success?
38
+ @type == :success
39
+ end
40
+
41
+ attr_reader :exception
42
+ attr_reader :process_status
43
+
44
+ def status_string
45
+ @type.to_s.upcase
46
+ end
47
+
48
+ def status
49
+ if @process_status
50
+ @process_status.exitstatus
51
+ else
52
+ case @type
53
+ when :success then EXIT_SUCCESS
54
+ when :failure then EXIT_FAILURE
55
+ when :error then EXIT_ERROR
56
+ else EXIT_ERROR
57
+ end
58
+ end
59
+ end
60
+
61
+ alias to_i status
62
+
63
+ def message
64
+ if @message
65
+ @message
66
+ elsif @exception
67
+ @exception.message
68
+ else
69
+ success? ? 'suceeded' : 'failed'
70
+ end
71
+ end
72
+ end
73
+
74
+ end
@@ -0,0 +1,52 @@
1
+ require 'stringio'
2
+ require 'logger'
3
+
4
+ module Bricolage
5
+ class Logger < ::Logger
6
+ def Logger.default
7
+ logger = new($stderr)
8
+ logger.level = $stderr.tty? ? Logger::DEBUG : Logger::INFO
9
+ logger.formatter = -> (sev, time, prog, msg) {
10
+ "#{time}: #{sev}: #{msg}\n"
11
+ }
12
+ logger
13
+ end
14
+
15
+ def exception(ex)
16
+ buf = StringIO.new
17
+ buf.puts "#{ex.class}: #{ex.message}"
18
+ ex.backtrace.each do |trace|
19
+ buf.puts "\t" + trace
20
+ end
21
+ error buf.string
22
+ end
23
+
24
+ def with_elapsed_time(label = '')
25
+ start_time = Time.now
26
+ begin
27
+ return yield
28
+ ensure
29
+ elapsed_time(label, Time.now - start_time)
30
+ end
31
+ end
32
+
33
+ def elapsed_time(label, t)
34
+ info "#{label}#{pretty_interval(t)}"
35
+ end
36
+
37
+ private
38
+
39
+ def pretty_interval(seconds)
40
+ case
41
+ when seconds > 60 * 60
42
+ h, secs = seconds.divmod(60 * 60)
43
+ m, s = secs.divmod(60)
44
+ "%d hours %d minutes" % [h, m]
45
+ when seconds > 60
46
+ "%d minutes %d seconds" % seconds.divmod(60)
47
+ else
48
+ "%.2f secs" % seconds
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,223 @@
1
+ require 'bricolage/datasource'
2
+ require 'mysql2'
3
+ require 'json'
4
+ require 'csv'
5
+ require 'stringio'
6
+ require 'open3'
7
+
8
+ module Bricolage
9
+
10
+ class MySQLDataSource < DataSource
11
+ declare_type 'mysql'
12
+
13
+ def initialize(**mysql_options)
14
+ @mysql_options = mysql_options
15
+ @client = nil
16
+ end
17
+
18
+ def host
19
+ @mysql_options[:host]
20
+ end
21
+
22
+ def port
23
+ @mysql_options[:port]
24
+ end
25
+
26
+ def username
27
+ @mysql_options[:username]
28
+ end
29
+
30
+ def password
31
+ @mysql_options[:password]
32
+ end
33
+
34
+ def database
35
+ @mysql_options[:database]
36
+ end
37
+
38
+ def new_task
39
+ MySQLTask.new(self)
40
+ end
41
+
42
+ def open
43
+ @client = Mysql2::Client.new(**@mysql_options)
44
+ begin
45
+ yield self
46
+ ensure
47
+ c = @client
48
+ @client = nil
49
+ c.close
50
+ end
51
+ end
52
+
53
+ def query(sql, **opts)
54
+ logger.info "[SQL] #{sql}"
55
+ connection_check
56
+ @client.query(sql, **opts)
57
+ end
58
+
59
+ private
60
+
61
+ def connection_check
62
+ unless @client
63
+ raise FatalError, "#{self.class} used outside of \#open block"
64
+ end
65
+ end
66
+ end
67
+
68
+ class MySQLTask < DataSourceTask
69
+ def export(stmt, path: nil, format: nil, override: false, gzip: false, sqldump: sqldump)
70
+ add Export.new(stmt, path: path, format: format, override: override, gzip: gzip, sqldump: sqldump)
71
+ end
72
+
73
+ class Export < Action
74
+ def initialize(stmt, path: nil, format: nil, override: false, gzip: false, sqldump: false)
75
+ @statement = stmt
76
+ @path = path
77
+ @format = format
78
+ @override = override
79
+ @gzip = gzip
80
+ @sqldump = sqldump
81
+ end
82
+
83
+ def bind(*args)
84
+ @statement.bind(*args)
85
+ end
86
+
87
+ def source
88
+ @statement.stripped_source
89
+ end
90
+
91
+ def run
92
+ if @sqldump and sqldump_available? and sqldump_usable?
93
+ export_by_sqldump
94
+ else
95
+ export_by_ruby
96
+ end
97
+ JobResult.success
98
+ end
99
+
100
+ def export_by_sqldump
101
+ cmds = [[sqldump_path.to_s, "--#{@format}", ds.host, ds.port.to_s, ds.username, ds.password, ds.database, @statement.stripped_source]]
102
+ cmds.push [GZIP_COMMAND] if @gzip
103
+ cmds.last.push({out: @path.to_s})
104
+ ds.logger.info '[CMD] ' + format_pipeline(cmds)
105
+ statuses = Open3.pipeline(*cmds)
106
+ statuses.each_with_index do |st, idx|
107
+ unless st.success?
108
+ cmd = cmds[idx].first
109
+ raise JobFailure, "#{cmd} failed (status #{st.to_i})"
110
+ end
111
+ end
112
+ end
113
+
114
+ def format_pipeline(cmds)
115
+ cmds = cmds.map {|args| args.dup }
116
+ cmds.first[5] = '****'
117
+ cmds.map {|args| %Q("#{args.join('" "')}") }.join(' | ')
118
+ end
119
+
120
+ def sqldump_available?
121
+ sqldump_path.executable?
122
+ end
123
+
124
+ def sqldump_path
125
+ Pathname(__dir__).parent.parent + "libexec/sqldump"
126
+ end
127
+
128
+ def sqldump_real_path
129
+ Pathname("#{sqldump_path}.#{platform_name}")
130
+ end
131
+
132
+ def platform_name
133
+ @platform_name ||= `uname -s`
134
+ end
135
+
136
+ def sqldump_usable?
137
+ %w[json tsv].include?(@format)
138
+ end
139
+
140
+ def export_by_ruby
141
+ ds.logger.info "exporting table into #{@path} ..."
142
+ count = 0
143
+ open_target_file(@path) {|f|
144
+ writer_class = WRITER_CLASSES[@format] or raise ArgumentError, "unknown export format: #{@format.inspect}"
145
+ writer = writer_class.new(f)
146
+ rs = ds.query(@statement.stripped_source, as: writer_class.record_format, stream: true, cache_rows: false)
147
+ ds.logger.info "got result set, writing..."
148
+ rs.each do |values|
149
+ writer.write_record values
150
+ count += 1
151
+ ds.logger.info "#{count} records exported..." if count % 10_0000 == 0
152
+ end
153
+ }
154
+ ds.logger.info "#{count} records exported; export finished"
155
+ end
156
+
157
+ private
158
+
159
+ # FIXME: parameterize
160
+ GZIP_COMMAND = 'gzip'
161
+
162
+ def open_target_file(path, &block)
163
+ unless @override
164
+ raise JobFailure, "destination file already exists: #{path}" if File.exist?(path)
165
+ end
166
+ if @gzip
167
+ ds.logger.info "enable compression: gzip"
168
+ IO.popen(%Q(#{GZIP_COMMAND} > "#{path}"), 'w', &block)
169
+ else
170
+ File.open(path, 'w', &block)
171
+ end
172
+ end
173
+ end
174
+
175
+ WRITER_CLASSES = {}
176
+
177
+ class JSONWriter
178
+ def JSONWriter.record_format
179
+ :hash
180
+ end
181
+
182
+ def initialize(f)
183
+ @f = f
184
+ end
185
+
186
+ def write_record(values)
187
+ @f.puts JSON.dump(values)
188
+ end
189
+ end
190
+ WRITER_CLASSES['json'] = JSONWriter
191
+
192
+ class TSVWriter
193
+ def TSVWriter.record_format
194
+ :array
195
+ end
196
+
197
+ def initialize(f)
198
+ @f = f
199
+ end
200
+
201
+ def write_record(values)
202
+ @f.puts values.join("\t")
203
+ end
204
+ end
205
+ WRITER_CLASSES['tsv'] = TSVWriter
206
+
207
+ class CSVWriter
208
+ def CSVWriter.record_format
209
+ :array
210
+ end
211
+
212
+ def initialize(f)
213
+ @csv = CSV.new(f)
214
+ end
215
+
216
+ def write_record(values)
217
+ @csv.add_row values
218
+ end
219
+ end
220
+ WRITER_CLASSES['csv'] = CSVWriter
221
+ end
222
+
223
+ end