bricolage 5.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +4 -0
  3. data/bin/bricolage +6 -0
  4. data/bin/bricolage-jobnet +6 -0
  5. data/jobclass/create.rb +21 -0
  6. data/jobclass/exec.rb +17 -0
  7. data/jobclass/insert-delta.rb +31 -0
  8. data/jobclass/insert.rb +33 -0
  9. data/jobclass/load.rb +39 -0
  10. data/jobclass/my-export.rb +40 -0
  11. data/jobclass/my-migrate.rb +103 -0
  12. data/jobclass/noop.rb +13 -0
  13. data/jobclass/rebuild-drop.rb +37 -0
  14. data/jobclass/rebuild-rename.rb +49 -0
  15. data/jobclass/s3-put.rb +19 -0
  16. data/jobclass/sql.rb +29 -0
  17. data/jobclass/td-delete.rb +20 -0
  18. data/jobclass/td-export.rb +30 -0
  19. data/jobclass/unload.rb +30 -0
  20. data/jobclass/wait-file.rb +48 -0
  21. data/lib/bricolage/application.rb +260 -0
  22. data/lib/bricolage/commandutils.rb +52 -0
  23. data/lib/bricolage/configloader.rb +126 -0
  24. data/lib/bricolage/context.rb +108 -0
  25. data/lib/bricolage/datasource.rb +144 -0
  26. data/lib/bricolage/eventhandlers.rb +47 -0
  27. data/lib/bricolage/exception.rb +47 -0
  28. data/lib/bricolage/filedatasource.rb +42 -0
  29. data/lib/bricolage/filesystem.rb +165 -0
  30. data/lib/bricolage/genericdatasource.rb +37 -0
  31. data/lib/bricolage/job.rb +212 -0
  32. data/lib/bricolage/jobclass.rb +98 -0
  33. data/lib/bricolage/jobfile.rb +100 -0
  34. data/lib/bricolage/jobflow.rb +389 -0
  35. data/lib/bricolage/jobnetrunner.rb +264 -0
  36. data/lib/bricolage/jobresult.rb +74 -0
  37. data/lib/bricolage/logger.rb +52 -0
  38. data/lib/bricolage/mysqldatasource.rb +223 -0
  39. data/lib/bricolage/parameters.rb +653 -0
  40. data/lib/bricolage/postgresconnection.rb +78 -0
  41. data/lib/bricolage/psqldatasource.rb +449 -0
  42. data/lib/bricolage/resource.rb +68 -0
  43. data/lib/bricolage/rubyjobclass.rb +42 -0
  44. data/lib/bricolage/s3datasource.rb +144 -0
  45. data/lib/bricolage/script.rb +120 -0
  46. data/lib/bricolage/sqlstatement.rb +351 -0
  47. data/lib/bricolage/taskqueue.rb +156 -0
  48. data/lib/bricolage/tddatasource.rb +116 -0
  49. data/lib/bricolage/variables.rb +208 -0
  50. data/lib/bricolage/version.rb +4 -0
  51. data/lib/bricolage.rb +8 -0
  52. data/libexec/sqldump +9 -0
  53. data/libexec/sqldump.Darwin +0 -0
  54. data/libexec/sqldump.Linux +0 -0
  55. data/test/all.rb +3 -0
  56. data/test/home/config/development/database.yml +57 -0
  57. data/test/home/config/development/password.yml +2 -0
  58. data/test/home/subsys/separated.job +1 -0
  59. data/test/home/subsys/separated.sql +1 -0
  60. data/test/home/subsys/unified.jobnet +1 -0
  61. data/test/home/subsys/unified.sql.job +5 -0
  62. data/test/test_filesystem.rb +19 -0
  63. data/test/test_parameters.rb +401 -0
  64. data/test/test_variables.rb +114 -0
  65. metadata +192 -0
@@ -0,0 +1,47 @@
1
+ module Bricolage
2
+ class EventHandlers
3
+ def initialize
4
+ @handlers = []
5
+ end
6
+
7
+ def add(handler)
8
+ @handlers.push handler
9
+ end
10
+
11
+ def run(event)
12
+ @handlers.each do |h|
13
+ begin
14
+ h.call(event)
15
+ rescue => err
16
+ $stderr.puts "hook error: #{err.class}: #{err.message}"
17
+ err.backtrace.each do |line|
18
+ $stderr.puts "\t#{line}"
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ %w[
26
+ before_option_parsing
27
+ before_all_jobs
28
+ before_job
29
+ after_job
30
+ after_all_jobs
31
+ ].each do |type|
32
+ handlers = EventHandlers.new
33
+ instance_variable_set "@#{type}".intern, handlers
34
+ cc = (class << self; self; end)
35
+ cc.__send__(:define_method, type.intern) {|&action|
36
+ handlers.add(action)
37
+ }
38
+ cc.__send__(:define_method, "run_#{type}_hooks".intern) {|event|
39
+ handlers.run(event)
40
+ }
41
+ end
42
+
43
+ BeforeAllJobsEvent = Struct.new(:flow_id, :queue)
44
+ BeforeJobEvent = Struct.new(:job)
45
+ AfterJobEvent = Struct.new(:result)
46
+ AfterAllJobsEvent = Struct.new(:succeeded, :queue)
47
+ end
@@ -0,0 +1,47 @@
1
+ module Bricolage
2
+
3
+ ##
4
+ # Common super-class of handlable bricolage exceptions
5
+ class ApplicationError < StandardError; end
6
+
7
+ ##
8
+ # Job failure.
9
+ # This exception may occur in production environment and is temporary.
10
+ # e.g. Source data error, SQL error
11
+ class JobFailure < ApplicationError; end
12
+
13
+ ##
14
+ # various SQL exception
15
+ class SQLException < JobFailure
16
+ def SQLException.wrap(ex)
17
+ new(ex.message, ex)
18
+ end
19
+
20
+ def initialize(msg, orig = nil)
21
+ super msg
22
+ @original = orig
23
+ end
24
+
25
+ attr_reader :original
26
+ end
27
+
28
+ ##
29
+ # Job error.
30
+ # This exception should NOT be thrown in production environment.
31
+ # Developer must fix source code or configuration, not to be get this exception.
32
+ class JobError < ApplicationError; end
33
+
34
+ ##
35
+ # Command-line option errors (should NOT be thrown in production environment)
36
+ class OptionError < JobError; end
37
+
38
+ ##
39
+ # User parameter errors (should NOT be thrown in production environment)
40
+ class ParameterError < JobError; end
41
+
42
+ ##
43
+ # Bad code in bricolage core or job classes.
44
+ # This exception should NOT be thrown in ANY user environment.
45
+ class FatalError < Exception; end
46
+
47
+ end
@@ -0,0 +1,42 @@
1
+ require 'bricolage/datasource'
2
+ require 'stringio'
3
+
4
+ module Bricolage
5
+
6
+ class FileDataSource < DataSource
7
+ declare_type 'file'
8
+
9
+ def initialize(opts)
10
+ end
11
+
12
+ def new_task
13
+ FileTask.new(self)
14
+ end
15
+ end
16
+
17
+ class FileTask < DataSourceTask
18
+ def remove(src)
19
+ add Remove.new(src)
20
+ end
21
+
22
+ class Remove < Action
23
+ def initialize(src)
24
+ @src = src
25
+ end
26
+
27
+ def source_files
28
+ Dir.glob(@src)
29
+ end
30
+
31
+ def source
32
+ "rm -f #{@src}"
33
+ end
34
+
35
+ def run
36
+ FileUtils.rm_f source_files, verbose: true
37
+ nil
38
+ end
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,165 @@
1
+ require 'bricolage/resource'
2
+ require 'bricolage/exception'
3
+ require 'pathname'
4
+
5
+ module Bricolage
6
+
7
+ class FileSystem
8
+ def FileSystem.for_option_pathes(home_path, job_path, env)
9
+ if job_path
10
+ home, subsys_id = extract_home_dirs(job_path)
11
+ if home_path and home_path.realpath.to_s != home.realpath.to_s
12
+ raise OptionError, "--home option and job file is exclusive"
13
+ end
14
+ new(home, env).subsystem(subsys_id)
15
+ elsif home_path
16
+ new(home_path, env)
17
+ elsif home = ENV['BRICOLAGE_HOME']
18
+ new(home, env)
19
+ else
20
+ new(Pathname.getwd, env)
21
+ end
22
+ end
23
+
24
+ # job_path -> [home_path, subsys_id]
25
+ def FileSystem.extract_home_dirs(job_path)
26
+ subsys_path = Pathname(job_path).realpath.parent
27
+ return subsys_path.parent, subsys_path.basename
28
+ rescue SystemCallError => err
29
+ raise ParameterError, "failed to access job file: #{err.message}"
30
+ end
31
+
32
+ def initialize(path, env)
33
+ @path = Pathname(path)
34
+ @environment = env
35
+ @subsystems = {}
36
+ end
37
+
38
+ def scoped?
39
+ false
40
+ end
41
+
42
+ attr_reader :path
43
+ attr_reader :environment
44
+
45
+ def inspect
46
+ "\#<#{self.class} #{@path}>"
47
+ end
48
+
49
+ def root
50
+ self
51
+ end
52
+
53
+ def subsystem(id)
54
+ @subsystems[id.to_s] ||= begin
55
+ unless root.relative(id).directory?
56
+ raise ParameterError, "no such subsystem: #{id}"
57
+ end
58
+ ScopedFileSystem.new(root, id)
59
+ end
60
+ end
61
+
62
+ def subsystems
63
+ root.subdirectories
64
+ .map {|path| path.basename.to_s }
65
+ .select {|name| /\A\w+\z/ =~ name }
66
+ .reject {|name| name == 'config' }
67
+ .map {|name| subsystem(name) }
68
+ end
69
+
70
+ def subdirectories
71
+ @path.children(true).select {|path| path.directory? }
72
+ end
73
+
74
+ def home_path
75
+ root.path
76
+ end
77
+
78
+ def job_dir
79
+ scoped? ? @path : nil
80
+ end
81
+
82
+ def exist?(name)
83
+ relative(name).exist?
84
+ end
85
+
86
+ def file(name)
87
+ FileResource.new(relative(name))
88
+ end
89
+
90
+ def root_relative_path(rel)
91
+ root.relative_path(rel)
92
+ end
93
+
94
+ alias root_relative root_relative_path
95
+
96
+ def relative_path(name)
97
+ path = Pathname(name)
98
+ if path.absolute?
99
+ path
100
+ else
101
+ @path + path
102
+ end
103
+ end
104
+
105
+ alias relative relative_path
106
+
107
+ # typed_name("make_master", "sql") -> Pathname("$prefix/make_master.sql")
108
+ def typed_name(name, type)
109
+ relative(name.count('.') > 0 ? name : "#{name}.#{type}")
110
+ end
111
+
112
+ # typed_file("make_master", "sql") -> FileResource("$prefix/make_master.sql")
113
+ def typed_file(name, type)
114
+ FileResource.new(typed_name(name, type))
115
+ end
116
+
117
+ def parameter_file(name, type)
118
+ name.count('/') == 0 ? typed_file(name, type) : root.file(name)
119
+ end
120
+
121
+ def parameter_file_loader
122
+ ConfigLoader.new(home_path)
123
+ end
124
+
125
+ def config_pathes(name)
126
+ [ "config/#{name}", "config/#{@environment}/#{name}" ].map {|rel| root.relative(rel) }
127
+ end
128
+
129
+ def job_file(id)
130
+ path = typed_name(id, 'job')
131
+ return path if path.exist?
132
+ glob("#{id}.*.job").first or path
133
+ end
134
+
135
+ def glob(pattern)
136
+ Dir.glob("#{@path}/#{pattern}").map {|path| Pathname(path) }
137
+ end
138
+
139
+ ## */*.TYPE
140
+ def all_typed_pathes(type)
141
+ subsystems.map {|subsys| subsys.typed_pathes(type) }.flatten
142
+ end
143
+
144
+ def typed_pathes(type)
145
+ @path.children.select {|path| path.file? and path.extname.to_s == ".#{type}" }
146
+ end
147
+ end
148
+
149
+ class ScopedFileSystem < FileSystem
150
+ def initialize(parent, id)
151
+ super parent.relative(id), parent.environment
152
+ @parent = parent
153
+ @id = id
154
+ end
155
+
156
+ def scoped?
157
+ true
158
+ end
159
+
160
+ def root
161
+ @parent.root
162
+ end
163
+ end
164
+
165
+ end
@@ -0,0 +1,37 @@
1
+ require 'bricolage/datasource'
2
+
3
+ module Bricolage
4
+ class GenericDataSource < DataSource
5
+ declare_type 'generic'
6
+
7
+ # FIXME: keyword argument placeholder is required
8
+ def initialize(**)
9
+ end
10
+
11
+ def new_task
12
+ GenericTask.new(self)
13
+ end
14
+ end
15
+
16
+ class GenericTask < DataSourceTask
17
+ def action(label = nil, &block)
18
+ raise FatalError, "no block" unless block
19
+ add AnyAction.new(label, block)
20
+ end
21
+
22
+ class AnyAction < Action
23
+ def initialize(label, block)
24
+ @label = label
25
+ @block = block
26
+ end
27
+
28
+ def source
29
+ @label || @block.to_s
30
+ end
31
+
32
+ def run
33
+ @block.call(ds)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,212 @@
1
+ require 'bricolage/jobfile'
2
+ require 'bricolage/jobclass'
3
+ require 'bricolage/jobresult'
4
+ require 'bricolage/parameters'
5
+ require 'bricolage/variables'
6
+ require 'bricolage/configloader'
7
+ require 'bricolage/exception'
8
+ require 'fileutils'
9
+
10
+ module Bricolage
11
+
12
+ class Job
13
+ # For JobNetRunner
14
+ def Job.load_ref(ref, jobnet_context)
15
+ ctx = jobnet_context.subsystem(ref.subsystem)
16
+ path = ctx.job_file(ref.name)
17
+ load_file(path, ctx)
18
+ end
19
+
20
+ # For standalone job (.job file mode)
21
+ def Job.load_file(path, ctx)
22
+ f = JobFile.load(ctx, path)
23
+ instantiate(f.job_id, f.class_id, ctx).tap {|job|
24
+ job.bind_parameters f.values
25
+ }
26
+ end
27
+
28
+ # For standalone job (command line mode)
29
+ def Job.instantiate(id, class_id, ctx)
30
+ new(id, JobClass.get(class_id), ctx).tap {|job|
31
+ job.init_global_variables
32
+ }
33
+ end
34
+
35
+ def initialize(id, job_class, context)
36
+ @id = id
37
+ @job_class = job_class
38
+ @context = context
39
+ @global_variables = nil
40
+ @param_decls = @job_class.get_parameters
41
+ @param_vals = nil # Parameters::IntermediateValues by *.job
42
+ @param_vals_opt = nil # Parameters::IntermediateValues by options
43
+ @params = nil
44
+ @variables = nil
45
+ end
46
+
47
+ attr_reader :id
48
+
49
+ def class_id
50
+ @job_class.id
51
+ end
52
+
53
+ def init_global_variables
54
+ # Context#global_variables loads file on each call, fix global variables here.
55
+ @global_variables = @context.global_variables
56
+ @global_variables['bricolage_cwd'] = Dir.pwd
57
+ @global_variables['bricolage_job_dir'] = @context.job_dir.to_s
58
+ end
59
+
60
+ attr_reader :params
61
+ attr_reader :global_variables # valid after #init_global_variables
62
+ attr_reader :variables # valid after #compile
63
+ attr_reader :script # valid after #compile
64
+
65
+ # For job file
66
+ def bind_parameters(values)
67
+ @param_vals = @param_decls.parse_direct_values(values)
68
+ end
69
+
70
+ # For command line options
71
+ def parsing_options(&block)
72
+ @param_vals_opt = @param_decls.parsing_options(&block)
73
+ end
74
+
75
+ def compile
76
+ @job_class.invoke_parameters_filter(self)
77
+
78
+ job_file_rest_vars = @param_vals ? @param_vals.variables : Variables.new
79
+ job_v_opt_vars = @param_vals_opt ? @param_vals_opt.variables : Variables.new
80
+
81
+ # We use different variable set for paramter expansion and
82
+ # SQL variable expansion. Parameter expansion uses global
83
+ # variables and "-v" option variables (both of global and job).
84
+ base_vars = Variables.union(
85
+ # ^ Low precedence
86
+ @global_variables,
87
+ job_v_opt_vars
88
+ # v High precedence
89
+ )
90
+ pvals = @param_decls.union_intermediate_values(*[@param_vals, @param_vals_opt].compact)
91
+ @params = pvals.resolve(@context, base_vars.resolve)
92
+
93
+ # Then, expand SQL variables and check with declarations.
94
+ vars = Variables.union(
95
+ # ^ Low precedence
96
+ declarations.default_variables,
97
+ @global_variables,
98
+ @params.variables, # Like $dest_table
99
+ job_file_rest_vars,
100
+ job_v_opt_vars
101
+ # v High precedence
102
+ )
103
+ @variables = vars.resolve
104
+ @variables.bind_declarations declarations
105
+
106
+ @script = @job_class.get_script(@params)
107
+ @script.bind @context, @variables
108
+ end
109
+
110
+ # Called from jobclasses (parameters_filter)
111
+ def provide_sql_file_by_job_id
112
+ if @param_vals and @id
113
+ @param_vals['sql-file'] ||= @id
114
+ end
115
+ end
116
+
117
+ def declarations
118
+ @declarations ||= @job_class.get_declarations(@params)
119
+ end
120
+
121
+ def script_source
122
+ raise 'Job#script_source called before #compile' unless @script
123
+ @script.source
124
+ end
125
+
126
+ def explain
127
+ raise 'Job#explain called before #compile' unless @script
128
+ @script.run_explain
129
+ end
130
+
131
+ def execute
132
+ logger = @context.logger
133
+ logger.info "#{@context.environment} environment"
134
+ result = logger.with_elapsed_time {
135
+ script.run
136
+ }
137
+ logger.info result.status_string
138
+ result
139
+ rescue JobFailure => ex
140
+ logger.error ex.message
141
+ logger.error "failure: #{ex.message}"
142
+ return JobResult.failure(ex)
143
+ rescue Exception => ex
144
+ logger.exception ex
145
+ logger.error "error: #{ex.class}: #{ex.message}"
146
+ return JobResult.error(ex)
147
+ end
148
+
149
+ def execute_in_process(log_path)
150
+ isolate(log_path) {
151
+ execute
152
+ }
153
+ end
154
+
155
+ private
156
+
157
+ def isolate(log_path)
158
+ cpid = Process.fork {
159
+ Process.setproctitle "bricolage [#{@id}]"
160
+ redirect_stdouts_to log_path if log_path
161
+ result = yield
162
+ save_result result, log_path
163
+ exit result.status
164
+ }
165
+ _, st = Process.waitpid2(cpid)
166
+ restore_result(st, log_path)
167
+ end
168
+
169
+ def redirect_stdouts_to(path)
170
+ FileUtils.mkdir_p File.dirname(path)
171
+ # make readable for retrieve_last_match_from_stderr
172
+ File.open(path, 'w+') {|f|
173
+ $stdout.reopen f
174
+ $stderr.reopen f
175
+ }
176
+ end
177
+
178
+ def save_result(result, log_path)
179
+ return if result.success?
180
+ return unless log_path
181
+ begin
182
+ File.open(error_log_path(log_path), 'w') {|f|
183
+ f.puts result.message
184
+ }
185
+ rescue
186
+ end
187
+ end
188
+
189
+ def restore_result(st, log_path)
190
+ JobResult.for_process_status(st, restore_message(log_path))
191
+ end
192
+
193
+ def restore_message(log_path)
194
+ return nil unless log_path
195
+ msg = read_if_exist(error_log_path(log_path))
196
+ msg ? msg.strip : nil
197
+ ensure
198
+ FileUtils.rm_f error_log_path(log_path) if log_path
199
+ end
200
+
201
+ def error_log_path(log_path)
202
+ "#{log_path}.error"
203
+ end
204
+
205
+ def read_if_exist(path)
206
+ File.read(path)
207
+ rescue
208
+ nil
209
+ end
210
+ end
211
+
212
+ end
@@ -0,0 +1,98 @@
1
+ require 'bricolage/parameters'
2
+ require 'bricolage/script'
3
+ require 'bricolage/sqlstatement'
4
+ require 'bricolage/exception'
5
+ require 'pathname'
6
+
7
+ module Bricolage
8
+
9
+ class JobClass
10
+ CLASSES = {}
11
+
12
+ def JobClass.define(id, &block)
13
+ id = id.to_s
14
+ raise FatalError, "duplicated job class: #{@id.inspect}" if CLASSES[id]
15
+ c = new(id)
16
+ c.instance_exec(c, &block)
17
+ CLASSES[id] = c
18
+ end
19
+
20
+ srcdir = Pathname(__FILE__).realpath.parent.parent.parent.cleanpath
21
+ LOAD_PATH = [srcdir + 'jobclass']
22
+
23
+ def JobClass.get(id)
24
+ unless CLASSES[id.to_s]
25
+ begin
26
+ path = LOAD_PATH.map {|prefix| prefix + "#{id}.rb" }.detect(&:exist?)
27
+ raise ParameterError, "no such job class: #{id}" unless path
28
+ ::Bricolage.module_eval File.read(path), path.to_path, 1
29
+ rescue SystemCallError => err
30
+ raise FatalError, "could not load job class: #{id}: #{err.message}"
31
+ end
32
+ raise FatalError, "job class file loaded but required job class is not defined: #{id}" unless CLASSES[id.to_s]
33
+ end
34
+ CLASSES[id.to_s]
35
+ end
36
+
37
+ def JobClass.list
38
+ LOAD_PATH.map {|dir|
39
+ Dir.glob("#{dir}/*.rb").map {|path| File.basename(path, '.rb') }
40
+ }.flatten.uniq.sort
41
+ end
42
+
43
+ def JobClass.each(&block)
44
+ CLASSES.each_value(&block)
45
+ end
46
+
47
+ def initialize(id)
48
+ @id = id
49
+ @parameters = nil
50
+ @parameters_filter = nil # optional
51
+ @declarations = nil
52
+ @script = nil
53
+ end
54
+
55
+ attr_reader :id
56
+
57
+ def inspect
58
+ "\#<#{self.class} #{@id}>"
59
+ end
60
+
61
+ def parameters(&block)
62
+ @parameters = block
63
+ end
64
+
65
+ def get_parameters
66
+ Parameters::Declarations.new.tap {|params|
67
+ @parameters.call(params)
68
+ }
69
+ end
70
+
71
+ def parameters_filter(&block)
72
+ @parameters_filter = block
73
+ end
74
+
75
+ def invoke_parameters_filter(job)
76
+ @parameters_filter.call(job) if @parameters_filter
77
+ end
78
+
79
+ def declarations(&block)
80
+ @declarations = block
81
+ end
82
+
83
+ def get_declarations(params)
84
+ @declarations ? @declarations.call(params) : Declarations.new
85
+ end
86
+
87
+ def script(&block)
88
+ @script = block
89
+ end
90
+
91
+ def get_script(params)
92
+ Script.new.tap {|script|
93
+ @script.call(params, script)
94
+ }
95
+ end
96
+ end
97
+
98
+ end