bricolage 5.8.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +4 -0
- data/bin/bricolage +6 -0
- data/bin/bricolage-jobnet +6 -0
- data/jobclass/create.rb +21 -0
- data/jobclass/exec.rb +17 -0
- data/jobclass/insert-delta.rb +31 -0
- data/jobclass/insert.rb +33 -0
- data/jobclass/load.rb +39 -0
- data/jobclass/my-export.rb +40 -0
- data/jobclass/my-migrate.rb +103 -0
- data/jobclass/noop.rb +13 -0
- data/jobclass/rebuild-drop.rb +37 -0
- data/jobclass/rebuild-rename.rb +49 -0
- data/jobclass/s3-put.rb +19 -0
- data/jobclass/sql.rb +29 -0
- data/jobclass/td-delete.rb +20 -0
- data/jobclass/td-export.rb +30 -0
- data/jobclass/unload.rb +30 -0
- data/jobclass/wait-file.rb +48 -0
- data/lib/bricolage/application.rb +260 -0
- data/lib/bricolage/commandutils.rb +52 -0
- data/lib/bricolage/configloader.rb +126 -0
- data/lib/bricolage/context.rb +108 -0
- data/lib/bricolage/datasource.rb +144 -0
- data/lib/bricolage/eventhandlers.rb +47 -0
- data/lib/bricolage/exception.rb +47 -0
- data/lib/bricolage/filedatasource.rb +42 -0
- data/lib/bricolage/filesystem.rb +165 -0
- data/lib/bricolage/genericdatasource.rb +37 -0
- data/lib/bricolage/job.rb +212 -0
- data/lib/bricolage/jobclass.rb +98 -0
- data/lib/bricolage/jobfile.rb +100 -0
- data/lib/bricolage/jobflow.rb +389 -0
- data/lib/bricolage/jobnetrunner.rb +264 -0
- data/lib/bricolage/jobresult.rb +74 -0
- data/lib/bricolage/logger.rb +52 -0
- data/lib/bricolage/mysqldatasource.rb +223 -0
- data/lib/bricolage/parameters.rb +653 -0
- data/lib/bricolage/postgresconnection.rb +78 -0
- data/lib/bricolage/psqldatasource.rb +449 -0
- data/lib/bricolage/resource.rb +68 -0
- data/lib/bricolage/rubyjobclass.rb +42 -0
- data/lib/bricolage/s3datasource.rb +144 -0
- data/lib/bricolage/script.rb +120 -0
- data/lib/bricolage/sqlstatement.rb +351 -0
- data/lib/bricolage/taskqueue.rb +156 -0
- data/lib/bricolage/tddatasource.rb +116 -0
- data/lib/bricolage/variables.rb +208 -0
- data/lib/bricolage/version.rb +4 -0
- data/lib/bricolage.rb +8 -0
- data/libexec/sqldump +9 -0
- data/libexec/sqldump.Darwin +0 -0
- data/libexec/sqldump.Linux +0 -0
- data/test/all.rb +3 -0
- data/test/home/config/development/database.yml +57 -0
- data/test/home/config/development/password.yml +2 -0
- data/test/home/subsys/separated.job +1 -0
- data/test/home/subsys/separated.sql +1 -0
- data/test/home/subsys/unified.jobnet +1 -0
- data/test/home/subsys/unified.sql.job +5 -0
- data/test/test_filesystem.rb +19 -0
- data/test/test_parameters.rb +401 -0
- data/test/test_variables.rb +114 -0
- metadata +192 -0
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'bricolage/exception'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module Bricolage
|
5
|
+
|
6
|
+
class JobFile
|
7
|
+
def JobFile.load(ctx, path)
|
8
|
+
values = if /\.sql\.job\z/ =~ path.to_s
|
9
|
+
load_embedded_definition(ctx, path)
|
10
|
+
else
|
11
|
+
ctx.parameter_file_loader.load_eruby_yaml(path)
|
12
|
+
end
|
13
|
+
parse(values, path)
|
14
|
+
end
|
15
|
+
|
16
|
+
class << JobFile
|
17
|
+
private
|
18
|
+
|
19
|
+
def load_embedded_definition(ctx, path)
|
20
|
+
sql = ctx.parameter_file_loader.read_file(path)
|
21
|
+
block = sql.slice(%r{\A/\*.*?^\*/}m) or
|
22
|
+
raise ParameterError, "missing embedded job definition block: #{path}"
|
23
|
+
yaml = block.sub(%r{\A/\*}, '').sub(%r{^\*/\s*\z}, '')
|
24
|
+
begin
|
25
|
+
values = YAML.load(yaml)
|
26
|
+
rescue => err
|
27
|
+
raise ParameterError, "#{path}: embedded job definition syntax error: #{err.message}"
|
28
|
+
end
|
29
|
+
stripped_sql = sql.sub(%r{\A/\*.*?^\*/}m, "\n" * block.count("\n"))
|
30
|
+
decls = make_sql_declarations(stripped_sql, values, path)
|
31
|
+
stmt = SQLStatement.new(StringResource.new(sql, path), decls)
|
32
|
+
set_value values, 'sql-file', stmt, path
|
33
|
+
values
|
34
|
+
end
|
35
|
+
|
36
|
+
def set_value(values, name, value, path)
|
37
|
+
raise ParameterError, "#{path}: #{name} parameter and embedded SQL script is exclusive" if values[name]
|
38
|
+
values[name] = value
|
39
|
+
end
|
40
|
+
|
41
|
+
def make_sql_declarations(sql, values, path)
|
42
|
+
decls = Declarations.new
|
43
|
+
vars = Variable.list(sql)
|
44
|
+
if dest = vars.delete('dest_table')
|
45
|
+
decls.declare 'dest_table', nil
|
46
|
+
end
|
47
|
+
if values['src-tables']
|
48
|
+
srcs = values['src-tables']
|
49
|
+
case srcs
|
50
|
+
when String
|
51
|
+
decls.declare srcs, nil
|
52
|
+
vars.delete srcs
|
53
|
+
when Array
|
54
|
+
srcs.each do |table|
|
55
|
+
decls.declare table, nil
|
56
|
+
vars.delete table
|
57
|
+
end
|
58
|
+
when Hash
|
59
|
+
srcs.each_key do |table|
|
60
|
+
decls.declare table, nil
|
61
|
+
vars.delete table
|
62
|
+
end
|
63
|
+
else
|
64
|
+
raise ParameterError, "unknown src-tables value type: #{srcs.class}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
vars.each do |name|
|
68
|
+
decls.declare name, name
|
69
|
+
end
|
70
|
+
decls
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def JobFile.parse(values, path)
|
75
|
+
values = values.dup
|
76
|
+
class_id = values.delete('class') or
|
77
|
+
raise ParameterError, "missing job class: #{path}"
|
78
|
+
new(class_id, values, path)
|
79
|
+
end
|
80
|
+
|
81
|
+
def initialize(class_id, values, path)
|
82
|
+
@class_id = class_id
|
83
|
+
@values = values
|
84
|
+
@path = Pathname(path)
|
85
|
+
end
|
86
|
+
|
87
|
+
attr_reader :class_id
|
88
|
+
attr_reader :values
|
89
|
+
attr_reader :path
|
90
|
+
|
91
|
+
def job_id
|
92
|
+
@path.basename('.job').to_s
|
93
|
+
end
|
94
|
+
|
95
|
+
def subsystem
|
96
|
+
@path.parent.basename.to_s
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
@@ -0,0 +1,389 @@
|
|
1
|
+
require 'bricolage/exception'
|
2
|
+
require 'tsort'
|
3
|
+
|
4
|
+
module Bricolage
|
5
|
+
|
6
|
+
class JobFlow
|
7
|
+
def JobFlow.load(path)
|
8
|
+
File.open(path) {|f|
|
9
|
+
parse_stream(f, make_node_ref(path))
|
10
|
+
}
|
11
|
+
rescue SystemCallError => err
|
12
|
+
raise ParameterError, "could not load job flow: #{path} (#{err.message})"
|
13
|
+
end
|
14
|
+
|
15
|
+
def JobFlow.make_node_ref(path)
|
16
|
+
subsys = path.parent.basename.to_s
|
17
|
+
name = path.basename('.jobnet').to_s
|
18
|
+
JobNetRef.new(subsys, name, Location.dummy)
|
19
|
+
end
|
20
|
+
|
21
|
+
def JobFlow.parse_stream(f, ref)
|
22
|
+
Parser.new(ref.subsystem).parse_stream(f, ref)
|
23
|
+
end
|
24
|
+
|
25
|
+
def JobFlow.root
|
26
|
+
RootJobFlow.new
|
27
|
+
end
|
28
|
+
|
29
|
+
ROOT_FLOW_NAME = '*'
|
30
|
+
|
31
|
+
def initialize(ref, location)
|
32
|
+
@ref = ref
|
33
|
+
@location = location
|
34
|
+
@flow = {} # Ref => [Ref] (src->dest)
|
35
|
+
@deps = {} # Ref => [Ref] (dest->src)
|
36
|
+
@subnets_resolved = false
|
37
|
+
end
|
38
|
+
|
39
|
+
def inspect
|
40
|
+
"\#<#{self.class} #{ref}>"
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_reader :ref
|
44
|
+
|
45
|
+
def name
|
46
|
+
ref.to_s
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_edge(src, dest)
|
50
|
+
(@flow[src] ||= []).push dest unless dest.net?
|
51
|
+
(@deps[dest] ||= []).push src
|
52
|
+
end
|
53
|
+
|
54
|
+
def flow_tree
|
55
|
+
h = {}
|
56
|
+
@flow.each do |src, dests|
|
57
|
+
h[src.to_s] = dests.map {|d| d.to_s }
|
58
|
+
end
|
59
|
+
h
|
60
|
+
end
|
61
|
+
|
62
|
+
def dependencies
|
63
|
+
h = {}
|
64
|
+
@deps.each do |ref, deps|
|
65
|
+
h[ref.to_s] = deps.map(&:to_s)
|
66
|
+
end
|
67
|
+
h
|
68
|
+
end
|
69
|
+
|
70
|
+
def dependent_flows
|
71
|
+
@deps.values.flatten.select {|ref| ref.net? }.uniq
|
72
|
+
end
|
73
|
+
|
74
|
+
def sequential_nodes
|
75
|
+
tsort.reject {|ref| ref.dummy? }
|
76
|
+
end
|
77
|
+
|
78
|
+
def sequential_jobs
|
79
|
+
sequential_nodes.reject {|ref| ref.net? }
|
80
|
+
end
|
81
|
+
|
82
|
+
include TSort
|
83
|
+
|
84
|
+
def tsort_each_node(&block)
|
85
|
+
@deps.each_key(&block)
|
86
|
+
end
|
87
|
+
|
88
|
+
def tsort_each_child(ref, &block)
|
89
|
+
@deps.fetch(ref).each(&block)
|
90
|
+
end
|
91
|
+
|
92
|
+
def fix_graph
|
93
|
+
close_graph
|
94
|
+
check_cycle
|
95
|
+
check_orphan
|
96
|
+
end
|
97
|
+
|
98
|
+
def resolve_subnets(root_flow)
|
99
|
+
@deps.each_key do |ref|
|
100
|
+
next unless ref.net?
|
101
|
+
ref.flow = root_flow.subnet(ref)
|
102
|
+
end
|
103
|
+
@subnets_resolved = true
|
104
|
+
end
|
105
|
+
|
106
|
+
def subnets_resolved?
|
107
|
+
@subnets_resolved
|
108
|
+
end
|
109
|
+
|
110
|
+
private
|
111
|
+
|
112
|
+
def close_graph
|
113
|
+
@deps.values.flatten.uniq.each do |ref|
|
114
|
+
@deps[ref] ||= []
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def check_cycle
|
119
|
+
each_strongly_connected_component do |refs|
|
120
|
+
unless refs.size == 1
|
121
|
+
cycle = (refs + [refs.first]).reverse.join(' -> ')
|
122
|
+
raise ParameterError, "found cycle in the flow: #{cycle}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def check_orphan
|
128
|
+
orphan_nodes.each do |ref|
|
129
|
+
raise ParameterError, "found orphan job in the flow: #{ref.location}: #{ref}"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def orphan_nodes
|
134
|
+
@deps.to_a.select {|ref, deps| deps.empty? and not ref.dummy? and not ref.net? }.map {|ref, *| ref }
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class RootJobFlow < JobFlow
|
139
|
+
def RootJobFlow.load(ctx, path)
|
140
|
+
flow = new(ctx)
|
141
|
+
flow.add_subnet JobFlow.load(path)
|
142
|
+
flow.fix
|
143
|
+
flow
|
144
|
+
end
|
145
|
+
|
146
|
+
def initialize(ctx)
|
147
|
+
@ctx = ctx
|
148
|
+
super ROOT_FLOW_NAME, Location.dummy
|
149
|
+
@subnets = {}
|
150
|
+
end
|
151
|
+
|
152
|
+
def add_subnet(flow)
|
153
|
+
raise ParameterError, "duplicated subnet definition: #{flow.name}" if @subnets.key?(flow.name)
|
154
|
+
@subnets[flow.name] = flow
|
155
|
+
end
|
156
|
+
|
157
|
+
def subnet(ref)
|
158
|
+
if flow = @subnets[ref]
|
159
|
+
flow
|
160
|
+
else
|
161
|
+
flow = load_jobnet_auto(ref)
|
162
|
+
add_subnet flow
|
163
|
+
flow
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def load_jobnet_auto(ref)
|
168
|
+
path = @ctx.root_relative_path(ref.relative_path)
|
169
|
+
raise ParameterError, "undefined subnet: #{ref}" unless path.file?
|
170
|
+
JobFlow.load(path)
|
171
|
+
end
|
172
|
+
|
173
|
+
def each_subnet(&block)
|
174
|
+
@subnets.values.each(&block)
|
175
|
+
end
|
176
|
+
|
177
|
+
def each_flow(&block)
|
178
|
+
yield self
|
179
|
+
@subnets.each_value(&block)
|
180
|
+
end
|
181
|
+
|
182
|
+
def each_subnet_sequence
|
183
|
+
sequential_nodes.each do |ref|
|
184
|
+
yield subnet(ref)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def fix
|
189
|
+
resolve_subnet_references
|
190
|
+
each_subnet do |flow|
|
191
|
+
flow.fix_graph
|
192
|
+
end
|
193
|
+
add_jobnet_dependency_edges
|
194
|
+
fix_graph
|
195
|
+
end
|
196
|
+
|
197
|
+
private
|
198
|
+
|
199
|
+
def resolve_subnet_references
|
200
|
+
unresolved = true
|
201
|
+
while unresolved
|
202
|
+
unresolved = false
|
203
|
+
([self] + @subnets.values).each do |flow|
|
204
|
+
unless flow.subnets_resolved?
|
205
|
+
flow.resolve_subnets self
|
206
|
+
unresolved = true
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def add_jobnet_dependency_edges
|
213
|
+
each_subnet do |flow|
|
214
|
+
# dummy dependency to ensure to execute all subnets
|
215
|
+
add_edge Ref.dummy, flow.ref
|
216
|
+
# jobnet -> jobnet dependency
|
217
|
+
flow.dependent_flows.each do |dep_flow_ref|
|
218
|
+
add_edge dep_flow_ref, flow.ref
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def check_orphan
|
224
|
+
# should not check orphan for root.
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
class JobFlow # reopen as namespace
|
229
|
+
|
230
|
+
class Ref
|
231
|
+
START_NAME = '@start'
|
232
|
+
|
233
|
+
def Ref.dummy
|
234
|
+
JobRef.new(nil, START_NAME, Location.dummy)
|
235
|
+
end
|
236
|
+
|
237
|
+
def Ref.parse(ref, curr_subsys = nil, location = Location.dummy)
|
238
|
+
return JobNetRef.new(nil, '', location) if ref == ROOT_FLOW_NAME
|
239
|
+
m = %r<\A(\*)?(?:(\w[\w\-]*)/)?(@?\w[\w\-]*)\z>.match(ref) or
|
240
|
+
raise ParameterError, "bad job name: #{ref.inspect}"
|
241
|
+
is_net, subsys, name = m.captures
|
242
|
+
ref_class = (is_net ? JobNetRef : JobRef)
|
243
|
+
node_subsys = subsys || curr_subsys
|
244
|
+
unless node_subsys
|
245
|
+
raise ParameterError, "missing subsystem: #{ref}"
|
246
|
+
end
|
247
|
+
ref_class.new(node_subsys, name, location)
|
248
|
+
end
|
249
|
+
|
250
|
+
def initialize(subsys, name, location)
|
251
|
+
@subsystem = subsys
|
252
|
+
@name = name
|
253
|
+
@location = location
|
254
|
+
end
|
255
|
+
|
256
|
+
attr_reader :subsystem
|
257
|
+
attr_reader :name
|
258
|
+
attr_reader :location
|
259
|
+
|
260
|
+
def inspect
|
261
|
+
"\#<#{self.class} #{to_s}>"
|
262
|
+
end
|
263
|
+
|
264
|
+
def to_s
|
265
|
+
@ref ||= [@subsystem, @name].compact.join('/')
|
266
|
+
end
|
267
|
+
|
268
|
+
def ==(other)
|
269
|
+
to_s == other.to_s
|
270
|
+
end
|
271
|
+
|
272
|
+
alias eql? ==
|
273
|
+
|
274
|
+
def hash
|
275
|
+
to_s.hash
|
276
|
+
end
|
277
|
+
|
278
|
+
def dummy?
|
279
|
+
@name[0] == '@'
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
class JobRef < Ref
|
284
|
+
def net?
|
285
|
+
false
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
class JobNetRef < Ref
|
290
|
+
def initialize(subsys, name, location)
|
291
|
+
super
|
292
|
+
@flow = nil
|
293
|
+
end
|
294
|
+
|
295
|
+
def flow=(flow)
|
296
|
+
@flow = flow
|
297
|
+
end
|
298
|
+
|
299
|
+
def net?
|
300
|
+
true
|
301
|
+
end
|
302
|
+
|
303
|
+
def to_s
|
304
|
+
'*' + super
|
305
|
+
end
|
306
|
+
|
307
|
+
def relative_path
|
308
|
+
"#{subsystem}/#{name}.jobnet"
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
class Location
|
313
|
+
def Location.dummy
|
314
|
+
new('(dummy)', 0)
|
315
|
+
end
|
316
|
+
|
317
|
+
def Location.for_file(f)
|
318
|
+
new(f.path, f.lineno)
|
319
|
+
end
|
320
|
+
|
321
|
+
def initialize(file, lineno)
|
322
|
+
@file = file
|
323
|
+
@lineno = lineno
|
324
|
+
end
|
325
|
+
|
326
|
+
attr_reader :file
|
327
|
+
attr_reader :lineno
|
328
|
+
|
329
|
+
def inspect
|
330
|
+
"\#<#{self.class} #{to_s}>"
|
331
|
+
end
|
332
|
+
|
333
|
+
def to_s
|
334
|
+
"#{@file}:#{@lineno}"
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
class Parser
|
339
|
+
def initialize(subsys)
|
340
|
+
@subsys = subsys
|
341
|
+
end
|
342
|
+
|
343
|
+
def parse_stream(f, ref)
|
344
|
+
flow = JobFlow.new(ref, Location.for_file(f))
|
345
|
+
foreach_edge(f) do |src, dest|
|
346
|
+
flow.add_edge src, dest
|
347
|
+
end
|
348
|
+
flow
|
349
|
+
end
|
350
|
+
|
351
|
+
private
|
352
|
+
|
353
|
+
name = /\w[\w\-]*/
|
354
|
+
node_ref = %r<[@*]?(?:#{name}/)?#{name}>
|
355
|
+
START_PATTERN = /\A(#{node_ref})\z/
|
356
|
+
DEPEND_PATTERN = /\A(#{node_ref})?\s*->\s*(#{node_ref})\z/
|
357
|
+
|
358
|
+
def foreach_edge(f)
|
359
|
+
default_src = Ref.dummy
|
360
|
+
f.each do |line|
|
361
|
+
text = line.sub(/\#.*/, '').strip
|
362
|
+
next if text.empty?
|
363
|
+
loc = Location.for_file(f)
|
364
|
+
|
365
|
+
if m = DEPEND_PATTERN.match(text)
|
366
|
+
src = m[1] ? ref(m[1], loc) : default_src
|
367
|
+
dest = ref(m[2], loc)
|
368
|
+
yield src, dest
|
369
|
+
default_src = dest
|
370
|
+
|
371
|
+
elsif m = START_PATTERN.match(text)
|
372
|
+
dest = ref(m[1], loc)
|
373
|
+
yield Ref.dummy, dest
|
374
|
+
default_src = dest
|
375
|
+
|
376
|
+
else
|
377
|
+
raise ParameterError, "syntax error at #{loc}: #{line.strip.inspect}"
|
378
|
+
end
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
def ref(ref_str, location)
|
383
|
+
Ref.parse(ref_str, @subsys, location)
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
end
|
388
|
+
|
389
|
+
end
|