bricolage 5.8.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +4 -0
  3. data/bin/bricolage +6 -0
  4. data/bin/bricolage-jobnet +6 -0
  5. data/jobclass/create.rb +21 -0
  6. data/jobclass/exec.rb +17 -0
  7. data/jobclass/insert-delta.rb +31 -0
  8. data/jobclass/insert.rb +33 -0
  9. data/jobclass/load.rb +39 -0
  10. data/jobclass/my-export.rb +40 -0
  11. data/jobclass/my-migrate.rb +103 -0
  12. data/jobclass/noop.rb +13 -0
  13. data/jobclass/rebuild-drop.rb +37 -0
  14. data/jobclass/rebuild-rename.rb +49 -0
  15. data/jobclass/s3-put.rb +19 -0
  16. data/jobclass/sql.rb +29 -0
  17. data/jobclass/td-delete.rb +20 -0
  18. data/jobclass/td-export.rb +30 -0
  19. data/jobclass/unload.rb +30 -0
  20. data/jobclass/wait-file.rb +48 -0
  21. data/lib/bricolage/application.rb +260 -0
  22. data/lib/bricolage/commandutils.rb +52 -0
  23. data/lib/bricolage/configloader.rb +126 -0
  24. data/lib/bricolage/context.rb +108 -0
  25. data/lib/bricolage/datasource.rb +144 -0
  26. data/lib/bricolage/eventhandlers.rb +47 -0
  27. data/lib/bricolage/exception.rb +47 -0
  28. data/lib/bricolage/filedatasource.rb +42 -0
  29. data/lib/bricolage/filesystem.rb +165 -0
  30. data/lib/bricolage/genericdatasource.rb +37 -0
  31. data/lib/bricolage/job.rb +212 -0
  32. data/lib/bricolage/jobclass.rb +98 -0
  33. data/lib/bricolage/jobfile.rb +100 -0
  34. data/lib/bricolage/jobflow.rb +389 -0
  35. data/lib/bricolage/jobnetrunner.rb +264 -0
  36. data/lib/bricolage/jobresult.rb +74 -0
  37. data/lib/bricolage/logger.rb +52 -0
  38. data/lib/bricolage/mysqldatasource.rb +223 -0
  39. data/lib/bricolage/parameters.rb +653 -0
  40. data/lib/bricolage/postgresconnection.rb +78 -0
  41. data/lib/bricolage/psqldatasource.rb +449 -0
  42. data/lib/bricolage/resource.rb +68 -0
  43. data/lib/bricolage/rubyjobclass.rb +42 -0
  44. data/lib/bricolage/s3datasource.rb +144 -0
  45. data/lib/bricolage/script.rb +120 -0
  46. data/lib/bricolage/sqlstatement.rb +351 -0
  47. data/lib/bricolage/taskqueue.rb +156 -0
  48. data/lib/bricolage/tddatasource.rb +116 -0
  49. data/lib/bricolage/variables.rb +208 -0
  50. data/lib/bricolage/version.rb +4 -0
  51. data/lib/bricolage.rb +8 -0
  52. data/libexec/sqldump +9 -0
  53. data/libexec/sqldump.Darwin +0 -0
  54. data/libexec/sqldump.Linux +0 -0
  55. data/test/all.rb +3 -0
  56. data/test/home/config/development/database.yml +57 -0
  57. data/test/home/config/development/password.yml +2 -0
  58. data/test/home/subsys/separated.job +1 -0
  59. data/test/home/subsys/separated.sql +1 -0
  60. data/test/home/subsys/unified.jobnet +1 -0
  61. data/test/home/subsys/unified.sql.job +5 -0
  62. data/test/test_filesystem.rb +19 -0
  63. data/test/test_parameters.rb +401 -0
  64. data/test/test_variables.rb +114 -0
  65. metadata +192 -0
@@ -0,0 +1,100 @@
1
+ require 'bricolage/exception'
2
+ require 'pathname'
3
+
4
+ module Bricolage
5
+
6
+ class JobFile
7
+ def JobFile.load(ctx, path)
8
+ values = if /\.sql\.job\z/ =~ path.to_s
9
+ load_embedded_definition(ctx, path)
10
+ else
11
+ ctx.parameter_file_loader.load_eruby_yaml(path)
12
+ end
13
+ parse(values, path)
14
+ end
15
+
16
+ class << JobFile
17
+ private
18
+
19
+ def load_embedded_definition(ctx, path)
20
+ sql = ctx.parameter_file_loader.read_file(path)
21
+ block = sql.slice(%r{\A/\*.*?^\*/}m) or
22
+ raise ParameterError, "missing embedded job definition block: #{path}"
23
+ yaml = block.sub(%r{\A/\*}, '').sub(%r{^\*/\s*\z}, '')
24
+ begin
25
+ values = YAML.load(yaml)
26
+ rescue => err
27
+ raise ParameterError, "#{path}: embedded job definition syntax error: #{err.message}"
28
+ end
29
+ stripped_sql = sql.sub(%r{\A/\*.*?^\*/}m, "\n" * block.count("\n"))
30
+ decls = make_sql_declarations(stripped_sql, values, path)
31
+ stmt = SQLStatement.new(StringResource.new(sql, path), decls)
32
+ set_value values, 'sql-file', stmt, path
33
+ values
34
+ end
35
+
36
+ def set_value(values, name, value, path)
37
+ raise ParameterError, "#{path}: #{name} parameter and embedded SQL script is exclusive" if values[name]
38
+ values[name] = value
39
+ end
40
+
41
+ def make_sql_declarations(sql, values, path)
42
+ decls = Declarations.new
43
+ vars = Variable.list(sql)
44
+ if dest = vars.delete('dest_table')
45
+ decls.declare 'dest_table', nil
46
+ end
47
+ if values['src-tables']
48
+ srcs = values['src-tables']
49
+ case srcs
50
+ when String
51
+ decls.declare srcs, nil
52
+ vars.delete srcs
53
+ when Array
54
+ srcs.each do |table|
55
+ decls.declare table, nil
56
+ vars.delete table
57
+ end
58
+ when Hash
59
+ srcs.each_key do |table|
60
+ decls.declare table, nil
61
+ vars.delete table
62
+ end
63
+ else
64
+ raise ParameterError, "unknown src-tables value type: #{srcs.class}"
65
+ end
66
+ end
67
+ vars.each do |name|
68
+ decls.declare name, name
69
+ end
70
+ decls
71
+ end
72
+ end
73
+
74
+ def JobFile.parse(values, path)
75
+ values = values.dup
76
+ class_id = values.delete('class') or
77
+ raise ParameterError, "missing job class: #{path}"
78
+ new(class_id, values, path)
79
+ end
80
+
81
+ def initialize(class_id, values, path)
82
+ @class_id = class_id
83
+ @values = values
84
+ @path = Pathname(path)
85
+ end
86
+
87
+ attr_reader :class_id
88
+ attr_reader :values
89
+ attr_reader :path
90
+
91
+ def job_id
92
+ @path.basename('.job').to_s
93
+ end
94
+
95
+ def subsystem
96
+ @path.parent.basename.to_s
97
+ end
98
+ end
99
+
100
+ end
@@ -0,0 +1,389 @@
1
+ require 'bricolage/exception'
2
+ require 'tsort'
3
+
4
+ module Bricolage
5
+
6
+ class JobFlow
7
+ def JobFlow.load(path)
8
+ File.open(path) {|f|
9
+ parse_stream(f, make_node_ref(path))
10
+ }
11
+ rescue SystemCallError => err
12
+ raise ParameterError, "could not load job flow: #{path} (#{err.message})"
13
+ end
14
+
15
+ def JobFlow.make_node_ref(path)
16
+ subsys = path.parent.basename.to_s
17
+ name = path.basename('.jobnet').to_s
18
+ JobNetRef.new(subsys, name, Location.dummy)
19
+ end
20
+
21
+ def JobFlow.parse_stream(f, ref)
22
+ Parser.new(ref.subsystem).parse_stream(f, ref)
23
+ end
24
+
25
+ def JobFlow.root
26
+ RootJobFlow.new
27
+ end
28
+
29
+ ROOT_FLOW_NAME = '*'
30
+
31
+ def initialize(ref, location)
32
+ @ref = ref
33
+ @location = location
34
+ @flow = {} # Ref => [Ref] (src->dest)
35
+ @deps = {} # Ref => [Ref] (dest->src)
36
+ @subnets_resolved = false
37
+ end
38
+
39
+ def inspect
40
+ "\#<#{self.class} #{ref}>"
41
+ end
42
+
43
+ attr_reader :ref
44
+
45
+ def name
46
+ ref.to_s
47
+ end
48
+
49
+ def add_edge(src, dest)
50
+ (@flow[src] ||= []).push dest unless dest.net?
51
+ (@deps[dest] ||= []).push src
52
+ end
53
+
54
+ def flow_tree
55
+ h = {}
56
+ @flow.each do |src, dests|
57
+ h[src.to_s] = dests.map {|d| d.to_s }
58
+ end
59
+ h
60
+ end
61
+
62
+ def dependencies
63
+ h = {}
64
+ @deps.each do |ref, deps|
65
+ h[ref.to_s] = deps.map(&:to_s)
66
+ end
67
+ h
68
+ end
69
+
70
+ def dependent_flows
71
+ @deps.values.flatten.select {|ref| ref.net? }.uniq
72
+ end
73
+
74
+ def sequential_nodes
75
+ tsort.reject {|ref| ref.dummy? }
76
+ end
77
+
78
+ def sequential_jobs
79
+ sequential_nodes.reject {|ref| ref.net? }
80
+ end
81
+
82
+ include TSort
83
+
84
+ def tsort_each_node(&block)
85
+ @deps.each_key(&block)
86
+ end
87
+
88
+ def tsort_each_child(ref, &block)
89
+ @deps.fetch(ref).each(&block)
90
+ end
91
+
92
+ def fix_graph
93
+ close_graph
94
+ check_cycle
95
+ check_orphan
96
+ end
97
+
98
+ def resolve_subnets(root_flow)
99
+ @deps.each_key do |ref|
100
+ next unless ref.net?
101
+ ref.flow = root_flow.subnet(ref)
102
+ end
103
+ @subnets_resolved = true
104
+ end
105
+
106
+ def subnets_resolved?
107
+ @subnets_resolved
108
+ end
109
+
110
+ private
111
+
112
+ def close_graph
113
+ @deps.values.flatten.uniq.each do |ref|
114
+ @deps[ref] ||= []
115
+ end
116
+ end
117
+
118
+ def check_cycle
119
+ each_strongly_connected_component do |refs|
120
+ unless refs.size == 1
121
+ cycle = (refs + [refs.first]).reverse.join(' -> ')
122
+ raise ParameterError, "found cycle in the flow: #{cycle}"
123
+ end
124
+ end
125
+ end
126
+
127
+ def check_orphan
128
+ orphan_nodes.each do |ref|
129
+ raise ParameterError, "found orphan job in the flow: #{ref.location}: #{ref}"
130
+ end
131
+ end
132
+
133
+ def orphan_nodes
134
+ @deps.to_a.select {|ref, deps| deps.empty? and not ref.dummy? and not ref.net? }.map {|ref, *| ref }
135
+ end
136
+ end
137
+
138
+ class RootJobFlow < JobFlow
139
+ def RootJobFlow.load(ctx, path)
140
+ flow = new(ctx)
141
+ flow.add_subnet JobFlow.load(path)
142
+ flow.fix
143
+ flow
144
+ end
145
+
146
+ def initialize(ctx)
147
+ @ctx = ctx
148
+ super ROOT_FLOW_NAME, Location.dummy
149
+ @subnets = {}
150
+ end
151
+
152
+ def add_subnet(flow)
153
+ raise ParameterError, "duplicated subnet definition: #{flow.name}" if @subnets.key?(flow.name)
154
+ @subnets[flow.name] = flow
155
+ end
156
+
157
+ def subnet(ref)
158
+ if flow = @subnets[ref]
159
+ flow
160
+ else
161
+ flow = load_jobnet_auto(ref)
162
+ add_subnet flow
163
+ flow
164
+ end
165
+ end
166
+
167
+ def load_jobnet_auto(ref)
168
+ path = @ctx.root_relative_path(ref.relative_path)
169
+ raise ParameterError, "undefined subnet: #{ref}" unless path.file?
170
+ JobFlow.load(path)
171
+ end
172
+
173
+ def each_subnet(&block)
174
+ @subnets.values.each(&block)
175
+ end
176
+
177
+ def each_flow(&block)
178
+ yield self
179
+ @subnets.each_value(&block)
180
+ end
181
+
182
+ def each_subnet_sequence
183
+ sequential_nodes.each do |ref|
184
+ yield subnet(ref)
185
+ end
186
+ end
187
+
188
+ def fix
189
+ resolve_subnet_references
190
+ each_subnet do |flow|
191
+ flow.fix_graph
192
+ end
193
+ add_jobnet_dependency_edges
194
+ fix_graph
195
+ end
196
+
197
+ private
198
+
199
+ def resolve_subnet_references
200
+ unresolved = true
201
+ while unresolved
202
+ unresolved = false
203
+ ([self] + @subnets.values).each do |flow|
204
+ unless flow.subnets_resolved?
205
+ flow.resolve_subnets self
206
+ unresolved = true
207
+ end
208
+ end
209
+ end
210
+ end
211
+
212
+ def add_jobnet_dependency_edges
213
+ each_subnet do |flow|
214
+ # dummy dependency to ensure to execute all subnets
215
+ add_edge Ref.dummy, flow.ref
216
+ # jobnet -> jobnet dependency
217
+ flow.dependent_flows.each do |dep_flow_ref|
218
+ add_edge dep_flow_ref, flow.ref
219
+ end
220
+ end
221
+ end
222
+
223
+ def check_orphan
224
+ # should not check orphan for root.
225
+ end
226
+ end
227
+
228
+ class JobFlow # reopen as namespace
229
+
230
+ class Ref
231
+ START_NAME = '@start'
232
+
233
+ def Ref.dummy
234
+ JobRef.new(nil, START_NAME, Location.dummy)
235
+ end
236
+
237
+ def Ref.parse(ref, curr_subsys = nil, location = Location.dummy)
238
+ return JobNetRef.new(nil, '', location) if ref == ROOT_FLOW_NAME
239
+ m = %r<\A(\*)?(?:(\w[\w\-]*)/)?(@?\w[\w\-]*)\z>.match(ref) or
240
+ raise ParameterError, "bad job name: #{ref.inspect}"
241
+ is_net, subsys, name = m.captures
242
+ ref_class = (is_net ? JobNetRef : JobRef)
243
+ node_subsys = subsys || curr_subsys
244
+ unless node_subsys
245
+ raise ParameterError, "missing subsystem: #{ref}"
246
+ end
247
+ ref_class.new(node_subsys, name, location)
248
+ end
249
+
250
+ def initialize(subsys, name, location)
251
+ @subsystem = subsys
252
+ @name = name
253
+ @location = location
254
+ end
255
+
256
+ attr_reader :subsystem
257
+ attr_reader :name
258
+ attr_reader :location
259
+
260
+ def inspect
261
+ "\#<#{self.class} #{to_s}>"
262
+ end
263
+
264
+ def to_s
265
+ @ref ||= [@subsystem, @name].compact.join('/')
266
+ end
267
+
268
+ def ==(other)
269
+ to_s == other.to_s
270
+ end
271
+
272
+ alias eql? ==
273
+
274
+ def hash
275
+ to_s.hash
276
+ end
277
+
278
+ def dummy?
279
+ @name[0] == '@'
280
+ end
281
+ end
282
+
283
+ class JobRef < Ref
284
+ def net?
285
+ false
286
+ end
287
+ end
288
+
289
+ class JobNetRef < Ref
290
+ def initialize(subsys, name, location)
291
+ super
292
+ @flow = nil
293
+ end
294
+
295
+ def flow=(flow)
296
+ @flow = flow
297
+ end
298
+
299
+ def net?
300
+ true
301
+ end
302
+
303
+ def to_s
304
+ '*' + super
305
+ end
306
+
307
+ def relative_path
308
+ "#{subsystem}/#{name}.jobnet"
309
+ end
310
+ end
311
+
312
+ class Location
313
+ def Location.dummy
314
+ new('(dummy)', 0)
315
+ end
316
+
317
+ def Location.for_file(f)
318
+ new(f.path, f.lineno)
319
+ end
320
+
321
+ def initialize(file, lineno)
322
+ @file = file
323
+ @lineno = lineno
324
+ end
325
+
326
+ attr_reader :file
327
+ attr_reader :lineno
328
+
329
+ def inspect
330
+ "\#<#{self.class} #{to_s}>"
331
+ end
332
+
333
+ def to_s
334
+ "#{@file}:#{@lineno}"
335
+ end
336
+ end
337
+
338
+ class Parser
339
+ def initialize(subsys)
340
+ @subsys = subsys
341
+ end
342
+
343
+ def parse_stream(f, ref)
344
+ flow = JobFlow.new(ref, Location.for_file(f))
345
+ foreach_edge(f) do |src, dest|
346
+ flow.add_edge src, dest
347
+ end
348
+ flow
349
+ end
350
+
351
+ private
352
+
353
+ name = /\w[\w\-]*/
354
+ node_ref = %r<[@*]?(?:#{name}/)?#{name}>
355
+ START_PATTERN = /\A(#{node_ref})\z/
356
+ DEPEND_PATTERN = /\A(#{node_ref})?\s*->\s*(#{node_ref})\z/
357
+
358
+ def foreach_edge(f)
359
+ default_src = Ref.dummy
360
+ f.each do |line|
361
+ text = line.sub(/\#.*/, '').strip
362
+ next if text.empty?
363
+ loc = Location.for_file(f)
364
+
365
+ if m = DEPEND_PATTERN.match(text)
366
+ src = m[1] ? ref(m[1], loc) : default_src
367
+ dest = ref(m[2], loc)
368
+ yield src, dest
369
+ default_src = dest
370
+
371
+ elsif m = START_PATTERN.match(text)
372
+ dest = ref(m[1], loc)
373
+ yield Ref.dummy, dest
374
+ default_src = dest
375
+
376
+ else
377
+ raise ParameterError, "syntax error at #{loc}: #{line.strip.inspect}"
378
+ end
379
+ end
380
+ end
381
+
382
+ def ref(ref_str, location)
383
+ Ref.parse(ref_str, @subsys, location)
384
+ end
385
+ end
386
+
387
+ end
388
+
389
+ end