bricolage 5.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +4 -0
  3. data/bin/bricolage +6 -0
  4. data/bin/bricolage-jobnet +6 -0
  5. data/jobclass/create.rb +21 -0
  6. data/jobclass/exec.rb +17 -0
  7. data/jobclass/insert-delta.rb +31 -0
  8. data/jobclass/insert.rb +33 -0
  9. data/jobclass/load.rb +39 -0
  10. data/jobclass/my-export.rb +40 -0
  11. data/jobclass/my-migrate.rb +103 -0
  12. data/jobclass/noop.rb +13 -0
  13. data/jobclass/rebuild-drop.rb +37 -0
  14. data/jobclass/rebuild-rename.rb +49 -0
  15. data/jobclass/s3-put.rb +19 -0
  16. data/jobclass/sql.rb +29 -0
  17. data/jobclass/td-delete.rb +20 -0
  18. data/jobclass/td-export.rb +30 -0
  19. data/jobclass/unload.rb +30 -0
  20. data/jobclass/wait-file.rb +48 -0
  21. data/lib/bricolage/application.rb +260 -0
  22. data/lib/bricolage/commandutils.rb +52 -0
  23. data/lib/bricolage/configloader.rb +126 -0
  24. data/lib/bricolage/context.rb +108 -0
  25. data/lib/bricolage/datasource.rb +144 -0
  26. data/lib/bricolage/eventhandlers.rb +47 -0
  27. data/lib/bricolage/exception.rb +47 -0
  28. data/lib/bricolage/filedatasource.rb +42 -0
  29. data/lib/bricolage/filesystem.rb +165 -0
  30. data/lib/bricolage/genericdatasource.rb +37 -0
  31. data/lib/bricolage/job.rb +212 -0
  32. data/lib/bricolage/jobclass.rb +98 -0
  33. data/lib/bricolage/jobfile.rb +100 -0
  34. data/lib/bricolage/jobflow.rb +389 -0
  35. data/lib/bricolage/jobnetrunner.rb +264 -0
  36. data/lib/bricolage/jobresult.rb +74 -0
  37. data/lib/bricolage/logger.rb +52 -0
  38. data/lib/bricolage/mysqldatasource.rb +223 -0
  39. data/lib/bricolage/parameters.rb +653 -0
  40. data/lib/bricolage/postgresconnection.rb +78 -0
  41. data/lib/bricolage/psqldatasource.rb +449 -0
  42. data/lib/bricolage/resource.rb +68 -0
  43. data/lib/bricolage/rubyjobclass.rb +42 -0
  44. data/lib/bricolage/s3datasource.rb +144 -0
  45. data/lib/bricolage/script.rb +120 -0
  46. data/lib/bricolage/sqlstatement.rb +351 -0
  47. data/lib/bricolage/taskqueue.rb +156 -0
  48. data/lib/bricolage/tddatasource.rb +116 -0
  49. data/lib/bricolage/variables.rb +208 -0
  50. data/lib/bricolage/version.rb +4 -0
  51. data/lib/bricolage.rb +8 -0
  52. data/libexec/sqldump +9 -0
  53. data/libexec/sqldump.Darwin +0 -0
  54. data/libexec/sqldump.Linux +0 -0
  55. data/test/all.rb +3 -0
  56. data/test/home/config/development/database.yml +57 -0
  57. data/test/home/config/development/password.yml +2 -0
  58. data/test/home/subsys/separated.job +1 -0
  59. data/test/home/subsys/separated.sql +1 -0
  60. data/test/home/subsys/unified.jobnet +1 -0
  61. data/test/home/subsys/unified.sql.job +5 -0
  62. data/test/test_filesystem.rb +19 -0
  63. data/test/test_parameters.rb +401 -0
  64. data/test/test_variables.rb +114 -0
  65. metadata +192 -0
@@ -0,0 +1,100 @@
1
+ require 'bricolage/exception'
2
+ require 'pathname'
3
+
4
+ module Bricolage
5
+
6
+ class JobFile
7
+ def JobFile.load(ctx, path)
8
+ values = if /\.sql\.job\z/ =~ path.to_s
9
+ load_embedded_definition(ctx, path)
10
+ else
11
+ ctx.parameter_file_loader.load_eruby_yaml(path)
12
+ end
13
+ parse(values, path)
14
+ end
15
+
16
+ class << JobFile
17
+ private
18
+
19
+ def load_embedded_definition(ctx, path)
20
+ sql = ctx.parameter_file_loader.read_file(path)
21
+ block = sql.slice(%r{\A/\*.*?^\*/}m) or
22
+ raise ParameterError, "missing embedded job definition block: #{path}"
23
+ yaml = block.sub(%r{\A/\*}, '').sub(%r{^\*/\s*\z}, '')
24
+ begin
25
+ values = YAML.load(yaml)
26
+ rescue => err
27
+ raise ParameterError, "#{path}: embedded job definition syntax error: #{err.message}"
28
+ end
29
+ stripped_sql = sql.sub(%r{\A/\*.*?^\*/}m, "\n" * block.count("\n"))
30
+ decls = make_sql_declarations(stripped_sql, values, path)
31
+ stmt = SQLStatement.new(StringResource.new(sql, path), decls)
32
+ set_value values, 'sql-file', stmt, path
33
+ values
34
+ end
35
+
36
+ def set_value(values, name, value, path)
37
+ raise ParameterError, "#{path}: #{name} parameter and embedded SQL script is exclusive" if values[name]
38
+ values[name] = value
39
+ end
40
+
41
+ def make_sql_declarations(sql, values, path)
42
+ decls = Declarations.new
43
+ vars = Variable.list(sql)
44
+ if dest = vars.delete('dest_table')
45
+ decls.declare 'dest_table', nil
46
+ end
47
+ if values['src-tables']
48
+ srcs = values['src-tables']
49
+ case srcs
50
+ when String
51
+ decls.declare srcs, nil
52
+ vars.delete srcs
53
+ when Array
54
+ srcs.each do |table|
55
+ decls.declare table, nil
56
+ vars.delete table
57
+ end
58
+ when Hash
59
+ srcs.each_key do |table|
60
+ decls.declare table, nil
61
+ vars.delete table
62
+ end
63
+ else
64
+ raise ParameterError, "unknown src-tables value type: #{srcs.class}"
65
+ end
66
+ end
67
+ vars.each do |name|
68
+ decls.declare name, name
69
+ end
70
+ decls
71
+ end
72
+ end
73
+
74
+ def JobFile.parse(values, path)
75
+ values = values.dup
76
+ class_id = values.delete('class') or
77
+ raise ParameterError, "missing job class: #{path}"
78
+ new(class_id, values, path)
79
+ end
80
+
81
+ def initialize(class_id, values, path)
82
+ @class_id = class_id
83
+ @values = values
84
+ @path = Pathname(path)
85
+ end
86
+
87
+ attr_reader :class_id
88
+ attr_reader :values
89
+ attr_reader :path
90
+
91
+ def job_id
92
+ @path.basename('.job').to_s
93
+ end
94
+
95
+ def subsystem
96
+ @path.parent.basename.to_s
97
+ end
98
+ end
99
+
100
+ end
@@ -0,0 +1,389 @@
1
+ require 'bricolage/exception'
2
+ require 'tsort'
3
+
4
+ module Bricolage
5
+
6
+ class JobFlow
7
+ def JobFlow.load(path)
8
+ File.open(path) {|f|
9
+ parse_stream(f, make_node_ref(path))
10
+ }
11
+ rescue SystemCallError => err
12
+ raise ParameterError, "could not load job flow: #{path} (#{err.message})"
13
+ end
14
+
15
+ def JobFlow.make_node_ref(path)
16
+ subsys = path.parent.basename.to_s
17
+ name = path.basename('.jobnet').to_s
18
+ JobNetRef.new(subsys, name, Location.dummy)
19
+ end
20
+
21
+ def JobFlow.parse_stream(f, ref)
22
+ Parser.new(ref.subsystem).parse_stream(f, ref)
23
+ end
24
+
25
+ def JobFlow.root
26
+ RootJobFlow.new
27
+ end
28
+
29
+ ROOT_FLOW_NAME = '*'
30
+
31
+ def initialize(ref, location)
32
+ @ref = ref
33
+ @location = location
34
+ @flow = {} # Ref => [Ref] (src->dest)
35
+ @deps = {} # Ref => [Ref] (dest->src)
36
+ @subnets_resolved = false
37
+ end
38
+
39
+ def inspect
40
+ "\#<#{self.class} #{ref}>"
41
+ end
42
+
43
+ attr_reader :ref
44
+
45
+ def name
46
+ ref.to_s
47
+ end
48
+
49
+ def add_edge(src, dest)
50
+ (@flow[src] ||= []).push dest unless dest.net?
51
+ (@deps[dest] ||= []).push src
52
+ end
53
+
54
+ def flow_tree
55
+ h = {}
56
+ @flow.each do |src, dests|
57
+ h[src.to_s] = dests.map {|d| d.to_s }
58
+ end
59
+ h
60
+ end
61
+
62
+ def dependencies
63
+ h = {}
64
+ @deps.each do |ref, deps|
65
+ h[ref.to_s] = deps.map(&:to_s)
66
+ end
67
+ h
68
+ end
69
+
70
+ def dependent_flows
71
+ @deps.values.flatten.select {|ref| ref.net? }.uniq
72
+ end
73
+
74
+ def sequential_nodes
75
+ tsort.reject {|ref| ref.dummy? }
76
+ end
77
+
78
+ def sequential_jobs
79
+ sequential_nodes.reject {|ref| ref.net? }
80
+ end
81
+
82
+ include TSort
83
+
84
+ def tsort_each_node(&block)
85
+ @deps.each_key(&block)
86
+ end
87
+
88
+ def tsort_each_child(ref, &block)
89
+ @deps.fetch(ref).each(&block)
90
+ end
91
+
92
+ def fix_graph
93
+ close_graph
94
+ check_cycle
95
+ check_orphan
96
+ end
97
+
98
+ def resolve_subnets(root_flow)
99
+ @deps.each_key do |ref|
100
+ next unless ref.net?
101
+ ref.flow = root_flow.subnet(ref)
102
+ end
103
+ @subnets_resolved = true
104
+ end
105
+
106
+ def subnets_resolved?
107
+ @subnets_resolved
108
+ end
109
+
110
+ private
111
+
112
+ def close_graph
113
+ @deps.values.flatten.uniq.each do |ref|
114
+ @deps[ref] ||= []
115
+ end
116
+ end
117
+
118
+ def check_cycle
119
+ each_strongly_connected_component do |refs|
120
+ unless refs.size == 1
121
+ cycle = (refs + [refs.first]).reverse.join(' -> ')
122
+ raise ParameterError, "found cycle in the flow: #{cycle}"
123
+ end
124
+ end
125
+ end
126
+
127
+ def check_orphan
128
+ orphan_nodes.each do |ref|
129
+ raise ParameterError, "found orphan job in the flow: #{ref.location}: #{ref}"
130
+ end
131
+ end
132
+
133
+ def orphan_nodes
134
+ @deps.to_a.select {|ref, deps| deps.empty? and not ref.dummy? and not ref.net? }.map {|ref, *| ref }
135
+ end
136
+ end
137
+
138
+ class RootJobFlow < JobFlow
139
+ def RootJobFlow.load(ctx, path)
140
+ flow = new(ctx)
141
+ flow.add_subnet JobFlow.load(path)
142
+ flow.fix
143
+ flow
144
+ end
145
+
146
+ def initialize(ctx)
147
+ @ctx = ctx
148
+ super ROOT_FLOW_NAME, Location.dummy
149
+ @subnets = {}
150
+ end
151
+
152
+ def add_subnet(flow)
153
+ raise ParameterError, "duplicated subnet definition: #{flow.name}" if @subnets.key?(flow.name)
154
+ @subnets[flow.name] = flow
155
+ end
156
+
157
+ def subnet(ref)
158
+ if flow = @subnets[ref]
159
+ flow
160
+ else
161
+ flow = load_jobnet_auto(ref)
162
+ add_subnet flow
163
+ flow
164
+ end
165
+ end
166
+
167
+ def load_jobnet_auto(ref)
168
+ path = @ctx.root_relative_path(ref.relative_path)
169
+ raise ParameterError, "undefined subnet: #{ref}" unless path.file?
170
+ JobFlow.load(path)
171
+ end
172
+
173
+ def each_subnet(&block)
174
+ @subnets.values.each(&block)
175
+ end
176
+
177
+ def each_flow(&block)
178
+ yield self
179
+ @subnets.each_value(&block)
180
+ end
181
+
182
+ def each_subnet_sequence
183
+ sequential_nodes.each do |ref|
184
+ yield subnet(ref)
185
+ end
186
+ end
187
+
188
+ def fix
189
+ resolve_subnet_references
190
+ each_subnet do |flow|
191
+ flow.fix_graph
192
+ end
193
+ add_jobnet_dependency_edges
194
+ fix_graph
195
+ end
196
+
197
+ private
198
+
199
+ def resolve_subnet_references
200
+ unresolved = true
201
+ while unresolved
202
+ unresolved = false
203
+ ([self] + @subnets.values).each do |flow|
204
+ unless flow.subnets_resolved?
205
+ flow.resolve_subnets self
206
+ unresolved = true
207
+ end
208
+ end
209
+ end
210
+ end
211
+
212
+ def add_jobnet_dependency_edges
213
+ each_subnet do |flow|
214
+ # dummy dependency to ensure to execute all subnets
215
+ add_edge Ref.dummy, flow.ref
216
+ # jobnet -> jobnet dependency
217
+ flow.dependent_flows.each do |dep_flow_ref|
218
+ add_edge dep_flow_ref, flow.ref
219
+ end
220
+ end
221
+ end
222
+
223
+ def check_orphan
224
+ # should not check orphan for root.
225
+ end
226
+ end
227
+
228
+ class JobFlow # reopen as namespace
229
+
230
+ class Ref
231
+ START_NAME = '@start'
232
+
233
+ def Ref.dummy
234
+ JobRef.new(nil, START_NAME, Location.dummy)
235
+ end
236
+
237
+ def Ref.parse(ref, curr_subsys = nil, location = Location.dummy)
238
+ return JobNetRef.new(nil, '', location) if ref == ROOT_FLOW_NAME
239
+ m = %r<\A(\*)?(?:(\w[\w\-]*)/)?(@?\w[\w\-]*)\z>.match(ref) or
240
+ raise ParameterError, "bad job name: #{ref.inspect}"
241
+ is_net, subsys, name = m.captures
242
+ ref_class = (is_net ? JobNetRef : JobRef)
243
+ node_subsys = subsys || curr_subsys
244
+ unless node_subsys
245
+ raise ParameterError, "missing subsystem: #{ref}"
246
+ end
247
+ ref_class.new(node_subsys, name, location)
248
+ end
249
+
250
+ def initialize(subsys, name, location)
251
+ @subsystem = subsys
252
+ @name = name
253
+ @location = location
254
+ end
255
+
256
+ attr_reader :subsystem
257
+ attr_reader :name
258
+ attr_reader :location
259
+
260
+ def inspect
261
+ "\#<#{self.class} #{to_s}>"
262
+ end
263
+
264
+ def to_s
265
+ @ref ||= [@subsystem, @name].compact.join('/')
266
+ end
267
+
268
+ def ==(other)
269
+ to_s == other.to_s
270
+ end
271
+
272
+ alias eql? ==
273
+
274
+ def hash
275
+ to_s.hash
276
+ end
277
+
278
+ def dummy?
279
+ @name[0] == '@'
280
+ end
281
+ end
282
+
283
+ class JobRef < Ref
284
+ def net?
285
+ false
286
+ end
287
+ end
288
+
289
+ class JobNetRef < Ref
290
+ def initialize(subsys, name, location)
291
+ super
292
+ @flow = nil
293
+ end
294
+
295
+ def flow=(flow)
296
+ @flow = flow
297
+ end
298
+
299
+ def net?
300
+ true
301
+ end
302
+
303
+ def to_s
304
+ '*' + super
305
+ end
306
+
307
+ def relative_path
308
+ "#{subsystem}/#{name}.jobnet"
309
+ end
310
+ end
311
+
312
+ class Location
313
+ def Location.dummy
314
+ new('(dummy)', 0)
315
+ end
316
+
317
+ def Location.for_file(f)
318
+ new(f.path, f.lineno)
319
+ end
320
+
321
+ def initialize(file, lineno)
322
+ @file = file
323
+ @lineno = lineno
324
+ end
325
+
326
+ attr_reader :file
327
+ attr_reader :lineno
328
+
329
+ def inspect
330
+ "\#<#{self.class} #{to_s}>"
331
+ end
332
+
333
+ def to_s
334
+ "#{@file}:#{@lineno}"
335
+ end
336
+ end
337
+
338
+ class Parser
339
+ def initialize(subsys)
340
+ @subsys = subsys
341
+ end
342
+
343
+ def parse_stream(f, ref)
344
+ flow = JobFlow.new(ref, Location.for_file(f))
345
+ foreach_edge(f) do |src, dest|
346
+ flow.add_edge src, dest
347
+ end
348
+ flow
349
+ end
350
+
351
+ private
352
+
353
+ name = /\w[\w\-]*/
354
+ node_ref = %r<[@*]?(?:#{name}/)?#{name}>
355
+ START_PATTERN = /\A(#{node_ref})\z/
356
+ DEPEND_PATTERN = /\A(#{node_ref})?\s*->\s*(#{node_ref})\z/
357
+
358
+ def foreach_edge(f)
359
+ default_src = Ref.dummy
360
+ f.each do |line|
361
+ text = line.sub(/\#.*/, '').strip
362
+ next if text.empty?
363
+ loc = Location.for_file(f)
364
+
365
+ if m = DEPEND_PATTERN.match(text)
366
+ src = m[1] ? ref(m[1], loc) : default_src
367
+ dest = ref(m[2], loc)
368
+ yield src, dest
369
+ default_src = dest
370
+
371
+ elsif m = START_PATTERN.match(text)
372
+ dest = ref(m[1], loc)
373
+ yield Ref.dummy, dest
374
+ default_src = dest
375
+
376
+ else
377
+ raise ParameterError, "syntax error at #{loc}: #{line.strip.inspect}"
378
+ end
379
+ end
380
+ end
381
+
382
+ def ref(ref_str, location)
383
+ Ref.parse(ref_str, @subsys, location)
384
+ end
385
+ end
386
+
387
+ end
388
+
389
+ end