bricolage 5.30.0 → 6.0.0beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,158 @@
1
+ module Bricolage
2
+ module DAO
3
+ class JobNet
4
+
5
+ include SQLUtils
6
+
7
+ Attributes = Struct.new(:id, :subsystem, :jobnet_name, keyword_init: true)
8
+
9
+ def JobNet.for_record(r)
10
+ Attributes.new(
11
+ id: r['jobnet_id']&.to_i,
12
+ subsystem: r['subsystem'],
13
+ jobnet_name: r['jobnet_name']
14
+ )
15
+ end
16
+
17
+ def JobNet.for_records(jobnets)
18
+ jobnets.map {|jobnet| JobNet.for_record(jobnet) }
19
+ end
20
+
21
+ def initialize(datasource)
22
+ @datasource = datasource
23
+ end
24
+
25
+ private def connect(&block)
26
+ @datasource.open_shared_connection(&block)
27
+ end
28
+
29
+ def find_or_create(ref)
30
+ connect {|conn|
31
+ jobnet = find(conn, ref)
32
+ if jobnet
33
+ return jobnet
34
+ else
35
+ begin
36
+ return create(conn, ref)
37
+ rescue UniqueViolationException
38
+ jobnet = find(conn, ref) or raise "[BUG] Could not create jobnet record: #{ref}"
39
+ return jobnet
40
+ end
41
+ end
42
+ }
43
+ end
44
+
45
+ private def create(conn, ref)
46
+ records = conn.execute_update(<<~SQL)
47
+ insert into jobnets
48
+ ( "subsystem"
49
+ , jobnet_name
50
+ )
51
+ values
52
+ ( #{s ref.subsystem}
53
+ , #{s ref.name}
54
+ )
55
+ returning jobnet_id
56
+ ;
57
+ SQL
58
+
59
+ Attributes.new(
60
+ id: records.first['jobnet_id']&.to_i,
61
+ subsystem: ref.subsystem,
62
+ jobnet_name: ref.name
63
+ )
64
+ end
65
+
66
+ private def find(conn, ref)
67
+ record = conn.query_row(<<~EndSQL)
68
+ select
69
+ jobnet_id
70
+ , "subsystem"
71
+ , jobnet_name
72
+ from
73
+ jobnets
74
+ where
75
+ "subsystem" = #{s ref.subsystem}
76
+ and jobnet_name = #{s ref.name}
77
+ ;
78
+ EndSQL
79
+
80
+ if record
81
+ JobNet.for_record(record)
82
+ else
83
+ nil
84
+ end
85
+ end
86
+
87
+ def locked?(ref)
88
+ value = connect {|conn|
89
+ conn.query_value(<<~EndSQL)
90
+ select
91
+ count(*)
92
+ from
93
+ jobnets
94
+ where
95
+ "subsystem" = #{s ref.subsystem}
96
+ and jobnet_name = #{s ref.name}
97
+ and executor_id is not null
98
+ ;
99
+ EndSQL
100
+ }
101
+
102
+ value.to_i > 0
103
+ end
104
+
105
+ def lock(jobnet_id, executor_id)
106
+ records = connect {|conn|
107
+ conn.execute_update(<<~EndSQL)
108
+ update jobnets
109
+ set
110
+ executor_id = #{s executor_id}
111
+ where
112
+ jobnet_id = #{jobnet_id}
113
+ and executor_id is null
114
+ returning jobnet_id
115
+ ;
116
+ EndSQL
117
+ }
118
+ if records.empty?
119
+ raise DoubleLockError, "Could not lock jobnet: jobnet_id=#{jobnet_id}"
120
+ end
121
+ end
122
+
123
+ # Unlock jobnet lock.
124
+ # Returns true if unlocked successfully, otherwise false.
125
+ # FIXME: raise exception?
126
+ def unlock(jobnet_id, executor_id)
127
+ records = connect {|conn|
128
+ conn.execute_update(<<~EndSQL)
129
+ update jobnets
130
+ set
131
+ executor_id = null
132
+ where
133
+ jobnet_id = #{jobnet_id}
134
+ and executor_id = #{s executor_id}
135
+ returning jobnet_id
136
+ ;
137
+ EndSQL
138
+ }
139
+
140
+ not records.empty?
141
+ end
142
+
143
+ def clear_lock(jobnet_id)
144
+ records = connect {|conn|
145
+ conn.execute_update(<<~EndSQL)
146
+ update jobnets
147
+ set
148
+ executor_id = null
149
+ where
150
+ jobnet_id = #{jobnet_id}
151
+ ;
152
+ EndSQL
153
+ }
154
+ end
155
+
156
+ end
157
+ end
158
+ end
@@ -137,7 +137,7 @@ module Bricolage
137
137
 
138
138
  attr_reader :name
139
139
  attr_reader :context
140
- attr_reader :logger
140
+ attr_accessor :logger
141
141
 
142
142
  def open
143
143
  yield nil
@@ -24,12 +24,23 @@ module Bricolage
24
24
  # Various SQL exception, except connection problem.
25
25
  class SQLException < JobFailureByException; end
26
26
 
27
+ # SQL unique constraint violation
28
+ class UniqueViolationException < SQLException; end
29
+
27
30
  # Database connection problems (not established, closed unexpectedly, invalid state)
28
31
  class ConnectionError < JobFailureByException; end
29
32
 
30
33
  # Aquiring lock takes too long (e.g. VACUUM lock)
31
34
  class LockTimeout < JobFailure; end
32
35
 
36
+ # The executing jobnet or job is already locked.
37
+ # You should wait to unlock by another job execution or force to unlock manually.
38
+ class DoubleLockError < JobFailure; end
39
+
40
+ # Unexpected job state transition tried.
41
+ # This error must be fixed by a operator.
42
+ class IllegalJobStateException < JobFailure; end
43
+
33
44
  # S3 related exceptions
34
45
  class S3Exception < JobFailureByException; end
35
46
 
@@ -9,7 +9,7 @@ require 'bricolage/exception'
9
9
  require 'fileutils'
10
10
 
11
11
  module Bricolage
12
-
12
+
13
13
  class Job
14
14
  # For JobNetRunner
15
15
  def Job.load_ref(ref, jobnet_context)
@@ -41,6 +41,7 @@ module Bricolage
41
41
  @job_class = job_class
42
42
  @context = context
43
43
  @global_variables = nil
44
+ @option_variables = @context.option_variables
44
45
  @param_decls = @job_class.get_parameters
45
46
  @param_vals = nil # Parameters::IntermediateValues by *.job
46
47
  @param_vals_opt = nil # Parameters::IntermediateValues by options
@@ -87,6 +88,7 @@ module Bricolage
87
88
 
88
89
  job_file_rest_vars = @param_vals ? @param_vals.variables : Variables.new
89
90
  job_v_opt_vars = @param_vals_opt ? @param_vals_opt.variables : Variables.new
91
+ cmd_v_opt_vars = @option_variables ? @option_variables : Variables.new
90
92
 
91
93
  # We use different variable set for paramter expansion and
92
94
  # SQL variable expansion. Parameter expansion uses global
@@ -94,6 +96,7 @@ module Bricolage
94
96
  base_vars = Variables.union(
95
97
  # ^ Low precedence
96
98
  @global_variables,
99
+ cmd_v_opt_vars,
97
100
  job_v_opt_vars
98
101
  # v High precedence
99
102
  )
@@ -103,11 +106,12 @@ module Bricolage
103
106
  # Then, expand SQL variables and check with declarations.
104
107
  vars = Variables.union(
105
108
  # ^ Low precedence
106
- declarations.default_variables,
107
- @global_variables,
108
- @params.variables, # Like $dest_table
109
- job_file_rest_vars,
110
- job_v_opt_vars
109
+ declarations.default_variables, # default value written in *.sql
110
+ @global_variables, # from yaml file
111
+ @params.variables, # Like $dest_table in job file
112
+ job_file_rest_vars, # custom variable at header of job file
113
+ cmd_v_opt_vars, # -v option for bricolage/bricolage-jobnet command
114
+ job_v_opt_vars # -v option for bricolage command using jobclass
111
115
  # v High precedence
112
116
  )
113
117
  @variables = vars.resolve
@@ -51,6 +51,10 @@ module Bricolage
51
51
 
52
52
  attr_reader :start_jobnet
53
53
 
54
+ def ref
55
+ @start_jobnet.ref
56
+ end
57
+
54
58
  def each_jobnet(&block)
55
59
  @jobnets.each_value(&block)
56
60
  end
@@ -351,7 +355,7 @@ module Bricolage
351
355
  unless node_subsys
352
356
  raise ParameterError, "missing subsystem: #{ref}"
353
357
  end
354
- ref_class.new(node_subsys, name, location)
358
+ ref_class.new(node_subsys.to_s, name.to_s, location)
355
359
  end
356
360
 
357
361
  def initialize(subsys, name, location)
@@ -389,7 +393,7 @@ module Bricolage
389
393
 
390
394
  class JobRef < Ref
391
395
  def JobRef.for_path(path)
392
- new(path.parent.basename, JobRef.strip_exts(path), Location.dummy)
396
+ new(path.parent.basename.to_s, JobRef.strip_exts(path), Location.dummy)
393
397
  end
394
398
 
395
399
  def JobRef.strip_exts(path)
@@ -398,7 +402,7 @@ module Bricolage
398
402
  until (ext = basename.extname).empty?
399
403
  basename = basename.basename(ext)
400
404
  end
401
- basename
405
+ basename.to_s
402
406
  end
403
407
 
404
408
  def net?
@@ -408,11 +412,11 @@ module Bricolage
408
412
 
409
413
  class JobNetRef < Ref
410
414
  def JobNetRef.for_path(path)
411
- new(path.parent.basename, path.basename('.jobnet'), Location.dummy)
415
+ new(path.parent.basename.to_s, path.basename('.jobnet').to_s, Location.dummy)
412
416
  end
413
417
 
414
418
  def JobNetRef.for_job_path(path)
415
- new(path.parent.basename, JobRef.strip_exts(path), Location.dummy)
419
+ new(path.parent.basename.to_s, JobRef.strip_exts(path).to_s, Location.dummy)
416
420
  end
417
421
 
418
422
  def initialize(subsys, name, location)
@@ -444,7 +448,6 @@ module Bricolage
444
448
  @end ||= JobRef.new(subsystem, "@#{name}@end", location)
445
449
  end
446
450
 
447
-
448
451
  def start
449
452
  @jobnet.start
450
453
  end
@@ -15,6 +15,9 @@ require 'bricolage/version'
15
15
  require 'fileutils'
16
16
  require 'pathname'
17
17
  require 'optparse'
18
+ require 'socket'
19
+ require 'net/http'
20
+ require 'json'
18
21
 
19
22
  module Bricolage
20
23
 
@@ -39,7 +42,7 @@ module Bricolage
39
42
  @hooks.run_before_option_parsing_hooks(opts)
40
43
  opts.parse!(ARGV)
41
44
 
42
- @ctx = Context.for_application(job_path: opts.jobnet_files.first, environment: opts.environment, global_variables: opts.global_variables)
45
+ @ctx = Context.for_application(job_path: opts.jobnet_files.first, environment: opts.environment, option_variables: opts.option_variables)
43
46
  opts.merge_saved_options(@ctx.load_system_options)
44
47
 
45
48
  jobnet = RootJobNet.load_auto(@ctx, opts.jobnet_files)
@@ -54,16 +57,18 @@ module Bricolage
54
57
  exit EXIT_SUCCESS
55
58
  end
56
59
 
60
+ queue = make_queue(opts)
61
+ if queue.locked?(jobnet)
62
+ raise ParameterError, "Job queue is still locked. If you are sure to restart jobnet, #{queue.unlock_help(jobnet)}"
63
+ end
57
64
  if opts.clear_queue?
58
- clear_queue(opts)
65
+ queue.cancel_jobnet(jobnet, 'cancelled by --clear-queue')
66
+ logger.info "queue is unlocked and cleared"
59
67
  exit EXIT_SUCCESS
60
68
  end
61
- queue = get_queue(opts)
62
- if queue.locked?
63
- raise ParameterError, "Job queue is still locked. If you are sure to restart jobnet, #{queue.unlock_help}"
64
- end
65
- unless queue.queued?
66
- enqueue_jobs jobnet, queue
69
+ queue.restore_jobnet(jobnet)
70
+ if queue.empty?
71
+ queue.enqueue_jobnet(jobnet)
67
72
  end
68
73
 
69
74
  if opts.list_jobs?
@@ -91,18 +96,30 @@ module Bricolage
91
96
  @ctx.logger
92
97
  end
93
98
 
94
- def clear_queue(opts)
95
- if path = get_queue_file_path(opts)
96
- FileUtils.rm_f path
99
+ def make_queue(opts)
100
+ if opts.db_name
101
+ logger.info "Enables DB queue: datasource=#{opts.db_name}"
102
+ datasource = @ctx.get_data_source('psql', opts.db_name)
103
+ executor_id = get_executor_id(opts.executor_type)
104
+ DatabaseTaskQueue.new(datasource: datasource, executor_id: executor_id, enable_lock: false)
105
+ elsif path = get_queue_file_path(opts)
106
+ logger.info "Enables file queue: #{path}"
107
+ FileTaskQueue.new(path: path)
108
+ else
109
+ MemoryTaskQueue.new
97
110
  end
98
111
  end
99
112
 
100
- def get_queue(opts)
101
- if path = get_queue_file_path(opts)
102
- logger.info "queue path: #{path}"
103
- FileTaskQueue.restore_if_exist(path)
113
+ def get_executor_id(executor_type)
114
+ # executor_id is 'TaskID:PID' or 'Hostname:PID'
115
+ if executor_type == 'ecs'
116
+ uri = URI.parse("#{ENV['ECS_CONTAINER_METADATA_URI']}/task")
117
+ response = Net::HTTP.get_response(uri)
118
+ task_id = JSON.parse(response.body)['TaskARN'].split('/').last
119
+ "#{task_id}:#{$$}"
104
120
  else
105
- TaskQueue.new
121
+ hostname = Socket.gethostname
122
+ "#{hostname}:#{$$}"
106
123
  end
107
124
  end
108
125
 
@@ -124,45 +141,38 @@ module Bricolage
124
141
  path.basename.to_s
125
142
  end
126
143
 
127
- def enqueue_jobs(jobnet, queue)
128
- seq = 1
129
- jobnet.sequential_jobs.each do |ref|
130
- queue.enq JobTask.new(ref)
131
- seq += 1
132
- end
133
- queue.save
134
- end
135
-
136
144
  def list_jobs(queue)
137
- queue.each do |task|
138
- puts task.job
145
+ queue.each do |job|
146
+ puts job
139
147
  end
140
148
  end
141
149
 
142
150
  def check_jobs(queue)
143
- queue.each do |task|
144
- Job.load_ref(task.job, @ctx).compile
151
+ queue.each do |job|
152
+ Job.load_ref(job, @ctx).compile
145
153
  end
146
154
  end
147
155
 
148
156
  def run_queue(queue)
157
+ result = nil
158
+ job = nil
149
159
  @hooks.run_before_all_jobs_hooks(BeforeAllJobsEvent.new(@jobnet_id, queue))
150
- queue.consume_each do |task|
151
- result = execute_job(task.job, queue)
152
- unless result.success?
153
- logger.elapsed_time 'jobnet total: ', (Time.now - @jobnet_start_time)
154
- logger.error "[job #{task.job}] #{result.message}"
155
- @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(false, queue))
156
- exit result.status
157
- end
160
+ queue.consume_each do |job|
161
+ result = execute_job(job, queue)
158
162
  end
159
- @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(true, queue))
163
+ @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(result.success?, queue))
160
164
  logger.elapsed_time 'jobnet total: ', (Time.now - @jobnet_start_time)
161
- logger.info "status all green"
165
+
166
+ if result.success?
167
+ logger.info "status all green"
168
+ else
169
+ logger.error "[job #{job}] #{result.message}"
170
+ exit result.status
171
+ end
162
172
  end
163
173
 
164
174
  def execute_job(ref, queue)
165
- logger.debug "job #{ref}"
175
+ logger.info "job #{ref}"
166
176
  job_start_time = Time.now
167
177
  job = Job.load_ref(ref, @ctx)
168
178
  job.compile
@@ -211,7 +221,7 @@ module Bricolage
211
221
  def initialize(app)
212
222
  @app = app
213
223
  @environment = nil
214
- @global_variables = Variables.new
224
+ @option_variables = Variables.new
215
225
  @jobnet_files = nil
216
226
 
217
227
  @dump_options = false
@@ -226,7 +236,9 @@ module Bricolage
226
236
  super.merge({
227
237
  'local-state-dir' => OptionValue.new('default value', '/tmp/bricolage'),
228
238
  'enable-queue' => OptionValue.new('default value', false),
229
- 'queue-path' => OptionValue.new('default value', nil)
239
+ 'queue-path' => OptionValue.new('default value', nil),
240
+ 'db-name' => OptionValue.new('default value', nil),
241
+ 'ecs-executor' => OptionValue.new('default value', false)
230
242
  })
231
243
  end
232
244
  private :opts_default
@@ -277,15 +289,22 @@ Options:
277
289
  parser.on('--queue-path=PATH', 'Enables job queue with this path.') {|path|
278
290
  @opts_cmdline['queue-path'] = OptionValue.new('--queue-path option', path)
279
291
  }
292
+ parser.on('--db-name=DB_NAME', 'Enables job queue with this database.') {|db_name|
293
+ @opts_cmdline['db-name'] = OptionValue.new('--db-name option', db_name)
294
+ }
295
+ parser.on('--ecs-executor', 'Set executor type as ECS ') {
296
+ @opts_cmdline['ecs-executor'] = OptionValue.new('--ecs-executor option', true)
297
+ }
298
+
280
299
  parser.on('-c', '--check-only', 'Checks job parameters and quit without executing.') {
281
300
  @check_only = true
282
301
  }
283
302
  parser.on('-l', '--list-jobs', 'Lists target jobs without executing.') {
284
303
  @list_jobs = true
285
304
  }
286
- parser.on('-v', '--variable=NAME=VALUE', 'Defines global variable.') {|name_value|
305
+ parser.on('-v', '--variable=NAME=VALUE', 'Defines option variable.') {|name_value|
287
306
  name, value = name_value.split('=', 2)
288
- @global_variables[name] = value
307
+ @option_variables[name] = value
289
308
  }
290
309
  parser.on('--dump-options', 'Shows option parsing result and quit.') {
291
310
  @dump_options = true
@@ -317,7 +336,7 @@ Options:
317
336
 
318
337
  attr_reader :jobnet_files
319
338
 
320
- attr_reader :global_variables
339
+ attr_reader :option_variables
321
340
 
322
341
  def dump_options?
323
342
  @dump_options
@@ -352,6 +371,24 @@ Options:
352
371
  end
353
372
  end
354
373
 
374
+ def db_name
375
+ opt = @opts['db-name']
376
+ if opt.value
377
+ opt.value
378
+ else
379
+ nil
380
+ end
381
+ end
382
+
383
+ def executor_type
384
+ opt = @opts['ecs-executor']
385
+ if opt.value
386
+ 'ecs'
387
+ else
388
+ 'ec2'
389
+ end
390
+ end
391
+
355
392
  def clear_queue?
356
393
  @clear_queue
357
394
  end