bricolage 5.29.1 → 6.0.0beta3

Sign up to get free protection for your applications and to get access to all the features.
@@ -51,6 +51,10 @@ module Bricolage
51
51
 
52
52
  attr_reader :start_jobnet
53
53
 
54
+ def ref
55
+ @start_jobnet.ref
56
+ end
57
+
54
58
  def each_jobnet(&block)
55
59
  @jobnets.each_value(&block)
56
60
  end
@@ -351,7 +355,7 @@ module Bricolage
351
355
  unless node_subsys
352
356
  raise ParameterError, "missing subsystem: #{ref}"
353
357
  end
354
- ref_class.new(node_subsys, name, location)
358
+ ref_class.new(node_subsys.to_s, name.to_s, location)
355
359
  end
356
360
 
357
361
  def initialize(subsys, name, location)
@@ -389,7 +393,7 @@ module Bricolage
389
393
 
390
394
  class JobRef < Ref
391
395
  def JobRef.for_path(path)
392
- new(path.parent.basename, JobRef.strip_exts(path), Location.dummy)
396
+ new(path.parent.basename.to_s, JobRef.strip_exts(path), Location.dummy)
393
397
  end
394
398
 
395
399
  def JobRef.strip_exts(path)
@@ -398,7 +402,7 @@ module Bricolage
398
402
  until (ext = basename.extname).empty?
399
403
  basename = basename.basename(ext)
400
404
  end
401
- basename
405
+ basename.to_s
402
406
  end
403
407
 
404
408
  def net?
@@ -408,11 +412,11 @@ module Bricolage
408
412
 
409
413
  class JobNetRef < Ref
410
414
  def JobNetRef.for_path(path)
411
- new(path.parent.basename, path.basename('.jobnet'), Location.dummy)
415
+ new(path.parent.basename.to_s, path.basename('.jobnet').to_s, Location.dummy)
412
416
  end
413
417
 
414
418
  def JobNetRef.for_job_path(path)
415
- new(path.parent.basename, JobRef.strip_exts(path), Location.dummy)
419
+ new(path.parent.basename.to_s, JobRef.strip_exts(path).to_s, Location.dummy)
416
420
  end
417
421
 
418
422
  def initialize(subsys, name, location)
@@ -444,7 +448,6 @@ module Bricolage
444
448
  @end ||= JobRef.new(subsystem, "@#{name}@end", location)
445
449
  end
446
450
 
447
-
448
451
  def start
449
452
  @jobnet.start
450
453
  end
@@ -15,6 +15,9 @@ require 'bricolage/version'
15
15
  require 'fileutils'
16
16
  require 'pathname'
17
17
  require 'optparse'
18
+ require 'socket'
19
+ require 'net/http'
20
+ require 'json'
18
21
 
19
22
  module Bricolage
20
23
 
@@ -39,7 +42,7 @@ module Bricolage
39
42
  @hooks.run_before_option_parsing_hooks(opts)
40
43
  opts.parse!(ARGV)
41
44
 
42
- @ctx = Context.for_application(job_path: opts.jobnet_files.first, environment: opts.environment, global_variables: opts.global_variables)
45
+ @ctx = Context.for_application(job_path: opts.jobnet_files.first, environment: opts.environment, option_variables: opts.option_variables)
43
46
  opts.merge_saved_options(@ctx.load_system_options)
44
47
 
45
48
  jobnet = RootJobNet.load_auto(@ctx, opts.jobnet_files)
@@ -54,16 +57,18 @@ module Bricolage
54
57
  exit EXIT_SUCCESS
55
58
  end
56
59
 
60
+ queue = make_queue(opts)
61
+ if queue.locked?(jobnet)
62
+ raise ParameterError, "Job queue is still locked. If you are sure to restart jobnet, #{queue.unlock_help(jobnet)}"
63
+ end
57
64
  if opts.clear_queue?
58
- clear_queue(opts)
65
+ queue.cancel_jobnet(jobnet, 'cancelled by --clear-queue')
66
+ logger.info "queue is unlocked and cleared"
59
67
  exit EXIT_SUCCESS
60
68
  end
61
- queue = get_queue(opts)
62
- if queue.locked?
63
- raise ParameterError, "Job queue is still locked. If you are sure to restart jobnet, #{queue.unlock_help}"
64
- end
65
- unless queue.queued?
66
- enqueue_jobs jobnet, queue
69
+ queue.restore_jobnet(jobnet)
70
+ if queue.empty?
71
+ queue.enqueue_jobnet(jobnet)
67
72
  end
68
73
 
69
74
  if opts.list_jobs?
@@ -91,18 +96,30 @@ module Bricolage
91
96
  @ctx.logger
92
97
  end
93
98
 
94
- def clear_queue(opts)
95
- if path = get_queue_file_path(opts)
96
- FileUtils.rm_f path
99
+ def make_queue(opts)
100
+ if opts.db_name
101
+ logger.info "Enables DB queue: datasource=#{opts.db_name}"
102
+ datasource = @ctx.get_data_source('psql', opts.db_name)
103
+ executor_id = get_executor_id(opts.executor_type)
104
+ DatabaseTaskQueue.new(datasource: datasource, executor_id: executor_id, enable_lock: false)
105
+ elsif path = get_queue_file_path(opts)
106
+ logger.info "Enables file queue: #{path}"
107
+ FileTaskQueue.new(path: path)
108
+ else
109
+ MemoryTaskQueue.new
97
110
  end
98
111
  end
99
112
 
100
- def get_queue(opts)
101
- if path = get_queue_file_path(opts)
102
- logger.info "queue path: #{path}"
103
- FileTaskQueue.restore_if_exist(path)
113
+ def get_executor_id(executor_type)
114
+ # executor_id is 'TaskID:PID' or 'Hostname:PID'
115
+ if executor_type == 'ecs'
116
+ uri = URI.parse("#{ENV['ECS_CONTAINER_METADATA_URI']}/task")
117
+ response = Net::HTTP.get_response(uri)
118
+ task_id = JSON.parse(response.body)['TaskARN'].split('/').last
119
+ "#{task_id}:#{$$}"
104
120
  else
105
- TaskQueue.new
121
+ hostname = Socket.gethostname
122
+ "#{hostname}:#{$$}"
106
123
  end
107
124
  end
108
125
 
@@ -124,45 +141,38 @@ module Bricolage
124
141
  path.basename.to_s
125
142
  end
126
143
 
127
- def enqueue_jobs(jobnet, queue)
128
- seq = 1
129
- jobnet.sequential_jobs.each do |ref|
130
- queue.enq JobTask.new(ref)
131
- seq += 1
132
- end
133
- queue.save
134
- end
135
-
136
144
  def list_jobs(queue)
137
- queue.each do |task|
138
- puts task.job
145
+ queue.each do |job|
146
+ puts job
139
147
  end
140
148
  end
141
149
 
142
150
  def check_jobs(queue)
143
- queue.each do |task|
144
- Job.load_ref(task.job, @ctx).compile
151
+ queue.each do |job|
152
+ Job.load_ref(job, @ctx).compile
145
153
  end
146
154
  end
147
155
 
148
156
  def run_queue(queue)
157
+ result = nil
158
+ job = nil
149
159
  @hooks.run_before_all_jobs_hooks(BeforeAllJobsEvent.new(@jobnet_id, queue))
150
- queue.consume_each do |task|
151
- result = execute_job(task.job, queue)
152
- unless result.success?
153
- logger.elapsed_time 'jobnet total: ', (Time.now - @jobnet_start_time)
154
- logger.error "[job #{task.job}] #{result.message}"
155
- @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(false, queue))
156
- exit result.status
157
- end
160
+ queue.consume_each do |job|
161
+ result = execute_job(job, queue)
158
162
  end
159
- @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(true, queue))
163
+ @hooks.run_after_all_jobs_hooks(AfterAllJobsEvent.new(result.success?, queue))
160
164
  logger.elapsed_time 'jobnet total: ', (Time.now - @jobnet_start_time)
161
- logger.info "status all green"
165
+
166
+ if result.success?
167
+ logger.info "status all green"
168
+ else
169
+ logger.error "[job #{job}] #{result.message}"
170
+ exit result.status
171
+ end
162
172
  end
163
173
 
164
174
  def execute_job(ref, queue)
165
- logger.debug "job #{ref}"
175
+ logger.info "job #{ref}"
166
176
  job_start_time = Time.now
167
177
  job = Job.load_ref(ref, @ctx)
168
178
  job.compile
@@ -211,7 +221,7 @@ module Bricolage
211
221
  def initialize(app)
212
222
  @app = app
213
223
  @environment = nil
214
- @global_variables = Variables.new
224
+ @option_variables = Variables.new
215
225
  @jobnet_files = nil
216
226
 
217
227
  @dump_options = false
@@ -226,7 +236,9 @@ module Bricolage
226
236
  super.merge({
227
237
  'local-state-dir' => OptionValue.new('default value', '/tmp/bricolage'),
228
238
  'enable-queue' => OptionValue.new('default value', false),
229
- 'queue-path' => OptionValue.new('default value', nil)
239
+ 'queue-path' => OptionValue.new('default value', nil),
240
+ 'db-name' => OptionValue.new('default value', nil),
241
+ 'ecs-executor' => OptionValue.new('default value', false)
230
242
  })
231
243
  end
232
244
  private :opts_default
@@ -277,15 +289,22 @@ Options:
277
289
  parser.on('--queue-path=PATH', 'Enables job queue with this path.') {|path|
278
290
  @opts_cmdline['queue-path'] = OptionValue.new('--queue-path option', path)
279
291
  }
292
+ parser.on('--db-name=DB_NAME', 'Enables job queue with this database.') {|db_name|
293
+ @opts_cmdline['db-name'] = OptionValue.new('--db-name option', db_name)
294
+ }
295
+ parser.on('--ecs-executor', 'Set executor type as ECS ') {
296
+ @opts_cmdline['ecs-executor'] = OptionValue.new('--ecs-executor option', true)
297
+ }
298
+
280
299
  parser.on('-c', '--check-only', 'Checks job parameters and quit without executing.') {
281
300
  @check_only = true
282
301
  }
283
302
  parser.on('-l', '--list-jobs', 'Lists target jobs without executing.') {
284
303
  @list_jobs = true
285
304
  }
286
- parser.on('-v', '--variable=NAME=VALUE', 'Defines global variable.') {|name_value|
305
+ parser.on('-v', '--variable=NAME=VALUE', 'Defines option variable.') {|name_value|
287
306
  name, value = name_value.split('=', 2)
288
- @global_variables[name] = value
307
+ @option_variables[name] = value
289
308
  }
290
309
  parser.on('--dump-options', 'Shows option parsing result and quit.') {
291
310
  @dump_options = true
@@ -317,7 +336,7 @@ Options:
317
336
 
318
337
  attr_reader :jobnet_files
319
338
 
320
- attr_reader :global_variables
339
+ attr_reader :option_variables
321
340
 
322
341
  def dump_options?
323
342
  @dump_options
@@ -352,6 +371,24 @@ Options:
352
371
  end
353
372
  end
354
373
 
374
+ def db_name
375
+ opt = @opts['db-name']
376
+ if opt.value
377
+ opt.value
378
+ else
379
+ nil
380
+ end
381
+ end
382
+
383
+ def executor_type
384
+ opt = @opts['ecs-executor']
385
+ if opt.value
386
+ 'ecs'
387
+ else
388
+ 'ec2'
389
+ end
390
+ end
391
+
355
392
  def clear_queue?
356
393
  @clear_queue
357
394
  end
@@ -20,9 +20,9 @@ module Bricolage
20
20
 
21
21
  DEFAULT_ROTATION_SIZE = 1024 ** 2 * 100 # 100MB
22
22
 
23
- def Logger.new(device: $stderr, rotation_period: nil, rotation_size: DEFAULT_ROTATION_SIZE)
23
+ def Logger.new(device: $stderr, level: nil, rotation_period: nil, rotation_size: DEFAULT_ROTATION_SIZE)
24
24
  logger = super(device, (rotation_period || 0), rotation_size)
25
- logger.level = (device == $stderr && $stderr.tty?) ? Logger::DEBUG : Logger::INFO
25
+ logger.level = level || Logger::INFO
26
26
  logger.formatter = -> (sev, time, prog, msg) {
27
27
  "#{time}: #{sev}: #{msg}\n"
28
28
  }
@@ -1,3 +1,5 @@
1
+ require 'fileutils'
2
+
1
3
  module Bricolage
2
4
  class LogLocator
3
5
  def LogLocator.empty
@@ -63,9 +65,25 @@ module Bricolage
63
65
  puts "bricolage: S3 log: #{s3_url}"
64
66
  begin
65
67
  @s3_writer.upload(path)
68
+ # tmp: Prints & removes local file if S3 upload is succeeded.
69
+ # It seems leaving local files causes unexpected Docker failure, I try to remove this.
70
+ puts File.read(path)
71
+ FileUtils.rm_f(path)
72
+ cleanup_local_dirs(File.dirname(path))
66
73
  rescue => ex
67
- puts "warning: S3 upload failed: #{s3_url}"
74
+ $stderr.puts "warning: S3 upload failed: #{ex.class} #{ex.message}: #{s3_url}"
75
+ end
76
+ end
77
+
78
+ # Removes empty directories recursively
79
+ def cleanup_local_dirs(path)
80
+ dir_path = path
81
+ until dir_path == '/' or dir_path == '.'
82
+ Dir.rmdir(dir_path)
83
+ dir_path = File.dirname(dir_path)
68
84
  end
85
+ rescue SystemCallError
86
+ return # ignore
69
87
  end
70
88
  end
71
89
  end
@@ -87,7 +87,7 @@ module Bricolage
87
87
  private :querying
88
88
 
89
89
  def execute_update(query)
90
- log_query query
90
+ log_query query, @ds.update_sql_log_level
91
91
  rs = log_elapsed_time {
92
92
  querying {
93
93
  @connection.async_exec(query)
@@ -97,6 +97,8 @@ module Bricolage
97
97
  rescue PG::ConnectionBad, PG::UnableToSend => ex
98
98
  @connection_failed = true
99
99
  raise ConnectionError.wrap(ex)
100
+ rescue PG::UniqueViolation => ex
101
+ raise UniqueViolationException.wrap(ex)
100
102
  rescue PG::Error => ex
101
103
  raise PostgreSQLException.wrap(ex)
102
104
  ensure
@@ -128,7 +130,7 @@ module Bricolage
128
130
  end
129
131
 
130
132
  def execute_query(query, &block)
131
- log_query query
133
+ log_query query, @ds.query_sql_log_level
132
134
  rs = log_elapsed_time {
133
135
  querying {
134
136
  @connection.async_exec(query)
@@ -262,8 +264,8 @@ module Bricolage
262
264
  execute("lock #{table}")
263
265
  end
264
266
 
265
- def log_query(query)
266
- @logger.log(@ds.sql_log_level) { "[#{@ds.name}] #{mask_secrets query}" }
267
+ def log_query(query, log_level = @ds.sql_log_level)
268
+ @logger.log(log_level) { "[#{@ds.name}] #{mask_secrets query}" }
267
269
  end
268
270
 
269
271
  def mask_secrets(msg)
@@ -15,6 +15,8 @@ module Bricolage
15
15
 
16
16
  include CommandUtils
17
17
 
18
+ DEFAULT_RETRY_LIMIT = 3
19
+
18
20
  def initialize(
19
21
  host: 'localhost',
20
22
  port: 5439,
@@ -24,7 +26,9 @@ module Bricolage
24
26
  pgpass: nil,
25
27
  encoding: nil,
26
28
  psql: 'psql',
27
- sql_log_level: Logger::INFO,
29
+ sql_log_level: nil,
30
+ query_sql_log_level: nil,
31
+ update_sql_log_level: nil,
28
32
  tmpdir: Dir.tmpdir)
29
33
  @host = host
30
34
  @port = port
@@ -34,8 +38,11 @@ module Bricolage
34
38
  @pgpass = pgpass
35
39
  @encoding = encoding
36
40
  @psql = psql
37
- @sql_log_level = Logger.intern_severity(sql_log_level)
41
+ @sql_log_level = Logger.intern_severity(sql_log_level || Logger::DEBUG)
42
+ @query_sql_log_level = Logger.intern_severity(query_sql_log_level || sql_log_level || Logger::DEBUG)
43
+ @update_sql_log_level = Logger.intern_severity(update_sql_log_level || sql_log_level || Logger::INFO)
38
44
  @tmpdir = tmpdir
45
+ @connection_pool = []
39
46
  raise ParameterError, "missing psql host" unless @host
40
47
  raise ParameterError, "missing psql port" unless @port
41
48
  raise ParameterError, "missing psql database" unless @database
@@ -51,6 +58,8 @@ module Bricolage
51
58
  attr_reader :user
52
59
 
53
60
  attr_reader :sql_log_level
61
+ attr_reader :query_sql_log_level
62
+ attr_reader :update_sql_log_level
54
63
 
55
64
  def new_task
56
65
  PSQLTask.new(self)
@@ -109,7 +118,48 @@ module Bricolage
109
118
  end
110
119
 
111
120
  def open(&block)
112
- PostgresConnection.open_data_source(self, &block)
121
+ retries = (ENV['BRICOLAGE_OPEN_RETRY_LIMIT'] || DEFAULT_RETRY_LIMIT).to_i
122
+ begin
123
+ conn = PostgresConnection.open_data_source(self)
124
+ conn.execute_query('select 1'){}
125
+ rescue PG::ConnectionBad, PG::UnableToSend => ex
126
+ retries -= 1
127
+ if retries >= 0
128
+ logger.warn "Retry PG connection for execute query: #{ex.message}"
129
+ sleep 1
130
+ retry
131
+ end
132
+ end
133
+ if block_given?
134
+ yield conn
135
+ else
136
+ return conn
137
+ end
138
+ end
139
+
140
+ def open_shared_connection
141
+ raise ParameterError, 'open_shared_connection require block' unless block_given?
142
+ conn = nil
143
+ if @connection_pool.empty?
144
+ conn = open
145
+ else
146
+ begin
147
+ conn = @connection_pool.shift
148
+ conn.execute_query('select 1'){}
149
+ rescue
150
+ conn.close
151
+ conn = open
152
+ end
153
+ end
154
+
155
+ yield conn
156
+ ensure
157
+ @connection_pool.push(conn)
158
+ end
159
+
160
+ def clear_connection_pool
161
+ @connection_pool.map(&:close)
162
+ @connection_pool = []
113
163
  end
114
164
 
115
165
  def query_batch(query, batch_size = 5000, &block)
@@ -328,7 +378,6 @@ module Bricolage
328
378
  unless src_ds.redshift_loader_source?
329
379
  raise ParameterError, "input data source does not support redshift as bulk loading source: #{src_ds.name}"
330
380
  end
331
- opts.provide_defaults(src_ds)
332
381
  buf = StringIO.new
333
382
  buf.puts "copy #{dest_table}"
334
383
  buf.puts "from '#{src_ds.url(src_path)}'"
@@ -348,12 +397,23 @@ module Bricolage
348
397
  when 'csv'
349
398
  %q(delimiter ',')
350
399
  when 'json'
351
- jsonpath ? "json \'#{src_ds.url(jsonpath)}\'" : %q(json 'auto')
400
+ "json '#{json_param(jsonpath)}'"
352
401
  else
353
402
  raise ParameterError, "unsupported format: #{fmt}"
354
403
  end
355
404
  end
356
405
 
406
+ def json_param(jsonpath)
407
+ case jsonpath
408
+ when nil
409
+ 'auto'
410
+ when %r{\As3://}
411
+ jsonpath
412
+ else
413
+ src_ds.url(jsonpath)
414
+ end
415
+ end
416
+
357
417
  def unload(stmt, dest_ds, dest_path, format, opts)
358
418
  exec unload_statement(stmt, dest_ds, dest_path, format, opts)
359
419
  end
@@ -384,7 +444,7 @@ module Bricolage
384
444
  end
385
445
 
386
446
  def format_query(query)
387
- query.gsub(/^--.*/, '').strip.gsub(/[ \t]*\n[ \t]*/, ' ').gsub("'", "\\\\'")
447
+ query.gsub(/^--.*/, '').strip.gsub(/[ \t]*\n[ \t]*/, ' ').gsub(/\\/,"\\\\\\\\").gsub("'", "\\\\'")
388
448
  end
389
449
  end
390
450