bricolage 5.29.0 → 6.0.0beta2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 937a773a3be70440b08a33edea44480af5b5b4e9857081e5e9dfafeff7298cf0
4
- data.tar.gz: 373f091f9ce32ade9862e6c9d114c0ed760166a050ba67b3fa25d29fa42183b2
3
+ metadata.gz: dd96a97704f116e03c432aaab65d8ee28e76a571415fc8a11ad6ce7c0d594530
4
+ data.tar.gz: 271b47cee82b89c910f5ea1c9804d2be272b1817dc1ee0c5e8d5125e194f13d7
5
5
  SHA512:
6
- metadata.gz: 338d2113c8fc175a15f57fcaae506f83da82c60179bdbcf828fb68da45545f2988afec5eed0a70a79292030d93cb97d68862da2e716f6c409f80eb1cf1d0874f
7
- data.tar.gz: afa065e5b744ae159a018a050a9e2bb693fce818bc799e85c762b42291597ab96e64223d8569d0c17559ed96e16fe8771b342f2b632f490a43e93e902dcd58aa
6
+ metadata.gz: 45cd7ee922e61980526c5b08729be5b9021ada7c8c13a32124b233d2d796a8a10d7bd23f920bcd6b46822828b7acd45b39941c8721c242f8aa127ecd062568ed
7
+ data.tar.gz: 26f8c7375451d2418f968fb537d6359f4bc16afb6a95ab0f489b3c73b5b02d4ffa44070771a204e9a3b6b26b63768842457fef977dd4b1a24de2b71f8d9b600b
data/.gitignore CHANGED
@@ -17,6 +17,4 @@ tmp
17
17
  _yardoc
18
18
  doc/
19
19
 
20
- /dev
21
- /test/home/config
22
- Gemfile.lock
20
+ /Gemfile.lock
data/README.md CHANGED
@@ -13,6 +13,10 @@ See LICENSES file for details.
13
13
 
14
14
  % rake test
15
15
 
16
+ ## How to use
17
+
18
+ See https://github.com/bricolages/bricolage/wiki
19
+
16
20
  ## Author
17
21
 
18
22
  Minero Aoki
data/RELEASE.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # Bricolage Release Note
2
2
 
3
+ ## version 6.0.0 beta 2
4
+
5
+ - [fix] Always transmit error messages from jobs in the jobnet.
6
+
7
+ ## version 6.0.0 beta 1
8
+
9
+ - [new] Introduces database queue. Database queue saves job states in the PostgreSQL instance, we now can run bricolage on container environment.
10
+ - [new] Default log level is DEBUG on development environment, INFO on production environment.
11
+ - [new] Only updating query is logged as INFO level. Read-only queries are logged in DEBUG level.
12
+
13
+ ## version 5.30.0
14
+
15
+ - [new] streaming_load: new option --ctl-ds to change S3 data source for metadata files.
16
+
17
+ ## version 5.29.2
18
+
19
+ - [new] load: Allows to set jsonpath by direct S3 URL, instead of relative S3 key.
20
+
21
+ ## version 5.29.1
22
+
23
+ - [fix] PSQLDataSource: slice_last_stderr may fail when $stderr is not a regular file.
24
+
3
25
  ## version 5.29.0
4
26
 
5
27
  - [new] bricolage-jobnet command accepts multiple jobs/jobnets and executes them sequencially.
@@ -23,4 +23,5 @@ Gem::Specification.new do |s|
23
23
  s.add_development_dependency 'test-unit'
24
24
  s.add_development_dependency 'rake'
25
25
  s.add_development_dependency 'mocha'
26
+ s.add_development_dependency 'pry-byebug'
26
27
  end
@@ -18,7 +18,7 @@ JobClass.define('create') {
18
18
  task.drop_force_if params['drop']
19
19
  task.exec params['table-def']
20
20
  task.analyze_if params['analyze']
21
- task.grant_if params['grant'], params['dest-table']
21
+ task.grant_if params['grant'], '$dest_table'
22
22
  }
23
23
  }
24
24
  }
@@ -17,7 +17,7 @@ JobClass.define('createview') {
17
17
  task.transaction {
18
18
  task.drop_view_force_if params['drop']
19
19
  task.exec params['sql-file']
20
- task.grant_if params['grant'], params['dest-table']
20
+ task.grant_if params['grant'], '$dest_table'
21
21
  }
22
22
  }
23
23
  }
@@ -35,9 +35,9 @@ JobClass.define('load') {
35
35
  task.transaction {
36
36
  task.drop_force '${dest_table}'
37
37
  task.exec params['table-def']
38
- task.load params['src-ds'], params['src-file'], params['dest-table'],
38
+ task.load params['src-ds'], params['src-file'], '$dest_table',
39
39
  params['format'], params['jsonpath'], params['options']
40
- task.grant_if params['grant'], params['dest-table']
40
+ task.grant_if params['grant'], '$dest_table'
41
41
  }
42
42
  # ANALYZE, VACUUM is needless for newly loaded table, skip always.
43
43
 
@@ -49,9 +49,9 @@ JobClass.define('load') {
49
49
 
50
50
  task.truncate_if params['truncate']
51
51
  task.transaction {
52
- task.load params['src-ds'], params['src-file'], params['dest-table'],
52
+ task.load params['src-ds'], params['src-file'], '$dest_table',
53
53
  params['format'], params['jsonpath'], params['options']
54
- task.grant_if params['grant'], params['dest-table']
54
+ task.grant_if params['grant'], '$dest_table'
55
55
  }
56
56
  # ANALYZE, VACUUM is needless for newly loaded table, skip always.
57
57
 
@@ -59,9 +59,9 @@ JobClass.define('load') {
59
59
  # load only pattern
60
60
 
61
61
  task.transaction {
62
- task.load params['src-ds'], params['src-file'], params['dest-table'],
62
+ task.load params['src-ds'], params['src-file'], '$dest_table',
63
63
  params['format'], params['jsonpath'], params['options']
64
- task.grant_if params['grant'], params['dest-table']
64
+ task.grant_if params['grant'], '$dest_table'
65
65
  task.analyze_if params['analyze']
66
66
  }
67
67
  # We cannot execute VACUUM in transaction
@@ -23,19 +23,19 @@ JobClass.define('rebuild-drop') {
23
23
  script.task(params['data-source']) {|task|
24
24
  task.transaction {
25
25
  # CREATE
26
- task.drop_force params['dest-table']
26
+ task.drop_force '$dest_table'
27
27
  task.exec params['table-def']
28
28
 
29
29
  # INSERT
30
30
  task.exec params['sql-file']
31
31
 
32
32
  # GRANT
33
- task.grant_if params['grant'], params['dest-table']
33
+ task.grant_if params['grant'], '$dest_table'
34
34
  }
35
35
 
36
36
  # VACUUM, ANALYZE
37
- task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table']
38
- task.analyze_if params['analyze'], params['dest-table']
37
+ task.vacuum_if params['vacuum'], params['vacuum-sort'], '$dest_table'
38
+ task.analyze_if params['analyze'], '$dest_table'
39
39
  }
40
40
  }
41
41
  }
@@ -21,6 +21,7 @@ JobClass.define('rebuild-rename') {
21
21
 
22
22
  script {|params, script|
23
23
  script.task(params['data-source']) {|task|
24
+ dest_table = '$dest_table'
24
25
  prev_table = '${dest_table}_old'
25
26
  work_table = '${dest_table}_wk'
26
27
 
@@ -43,10 +44,9 @@ JobClass.define('rebuild-rename') {
43
44
 
44
45
  # RENAME
45
46
  task.transaction {
46
- task.create_dummy_table '$dest_table'
47
- dest_table = params['dest-table']
48
- task.rename_table dest_table.to_s, "#{dest_table.name}_old"
49
- task.rename_table "#{dest_table}_wk", dest_table.name
47
+ task.create_dummy_table dest_table
48
+ task.rename_table dest_table, prev_table
49
+ task.rename_table work_table, dest_table
50
50
  }
51
51
  }
52
52
  }
@@ -25,7 +25,7 @@ JobClass.define('sql') {
25
25
  task.exec params['sql-file']
26
26
  task.vacuum_if params['vacuum'], params['vacuum-sort']
27
27
  task.analyze_if params['analyze']
28
- task.grant_if params['grant'], params['dest-table']
28
+ task.grant_if params['grant'], '$dest_table'
29
29
  }
30
30
  }
31
31
  }
@@ -18,6 +18,7 @@ class StreamingLoadJobClass < RubyJobClass
18
18
  optional: true, default: Bricolage::PSQLLoadOptions.new,
19
19
  value_handler: lambda {|value, ctx, vars| Bricolage::PSQLLoadOptions.parse(value) })
20
20
  params.add Bricolage::DataSourceParam.new('s3', 's3-ds', 'S3 data source.')
21
+ params.add Bricolage::DataSourceParam.new('s3', 'ctl-ds', 'S3 data source for control files. (default: $s3-ds)', optional: true)
21
22
  params.add Bricolage::StringParam.new('ctl-prefix', 'S3_PREFIX', 'S3 object key prefix for control files. (default: ${queue-path}/ctl)', optional: true)
22
23
  params.add Bricolage::OptionalBoolParam.new('keep-ctl', 'Does not delete control files if true.')
23
24
  params.add Bricolage::StringParam.new('queue-path', 'S3_PATH', 'S3 path for data file queue.')
@@ -59,8 +60,6 @@ class StreamingLoadJobClass < RubyJobClass
59
60
 
60
61
  def make_loader(params)
61
62
  ds = params['redshift-ds']
62
- load_opts = params['load-options']
63
- load_opts.provide_defaults(params['s3-ds'])
64
63
  RedshiftStreamingLoader.new(
65
64
  data_source: ds,
66
65
  queue: make_s3_queue(params),
@@ -68,7 +67,7 @@ class StreamingLoadJobClass < RubyJobClass
68
67
  table: string(params['dest-table']),
69
68
  work_table: string(params['work-table']),
70
69
  log_table: string(params['log-table']),
71
- load_options: load_opts,
70
+ load_options: params['load-options'],
72
71
  sql: params['sql-file'],
73
72
  logger: ds.logger,
74
73
  noop: params['noop'],
@@ -81,6 +80,7 @@ class StreamingLoadJobClass < RubyJobClass
81
80
  ds = params['s3-ds']
82
81
  S3Queue.new(
83
82
  data_source: ds,
83
+ ctl_ds: (params['ctl-ds'] || params['s3-ds']),
84
84
  ctl_prefix: (params['ctl-prefix'] || "#{params['queue-path']}/ctl"),
85
85
  queue_path: params['queue-path'],
86
86
  persistent_path: params['persistent-path'],
@@ -212,7 +212,7 @@ class StreamingLoadJobClass < RubyJobClass
212
212
  end
213
213
  @logger.info "creating manifest: #{manifest_name}"
214
214
  json = make_manifest_json(objects)
215
- @logger.info "manifest:\n" + json
215
+ @logger.debug "manifest:\n" + json
216
216
  url = @src.put_control_file(manifest_name, json, noop: @noop)
217
217
  yield url
218
218
  @src.remove_control_file(File.basename(url), noop: @noop) unless @keep_ctl
@@ -241,7 +241,7 @@ class StreamingLoadJobClass < RubyJobClass
241
241
  log_name = "load_log-#{@job_process_id}.csv"
242
242
  @logger.info "creating tmp load log: #{log_name}"
243
243
  csv = make_load_log_csv(objects)
244
- @logger.info "load_log:\n" + csv
244
+ @logger.debug "load_log:\n" + csv
245
245
  url = @src.put_control_file(log_name, csv, noop: @noop)
246
246
  begin
247
247
  yield url
@@ -362,8 +362,9 @@ class StreamingLoadJobClass < RubyJobClass
362
362
  class S3Queue
363
363
  extend Forwardable
364
364
 
365
- def initialize(data_source:, ctl_prefix:, queue_path:, persistent_path:, file_name:, logger:)
365
+ def initialize(data_source:, ctl_ds:, ctl_prefix:, queue_path:, persistent_path:, file_name:, logger:)
366
366
  @ds = data_source
367
+ @ctl_ds = ctl_ds
367
368
  @ctl_prefix = ctl_prefix
368
369
  @queue_path = queue_path
369
370
  @persistent_path = persistent_path
@@ -388,18 +389,18 @@ class StreamingLoadJobClass < RubyJobClass
388
389
  end
389
390
 
390
391
  def control_file_url(name)
391
- @ds.url(control_file_path(name))
392
+ @ctl_ds.url(control_file_path(name))
392
393
  end
393
394
 
394
395
  def put_control_file(name, data, noop: false)
395
396
  @logger.info "s3 put: #{control_file_url(name)}"
396
- @ds.object(control_file_path(name)).put(body: data) unless noop
397
+ @ctl_ds.object(control_file_path(name)).put(body: data) unless noop
397
398
  control_file_url(name)
398
399
  end
399
400
 
400
401
  def remove_control_file(name, noop: false)
401
402
  @logger.info "s3 delete: #{control_file_url(name)}"
402
- @ds.object(control_file_path(name)).delete unless noop
403
+ @ctl_ds.object(control_file_path(name)).delete unless noop
403
404
  end
404
405
 
405
406
  def control_file_path(name)
@@ -39,7 +39,7 @@ module Bricolage
39
39
  @hooks.run_before_option_parsing_hooks(opts)
40
40
  opts.parse!(ARGV)
41
41
 
42
- @ctx = Context.for_application(opts.home, opts.job_file, environment: opts.environment, global_variables: opts.global_variables)
42
+ @ctx = Context.for_application(opts.home, opts.job_file, environment: opts.environment, option_variables: opts.option_variables)
43
43
  opts.merge_saved_options(@ctx.load_system_options)
44
44
 
45
45
  if opts.dump_options?
@@ -294,7 +294,7 @@ module Bricolage
294
294
  @job_file = nil
295
295
  @environment = nil
296
296
  @home = nil
297
- @global_variables = Variables.new
297
+ @option_variables = Variables.new
298
298
  @dry_run = false
299
299
  @explain = false
300
300
  @list_global_variables = false
@@ -351,9 +351,9 @@ Global Options:
351
351
  parser.on('-r', '--require=FEATURE', 'Requires ruby library.') {|feature|
352
352
  require feature
353
353
  }
354
- parser.on('-v', '--variable=NAME=VALUE', 'Set global variable (is different from job-level -v !!).') {|name_value|
354
+ parser.on('-v', '--variable=NAME=VALUE', 'Set option variable.') {|name_value|
355
355
  name, value = name_value.split('=', 2)
356
- @global_variables[name] = value
356
+ @option_variables[name] = value
357
357
  }
358
358
  parser.on('--dump-options', 'Shows option parsing result and quit.') {
359
359
  @dump_options = true
@@ -401,7 +401,7 @@ Global Options:
401
401
  @dump_options
402
402
  end
403
403
 
404
- attr_reader :global_variables
404
+ attr_reader :option_variables
405
405
 
406
406
  def list_global_variables?
407
407
  @list_global_variables
@@ -19,7 +19,7 @@ module Bricolage
19
19
  FileSystem.home_path(opt_path)
20
20
  end
21
21
 
22
- def Context.for_application(home_path = nil, job_path_0 = nil, job_path: nil, environment: nil, global_variables: nil, logger: nil)
22
+ def Context.for_application(home_path = nil, job_path_0 = nil, job_path: nil, environment: nil, option_variables: nil, logger: nil)
23
23
  env = environment(environment)
24
24
  if (job_path ||= job_path_0)
25
25
  fs = FileSystem.for_job_path(job_path, env)
@@ -29,24 +29,28 @@ module Bricolage
29
29
  else
30
30
  fs = FileSystem.for_options(home_path, env)
31
31
  end
32
- load(fs, env, global_variables: global_variables, logger: logger)
32
+ load(fs, env, option_variables: option_variables, logger: logger)
33
33
  end
34
34
 
35
- def Context.load(fs, env, global_variables: nil, data_sources: nil, logger: nil)
36
- new(fs, env, global_variables: global_variables, logger: logger).tap {|ctx|
35
+ def Context.load(fs, env, option_variables: nil, data_sources: nil, logger: nil)
36
+ new(fs, env, option_variables: option_variables, logger: logger).tap {|ctx|
37
37
  ctx.load_configurations
38
38
  }
39
39
  end
40
40
  private_class_method :load
41
41
 
42
- def initialize(fs, env, global_variables: nil, data_sources: nil, logger: nil)
43
- @logger = logger || Logger.default
42
+ def initialize(fs, env, option_variables: nil, data_sources: nil, logger: nil)
43
+ @logger = logger || default_logger(env)
44
44
  @filesystem = fs
45
45
  @environment = env
46
- @opt_global_variables = global_variables || Variables.new
46
+ @option_variables = option_variables || Variables.new
47
47
  @data_sources = data_sources
48
48
  end
49
49
 
50
+ private def default_logger(env)
51
+ Logger.new(device: $stderr, level: Logger::INFO)
52
+ end
53
+
50
54
  def load_configurations
51
55
  @filesystem.config_pathes('prelude.rb').each do |path|
52
56
  EmbeddedCodeAPI.module_eval(File.read(path), path.to_s, 1) if path.exist?
@@ -56,6 +60,7 @@ module Bricolage
56
60
 
57
61
  attr_reader :environment
58
62
  attr_reader :logger
63
+ attr_reader :option_variables
59
64
 
60
65
  def get_data_source(type, name)
61
66
  @data_sources.get(type, name)
@@ -63,7 +68,7 @@ module Bricolage
63
68
 
64
69
  def subsystem(id)
65
70
  self.class.new(@filesystem.subsystem(id), @environment,
66
- global_variables: @opt_global_variables,
71
+ option_variables: @option_variables,
67
72
  data_sources: @data_sources,
68
73
  logger: @logger)
69
74
  end
@@ -102,7 +107,6 @@ module Bricolage
102
107
  Variables.union(
103
108
  builtin_variables,
104
109
  load_global_variables,
105
- @opt_global_variables
106
110
  )
107
111
  end
108
112
 
@@ -130,8 +134,11 @@ module Bricolage
130
134
 
131
135
  def load_variables(path)
132
136
  Variables.define {|vars|
133
- @filesystem.config_file_loader.load_yaml(path).each do |name, value|
134
- vars[name] = value
137
+ kvs = @filesystem.config_file_loader.load_yaml(path)
138
+ if kvs
139
+ kvs.each do |name, value|
140
+ vars[name] = value
141
+ end
135
142
  end
136
143
  }
137
144
  end
@@ -0,0 +1,184 @@
1
+ require 'bricolage/exception'
2
+
3
+ module Bricolage
4
+ module DAO
5
+
6
+ class Job
7
+ include SQLUtils
8
+
9
+ Attributes = Struct.new(:id, :subsystem, :job_name, :jobnet_id, :executor_id, keyword_init: true)
10
+
11
+ def Job.for_record(r)
12
+ Attributes.new(
13
+ id: r['job_id']&.to_i,
14
+ subsystem: r['subsystem'],
15
+ job_name: r['job_name'],
16
+ jobnet_id: r['jobnet_id']&.to_i,
17
+ executor_id: r['executor_id']
18
+ )
19
+ end
20
+
21
+ def initialize(datasource)
22
+ @datasource = datasource
23
+ end
24
+
25
+ private def connect(&block)
26
+ @datasource.open_shared_connection(&block)
27
+ end
28
+
29
+ def find_or_create(jobnet_id, job_ref)
30
+ connect {|conn|
31
+ job = find(conn, jobnet_id, job_ref) # optimize the most frequent case
32
+ if job
33
+ job
34
+ else
35
+ begin
36
+ create(conn, jobnet_id, job_ref)
37
+ rescue UniqueViolationException
38
+ find(conn, jobnet_id, job_ref) or raise "[BUG] Could not find/create job record: jobnet_id=#{jobnet_id}, ref=#{job_ref}"
39
+ end
40
+ end
41
+ }
42
+ end
43
+
44
+ private def find(conn, jobnet_id, job_ref)
45
+ record = conn.query_row(<<~EndSQL)
46
+ select
47
+ "job_id"
48
+ , "subsystem"
49
+ , "job_name"
50
+ , "executor_id"
51
+ , jobnet_id
52
+ from
53
+ jobs
54
+ where
55
+ jobnet_id = #{jobnet_id}
56
+ and "subsystem" = #{s job_ref.subsystem}
57
+ and "job_name" = #{s job_ref.name}
58
+ ;
59
+ EndSQL
60
+
61
+ if record
62
+ Job.for_record(record)
63
+ else
64
+ nil
65
+ end
66
+ end
67
+
68
+ private def create(conn, jobnet_id, job_ref)
69
+ record = conn.query_row(<<~EndSQL)
70
+ insert into jobs
71
+ ( "subsystem"
72
+ , "job_name"
73
+ , jobnet_id
74
+ )
75
+ values
76
+ ( #{s job_ref.subsystem}
77
+ , #{s job_ref.name}
78
+ , #{jobnet_id}
79
+ )
80
+ returning "job_id", "subsystem", "job_name", jobnet_id
81
+ ;
82
+ EndSQL
83
+
84
+ Job.for_record(record)
85
+ end
86
+
87
+ def locked?(job_ids)
88
+ count = connect {|conn|
89
+ conn.query_value(<<~EndSQL)
90
+ select
91
+ count(job_id)
92
+ from
93
+ jobs
94
+ where
95
+ job_id in (#{job_ids.join(',')})
96
+ and executor_id is not null
97
+ ;
98
+ EndSQL
99
+ }
100
+
101
+ count.to_i > 0
102
+ end
103
+
104
+ def locked_jobs(jobnet_id)
105
+ records = connect {|conn|
106
+ conn.query_rows(<<~EndSQL)
107
+ select
108
+ "job_id"
109
+ , "subsystem"
110
+ , "job_name"
111
+ , jobnet_id
112
+ , "executor_id"
113
+ from
114
+ jobs
115
+ where
116
+ jobnet_id = #{jobnet_id}
117
+ and executor_id is not null
118
+ ;
119
+ EndSQL
120
+ }
121
+
122
+ if records.empty?
123
+ []
124
+ else
125
+ record.map {|r| Job.for_record(r) }
126
+ end
127
+ end
128
+
129
+ def lock(job_id, executor_id)
130
+ records = connect {|conn|
131
+ conn.execute_update(<<~EndSQL)
132
+ update jobs
133
+ set
134
+ executor_id = #{s executor_id}
135
+ where
136
+ job_id = #{job_id}
137
+ and executor_id is null
138
+ returning job_id
139
+ ;
140
+ EndSQL
141
+ }
142
+
143
+ if records.empty?
144
+ raise DoubleLockError, "Could not lock job: job_id=#{job_id}"
145
+ end
146
+ end
147
+
148
+ # Unlock the job.
149
+ # Returns true if successfully unlocked, otherwise false.
150
+ # FIXME: raise an exception on failing unlock?
151
+ def unlock(job_id, executor_id)
152
+ records = connect {|conn|
153
+ conn.execute_update(<<~EndSQL)
154
+ update jobs
155
+ set
156
+ executor_id = null
157
+ where
158
+ job_id = #{job_id}
159
+ and executor_id = #{s executor_id}
160
+ returning job_id
161
+ ;
162
+ EndSQL
163
+ }
164
+
165
+ not records.empty?
166
+ end
167
+
168
+ def clear_lock_all(jobnet_id)
169
+ connect {|conn|
170
+ conn.execute_update(<<~EndSQL)
171
+ update jobs
172
+ set
173
+ executor_id = null
174
+ where
175
+ jobnet_id = #{jobnet_id}
176
+ ;
177
+ EndSQL
178
+ }
179
+ end
180
+
181
+ end # class Job
182
+
183
+ end
184
+ end