bricolage 5.29.2 → 6.0.0beta4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,253 @@
1
+ module Bricolage
2
+ module DAO
3
+
4
+ class JobExecution
5
+ include SQLUtils
6
+
7
+ STATUS_WAITING = 'waiting'.freeze
8
+ STATUS_SUCCEEDED = 'succeeded'.freeze
9
+ STATUS_RUNNING = 'running'.freeze
10
+ STATUS_FAILED = 'failed'.freeze
11
+ STATUS_CANCELED = 'canceled'.freeze
12
+
13
+ Attributes = Struct.new(:job_id, :job_execution_id, :subsystem, :job_name, keyword_init: true)
14
+
15
+ def JobExecution.for_record(r)
16
+ Attributes.new(
17
+ job_id: r['job_id']&.to_i,
18
+ job_execution_id: r['job_execution_id']&.to_i,
19
+ subsystem: r['subsystem'],
20
+ job_name: r['job_name']
21
+ )
22
+ end
23
+
24
+ def JobExecution.for_connection(conn)
25
+ new(nil, connection: conn)
26
+ end
27
+
28
+ def initialize(datasource, connection: nil)
29
+ @datasource = datasource
30
+ @connection = connection
31
+ end
32
+
33
+ private def connect
34
+ if @connection
35
+ yield @connection
36
+ else
37
+ @datasource.open_shared_connection {|conn|
38
+ yield conn
39
+ }
40
+ end
41
+ end
42
+
43
+ def enqueued_jobs(jobnet_ref)
44
+ records = connect {|conn|
45
+ conn.query_rows(<<~EndSQL)
46
+ select
47
+ e.job_execution_id
48
+ , e.job_id
49
+ , j.subsystem
50
+ , j.job_name
51
+ from
52
+ job_executions e
53
+ inner join jobs j using (job_id)
54
+ inner join jobnets n using (jobnet_id)
55
+ where
56
+ n.subsystem = #{s jobnet_ref.subsystem}
57
+ and n.jobnet_name = #{s jobnet_ref.name}
58
+ and e.status in (#{s STATUS_WAITING}, #{s STATUS_RUNNING}, #{s STATUS_FAILED})
59
+ order by
60
+ e.execution_sequence
61
+ ;
62
+ EndSQL
63
+ }
64
+ records.map {|r| JobExecution.for_record(r) }
65
+ end
66
+
67
+ def enqueue_job(job, execution_sequence)
68
+ record = nil
69
+ connect {|conn|
70
+ records = conn.execute_update(<<~EndSQL)
71
+ insert into job_executions
72
+ ( job_id
73
+ , execution_sequence
74
+ , status
75
+ , message
76
+ , submitted_at
77
+ )
78
+ values
79
+ ( #{job.id}
80
+ , #{execution_sequence}
81
+ , #{s STATUS_WAITING}
82
+ , ''
83
+ , now()
84
+ )
85
+ returning job_execution_id, job_id
86
+ ;
87
+ EndSQL
88
+
89
+ record = records.first
90
+ save_state_transition(conn, record['job_execution_id'], 'submitted_at')
91
+ }
92
+
93
+ exec = JobExecution.for_record(record)
94
+ exec.subsystem = job.subsystem
95
+ exec.job_name = job.job_name
96
+ exec
97
+ end
98
+
99
+ def cancel_jobnet(jobnet_ref, message)
100
+ connect {|conn|
101
+ records = conn.execute_update(<<~EndSQL)
102
+ update job_executions
103
+ set
104
+ status = #{s STATUS_CANCELED}
105
+ , message = #{s message}
106
+ , finished_at = now()
107
+ where
108
+ job_id in (
109
+ select
110
+ j.job_id
111
+ from
112
+ jobs j inner join jobnets n using (jobnet_id)
113
+ where
114
+ n.subsystem = #{s jobnet_ref.subsystem}
115
+ and n.jobnet_name = #{s jobnet_ref.name}
116
+ )
117
+ and status in (#{s STATUS_WAITING}, #{s STATUS_RUNNING}, #{s STATUS_FAILED})
118
+ returning job_execution_id
119
+ ;
120
+ EndSQL
121
+
122
+ job_execution_ids = records.map {|r| r['job_execution_id'].to_i }
123
+ unless job_execution_ids.empty?
124
+ conn.execute_update(<<~EndSQL)
125
+ insert into job_execution_states
126
+ ( job_execution_id
127
+ , job_id
128
+ , created_at
129
+ , status
130
+ , message
131
+ )
132
+ select
133
+ job_execution_id
134
+ , job_id
135
+ , finished_at
136
+ , status
137
+ , message
138
+ from
139
+ job_executions
140
+ where
141
+ job_execution_id in (#{job_execution_ids.join(', ')})
142
+ ;
143
+ EndSQL
144
+ end
145
+ }
146
+ end
147
+
148
+ def transition_to_running(job_execution_id)
149
+ connect {|conn|
150
+ records = conn.execute_update(<<~EndSQL)
151
+ update job_executions
152
+ set
153
+ status = #{s STATUS_RUNNING}
154
+ , message = ''
155
+ , started_at = now()
156
+ , finished_at = null
157
+ where
158
+ job_execution_id = #{job_execution_id}
159
+ and status in (#{s STATUS_WAITING}, #{s STATUS_FAILED})
160
+ returning job_execution_id
161
+ ;
162
+ EndSQL
163
+ if records.empty?
164
+ raise IllegalJobStateException, "Could not run already running job: job_execution_id=#{job_execution_id}"
165
+ end
166
+
167
+ save_state_transition(conn, job_execution_id, 'started_at')
168
+ }
169
+ end
170
+
171
+ def transition_to_succeeded(job_execution_id)
172
+ connect {|conn|
173
+ records = conn.execute_update(<<~EndSQL)
174
+ update job_executions
175
+ set
176
+ finished_at = now()
177
+ , status = #{s STATUS_SUCCEEDED}
178
+ , message = ''
179
+ where
180
+ job_execution_id = #{job_execution_id}
181
+ and status = #{s STATUS_RUNNING}
182
+ returning job_execution_id
183
+ ;
184
+ EndSQL
185
+ if records.empty?
186
+ raise IllegalJobStateException, "could not transition to succeeded state: job_execution_id=#{job_execution_id}"
187
+ end
188
+
189
+ save_state_transition(conn, job_execution_id, 'finished_at')
190
+ }
191
+ end
192
+
193
+ def transition_to_failed(job_execution_id, message)
194
+ connect {|conn|
195
+ records = conn.execute_update(<<~EndSQL)
196
+ update job_executions
197
+ set
198
+ finished_at = now()
199
+ , status = #{s STATUS_FAILED}
200
+ , message = #{s message}
201
+ where
202
+ job_execution_id = #{job_execution_id}
203
+ and status = #{s STATUS_RUNNING}
204
+ returning job_execution_id
205
+ ;
206
+ EndSQL
207
+ if records.empty?
208
+ raise IllegalJobStateException, "could not transition to failed state: job_execution_id=#{job_execution_id}"
209
+ end
210
+
211
+ save_state_transition(conn, job_execution_id, 'finished_at')
212
+ }
213
+ end
214
+
215
+ private def save_state_transition(conn, job_execution_id, time_expr)
216
+ conn.execute_update(<<~EndSQL)
217
+ insert into job_execution_states
218
+ ( job_execution_id
219
+ , job_id
220
+ , created_at
221
+ , status
222
+ , message
223
+ )
224
+ select
225
+ job_execution_id
226
+ , job_id
227
+ , #{time_expr}
228
+ , status
229
+ , message
230
+ from
231
+ job_executions
232
+ where
233
+ job_execution_id = #{job_execution_id}
234
+ ;
235
+ EndSQL
236
+ end
237
+
238
+ # For tests only
239
+ def delete_all
240
+ connect {|conn|
241
+ conn.execute_update(<<~EndSQL)
242
+ delete from job_execution_states;
243
+ delete from job_executions;
244
+ delete from jobs;
245
+ delete from jobnets;
246
+ EndSQL
247
+ }
248
+ end
249
+
250
+ end # class JobExecution
251
+
252
+ end
253
+ end
@@ -0,0 +1,158 @@
1
+ module Bricolage
2
+ module DAO
3
+ class JobNet
4
+
5
+ include SQLUtils
6
+
7
+ Attributes = Struct.new(:id, :subsystem, :jobnet_name, keyword_init: true)
8
+
9
+ def JobNet.for_record(r)
10
+ Attributes.new(
11
+ id: r['jobnet_id']&.to_i,
12
+ subsystem: r['subsystem'],
13
+ jobnet_name: r['jobnet_name']
14
+ )
15
+ end
16
+
17
+ def JobNet.for_records(jobnets)
18
+ jobnets.map {|jobnet| JobNet.for_record(jobnet) }
19
+ end
20
+
21
+ def initialize(datasource)
22
+ @datasource = datasource
23
+ end
24
+
25
+ private def connect(&block)
26
+ @datasource.open_shared_connection(&block)
27
+ end
28
+
29
+ def find_or_create(ref)
30
+ connect {|conn|
31
+ jobnet = find(conn, ref)
32
+ if jobnet
33
+ return jobnet
34
+ else
35
+ begin
36
+ return create(conn, ref)
37
+ rescue UniqueViolationException
38
+ jobnet = find(conn, ref) or raise "[BUG] Could not create jobnet record: #{ref}"
39
+ return jobnet
40
+ end
41
+ end
42
+ }
43
+ end
44
+
45
+ private def create(conn, ref)
46
+ records = conn.execute_update(<<~SQL)
47
+ insert into jobnets
48
+ ( "subsystem"
49
+ , jobnet_name
50
+ )
51
+ values
52
+ ( #{s ref.subsystem}
53
+ , #{s ref.name}
54
+ )
55
+ returning jobnet_id
56
+ ;
57
+ SQL
58
+
59
+ Attributes.new(
60
+ id: records.first['jobnet_id']&.to_i,
61
+ subsystem: ref.subsystem,
62
+ jobnet_name: ref.name
63
+ )
64
+ end
65
+
66
+ private def find(conn, ref)
67
+ record = conn.query_row(<<~EndSQL)
68
+ select
69
+ jobnet_id
70
+ , "subsystem"
71
+ , jobnet_name
72
+ from
73
+ jobnets
74
+ where
75
+ "subsystem" = #{s ref.subsystem}
76
+ and jobnet_name = #{s ref.name}
77
+ ;
78
+ EndSQL
79
+
80
+ if record
81
+ JobNet.for_record(record)
82
+ else
83
+ nil
84
+ end
85
+ end
86
+
87
+ def locked?(ref)
88
+ value = connect {|conn|
89
+ conn.query_value(<<~EndSQL)
90
+ select
91
+ count(*)
92
+ from
93
+ jobnets
94
+ where
95
+ "subsystem" = #{s ref.subsystem}
96
+ and jobnet_name = #{s ref.name}
97
+ and executor_id is not null
98
+ ;
99
+ EndSQL
100
+ }
101
+
102
+ value.to_i > 0
103
+ end
104
+
105
+ def lock(jobnet_id, executor_id)
106
+ records = connect {|conn|
107
+ conn.execute_update(<<~EndSQL)
108
+ update jobnets
109
+ set
110
+ executor_id = #{s executor_id}
111
+ where
112
+ jobnet_id = #{jobnet_id}
113
+ and executor_id is null
114
+ returning jobnet_id
115
+ ;
116
+ EndSQL
117
+ }
118
+ if records.empty?
119
+ raise DoubleLockError, "Could not lock jobnet: jobnet_id=#{jobnet_id}"
120
+ end
121
+ end
122
+
123
+ # Unlock jobnet lock.
124
+ # Returns true if unlocked successfully, otherwise false.
125
+ # FIXME: raise exception?
126
+ def unlock(jobnet_id, executor_id)
127
+ records = connect {|conn|
128
+ conn.execute_update(<<~EndSQL)
129
+ update jobnets
130
+ set
131
+ executor_id = null
132
+ where
133
+ jobnet_id = #{jobnet_id}
134
+ and executor_id = #{s executor_id}
135
+ returning jobnet_id
136
+ ;
137
+ EndSQL
138
+ }
139
+
140
+ not records.empty?
141
+ end
142
+
143
+ def clear_lock(jobnet_id)
144
+ records = connect {|conn|
145
+ conn.execute_update(<<~EndSQL)
146
+ update jobnets
147
+ set
148
+ executor_id = null
149
+ where
150
+ jobnet_id = #{jobnet_id}
151
+ ;
152
+ EndSQL
153
+ }
154
+ end
155
+
156
+ end
157
+ end
158
+ end
@@ -137,7 +137,7 @@ module Bricolage
137
137
 
138
138
  attr_reader :name
139
139
  attr_reader :context
140
- attr_reader :logger
140
+ attr_accessor :logger
141
141
 
142
142
  def open
143
143
  yield nil
@@ -24,12 +24,23 @@ module Bricolage
24
24
  # Various SQL exception, except connection problem.
25
25
  class SQLException < JobFailureByException; end
26
26
 
27
+ # SQL unique constraint violation
28
+ class UniqueViolationException < SQLException; end
29
+
27
30
  # Database connection problems (not established, closed unexpectedly, invalid state)
28
31
  class ConnectionError < JobFailureByException; end
29
32
 
30
33
  # Aquiring lock takes too long (e.g. VACUUM lock)
31
34
  class LockTimeout < JobFailure; end
32
35
 
36
+ # The executing jobnet or job is already locked.
37
+ # You should wait to unlock by another job execution or force to unlock manually.
38
+ class DoubleLockError < JobFailure; end
39
+
40
+ # Unexpected job state transition tried.
41
+ # This error must be fixed by a operator.
42
+ class IllegalJobStateException < JobFailure; end
43
+
33
44
  # S3 related exceptions
34
45
  class S3Exception < JobFailureByException; end
35
46
 
@@ -6,10 +6,11 @@ require 'bricolage/variables'
6
6
  require 'bricolage/configloader'
7
7
  require 'bricolage/loglocator'
8
8
  require 'bricolage/exception'
9
+ require 'tmpdir'
9
10
  require 'fileutils'
10
11
 
11
12
  module Bricolage
12
-
13
+
13
14
  class Job
14
15
  # For JobNetRunner
15
16
  def Job.load_ref(ref, jobnet_context)
@@ -41,6 +42,7 @@ module Bricolage
41
42
  @job_class = job_class
42
43
  @context = context
43
44
  @global_variables = nil
45
+ @option_variables = @context.option_variables
44
46
  @param_decls = @job_class.get_parameters
45
47
  @param_vals = nil # Parameters::IntermediateValues by *.job
46
48
  @param_vals_opt = nil # Parameters::IntermediateValues by options
@@ -87,6 +89,7 @@ module Bricolage
87
89
 
88
90
  job_file_rest_vars = @param_vals ? @param_vals.variables : Variables.new
89
91
  job_v_opt_vars = @param_vals_opt ? @param_vals_opt.variables : Variables.new
92
+ cmd_v_opt_vars = @option_variables ? @option_variables : Variables.new
90
93
 
91
94
  # We use different variable set for paramter expansion and
92
95
  # SQL variable expansion. Parameter expansion uses global
@@ -94,6 +97,7 @@ module Bricolage
94
97
  base_vars = Variables.union(
95
98
  # ^ Low precedence
96
99
  @global_variables,
100
+ cmd_v_opt_vars,
97
101
  job_v_opt_vars
98
102
  # v High precedence
99
103
  )
@@ -103,11 +107,12 @@ module Bricolage
103
107
  # Then, expand SQL variables and check with declarations.
104
108
  vars = Variables.union(
105
109
  # ^ Low precedence
106
- declarations.default_variables,
107
- @global_variables,
108
- @params.variables, # Like $dest_table
109
- job_file_rest_vars,
110
- job_v_opt_vars
110
+ declarations.default_variables, # default value written in *.sql
111
+ @global_variables, # from yaml file
112
+ @params.variables, # Like $dest_table in job file
113
+ job_file_rest_vars, # custom variable at header of job file
114
+ cmd_v_opt_vars, # -v option for bricolage/bricolage-jobnet command
115
+ job_v_opt_vars # -v option for bricolage command using jobclass
111
116
  # v High precedence
112
117
  )
113
118
  @variables = vars.resolve
@@ -147,9 +152,7 @@ module Bricolage
147
152
  end
148
153
 
149
154
  def execute_in_process(log_locator:)
150
- # ??? FIXME: status_path should be independent from log_path.
151
- # Also, status_path should be defined regardless of log_path.
152
- status_path = log_locator.path ? "#{log_locator.path}.status" : nil
155
+ status_path = "#{Dir.tmpdir}/bricolage.#{$$}.status.#{"%010x" % rand(1000000000000)}"
153
156
  isolate_process(status_path) {
154
157
  log_locator.redirect_stdouts {
155
158
  do_execute