bricolage 5.30.0 → 6.0.0beta5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -3
- data/.ruby-version +1 -0
- data/README.md +3 -0
- data/RELEASE.md +22 -0
- data/Rakefile +11 -1
- data/bricolage.gemspec +9 -7
- data/config/test/datasource.yml +9 -0
- data/jobclass/rebuild-rename.rb +7 -7
- data/jobclass/streaming_load.rb +3 -3
- data/lib/bricolage/application.rb +5 -5
- data/lib/bricolage/configloader.rb +1 -1
- data/lib/bricolage/context.rb +18 -11
- data/lib/bricolage/dao/job.rb +184 -0
- data/lib/bricolage/dao/jobexecution.rb +253 -0
- data/lib/bricolage/dao/jobnet.rb +158 -0
- data/lib/bricolage/datasource.rb +1 -1
- data/lib/bricolage/exception.rb +11 -0
- data/lib/bricolage/filedatasource.rb +1 -1
- data/lib/bricolage/genericdatasource.rb +1 -2
- data/lib/bricolage/job.rb +14 -9
- data/lib/bricolage/jobnet.rb +9 -6
- data/lib/bricolage/jobnetrunner.rb +82 -45
- data/lib/bricolage/logger.rb +3 -3
- data/lib/bricolage/loglocator.rb +19 -1
- data/lib/bricolage/postgresconnection.rb +6 -4
- data/lib/bricolage/psqldatasource.rb +74 -5
- data/lib/bricolage/rubyjobclass.rb +1 -2
- data/lib/bricolage/sqlutils.rb +43 -1
- data/lib/bricolage/taskqueue.rb +221 -63
- data/lib/bricolage/vacuumlock.rb +2 -2
- data/lib/bricolage/version.rb +1 -1
- data/schema/Dockerfile +16 -0
- data/schema/Gemfile +5 -0
- data/schema/Gemfile.lock +38 -0
- data/schema/Schemafile +57 -0
- data/schema/database.yml +8 -0
- data/schema/ridgepole_dryrun.sh +2 -0
- data/schema/ridgepole_merge.sh +2 -0
- metadata +65 -25
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
module Bricolage
|
|
2
|
+
module DAO
|
|
3
|
+
|
|
4
|
+
class JobExecution
|
|
5
|
+
include SQLUtils
|
|
6
|
+
|
|
7
|
+
STATUS_WAITING = 'waiting'.freeze
|
|
8
|
+
STATUS_SUCCEEDED = 'succeeded'.freeze
|
|
9
|
+
STATUS_RUNNING = 'running'.freeze
|
|
10
|
+
STATUS_FAILED = 'failed'.freeze
|
|
11
|
+
STATUS_CANCELED = 'canceled'.freeze
|
|
12
|
+
|
|
13
|
+
Attributes = Struct.new(:job_id, :job_execution_id, :subsystem, :job_name, keyword_init: true)
|
|
14
|
+
|
|
15
|
+
def JobExecution.for_record(r)
|
|
16
|
+
Attributes.new(
|
|
17
|
+
job_id: r['job_id']&.to_i,
|
|
18
|
+
job_execution_id: r['job_execution_id']&.to_i,
|
|
19
|
+
subsystem: r['subsystem'],
|
|
20
|
+
job_name: r['job_name']
|
|
21
|
+
)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def JobExecution.for_connection(conn)
|
|
25
|
+
new(nil, connection: conn)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def initialize(datasource, connection: nil)
|
|
29
|
+
@datasource = datasource
|
|
30
|
+
@connection = connection
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private def connect
|
|
34
|
+
if @connection
|
|
35
|
+
yield @connection
|
|
36
|
+
else
|
|
37
|
+
@datasource.open_shared_connection {|conn|
|
|
38
|
+
yield conn
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def enqueued_jobs(jobnet_ref)
|
|
44
|
+
records = connect {|conn|
|
|
45
|
+
conn.query_rows(<<~EndSQL)
|
|
46
|
+
select
|
|
47
|
+
e.job_execution_id
|
|
48
|
+
, e.job_id
|
|
49
|
+
, j.subsystem
|
|
50
|
+
, j.job_name
|
|
51
|
+
from
|
|
52
|
+
job_executions e
|
|
53
|
+
inner join jobs j using (job_id)
|
|
54
|
+
inner join jobnets n using (jobnet_id)
|
|
55
|
+
where
|
|
56
|
+
n.subsystem = #{s jobnet_ref.subsystem}
|
|
57
|
+
and n.jobnet_name = #{s jobnet_ref.name}
|
|
58
|
+
and e.status in (#{s STATUS_WAITING}, #{s STATUS_RUNNING}, #{s STATUS_FAILED})
|
|
59
|
+
order by
|
|
60
|
+
e.execution_sequence
|
|
61
|
+
;
|
|
62
|
+
EndSQL
|
|
63
|
+
}
|
|
64
|
+
records.map {|r| JobExecution.for_record(r) }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def enqueue_job(job, execution_sequence)
|
|
68
|
+
record = nil
|
|
69
|
+
connect {|conn|
|
|
70
|
+
records = conn.execute_update(<<~EndSQL)
|
|
71
|
+
insert into job_executions
|
|
72
|
+
( job_id
|
|
73
|
+
, execution_sequence
|
|
74
|
+
, status
|
|
75
|
+
, message
|
|
76
|
+
, submitted_at
|
|
77
|
+
)
|
|
78
|
+
values
|
|
79
|
+
( #{job.id}
|
|
80
|
+
, #{execution_sequence}
|
|
81
|
+
, #{s STATUS_WAITING}
|
|
82
|
+
, ''
|
|
83
|
+
, now()
|
|
84
|
+
)
|
|
85
|
+
returning job_execution_id, job_id
|
|
86
|
+
;
|
|
87
|
+
EndSQL
|
|
88
|
+
|
|
89
|
+
record = records.first
|
|
90
|
+
save_state_transition(conn, record['job_execution_id'], 'submitted_at')
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
exec = JobExecution.for_record(record)
|
|
94
|
+
exec.subsystem = job.subsystem
|
|
95
|
+
exec.job_name = job.job_name
|
|
96
|
+
exec
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def cancel_jobnet(jobnet_ref, message)
|
|
100
|
+
connect {|conn|
|
|
101
|
+
records = conn.execute_update(<<~EndSQL)
|
|
102
|
+
update job_executions
|
|
103
|
+
set
|
|
104
|
+
status = #{s STATUS_CANCELED}
|
|
105
|
+
, message = #{s message}
|
|
106
|
+
, finished_at = now()
|
|
107
|
+
where
|
|
108
|
+
job_id in (
|
|
109
|
+
select
|
|
110
|
+
j.job_id
|
|
111
|
+
from
|
|
112
|
+
jobs j inner join jobnets n using (jobnet_id)
|
|
113
|
+
where
|
|
114
|
+
n.subsystem = #{s jobnet_ref.subsystem}
|
|
115
|
+
and n.jobnet_name = #{s jobnet_ref.name}
|
|
116
|
+
)
|
|
117
|
+
and status in (#{s STATUS_WAITING}, #{s STATUS_RUNNING}, #{s STATUS_FAILED})
|
|
118
|
+
returning job_execution_id
|
|
119
|
+
;
|
|
120
|
+
EndSQL
|
|
121
|
+
|
|
122
|
+
job_execution_ids = records.map {|r| r['job_execution_id'].to_i }
|
|
123
|
+
unless job_execution_ids.empty?
|
|
124
|
+
conn.execute_update(<<~EndSQL)
|
|
125
|
+
insert into job_execution_states
|
|
126
|
+
( job_execution_id
|
|
127
|
+
, job_id
|
|
128
|
+
, created_at
|
|
129
|
+
, status
|
|
130
|
+
, message
|
|
131
|
+
)
|
|
132
|
+
select
|
|
133
|
+
job_execution_id
|
|
134
|
+
, job_id
|
|
135
|
+
, finished_at
|
|
136
|
+
, status
|
|
137
|
+
, message
|
|
138
|
+
from
|
|
139
|
+
job_executions
|
|
140
|
+
where
|
|
141
|
+
job_execution_id in (#{job_execution_ids.join(', ')})
|
|
142
|
+
;
|
|
143
|
+
EndSQL
|
|
144
|
+
end
|
|
145
|
+
}
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def transition_to_running(job_execution_id)
|
|
149
|
+
connect {|conn|
|
|
150
|
+
records = conn.execute_update(<<~EndSQL)
|
|
151
|
+
update job_executions
|
|
152
|
+
set
|
|
153
|
+
status = #{s STATUS_RUNNING}
|
|
154
|
+
, message = ''
|
|
155
|
+
, started_at = now()
|
|
156
|
+
, finished_at = null
|
|
157
|
+
where
|
|
158
|
+
job_execution_id = #{job_execution_id}
|
|
159
|
+
and status in (#{s STATUS_WAITING}, #{s STATUS_FAILED})
|
|
160
|
+
returning job_execution_id
|
|
161
|
+
;
|
|
162
|
+
EndSQL
|
|
163
|
+
if records.empty?
|
|
164
|
+
raise IllegalJobStateException, "Could not run already running job: job_execution_id=#{job_execution_id}"
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
save_state_transition(conn, job_execution_id, 'started_at')
|
|
168
|
+
}
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def transition_to_succeeded(job_execution_id)
|
|
172
|
+
connect {|conn|
|
|
173
|
+
records = conn.execute_update(<<~EndSQL)
|
|
174
|
+
update job_executions
|
|
175
|
+
set
|
|
176
|
+
finished_at = now()
|
|
177
|
+
, status = #{s STATUS_SUCCEEDED}
|
|
178
|
+
, message = ''
|
|
179
|
+
where
|
|
180
|
+
job_execution_id = #{job_execution_id}
|
|
181
|
+
and status = #{s STATUS_RUNNING}
|
|
182
|
+
returning job_execution_id
|
|
183
|
+
;
|
|
184
|
+
EndSQL
|
|
185
|
+
if records.empty?
|
|
186
|
+
raise IllegalJobStateException, "could not transition to succeeded state: job_execution_id=#{job_execution_id}"
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
save_state_transition(conn, job_execution_id, 'finished_at')
|
|
190
|
+
}
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def transition_to_failed(job_execution_id, message)
|
|
194
|
+
connect {|conn|
|
|
195
|
+
records = conn.execute_update(<<~EndSQL)
|
|
196
|
+
update job_executions
|
|
197
|
+
set
|
|
198
|
+
finished_at = now()
|
|
199
|
+
, status = #{s STATUS_FAILED}
|
|
200
|
+
, message = #{s message}
|
|
201
|
+
where
|
|
202
|
+
job_execution_id = #{job_execution_id}
|
|
203
|
+
and status = #{s STATUS_RUNNING}
|
|
204
|
+
returning job_execution_id
|
|
205
|
+
;
|
|
206
|
+
EndSQL
|
|
207
|
+
if records.empty?
|
|
208
|
+
raise IllegalJobStateException, "could not transition to failed state: job_execution_id=#{job_execution_id}"
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
save_state_transition(conn, job_execution_id, 'finished_at')
|
|
212
|
+
}
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
private def save_state_transition(conn, job_execution_id, time_expr)
|
|
216
|
+
conn.execute_update(<<~EndSQL)
|
|
217
|
+
insert into job_execution_states
|
|
218
|
+
( job_execution_id
|
|
219
|
+
, job_id
|
|
220
|
+
, created_at
|
|
221
|
+
, status
|
|
222
|
+
, message
|
|
223
|
+
)
|
|
224
|
+
select
|
|
225
|
+
job_execution_id
|
|
226
|
+
, job_id
|
|
227
|
+
, #{time_expr}
|
|
228
|
+
, status
|
|
229
|
+
, message
|
|
230
|
+
from
|
|
231
|
+
job_executions
|
|
232
|
+
where
|
|
233
|
+
job_execution_id = #{job_execution_id}
|
|
234
|
+
;
|
|
235
|
+
EndSQL
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# For tests only
|
|
239
|
+
def delete_all
|
|
240
|
+
connect {|conn|
|
|
241
|
+
conn.execute_update(<<~EndSQL)
|
|
242
|
+
delete from job_execution_states;
|
|
243
|
+
delete from job_executions;
|
|
244
|
+
delete from jobs;
|
|
245
|
+
delete from jobnets;
|
|
246
|
+
EndSQL
|
|
247
|
+
}
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
end # class JobExecution
|
|
251
|
+
|
|
252
|
+
end
|
|
253
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
module Bricolage
|
|
2
|
+
module DAO
|
|
3
|
+
class JobNet
|
|
4
|
+
|
|
5
|
+
include SQLUtils
|
|
6
|
+
|
|
7
|
+
Attributes = Struct.new(:id, :subsystem, :jobnet_name, keyword_init: true)
|
|
8
|
+
|
|
9
|
+
def JobNet.for_record(r)
|
|
10
|
+
Attributes.new(
|
|
11
|
+
id: r['jobnet_id']&.to_i,
|
|
12
|
+
subsystem: r['subsystem'],
|
|
13
|
+
jobnet_name: r['jobnet_name']
|
|
14
|
+
)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def JobNet.for_records(jobnets)
|
|
18
|
+
jobnets.map {|jobnet| JobNet.for_record(jobnet) }
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def initialize(datasource)
|
|
22
|
+
@datasource = datasource
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private def connect(&block)
|
|
26
|
+
@datasource.open_shared_connection(&block)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def find_or_create(ref)
|
|
30
|
+
connect {|conn|
|
|
31
|
+
jobnet = find(conn, ref)
|
|
32
|
+
if jobnet
|
|
33
|
+
return jobnet
|
|
34
|
+
else
|
|
35
|
+
begin
|
|
36
|
+
return create(conn, ref)
|
|
37
|
+
rescue UniqueViolationException
|
|
38
|
+
jobnet = find(conn, ref) or raise "[BUG] Could not create jobnet record: #{ref}"
|
|
39
|
+
return jobnet
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private def create(conn, ref)
|
|
46
|
+
records = conn.execute_update(<<~SQL)
|
|
47
|
+
insert into jobnets
|
|
48
|
+
( "subsystem"
|
|
49
|
+
, jobnet_name
|
|
50
|
+
)
|
|
51
|
+
values
|
|
52
|
+
( #{s ref.subsystem}
|
|
53
|
+
, #{s ref.name}
|
|
54
|
+
)
|
|
55
|
+
returning jobnet_id
|
|
56
|
+
;
|
|
57
|
+
SQL
|
|
58
|
+
|
|
59
|
+
Attributes.new(
|
|
60
|
+
id: records.first['jobnet_id']&.to_i,
|
|
61
|
+
subsystem: ref.subsystem,
|
|
62
|
+
jobnet_name: ref.name
|
|
63
|
+
)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private def find(conn, ref)
|
|
67
|
+
record = conn.query_row(<<~EndSQL)
|
|
68
|
+
select
|
|
69
|
+
jobnet_id
|
|
70
|
+
, "subsystem"
|
|
71
|
+
, jobnet_name
|
|
72
|
+
from
|
|
73
|
+
jobnets
|
|
74
|
+
where
|
|
75
|
+
"subsystem" = #{s ref.subsystem}
|
|
76
|
+
and jobnet_name = #{s ref.name}
|
|
77
|
+
;
|
|
78
|
+
EndSQL
|
|
79
|
+
|
|
80
|
+
if record
|
|
81
|
+
JobNet.for_record(record)
|
|
82
|
+
else
|
|
83
|
+
nil
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def locked?(ref)
|
|
88
|
+
value = connect {|conn|
|
|
89
|
+
conn.query_value(<<~EndSQL)
|
|
90
|
+
select
|
|
91
|
+
count(*)
|
|
92
|
+
from
|
|
93
|
+
jobnets
|
|
94
|
+
where
|
|
95
|
+
"subsystem" = #{s ref.subsystem}
|
|
96
|
+
and jobnet_name = #{s ref.name}
|
|
97
|
+
and executor_id is not null
|
|
98
|
+
;
|
|
99
|
+
EndSQL
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
value.to_i > 0
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def lock(jobnet_id, executor_id)
|
|
106
|
+
records = connect {|conn|
|
|
107
|
+
conn.execute_update(<<~EndSQL)
|
|
108
|
+
update jobnets
|
|
109
|
+
set
|
|
110
|
+
executor_id = #{s executor_id}
|
|
111
|
+
where
|
|
112
|
+
jobnet_id = #{jobnet_id}
|
|
113
|
+
and executor_id is null
|
|
114
|
+
returning jobnet_id
|
|
115
|
+
;
|
|
116
|
+
EndSQL
|
|
117
|
+
}
|
|
118
|
+
if records.empty?
|
|
119
|
+
raise DoubleLockError, "Could not lock jobnet: jobnet_id=#{jobnet_id}"
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Unlock jobnet lock.
|
|
124
|
+
# Returns true if unlocked successfully, otherwise false.
|
|
125
|
+
# FIXME: raise exception?
|
|
126
|
+
def unlock(jobnet_id, executor_id)
|
|
127
|
+
records = connect {|conn|
|
|
128
|
+
conn.execute_update(<<~EndSQL)
|
|
129
|
+
update jobnets
|
|
130
|
+
set
|
|
131
|
+
executor_id = null
|
|
132
|
+
where
|
|
133
|
+
jobnet_id = #{jobnet_id}
|
|
134
|
+
and executor_id = #{s executor_id}
|
|
135
|
+
returning jobnet_id
|
|
136
|
+
;
|
|
137
|
+
EndSQL
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
not records.empty?
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def clear_lock(jobnet_id)
|
|
144
|
+
connect {|conn|
|
|
145
|
+
conn.execute_update(<<~EndSQL)
|
|
146
|
+
update jobnets
|
|
147
|
+
set
|
|
148
|
+
executor_id = null
|
|
149
|
+
where
|
|
150
|
+
jobnet_id = #{jobnet_id}
|
|
151
|
+
;
|
|
152
|
+
EndSQL
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
data/lib/bricolage/datasource.rb
CHANGED
data/lib/bricolage/exception.rb
CHANGED
|
@@ -24,12 +24,23 @@ module Bricolage
|
|
|
24
24
|
# Various SQL exception, except connection problem.
|
|
25
25
|
class SQLException < JobFailureByException; end
|
|
26
26
|
|
|
27
|
+
# SQL unique constraint violation
|
|
28
|
+
class UniqueViolationException < SQLException; end
|
|
29
|
+
|
|
27
30
|
# Database connection problems (not established, closed unexpectedly, invalid state)
|
|
28
31
|
class ConnectionError < JobFailureByException; end
|
|
29
32
|
|
|
30
33
|
# Aquiring lock takes too long (e.g. VACUUM lock)
|
|
31
34
|
class LockTimeout < JobFailure; end
|
|
32
35
|
|
|
36
|
+
# The executing jobnet or job is already locked.
|
|
37
|
+
# You should wait to unlock by another job execution or force to unlock manually.
|
|
38
|
+
class DoubleLockError < JobFailure; end
|
|
39
|
+
|
|
40
|
+
# Unexpected job state transition tried.
|
|
41
|
+
# This error must be fixed by a operator.
|
|
42
|
+
class IllegalJobStateException < JobFailure; end
|
|
43
|
+
|
|
33
44
|
# S3 related exceptions
|
|
34
45
|
class S3Exception < JobFailureByException; end
|
|
35
46
|
|