ductwork 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 18ebf23ac85cb80300ecc0c35475e0a2364fa4296a6012fdc21e95b8c5943023
4
- data.tar.gz: e08b2cda1074255a7b7a79c5fbe789ac373909eef57b7836be49b3f7554a4350
3
+ metadata.gz: e10d600812e131829fe76b4a694bf755a707d4a4c66e29afd21701ee17e2fa8b
4
+ data.tar.gz: 101e1a25cac0c898f678b0da11f2c3e0d679bfb9c11741ed99c8e4f5bb8e3782
5
5
  SHA512:
6
- metadata.gz: 9d3b28265bfdf2f90210c43e2609d4255dda6d120ab597b74e54e9c020cbdc4be0b07235bb104873a62ae309511fc841c7d236f20dd52124150760c8dfaa94c8
7
- data.tar.gz: fb6db1e8385b880b942404a25aba9ef02637d0c1a2dd53b1a6ddd78a8c70e6494c6349b8d7bbf837727cee5d2ed2c2e2afd67fe506c0be193797fc2de8867619
6
+ metadata.gz: be40286349648fa967100abdcecc2fd3519e616881e635362903fb90656c6568e59a005e1015ac477bcae7eff61cda7b05378b35603302f2ca2ea08f3558de6d
7
+ data.tar.gz: ed9f573f3d6f2913c2d22dc0081064cb6d67eb743f92a1e573260d31c56592837d093292029a7de82b6ca40ebe022dc2092522167b84f52eb805ff9768fbe6f8
data/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Ductwork Changelog
2
2
 
3
+ ## [0.5.0]
4
+
5
+ - core: add "waiting" status to `Step` model
6
+ - core: add "waiting" status to `Pipeline` model
7
+ - fix: change `jobs.input_args` and `jobs.output_payload` column type to `text`
8
+ - fix: change `pipelines.definition` column type to `text` - this prevents larger definitions from being clipped if there is a size limit on the string column
9
+ - feat: add missing unique index on `ductwork_results` and `ductwork_runs` tables
10
+ - feat: add missing composite index on `ductwork_executions` table for `Ductwork::Job.claim_latest` method
11
+ - feat: add missing composite index on `ductwork_availabilities` table for `Ductwork::Job.claim_latest` method
12
+ - feat: use array instead of ActiveRecord relation when advancing pipelines - this has major performance benefits but comes with memory-usage implications (see comments)
13
+ - fix: add condition to query to return correct pipelines that need advancing
14
+ - fix: release pipeline claim only if successfully claimed
15
+ - chore: add pipeline ID to misc log lines
16
+ - feat: add missing composite indexes on `ductwork_steps` table
17
+
3
18
  ## [0.4.0]
4
19
 
5
20
  - chore: change job worker thread name format
@@ -14,6 +14,7 @@ module Ductwork
14
14
  enum :status,
15
15
  pending: "pending",
16
16
  in_progress: "in_progress",
17
+ waiting: "waiting",
17
18
  halted: "halted",
18
19
  completed: "completed"
19
20
 
@@ -87,22 +88,19 @@ module Ductwork
87
88
  end
88
89
 
89
90
  def advance!
90
- # NOTE: there could be A LOT of steps advancing for a single pipeline
91
- # so instead of loading everything into memory and using ruby collection
92
- # methods we make multiple queries. may need to revisist this once
93
- # we do extensive load testing
94
- advancing = steps.advancing
95
- edges = if advancing.exists?
96
- parsed_definition
97
- .fetch(:edges, {})
98
- .select { |k| k.in?(advancing.pluck(:klass)) }
99
- end
91
+ # NOTE: if we've expanded the pipeline there could be a lot of
92
+ # advancing records which may cause memory issues. something to
93
+ # watch out for here and maybe add in config to use AR relation
94
+ # at certain counts or even memory limits.
95
+ advancing_steps = steps.advancing.pluck(:id, :klass)
96
+ advancing_ids = advancing_steps.map(&:first)
97
+ edges = find_edges(advancing_steps)
100
98
 
101
99
  Ductwork::Record.transaction do
102
100
  if edges.nil? || edges.values.all?(&:empty?)
103
- conditionally_complete_pipeline(advancing)
101
+ conditionally_complete_pipeline(advancing_ids)
104
102
  else
105
- advance_to_next_steps_by_type(edges, advancing)
103
+ advance_to_next_steps_by_type(edges, advancing_ids)
106
104
  end
107
105
  end
108
106
  end
@@ -120,10 +118,25 @@ module Ductwork
120
118
  Ductwork::Job.enqueue(next_step, input_arg)
121
119
  end
122
120
 
123
- def conditionally_complete_pipeline(advancing)
124
- advancing.update!(status: :completed, completed_at: Time.current)
121
+ def find_edges(advancing_steps)
122
+ if advancing_steps.any?
123
+ klasses = advancing_steps.map(&:last)
125
124
 
126
- if steps.where(status: %w[in_progress pending]).none?
125
+ parsed_definition.fetch(:edges, {}).select { |k| k.in?(klasses) }
126
+ end
127
+ end
128
+
129
+ def conditionally_complete_pipeline(advancing_ids)
130
+ steps
131
+ .where(id: advancing_ids)
132
+ .update_all(status: :completed, completed_at: Time.current)
133
+
134
+ remaining = steps
135
+ .where(status: %w[in_progress pending advancing])
136
+ .where.not(id: advancing_ids)
137
+ .exists?
138
+
139
+ if !remaining
127
140
  update!(status: :completed, completed_at: Time.current)
128
141
 
129
142
  Ductwork.logger.info(
@@ -134,9 +147,11 @@ module Ductwork
134
147
  end
135
148
  end
136
149
 
137
- def advance_to_next_steps_by_type(edges, advancing)
150
+ def advance_to_next_steps_by_type(edges, advancing_ids)
151
+ steps.where(id: advancing_ids).update_all(status: :completed, completed_at: Time.current)
152
+
138
153
  if edges.all? { |_, v| v.dig(-1, :type) == "combine" }
139
- conditionally_combine_next_steps(edges, advancing)
154
+ conditionally_combine_next_steps(edges, advancing_ids)
140
155
  else
141
156
  edges.each do |step_klass, step_edges|
142
157
  edge = step_edges[-1]
@@ -145,27 +160,25 @@ module Ductwork
145
160
  step_type = edge[:type] == "chain" ? "default" : edge[:type]
146
161
 
147
162
  if step_type == "collapse"
148
- conditionally_collapse_next_steps(step_klass, edge, advancing)
163
+ conditionally_collapse_next_steps(step_klass, edge, advancing_ids)
149
164
  else
150
- advance_non_merging_steps(step_klass, edges, advancing)
165
+ advance_non_merging_steps(step_klass, edge, advancing_ids)
151
166
  end
152
167
  end
153
168
  end
154
- advancing.update!(status: :completed, completed_at: Time.current)
155
169
  log_pipeline_advanced(edges)
156
170
  end
157
171
 
158
- def advance_non_merging_steps(step_klass, edges, advancing)
159
- advancing.where(klass: step_klass).find_each do |step|
160
- edge = edges.dig(step.klass, -1)
161
- # NOTE: "chain" is used by ActiveRecord so we have to call
162
- # this enum value "default" :sad:
163
- step_type = edge[:type] == "chain" ? "default" : edge[:type]
172
+ def advance_non_merging_steps(step_klass, edge, advancing_ids)
173
+ # NOTE: "chain" is used by ActiveRecord so we have to call
174
+ # this enum value "default" :sad:
175
+ step_type = edge[:type] == "chain" ? "default" : edge[:type]
164
176
 
177
+ steps.where(id: advancing_ids, klass: step_klass).find_each do |step|
165
178
  if step_type.in?(%w[default divide])
166
- advance_to_next_steps(step_type, advancing, edge)
179
+ advance_to_next_steps(step_type, step.id, edge)
167
180
  elsif step_type == "expand"
168
- expand_to_next_steps(step_type, advancing, edge)
181
+ expand_to_next_steps(step_type, step.id, edge)
169
182
  else
170
183
  Ductwork.logger.error(
171
184
  msg: "Invalid step type",
@@ -177,7 +190,7 @@ module Ductwork
177
190
  end
178
191
  end
179
192
 
180
- def advance_to_next_steps(step_type, advancing, edge)
193
+ def advance_to_next_steps(step_type, step_id, edge)
181
194
  too_many = edge[:to].tally.any? do |to_klass, count|
182
195
  depth = Ductwork
183
196
  .configuration
@@ -196,14 +209,15 @@ module Ductwork
196
209
  step_type: step_type,
197
210
  started_at: Time.current
198
211
  )
199
- Ductwork::Job.enqueue(next_step, advancing.take.job.return_value)
212
+ return_value = Ductwork::Job.find_by(step_id:).return_value
213
+ Ductwork::Job.enqueue(next_step, return_value)
200
214
  end
201
215
  end
202
216
  end
203
217
 
204
- def conditionally_combine_next_steps(edges, advancing)
218
+ def conditionally_combine_next_steps(edges, advancing_ids)
205
219
  if steps.where(status: %w[pending in_progress], klass: edges.keys).none?
206
- combine_next_steps(edges, advancing)
220
+ combine_next_steps(edges, advancing_ids)
207
221
  else
208
222
  Ductwork.logger.debug(
209
223
  msg: "Not all divided steps have completed; not combining",
@@ -213,14 +227,15 @@ module Ductwork
213
227
  end
214
228
  end
215
229
 
216
- def combine_next_steps(edges, advancing)
230
+ def combine_next_steps(edges, advancing_ids)
217
231
  klass = edges.values.sample.dig(-1, :to).sole
218
232
  step_type = "combine"
219
- groups = advancing
233
+ groups = steps
234
+ .where(id: advancing_ids)
220
235
  .group(:klass)
221
236
  .count
222
237
  .keys
223
- .map { |k| advancing.where(klass: k) }
238
+ .map { |k| steps.where(id: advancing_ids).where(klass: k) }
224
239
 
225
240
  groups.first.zip(*groups[1..]).each do |group|
226
241
  input_arg = Ductwork::Job
@@ -230,9 +245,11 @@ module Ductwork
230
245
  end
231
246
  end
232
247
 
233
- def expand_to_next_steps(step_type, advancing, edge)
248
+ def expand_to_next_steps(step_type, step_id, edge)
234
249
  next_klass = edge[:to].sole
235
- return_value = advancing.take.job.return_value
250
+ return_value = Ductwork::Job
251
+ .find_by(step_id:)
252
+ .return_value
236
253
  max_depth = Ductwork.configuration.steps_max_depth(pipeline: klass, step: next_klass)
237
254
 
238
255
  if max_depth != -1 && return_value.count > max_depth
@@ -248,9 +265,9 @@ module Ductwork
248
265
  end
249
266
  end
250
267
 
251
- def conditionally_collapse_next_steps(step_klass, edge, advancing)
268
+ def conditionally_collapse_next_steps(step_klass, edge, advancing_ids)
252
269
  if steps.where(status: %w[pending in_progress], klass: step_klass).none?
253
- collapse_next_steps(edge[:to].sole, advancing)
270
+ collapse_next_steps(edge[:to].sole, advancing_ids)
254
271
  else
255
272
  Ductwork.logger.debug(
256
273
  msg: "Not all expanded steps have completed; not collapsing",
@@ -260,14 +277,11 @@ module Ductwork
260
277
  end
261
278
  end
262
279
 
263
- def collapse_next_steps(klass, advancing)
280
+ def collapse_next_steps(klass, advancing_ids)
264
281
  step_type = "collapse"
265
282
  input_arg = []
266
283
 
267
- # NOTE: because of expanding based on return values, there
268
- # could be A LOT of jobs so we want to use batch methods
269
- # to avoid creating too many in-memory objects
270
- Ductwork::Job.where(step_id: advancing.ids).find_each do |job|
284
+ Ductwork::Job.where(step_id: advancing_ids).find_each do |job|
271
285
  input_arg << job.return_value
272
286
  end
273
287
 
@@ -12,6 +12,7 @@ module Ductwork
12
12
  enum :status,
13
13
  pending: "pending",
14
14
  in_progress: "in_progress",
15
+ waiting: "waiting",
15
16
  advancing: "advancing",
16
17
  failed: "failed",
17
18
  completed: "completed"
@@ -15,6 +15,7 @@ module Ductwork
15
15
  Ductwork::Pipeline
16
16
  .in_progress
17
17
  .where(klass: klass, claimed_for_advancing_at: nil)
18
+ .where(steps: Ductwork::Step.where(status: :advancing))
18
19
  .where.not(steps: Ductwork::Step.where.not(status: %w[advancing completed]))
19
20
  .order(:last_advanced_at)
20
21
  .limit(1)
@@ -30,6 +31,7 @@ module Ductwork
30
31
  if rows_updated == 1
31
32
  Ductwork.logger.debug(
32
33
  msg: "Pipeline claimed",
34
+ pipeline_id: id,
33
35
  pipeline: klass,
34
36
  role: :pipeline_advancer
35
37
  )
@@ -39,28 +41,29 @@ module Ductwork
39
41
 
40
42
  Ductwork.logger.debug(
41
43
  msg: "Pipeline advanced",
44
+ pipeline_id: id,
42
45
  pipeline: klass,
43
46
  role: :pipeline_advancer
44
47
  )
48
+
49
+ # release the pipeline and set last advanced at so it doesnt block.
50
+ # we're not using a queue so we have to use a db timestamp
51
+ pipeline.update!(
52
+ claimed_for_advancing_at: nil,
53
+ last_advanced_at: Time.current
54
+ )
45
55
  else
46
56
  Ductwork.logger.debug(
47
57
  msg: "Did not claim pipeline, avoided race condition",
58
+ pipeline_id: id,
48
59
  pipeline: klass,
49
60
  role: :pipeline_advancer
50
61
  )
51
62
  end
52
-
53
- # release the pipeline and set last advanced at so it doesnt block.
54
- # we're not using a queue so we have to use a db timestamp
55
- Ductwork::Pipeline.find(id).update!(
56
- claimed_for_advancing_at: nil,
57
- last_advanced_at: Time.current
58
- )
59
63
  else
60
64
  Ductwork.logger.debug(
61
65
  msg: "No pipeline needs advancing",
62
66
  pipeline: klass,
63
- id: id,
64
67
  role: :pipeline_advancer
65
68
  )
66
69
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ductwork
4
- VERSION = "0.4.0"
4
+ VERSION = "0.5.0"
5
5
  end
@@ -12,5 +12,8 @@ class CreateDuctworkAvailabilities < ActiveRecord::Migration[<%= Rails::VERSION:
12
12
 
13
13
  add_index :ductwork_availabilities, :execution_id, unique: true
14
14
  add_index :ductwork_availabilities, %i[id process_id]
15
+ add_index :ductwork_availabilities,
16
+ %i[completed_at started_at created_at],
17
+ name: "index_ductwork_availabilities_on_claim_latest"
15
18
  end
16
19
  end
@@ -10,5 +10,7 @@ class CreateDuctworkExecutions < ActiveRecord::Migration[<%= Rails::VERSION::MAJ
10
10
  table.integer :process_id
11
11
  table.timestamps null: false
12
12
  end
13
+
14
+ add_index :ductwork_executions, %i[job_id created_at]
13
15
  end
14
16
  end
@@ -7,8 +7,8 @@ class CreateDuctworkJobs < ActiveRecord::Migration[<%= Rails::VERSION::MAJOR %>.
7
7
  table.string :klass, null: false
8
8
  table.timestamp :started_at, null: false
9
9
  table.timestamp :completed_at
10
- table.string :input_args, null: false
11
- table.string :output_payload
10
+ table.text :input_args, null: false
11
+ table.text :output_payload
12
12
  table.timestamps null: false
13
13
  end
14
14
 
@@ -4,7 +4,7 @@ class CreateDuctworkPipelines < ActiveRecord::Migration[<%= Rails::VERSION::MAJO
4
4
  def change
5
5
  create_table :ductwork_pipelines do |table|
6
6
  table.string :klass, null: false
7
- table.string :definition, null: false
7
+ table.text :definition, null: false
8
8
  table.string :definition_sha1, null: false
9
9
  table.timestamp :triggered_at, null: false
10
10
  table.timestamp :completed_at
@@ -10,5 +10,7 @@ class CreateDuctworkResults < ActiveRecord::Migration[<%= Rails::VERSION::MAJOR
10
10
  table.text :error_backtrace
11
11
  table.timestamps null: false
12
12
  end
13
+
14
+ add_index :ductwork_results, :execution_id, unique: true
13
15
  end
14
16
  end
@@ -8,5 +8,7 @@ class CreateDuctworkRuns < ActiveRecord::Migration[<%= Rails::VERSION::MAJOR %>.
8
8
  table.timestamp :completed_at
9
9
  table.timestamps null: false
10
10
  end
11
+
12
+ add_index :ductwork_runs, :execution_id, unique: true
11
13
  end
12
14
  end
@@ -12,6 +12,9 @@ class CreateDuctworkSteps < ActiveRecord::Migration[<%= Rails::VERSION::MAJOR %>
12
12
  table.timestamps null: false
13
13
  end
14
14
 
15
+ add_index :ductwork_steps, %i[pipeline_id status klass]
16
+ add_index :ductwork_steps, %i[pipeline_id klass status]
17
+ add_index :ductwork_steps, %i[status klass]
15
18
  add_index :ductwork_steps, %i[pipeline_id status]
16
19
  end
17
20
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ductwork
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tyler Ewing