ductwork 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 18ebf23ac85cb80300ecc0c35475e0a2364fa4296a6012fdc21e95b8c5943023
4
- data.tar.gz: e08b2cda1074255a7b7a79c5fbe789ac373909eef57b7836be49b3f7554a4350
3
+ metadata.gz: fb1c47566dc19fe61a94c69c48cca984c8844952ce6c0b48917aa479f43f6709
4
+ data.tar.gz: d0a58e6426f2ca032b70ed3228e7e3de0a540f60693d8a2fdd4ad200235bb8ff
5
5
  SHA512:
6
- metadata.gz: 9d3b28265bfdf2f90210c43e2609d4255dda6d120ab597b74e54e9c020cbdc4be0b07235bb104873a62ae309511fc841c7d236f20dd52124150760c8dfaa94c8
7
- data.tar.gz: fb6db1e8385b880b942404a25aba9ef02637d0c1a2dd53b1a6ddd78a8c70e6494c6349b8d7bbf837727cee5d2ed2c2e2afd67fe506c0be193797fc2de8867619
6
+ metadata.gz: 1ff44e54ab30c89ebfadf3402ce79b11a8ca7704e8b07a06e8434f07af8216b18bf2db63c2d94cf352edcbc490545a8c3a5bb608f3807a37df35af6f5562e617
7
+ data.tar.gz: e1c09a239bb49b17143a3f4fb4f248dcbe25c9d5602e763db89bb7ff0411a6233b761ac59e30b5e03e7d0354253924871e3e1510757f3155bc56100954a1f7d3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # Ductwork Changelog
2
2
 
3
+ ## [0.6.0]
4
+
5
+ - feat: expose `Ductwork.eager_load` method for eager loading code via `zeitwerk`
6
+ - chore: let `zeitwerk` autoload models from `lib/models` directory instead of letting rails autoload them from the `app/models` directory via the rails engine
7
+ - feat: add `started_at` column to `ductwork_pipelines` table - for now, this will only be used in Pro features.
8
+
9
+ ## [0.5.0]
10
+
11
+ - chore: add "waiting" status to `Step` model
12
+ - chore: add "waiting" status to `Pipeline` model
13
+ - fix: change `jobs.input_args` and `jobs.output_payload` column type to `text`
14
+ - fix: change `pipelines.definition` column type to `text` - this prevents larger definitions from being clipped if there is a size limit on the string column
15
+ - feat: add missing unique index on `ductwork_results` and `ductwork_runs` tables
16
+ - feat: add missing composite index on `ductwork_executions` table for `Ductwork::Job.claim_latest` method
17
+ - feat: add missing composite index on `ductwork_availabilities` table for `Ductwork::Job.claim_latest` method
18
+ - feat: use array instead of ActiveRecord relation when advancing pipelines - this has major performance benefits but comes with memory-usage implications (see comments)
19
+ - fix: add condition to query to return correct pipelines that need advancing
20
+ - fix: release pipeline claim only if successfully claimed
21
+ - chore: add pipeline ID to misc log lines
22
+ - feat: add missing composite indexes on `ductwork_steps` table
23
+
3
24
  ## [0.4.0]
4
25
 
5
26
  - chore: change job worker thread name format
@@ -8,12 +8,14 @@ module Ductwork
8
8
  validates :definition, presence: true
9
9
  validates :definition_sha1, presence: true
10
10
  validates :status, presence: true
11
+ validates :started_at, presence: true
11
12
  validates :triggered_at, presence: true
12
13
  validates :last_advanced_at, presence: true
13
14
 
14
15
  enum :status,
15
16
  pending: "pending",
16
17
  in_progress: "in_progress",
18
+ waiting: "waiting",
17
19
  halted: "halted",
18
20
  completed: "completed"
19
21
 
@@ -63,6 +65,7 @@ module Ductwork
63
65
  definition: definition,
64
66
  definition_sha1: Digest::SHA1.hexdigest(definition),
65
67
  triggered_at: Time.current,
68
+ started_at: Time.current,
66
69
  last_advanced_at: Time.current
67
70
  )
68
71
  step = p.steps.create!(
@@ -87,22 +90,19 @@ module Ductwork
87
90
  end
88
91
 
89
92
  def advance!
90
- # NOTE: there could be A LOT of steps advancing for a single pipeline
91
- # so instead of loading everything into memory and using ruby collection
92
- # methods we make multiple queries. may need to revisist this once
93
- # we do extensive load testing
94
- advancing = steps.advancing
95
- edges = if advancing.exists?
96
- parsed_definition
97
- .fetch(:edges, {})
98
- .select { |k| k.in?(advancing.pluck(:klass)) }
99
- end
93
+ # NOTE: if we've expanded the pipeline there could be a lot of
94
+ # advancing records which may cause memory issues. something to
95
+ # watch out for here and maybe add in config to use AR relation
96
+ # at certain counts or even memory limits.
97
+ advancing_steps = steps.advancing.pluck(:id, :klass)
98
+ advancing_ids = advancing_steps.map(&:first)
99
+ edges = find_edges(advancing_steps)
100
100
 
101
101
  Ductwork::Record.transaction do
102
102
  if edges.nil? || edges.values.all?(&:empty?)
103
- conditionally_complete_pipeline(advancing)
103
+ conditionally_complete_pipeline(advancing_ids)
104
104
  else
105
- advance_to_next_steps_by_type(edges, advancing)
105
+ advance_to_next_steps_by_type(edges, advancing_ids)
106
106
  end
107
107
  end
108
108
  end
@@ -120,10 +120,25 @@ module Ductwork
120
120
  Ductwork::Job.enqueue(next_step, input_arg)
121
121
  end
122
122
 
123
- def conditionally_complete_pipeline(advancing)
124
- advancing.update!(status: :completed, completed_at: Time.current)
123
+ def find_edges(advancing_steps)
124
+ if advancing_steps.any?
125
+ klasses = advancing_steps.map(&:last)
125
126
 
126
- if steps.where(status: %w[in_progress pending]).none?
127
+ parsed_definition.fetch(:edges, {}).select { |k| k.in?(klasses) }
128
+ end
129
+ end
130
+
131
+ def conditionally_complete_pipeline(advancing_ids)
132
+ steps
133
+ .where(id: advancing_ids)
134
+ .update_all(status: :completed, completed_at: Time.current)
135
+
136
+ remaining = steps
137
+ .where(status: %w[in_progress pending advancing])
138
+ .where.not(id: advancing_ids)
139
+ .exists?
140
+
141
+ if !remaining
127
142
  update!(status: :completed, completed_at: Time.current)
128
143
 
129
144
  Ductwork.logger.info(
@@ -134,9 +149,11 @@ module Ductwork
134
149
  end
135
150
  end
136
151
 
137
- def advance_to_next_steps_by_type(edges, advancing)
152
+ def advance_to_next_steps_by_type(edges, advancing_ids)
153
+ steps.where(id: advancing_ids).update_all(status: :completed, completed_at: Time.current)
154
+
138
155
  if edges.all? { |_, v| v.dig(-1, :type) == "combine" }
139
- conditionally_combine_next_steps(edges, advancing)
156
+ conditionally_combine_next_steps(edges, advancing_ids)
140
157
  else
141
158
  edges.each do |step_klass, step_edges|
142
159
  edge = step_edges[-1]
@@ -145,27 +162,25 @@ module Ductwork
145
162
  step_type = edge[:type] == "chain" ? "default" : edge[:type]
146
163
 
147
164
  if step_type == "collapse"
148
- conditionally_collapse_next_steps(step_klass, edge, advancing)
165
+ conditionally_collapse_next_steps(step_klass, edge, advancing_ids)
149
166
  else
150
- advance_non_merging_steps(step_klass, edges, advancing)
167
+ advance_non_merging_steps(step_klass, edge, advancing_ids)
151
168
  end
152
169
  end
153
170
  end
154
- advancing.update!(status: :completed, completed_at: Time.current)
155
171
  log_pipeline_advanced(edges)
156
172
  end
157
173
 
158
- def advance_non_merging_steps(step_klass, edges, advancing)
159
- advancing.where(klass: step_klass).find_each do |step|
160
- edge = edges.dig(step.klass, -1)
161
- # NOTE: "chain" is used by ActiveRecord so we have to call
162
- # this enum value "default" :sad:
163
- step_type = edge[:type] == "chain" ? "default" : edge[:type]
174
+ def advance_non_merging_steps(step_klass, edge, advancing_ids)
175
+ # NOTE: "chain" is used by ActiveRecord so we have to call
176
+ # this enum value "default" :sad:
177
+ step_type = edge[:type] == "chain" ? "default" : edge[:type]
164
178
 
179
+ steps.where(id: advancing_ids, klass: step_klass).find_each do |step|
165
180
  if step_type.in?(%w[default divide])
166
- advance_to_next_steps(step_type, advancing, edge)
181
+ advance_to_next_steps(step_type, step.id, edge)
167
182
  elsif step_type == "expand"
168
- expand_to_next_steps(step_type, advancing, edge)
183
+ expand_to_next_steps(step_type, step.id, edge)
169
184
  else
170
185
  Ductwork.logger.error(
171
186
  msg: "Invalid step type",
@@ -177,7 +192,7 @@ module Ductwork
177
192
  end
178
193
  end
179
194
 
180
- def advance_to_next_steps(step_type, advancing, edge)
195
+ def advance_to_next_steps(step_type, step_id, edge)
181
196
  too_many = edge[:to].tally.any? do |to_klass, count|
182
197
  depth = Ductwork
183
198
  .configuration
@@ -196,14 +211,15 @@ module Ductwork
196
211
  step_type: step_type,
197
212
  started_at: Time.current
198
213
  )
199
- Ductwork::Job.enqueue(next_step, advancing.take.job.return_value)
214
+ return_value = Ductwork::Job.find_by(step_id:).return_value
215
+ Ductwork::Job.enqueue(next_step, return_value)
200
216
  end
201
217
  end
202
218
  end
203
219
 
204
- def conditionally_combine_next_steps(edges, advancing)
220
+ def conditionally_combine_next_steps(edges, advancing_ids)
205
221
  if steps.where(status: %w[pending in_progress], klass: edges.keys).none?
206
- combine_next_steps(edges, advancing)
222
+ combine_next_steps(edges, advancing_ids)
207
223
  else
208
224
  Ductwork.logger.debug(
209
225
  msg: "Not all divided steps have completed; not combining",
@@ -213,14 +229,15 @@ module Ductwork
213
229
  end
214
230
  end
215
231
 
216
- def combine_next_steps(edges, advancing)
232
+ def combine_next_steps(edges, advancing_ids)
217
233
  klass = edges.values.sample.dig(-1, :to).sole
218
234
  step_type = "combine"
219
- groups = advancing
235
+ groups = steps
236
+ .where(id: advancing_ids)
220
237
  .group(:klass)
221
238
  .count
222
239
  .keys
223
- .map { |k| advancing.where(klass: k) }
240
+ .map { |k| steps.where(id: advancing_ids).where(klass: k) }
224
241
 
225
242
  groups.first.zip(*groups[1..]).each do |group|
226
243
  input_arg = Ductwork::Job
@@ -230,9 +247,11 @@ module Ductwork
230
247
  end
231
248
  end
232
249
 
233
- def expand_to_next_steps(step_type, advancing, edge)
250
+ def expand_to_next_steps(step_type, step_id, edge)
234
251
  next_klass = edge[:to].sole
235
- return_value = advancing.take.job.return_value
252
+ return_value = Ductwork::Job
253
+ .find_by(step_id:)
254
+ .return_value
236
255
  max_depth = Ductwork.configuration.steps_max_depth(pipeline: klass, step: next_klass)
237
256
 
238
257
  if max_depth != -1 && return_value.count > max_depth
@@ -248,9 +267,9 @@ module Ductwork
248
267
  end
249
268
  end
250
269
 
251
- def conditionally_collapse_next_steps(step_klass, edge, advancing)
270
+ def conditionally_collapse_next_steps(step_klass, edge, advancing_ids)
252
271
  if steps.where(status: %w[pending in_progress], klass: step_klass).none?
253
- collapse_next_steps(edge[:to].sole, advancing)
272
+ collapse_next_steps(edge[:to].sole, advancing_ids)
254
273
  else
255
274
  Ductwork.logger.debug(
256
275
  msg: "Not all expanded steps have completed; not collapsing",
@@ -260,14 +279,11 @@ module Ductwork
260
279
  end
261
280
  end
262
281
 
263
- def collapse_next_steps(klass, advancing)
282
+ def collapse_next_steps(klass, advancing_ids)
264
283
  step_type = "collapse"
265
284
  input_arg = []
266
285
 
267
- # NOTE: because of expanding based on return values, there
268
- # could be A LOT of jobs so we want to use batch methods
269
- # to avoid creating too many in-memory objects
270
- Ductwork::Job.where(step_id: advancing.ids).find_each do |job|
286
+ Ductwork::Job.where(step_id: advancing_ids).find_each do |job|
271
287
  input_arg << job.return_value
272
288
  end
273
289
 
@@ -12,6 +12,7 @@ module Ductwork
12
12
  enum :status,
13
13
  pending: "pending",
14
14
  in_progress: "in_progress",
15
+ waiting: "waiting",
15
16
  advancing: "advancing",
16
17
  failed: "failed",
17
18
  completed: "completed"
@@ -15,6 +15,7 @@ module Ductwork
15
15
  Ductwork::Pipeline
16
16
  .in_progress
17
17
  .where(klass: klass, claimed_for_advancing_at: nil)
18
+ .where(steps: Ductwork::Step.where(status: :advancing))
18
19
  .where.not(steps: Ductwork::Step.where.not(status: %w[advancing completed]))
19
20
  .order(:last_advanced_at)
20
21
  .limit(1)
@@ -30,6 +31,7 @@ module Ductwork
30
31
  if rows_updated == 1
31
32
  Ductwork.logger.debug(
32
33
  msg: "Pipeline claimed",
34
+ pipeline_id: id,
33
35
  pipeline: klass,
34
36
  role: :pipeline_advancer
35
37
  )
@@ -39,28 +41,29 @@ module Ductwork
39
41
 
40
42
  Ductwork.logger.debug(
41
43
  msg: "Pipeline advanced",
44
+ pipeline_id: id,
42
45
  pipeline: klass,
43
46
  role: :pipeline_advancer
44
47
  )
48
+
49
+ # release the pipeline and set last advanced at so it doesnt block.
50
+ # we're not using a queue so we have to use a db timestamp
51
+ pipeline.update!(
52
+ claimed_for_advancing_at: nil,
53
+ last_advanced_at: Time.current
54
+ )
45
55
  else
46
56
  Ductwork.logger.debug(
47
57
  msg: "Did not claim pipeline, avoided race condition",
58
+ pipeline_id: id,
48
59
  pipeline: klass,
49
60
  role: :pipeline_advancer
50
61
  )
51
62
  end
52
-
53
- # release the pipeline and set last advanced at so it doesnt block.
54
- # we're not using a queue so we have to use a db timestamp
55
- Ductwork::Pipeline.find(id).update!(
56
- claimed_for_advancing_at: nil,
57
- last_advanced_at: Time.current
58
- )
59
63
  else
60
64
  Ductwork.logger.debug(
61
65
  msg: "No pipeline needs advancing",
62
66
  pipeline: klass,
63
- id: id,
64
67
  role: :pipeline_advancer
65
68
  )
66
69
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ductwork
4
- VERSION = "0.4.0"
4
+ VERSION = "0.6.0"
5
5
  end
data/lib/ductwork.rb CHANGED
@@ -9,19 +9,15 @@ require "zeitwerk"
9
9
 
10
10
  require "ductwork/engine"
11
11
 
12
- loader = Zeitwerk::Loader.for_gem
13
- loader.inflector.inflect("cli" => "CLI")
14
- loader.inflector.inflect("dsl" => "DSL")
15
- loader.ignore("#{__dir__}/generators")
16
- loader.ignore("#{__dir__}/ductwork/testing")
17
- loader.ignore("#{__dir__}/ductwork/testing.rb")
18
- loader.setup
19
-
20
12
  module Ductwork
21
13
  class << self
22
- attr_accessor :app_executor, :configuration, :logger
14
+ attr_accessor :app_executor, :configuration, :loader, :logger
23
15
  attr_writer :defined_pipelines, :hooks
24
16
 
17
+ def eager_load
18
+ loader.eager_load
19
+ end
20
+
25
21
  def wrap_with_app_executor(&block)
26
22
  if app_executor.present?
27
23
  app_executor.wrap(&block)
@@ -75,3 +71,14 @@ module Ductwork
75
71
  end
76
72
  end
77
73
  end
74
+
75
+ loader = Zeitwerk::Loader.for_gem
76
+ loader.inflector.inflect("cli" => "CLI")
77
+ loader.inflector.inflect("dsl" => "DSL")
78
+ loader.collapse("#{__dir__}/ductwork/models")
79
+ loader.ignore("#{__dir__}/generators")
80
+ loader.ignore("#{__dir__}/ductwork/testing")
81
+ loader.ignore("#{__dir__}/ductwork/testing.rb")
82
+ loader.setup
83
+
84
+ Ductwork.loader = loader
@@ -12,5 +12,8 @@ class CreateDuctworkAvailabilities < ActiveRecord::Migration[<%= Rails::VERSION:
12
12
 
13
13
  add_index :ductwork_availabilities, :execution_id, unique: true
14
14
  add_index :ductwork_availabilities, %i[id process_id]
15
+ add_index :ductwork_availabilities,
16
+ %i[completed_at started_at created_at],
17
+ name: "index_ductwork_availabilities_on_claim_latest"
15
18
  end
16
19
  end
@@ -10,5 +10,7 @@ class CreateDuctworkExecutions < ActiveRecord::Migration[<%= Rails::VERSION::MAJ
10
10
  table.integer :process_id
11
11
  table.timestamps null: false
12
12
  end
13
+
14
+ add_index :ductwork_executions, %i[job_id created_at]
13
15
  end
14
16
  end
@@ -7,8 +7,8 @@ class CreateDuctworkJobs < ActiveRecord::Migration[<%= Rails::VERSION::MAJOR %>.
7
7
  table.string :klass, null: false
8
8
  table.timestamp :started_at, null: false
9
9
  table.timestamp :completed_at
10
- table.string :input_args, null: false
11
- table.string :output_payload
10
+ table.text :input_args, null: false
11
+ table.text :output_payload
12
12
  table.timestamps null: false
13
13
  end
14
14
 
@@ -4,9 +4,10 @@ class CreateDuctworkPipelines < ActiveRecord::Migration[<%= Rails::VERSION::MAJO
4
4
  def change
5
5
  create_table :ductwork_pipelines do |table|
6
6
  table.string :klass, null: false
7
- table.string :definition, null: false
7
+ table.text :definition, null: false
8
8
  table.string :definition_sha1, null: false
9
9
  table.timestamp :triggered_at, null: false
10
+ table.timestamp :started_at, null: false
10
11
  table.timestamp :completed_at
11
12
  table.timestamp :claimed_for_advancing_at
12
13
  table.timestamp :last_advanced_at, null: false
@@ -10,5 +10,7 @@ class CreateDuctworkResults < ActiveRecord::Migration[<%= Rails::VERSION::MAJOR
10
10
  table.text :error_backtrace
11
11
  table.timestamps null: false
12
12
  end
13
+
14
+ add_index :ductwork_results, :execution_id, unique: true
13
15
  end
14
16
  end
@@ -8,5 +8,7 @@ class CreateDuctworkRuns < ActiveRecord::Migration[<%= Rails::VERSION::MAJOR %>.
8
8
  table.timestamp :completed_at
9
9
  table.timestamps null: false
10
10
  end
11
+
12
+ add_index :ductwork_runs, :execution_id, unique: true
11
13
  end
12
14
  end
@@ -12,6 +12,9 @@ class CreateDuctworkSteps < ActiveRecord::Migration[<%= Rails::VERSION::MAJOR %>
12
12
  table.timestamps null: false
13
13
  end
14
14
 
15
+ add_index :ductwork_steps, %i[pipeline_id status klass]
16
+ add_index :ductwork_steps, %i[pipeline_id klass status]
17
+ add_index :ductwork_steps, %i[status klass]
15
18
  add_index :ductwork_steps, %i[pipeline_id status]
16
19
  end
17
20
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ductwork
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tyler Ewing
@@ -95,15 +95,6 @@ files:
95
95
  - LICENSE.txt
96
96
  - README.md
97
97
  - Rakefile
98
- - app/models/ductwork/availability.rb
99
- - app/models/ductwork/execution.rb
100
- - app/models/ductwork/job.rb
101
- - app/models/ductwork/pipeline.rb
102
- - app/models/ductwork/process.rb
103
- - app/models/ductwork/record.rb
104
- - app/models/ductwork/result.rb
105
- - app/models/ductwork/run.rb
106
- - app/models/ductwork/step.rb
107
98
  - lib/ductwork.rb
108
99
  - lib/ductwork/cli.rb
109
100
  - lib/ductwork/configuration.rb
@@ -111,6 +102,15 @@ files:
111
102
  - lib/ductwork/dsl/definition_builder.rb
112
103
  - lib/ductwork/engine.rb
113
104
  - lib/ductwork/machine_identifier.rb
105
+ - lib/ductwork/models/availability.rb
106
+ - lib/ductwork/models/execution.rb
107
+ - lib/ductwork/models/job.rb
108
+ - lib/ductwork/models/pipeline.rb
109
+ - lib/ductwork/models/process.rb
110
+ - lib/ductwork/models/record.rb
111
+ - lib/ductwork/models/result.rb
112
+ - lib/ductwork/models/run.rb
113
+ - lib/ductwork/models/step.rb
114
114
  - lib/ductwork/processes/job_worker.rb
115
115
  - lib/ductwork/processes/job_worker_runner.rb
116
116
  - lib/ductwork/processes/pipeline_advancer.rb