dynflow 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/.travis.yml +3 -4
  4. data/Dockerfile +9 -0
  5. data/Gemfile +6 -0
  6. data/Rakefile +1 -0
  7. data/doc/pages/Gemfile +1 -0
  8. data/doc/pages/Rakefile +1 -0
  9. data/doc/pages/plugins/alert_block.rb +1 -0
  10. data/doc/pages/plugins/div_tag.rb +1 -0
  11. data/doc/pages/plugins/graphviz.rb +1 -0
  12. data/doc/pages/plugins/plantuml.rb +1 -0
  13. data/doc/pages/plugins/play.rb +1 -0
  14. data/doc/pages/plugins/tags.rb +1 -0
  15. data/doc/pages/plugins/toc.rb +1 -0
  16. data/docker-compose.yml +41 -0
  17. data/dynflow.gemspec +1 -0
  18. data/examples/clock_benchmark.rb +1 -0
  19. data/examples/example_helper.rb +19 -2
  20. data/examples/future_execution.rb +2 -1
  21. data/examples/memory_limit_watcher.rb +1 -0
  22. data/examples/orchestrate.rb +4 -5
  23. data/examples/orchestrate_evented.rb +3 -2
  24. data/examples/remote_executor.rb +68 -0
  25. data/examples/singletons.rb +4 -3
  26. data/examples/sub_plan_concurrency_control.rb +2 -1
  27. data/examples/sub_plans.rb +3 -2
  28. data/examples/termination.rb +1 -0
  29. data/lib/dynflow.rb +20 -0
  30. data/lib/dynflow/action.rb +28 -3
  31. data/lib/dynflow/action/cancellable.rb +1 -0
  32. data/lib/dynflow/action/format.rb +1 -0
  33. data/lib/dynflow/action/missing.rb +1 -0
  34. data/lib/dynflow/action/polling.rb +3 -1
  35. data/lib/dynflow/action/progress.rb +1 -0
  36. data/lib/dynflow/action/rescue.rb +1 -0
  37. data/lib/dynflow/action/singleton.rb +1 -0
  38. data/lib/dynflow/action/suspended.rb +9 -2
  39. data/lib/dynflow/action/timeouts.rb +2 -1
  40. data/lib/dynflow/action/with_bulk_sub_plans.rb +2 -1
  41. data/lib/dynflow/action/with_polling_sub_plans.rb +7 -5
  42. data/lib/dynflow/action/with_sub_plans.rb +1 -0
  43. data/lib/dynflow/active_job/queue_adapter.rb +1 -0
  44. data/lib/dynflow/actor.rb +13 -5
  45. data/lib/dynflow/actors.rb +1 -0
  46. data/lib/dynflow/actors/execution_plan_cleaner.rb +1 -0
  47. data/lib/dynflow/clock.rb +27 -47
  48. data/lib/dynflow/config.rb +11 -2
  49. data/lib/dynflow/connectors.rb +1 -0
  50. data/lib/dynflow/connectors/abstract.rb +1 -0
  51. data/lib/dynflow/connectors/database.rb +1 -0
  52. data/lib/dynflow/connectors/direct.rb +1 -0
  53. data/lib/dynflow/coordinator.rb +1 -0
  54. data/lib/dynflow/coordinator_adapters.rb +1 -0
  55. data/lib/dynflow/coordinator_adapters/abstract.rb +1 -0
  56. data/lib/dynflow/coordinator_adapters/sequel.rb +1 -0
  57. data/lib/dynflow/dead_letter_silencer.rb +2 -0
  58. data/lib/dynflow/debug/telemetry/persistence.rb +1 -0
  59. data/lib/dynflow/delayed_executors.rb +1 -0
  60. data/lib/dynflow/delayed_executors/abstract.rb +1 -0
  61. data/lib/dynflow/delayed_executors/abstract_core.rb +1 -0
  62. data/lib/dynflow/delayed_executors/polling.rb +1 -0
  63. data/lib/dynflow/delayed_plan.rb +1 -0
  64. data/lib/dynflow/director.rb +80 -15
  65. data/lib/dynflow/director/execution_plan_manager.rb +17 -3
  66. data/lib/dynflow/director/flow_manager.rb +1 -0
  67. data/lib/dynflow/director/{work_queue.rb → queue_hash.rb} +9 -8
  68. data/lib/dynflow/director/running_steps_manager.rb +55 -18
  69. data/lib/dynflow/director/sequence_cursor.rb +1 -0
  70. data/lib/dynflow/director/sequential_manager.rb +12 -2
  71. data/lib/dynflow/dispatcher.rb +4 -2
  72. data/lib/dynflow/dispatcher/abstract.rb +1 -0
  73. data/lib/dynflow/dispatcher/client_dispatcher.rb +6 -4
  74. data/lib/dynflow/dispatcher/executor_dispatcher.rb +13 -1
  75. data/lib/dynflow/errors.rb +1 -0
  76. data/lib/dynflow/execution_history.rb +1 -0
  77. data/lib/dynflow/execution_plan.rb +3 -2
  78. data/lib/dynflow/execution_plan/dependency_graph.rb +1 -0
  79. data/lib/dynflow/execution_plan/hooks.rb +1 -0
  80. data/lib/dynflow/execution_plan/output_reference.rb +2 -1
  81. data/lib/dynflow/execution_plan/steps.rb +1 -0
  82. data/lib/dynflow/execution_plan/steps/abstract.rb +10 -5
  83. data/lib/dynflow/execution_plan/steps/abstract_flow_step.rb +2 -0
  84. data/lib/dynflow/execution_plan/steps/error.rb +1 -0
  85. data/lib/dynflow/execution_plan/steps/finalize_step.rb +1 -0
  86. data/lib/dynflow/execution_plan/steps/plan_step.rb +1 -0
  87. data/lib/dynflow/execution_plan/steps/run_step.rb +1 -0
  88. data/lib/dynflow/executors.rb +1 -1
  89. data/lib/dynflow/executors/abstract/core.rb +132 -0
  90. data/lib/dynflow/executors/parallel.rb +24 -11
  91. data/lib/dynflow/executors/parallel/core.rb +10 -91
  92. data/lib/dynflow/executors/parallel/pool.rb +4 -2
  93. data/lib/dynflow/executors/parallel/worker.rb +2 -1
  94. data/lib/dynflow/executors/sidekiq/core.rb +121 -0
  95. data/lib/dynflow/executors/sidekiq/internal_job_base.rb +24 -0
  96. data/lib/dynflow/executors/sidekiq/orchestrator_jobs.rb +60 -0
  97. data/lib/dynflow/executors/sidekiq/redis_locking.rb +69 -0
  98. data/lib/dynflow/executors/sidekiq/serialization.rb +33 -0
  99. data/lib/dynflow/executors/sidekiq/worker_jobs.rb +42 -0
  100. data/lib/dynflow/flows.rb +1 -0
  101. data/lib/dynflow/flows/abstract.rb +1 -0
  102. data/lib/dynflow/flows/abstract_composed.rb +1 -0
  103. data/lib/dynflow/flows/atom.rb +1 -0
  104. data/lib/dynflow/flows/concurrence.rb +1 -0
  105. data/lib/dynflow/flows/sequence.rb +1 -0
  106. data/lib/dynflow/logger_adapters.rb +1 -0
  107. data/lib/dynflow/logger_adapters/abstract.rb +1 -0
  108. data/lib/dynflow/logger_adapters/delegator.rb +1 -0
  109. data/lib/dynflow/logger_adapters/formatters.rb +1 -0
  110. data/lib/dynflow/logger_adapters/formatters/abstract.rb +1 -0
  111. data/lib/dynflow/logger_adapters/formatters/exception.rb +1 -0
  112. data/lib/dynflow/logger_adapters/simple.rb +1 -0
  113. data/lib/dynflow/middleware.rb +1 -0
  114. data/lib/dynflow/middleware/common/singleton.rb +1 -0
  115. data/lib/dynflow/middleware/common/transaction.rb +1 -0
  116. data/lib/dynflow/middleware/register.rb +1 -0
  117. data/lib/dynflow/middleware/resolver.rb +1 -0
  118. data/lib/dynflow/middleware/stack.rb +1 -0
  119. data/lib/dynflow/middleware/world.rb +1 -0
  120. data/lib/dynflow/persistence.rb +3 -2
  121. data/lib/dynflow/persistence_adapters.rb +1 -0
  122. data/lib/dynflow/persistence_adapters/abstract.rb +1 -0
  123. data/lib/dynflow/persistence_adapters/sequel.rb +10 -7
  124. data/lib/dynflow/persistence_adapters/sequel_migrations/001_initial.rb +1 -0
  125. data/lib/dynflow/persistence_adapters/sequel_migrations/002_incremental_progress.rb +1 -0
  126. data/lib/dynflow/persistence_adapters/sequel_migrations/003_parent_action.rb +1 -0
  127. data/lib/dynflow/persistence_adapters/sequel_migrations/004_coordinator_records.rb +1 -0
  128. data/lib/dynflow/persistence_adapters/sequel_migrations/005_envelopes.rb +1 -0
  129. data/lib/dynflow/persistence_adapters/sequel_migrations/006_fix_data_length.rb +1 -0
  130. data/lib/dynflow/persistence_adapters/sequel_migrations/007_future_execution.rb +1 -0
  131. data/lib/dynflow/persistence_adapters/sequel_migrations/008_rename_scheduled_plans_to_delayed_plans.rb +1 -0
  132. data/lib/dynflow/persistence_adapters/sequel_migrations/009_fix_mysql_data_length.rb +1 -0
  133. data/lib/dynflow/persistence_adapters/sequel_migrations/010_add_execution_plans_label.rb +1 -0
  134. data/lib/dynflow/persistence_adapters/sequel_migrations/011_placeholder.rb +1 -0
  135. data/lib/dynflow/persistence_adapters/sequel_migrations/012_add_delayed_plans_serialized_args.rb +1 -0
  136. data/lib/dynflow/persistence_adapters/sequel_migrations/013_add_action_columns.rb +1 -0
  137. data/lib/dynflow/persistence_adapters/sequel_migrations/014_add_step_columns.rb +1 -0
  138. data/lib/dynflow/persistence_adapters/sequel_migrations/015_add_execution_plan_columns.rb +1 -0
  139. data/lib/dynflow/persistence_adapters/sequel_migrations/016_add_step_queue.rb +1 -0
  140. data/lib/dynflow/persistence_adapters/sequel_migrations/017_add_delayed_plan_frozen.rb +1 -0
  141. data/lib/dynflow/persistence_adapters/sequel_migrations/018_add_uuid_column.rb +1 -0
  142. data/lib/dynflow/persistence_adapters/sequel_migrations/019_update_mysql_time_precision.rb +48 -0
  143. data/lib/dynflow/rails.rb +1 -0
  144. data/lib/dynflow/rails/configuration.rb +6 -3
  145. data/lib/dynflow/rails/daemon.rb +1 -0
  146. data/lib/dynflow/round_robin.rb +1 -0
  147. data/lib/dynflow/semaphores.rb +1 -0
  148. data/lib/dynflow/semaphores/abstract.rb +1 -0
  149. data/lib/dynflow/semaphores/aggregating.rb +1 -0
  150. data/lib/dynflow/semaphores/dummy.rb +1 -0
  151. data/lib/dynflow/semaphores/stateful.rb +1 -0
  152. data/lib/dynflow/serializable.rb +13 -4
  153. data/lib/dynflow/serializer.rb +24 -0
  154. data/lib/dynflow/serializers.rb +1 -0
  155. data/lib/dynflow/serializers/abstract.rb +1 -0
  156. data/lib/dynflow/serializers/noop.rb +1 -0
  157. data/lib/dynflow/stateful.rb +1 -0
  158. data/lib/dynflow/telemetry.rb +1 -0
  159. data/lib/dynflow/telemetry_adapters/abstract.rb +1 -0
  160. data/lib/dynflow/telemetry_adapters/dummy.rb +1 -0
  161. data/lib/dynflow/telemetry_adapters/statsd.rb +1 -0
  162. data/lib/dynflow/testing.rb +1 -0
  163. data/lib/dynflow/testing/assertions.rb +6 -5
  164. data/lib/dynflow/testing/dummy_execution_plan.rb +1 -0
  165. data/lib/dynflow/testing/dummy_executor.rb +19 -2
  166. data/lib/dynflow/testing/dummy_planned_action.rb +1 -0
  167. data/lib/dynflow/testing/dummy_step.rb +3 -1
  168. data/lib/dynflow/testing/dummy_world.rb +9 -0
  169. data/lib/dynflow/testing/factories.rb +6 -1
  170. data/lib/dynflow/testing/in_thread_executor.rb +22 -3
  171. data/lib/dynflow/testing/in_thread_world.rb +9 -0
  172. data/lib/dynflow/testing/managed_clock.rb +1 -0
  173. data/lib/dynflow/testing/mimic.rb +1 -0
  174. data/lib/dynflow/throttle_limiter.rb +1 -0
  175. data/lib/dynflow/transaction_adapters.rb +1 -0
  176. data/lib/dynflow/transaction_adapters/abstract.rb +1 -0
  177. data/lib/dynflow/transaction_adapters/active_record.rb +1 -0
  178. data/lib/dynflow/transaction_adapters/none.rb +1 -0
  179. data/lib/dynflow/utils.rb +1 -0
  180. data/lib/dynflow/utils/indifferent_hash.rb +1 -0
  181. data/lib/dynflow/utils/priority_queue.rb +1 -0
  182. data/lib/dynflow/version.rb +2 -1
  183. data/lib/dynflow/watchers/memory_consumption_watcher.rb +1 -0
  184. data/lib/dynflow/web.rb +1 -0
  185. data/lib/dynflow/web/console.rb +1 -0
  186. data/lib/dynflow/web/console_helpers.rb +1 -0
  187. data/lib/dynflow/web/filtering_helpers.rb +1 -0
  188. data/lib/dynflow/web/world_helpers.rb +1 -0
  189. data/lib/dynflow/web_console.rb +1 -0
  190. data/lib/dynflow/world.rb +11 -1
  191. data/lib/dynflow/world/invalidation.rb +7 -1
  192. data/test/abnormal_states_recovery_test.rb +41 -40
  193. data/test/action_test.rb +160 -110
  194. data/test/activejob_adapter_test.rb +1 -0
  195. data/test/batch_sub_tasks_test.rb +12 -11
  196. data/test/clock_test.rb +2 -1
  197. data/test/concurrency_control_test.rb +20 -19
  198. data/test/coordinator_test.rb +20 -21
  199. data/test/daemon_test.rb +2 -1
  200. data/test/dead_letter_silencer_test.rb +9 -7
  201. data/test/dispatcher_test.rb +2 -1
  202. data/test/execution_plan_cleaner_test.rb +13 -12
  203. data/test/execution_plan_hooks_test.rb +3 -2
  204. data/test/execution_plan_test.rb +33 -32
  205. data/test/executor_test.rb +533 -489
  206. data/test/future_execution_test.rb +45 -44
  207. data/test/memory_cosumption_watcher_test.rb +5 -4
  208. data/test/middleware_test.rb +55 -54
  209. data/test/persistence_test.rb +56 -53
  210. data/test/rescue_test.rb +36 -35
  211. data/test/round_robin_test.rb +13 -12
  212. data/test/semaphores_test.rb +31 -30
  213. data/test/support/code_workflow_example.rb +1 -0
  214. data/test/support/dummy_example.rb +14 -1
  215. data/test/support/middleware_example.rb +2 -1
  216. data/test/support/rails/config/environment.rb +1 -0
  217. data/test/support/rescue_example.rb +1 -0
  218. data/test/support/test_execution_log.rb +1 -0
  219. data/test/test_helper.rb +18 -17
  220. data/test/testing_test.rb +45 -44
  221. data/test/utils_test.rb +18 -17
  222. data/test/web_console_test.rb +1 -0
  223. data/test/world_test.rb +7 -6
  224. metadata +13 -4
  225. data/lib/dynflow/executors/abstract.rb +0 -40
@@ -1,20 +1,17 @@
1
+ # frozen_string_literal: true
2
+ require 'dynflow/executors/parallel/pool'
3
+ require 'dynflow/executors/parallel/worker'
4
+
1
5
  module Dynflow
2
6
  module Executors
3
- class Parallel < Abstract
4
- class Core < Actor
7
+ class Parallel
8
+ class Core < Abstract::Core
5
9
  attr_reader :logger
6
10
 
7
11
  def initialize(world, heartbeat_interval, queues_options)
8
- @logger = world.logger
9
- @world = Type! world, World
10
- @queues_options = queues_options
11
- @pools = {}
12
- @terminated = nil
13
- @director = Director.new(@world)
14
- @heartbeat_interval = heartbeat_interval
15
-
12
+ super
13
+ @pools = {}
16
14
  initialize_queues
17
- schedule_heartbeat
18
15
  end
19
16
 
20
17
  def initialize_queues
@@ -27,41 +24,8 @@ module Dynflow
27
24
  end
28
25
  end
29
26
 
30
- def handle_execution(execution_plan_id, finished)
31
- if terminating?
32
- raise Dynflow::Error,
33
- "cannot accept execution_plan_id:#{execution_plan_id} core is terminating"
34
- end
35
-
36
- feed_pool(@director.start_execution(execution_plan_id, finished))
37
- end
38
-
39
- def handle_event(event)
40
- Type! event, Director::Event
41
- if terminating?
42
- raise Dynflow::Error,
43
- "cannot accept event: #{event} core is terminating"
44
- end
45
- feed_pool(@director.handle_event(event))
46
- end
47
-
48
- def work_finished(work)
49
- feed_pool(@director.work_finished(work))
50
- end
51
-
52
- def handle_persistence_error(error, work = nil)
53
- logger.error "PersistenceError in executor"
54
- logger.error error
55
- @director.work_failed(work) if work
56
- if error.is_a? Errors::FatalPersistenceError
57
- logger.fatal "Terminating"
58
- @world.terminate
59
- end
60
- end
61
-
62
27
  def start_termination(*args)
63
28
  super
64
- logger.info 'shutting down Core ...'
65
29
  @pools.values.each { |pool| pool.tell([:start_termination, Concurrent::Promises.resolvable_future]) }
66
30
  end
67
31
 
@@ -69,66 +33,21 @@ module Dynflow
69
33
  @pools.delete(pool_name)
70
34
  # we expect this message from all worker pools
71
35
  return unless @pools.empty?
72
- @director.terminate
73
- logger.info '... Dynflow core terminated.'
74
36
  super()
75
37
  end
76
38
 
77
- def dead_letter_routing
78
- @world.dead_letter_handler
79
- end
80
-
81
39
  def execution_status(execution_plan_id = nil)
82
40
  @pools.each_with_object({}) do |(pool_name, pool), hash|
83
41
  hash[pool_name] = pool.ask!([:execution_status, execution_plan_id])
84
42
  end
85
43
  end
86
44
 
87
- def heartbeat
88
- @logger.debug('Executor heartbeat')
89
- record = @world.coordinator.find_records(:id => @world.id,
90
- :class => ['Dynflow::Coordinator::ExecutorWorld', 'Dynflow::Coordinator::ClientWorld']).first
91
- unless record
92
- logger.error(%{Executor's world record for #{@world.id} missing: terminating})
93
- @world.terminate
94
- return
95
- end
96
-
97
- record.data[:meta].update(:last_seen => Dynflow::Dispatcher::ClientDispatcher::PingCache.format_time)
98
- @world.coordinator.update_record(record)
99
- schedule_heartbeat
100
- end
101
-
102
- private
103
-
104
- def schedule_heartbeat
105
- @world.clock.ping(self, @heartbeat_interval, :heartbeat)
106
- end
107
-
108
- def on_message(message)
109
- super
110
- rescue Errors::PersistenceError => e
111
- self.tell([:handle_persistence_error, e])
112
- end
113
-
114
45
  def feed_pool(work_items)
115
- return if terminating?
116
- return if work_items.nil?
117
- work_items = [work_items] if work_items.is_a? Director::WorkItem
118
- work_items.all? { |i| Type! i, Director::WorkItem }
119
46
  work_items.each do |new_work|
120
- pool = @pools[new_work.queue]
121
- unless pool
122
- logger.error("Pool is not available for queue #{new_work.queue}, falling back to #{fallback_queue}")
123
- pool = @pools[fallback_queue]
124
- end
125
- pool.tell([:schedule_work, new_work])
47
+ new_work.world = @world
48
+ @pools.fetch(suggest_queue(new_work)).tell([:schedule_work, new_work])
126
49
  end
127
50
  end
128
-
129
- def fallback_queue
130
- :default
131
- end
132
51
  end
133
52
  end
134
53
  end
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  module Dynflow
2
3
  module Executors
3
- class Parallel < Abstract
4
+ class Parallel
4
5
  class Pool < Actor
5
6
  class JobStorage
6
7
  def initialize
@@ -53,7 +54,8 @@ module Dynflow
53
54
  end
54
55
 
55
56
  def worker_done(worker, work)
56
- @executor_core.tell([:work_finished, work])
57
+ step = work.step if work.is_a?(Director::StepWorkItem)
58
+ @executor_core.tell([:work_finished, work, step && step.delayed_events])
57
59
  @free_workers << worker
58
60
  Dynflow::Telemetry.with_instance { |t| t.set_gauge(:dynflow_active_workers, -1, telemetry_options) }
59
61
  distribute_jobs
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  module Dynflow
2
3
  module Executors
3
- class Parallel < Abstract
4
+ class Parallel
4
5
  class Worker < Actor
5
6
  def initialize(pool, transaction_adapter, telemetry_options = {})
6
7
  @pool = Type! pool, Concurrent::Actor::Reference
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+ require 'dynflow/executors/sidekiq/serialization'
3
+ require 'dynflow/executors/sidekiq/internal_job_base'
4
+ require 'dynflow/executors/sidekiq/orchestrator_jobs'
5
+ require 'dynflow/executors/sidekiq/worker_jobs'
6
+ require 'dynflow/executors/sidekiq/redis_locking'
7
+
8
+ require 'sidekiq-reliable-fetch'
9
+ Sidekiq.configure_server do |config|
10
+ # Use semi-reliable fetch
11
+ # for details see https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/blob/master/README.md
12
+ config.options[:semi_reliable_fetch] = true
13
+ Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
14
+ end
15
+
16
+ module Dynflow
17
+ module Executors
18
+ module Sidekiq
19
+ class Core < Abstract::Core
20
+ include RedisLocking
21
+
22
+ TELEMETRY_UPDATE_INTERVAL = 30 # update telemetry every 30s
23
+
24
+ attr_reader :logger
25
+
26
+ def initialize(world, *_args)
27
+ @world = world
28
+ @logger = world.logger
29
+ wait_for_orchestrator_lock
30
+ super
31
+ schedule_update_telemetry
32
+ begin_startup!
33
+ end
34
+
35
+ def heartbeat
36
+ super
37
+ reacquire_orchestrator_lock
38
+ end
39
+
40
+ def start_termination(*args)
41
+ super
42
+ release_orchestrator_lock
43
+ finish_termination
44
+ end
45
+
46
+ # TODO: needs thoughs on how to implement it
47
+ def execution_status(execution_plan_id = nil)
48
+ {}
49
+ end
50
+
51
+ def feed_pool(work_items)
52
+ work_items.each do |new_work|
53
+ WorkerJobs::PerformWork.set(queue: suggest_queue(new_work)).perform_async(new_work)
54
+ end
55
+ end
56
+
57
+ def update_telemetry
58
+ sidekiq_queues = ::Sidekiq::Stats.new.queues
59
+ @queues_options.keys.each do |queue|
60
+ queue_size = sidekiq_queues[queue.to_s]
61
+ if queue_size
62
+ Dynflow::Telemetry.with_instance { |t| t.set_gauge(:dynflow_queue_size, queue_size, telemetry_options(queue)) }
63
+ end
64
+ end
65
+ schedule_update_telemetry
66
+ end
67
+
68
+ def work_finished(work, delayed_events = nil)
69
+ # If the work item is sent in reply to a request from the current orchestrator, proceed
70
+ if work.sender_orchestrator_id == @world.id
71
+ super
72
+ else
73
+ # If we're in recovery, we can drop the work as the execution plan will be resumed during validity checks performed when leaving recovery
74
+ # If we're not in recovery and receive an event from another orchestrator, it means it survived the queue draining.
75
+ handle_unknown_work_item(work) unless @recovery
76
+ end
77
+ end
78
+
79
+ def begin_startup!
80
+ WorkerJobs::DrainMarker.perform_async(@world.id)
81
+ @recovery = true
82
+ end
83
+
84
+ def startup_complete
85
+ logger.info('Performing validity checks')
86
+ @world.perform_validity_checks
87
+ logger.info('Finished performing validity checks')
88
+ @recovery = false
89
+ end
90
+
91
+ private
92
+
93
+ def fallback_queue
94
+ :default
95
+ end
96
+
97
+ def schedule_update_telemetry
98
+ @world.clock.ping(reference, TELEMETRY_UPDATE_INTERVAL, [:update_telemetry])
99
+ end
100
+
101
+ def telemetry_options(queue)
102
+ { queue: queue.to_s, world: @world.id }
103
+ end
104
+
105
+ # We take a look if an execution lock is already being held by an orchestrator (it should be the current one). If no lock is held
106
+ # we try to resume the execution plan if possible
107
+ def handle_unknown_work_item(work)
108
+ # We are past recovery now, if we receive an event here, the execution plan will be most likely paused
109
+ # We can either try to rescue it or turn it over to stopped
110
+ execution_lock = @world.coordinator.find_locks(class: Coordinator::ExecutionLock.name,
111
+ id: "execution-plan:#{work.execution_plan_id}").first
112
+ if execution_lock.nil?
113
+ plan = @world.persistence.load_execution_plan(work.execution_plan_id)
114
+ should_resume = !plan.error? || plan.prepare_for_rescue == :running
115
+ @world.execute(plan.id) if should_resume
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+ module Dynflow
3
+ module Executors
4
+ module Sidekiq
5
+ class InternalJobBase
6
+ include ::Sidekiq::Worker
7
+ extend ::Dynflow::Executors::Sidekiq::Serialization::WorkerExtension::ClassMethods
8
+ sidekiq_options retry: false, backtrace: true
9
+
10
+ def self.inherited(klass)
11
+ klass.prepend(::Dynflow::Executors::Sidekiq::Serialization::WorkerExtension)
12
+ end
13
+
14
+ def worker_id
15
+ ::Sidekiq::Logging.tid
16
+ end
17
+
18
+ def telemetry_options(work_item)
19
+ { queue: work_item.queue.to_s, world: Dynflow.process_world.id, worker: worker_id }
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+ module Dynflow
3
+ module Executors
4
+ module Sidekiq
5
+ module OrchestratorJobs
6
+ # handles resposnes about finished work form the workers
7
+ # or some event to handle on orchestrator side
8
+ class WorkerDone < InternalJobBase
9
+ sidekiq_options queue: :dynflow_orchestrator
10
+
11
+ # @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
12
+ # usually to start new execution or to pass some event
13
+ def perform(work_item, delayed_events = nil)
14
+ # Usually the step is saved on the worker's side. However if sidekiq is shut down,
15
+ # then the step may not have been saved so we save it just to be sure
16
+ if work_item.is_a?(Director::StepWorkItem) && work_item.step&.error&.exception.is_a?(::Sidekiq::Shutdown)
17
+ work_item.step.save
18
+ end
19
+ Dynflow.process_world.executor.core.tell([:work_finished, work_item, delayed_events])
20
+ end
21
+ end
22
+
23
+ # handles setting up an event on orchestrator
24
+ class PlanEvent < InternalJobBase
25
+ sidekiq_options queue: :dynflow_orchestrator
26
+
27
+ # @param event_envelope [Dispatcher::Event] - request to handle on orchestrator side
28
+ # usually to start new execution or to pass some event
29
+ def perform(execution_plan_id, step_id, event, time)
30
+ Dynflow.process_world.plan_event(execution_plan_id, step_id, event, time)
31
+ end
32
+ end
33
+
34
+ class HandlePersistenceError < InternalJobBase
35
+ sidekiq_options queue: :dynflow_orchestrator
36
+
37
+ # @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
38
+ # usually to start new execution or to pass some event
39
+ def perform(error, work_item)
40
+ Dynflow.process_world.executor.core.tell([:handle_persistence_error, error, work_item])
41
+ end
42
+ end
43
+
44
+ class StartupComplete < InternalJobBase
45
+ sidekiq_options queue: :dynflow_orchestrator
46
+
47
+ # @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
48
+ # usually to start new execution or to pass some event
49
+ def perform(world_id)
50
+ if Dynflow.process_world.id == world_id
51
+ Dynflow.process_world.executor.core.tell([:startup_complete])
52
+ else
53
+ logger.warn("Received startup complete for a different world #{world_id}, discarding.")
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+ module Dynflow
3
+ module Executors
4
+ module Sidekiq
5
+ module RedisLocking
6
+ REDIS_LOCK_KEY = 'dynflow_orchestrator_uuid'
7
+ REDIS_LOCK_TTL = 60
8
+ REDIS_LOCK_POLL_INTERVAL = 15
9
+
10
+ ACQUIRE_OK = 0
11
+ ACQUIRE_MISSING = 1
12
+ ACQUIRE_TAKEN = 2
13
+
14
+ RELEASE_SCRIPT = <<~LUA
15
+ if redis.call("get", KEYS[1]) == ARGV[1] then
16
+ redis.call("del", KEYS[1])
17
+ end
18
+ return #{ACQUIRE_OK}
19
+ LUA
20
+
21
+ REACQUIRE_SCRIPT = <<~LUA
22
+ if redis.call("exists", KEYS[1]) == 1 then
23
+ local owner = redis.call("get", KEYS[1])
24
+ if owner == ARGV[1] then
25
+ redis.call("set", KEYS[1], ARGV[1], "XX", "EX", #{REDIS_LOCK_TTL})
26
+ return #{ACQUIRE_OK}
27
+ else
28
+ return #{ACQUIRE_TAKEN}
29
+ end
30
+ else
31
+ redis.call("set", KEYS[1], ARGV[1], "NX", "EX", #{REDIS_LOCK_TTL})
32
+ return #{ACQUIRE_MISSING}
33
+ end
34
+ LUA
35
+
36
+ def release_orchestrator_lock
37
+ ::Sidekiq.redis { |conn| conn.eval RELEASE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
38
+ end
39
+
40
+ def wait_for_orchestrator_lock
41
+ mode = nil
42
+ loop do
43
+ active = ::Sidekiq.redis do |conn|
44
+ conn.set(REDIS_LOCK_KEY, @world.id, :ex => REDIS_LOCK_TTL, :nx => true)
45
+ end
46
+ break if active
47
+ if mode.nil?
48
+ mode = :passive
49
+ @logger.info('Orchestrator lock already taken, entering passive mode.')
50
+ end
51
+ sleep REDIS_LOCK_POLL_INTERVAL
52
+ end
53
+ @logger.info('Acquired orchestrator lock, entering active mode.')
54
+ end
55
+
56
+ def reacquire_orchestrator_lock
57
+ case ::Sidekiq.redis { |conn| conn.eval REACQUIRE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
58
+ when ACQUIRE_MISSING
59
+ @logger.error('The orchestrator lock was lost, reacquired')
60
+ when ACQUIRE_TAKEN
61
+ owner = ::Sidekiq.redis { |conn| conn.get REDIS_LOCK_KEY }
62
+ @logger.fatal("The orchestrator lock was stolen by #{owner}, aborting.")
63
+ Process.kill('INT', Process.pid)
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ module Dynflow
3
+ module Executors
4
+ module Sidekiq
5
+ # Module to prepend the Sidekiq job to handle the serialization
6
+ module Serialization
7
+ def self.serialize(value)
8
+ Dynflow.serializer.dump(value)
9
+ end
10
+
11
+ def self.deserialize(value)
12
+ value = Utils::IndifferentHash.new(value) if value.is_a? Hash
13
+ Dynflow.serializer.load(value)
14
+ end
15
+
16
+ module WorkerExtension
17
+ # Overriding the Sidekiq entry method to perform additional serialization preparation
18
+ module ClassMethods
19
+ def client_push(opts)
20
+ opts['args'] = opts['args'].map { |a| Serialization.serialize(a) }
21
+ super(opts)
22
+ end
23
+ end
24
+
25
+ def perform(*args)
26
+ args = args.map { |a| Serialization.deserialize(a) }
27
+ super(*args)
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end