dynflow 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (225) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/.travis.yml +3 -4
  4. data/Dockerfile +9 -0
  5. data/Gemfile +6 -0
  6. data/Rakefile +1 -0
  7. data/doc/pages/Gemfile +1 -0
  8. data/doc/pages/Rakefile +1 -0
  9. data/doc/pages/plugins/alert_block.rb +1 -0
  10. data/doc/pages/plugins/div_tag.rb +1 -0
  11. data/doc/pages/plugins/graphviz.rb +1 -0
  12. data/doc/pages/plugins/plantuml.rb +1 -0
  13. data/doc/pages/plugins/play.rb +1 -0
  14. data/doc/pages/plugins/tags.rb +1 -0
  15. data/doc/pages/plugins/toc.rb +1 -0
  16. data/docker-compose.yml +41 -0
  17. data/dynflow.gemspec +1 -0
  18. data/examples/clock_benchmark.rb +1 -0
  19. data/examples/example_helper.rb +19 -2
  20. data/examples/future_execution.rb +2 -1
  21. data/examples/memory_limit_watcher.rb +1 -0
  22. data/examples/orchestrate.rb +4 -5
  23. data/examples/orchestrate_evented.rb +3 -2
  24. data/examples/remote_executor.rb +68 -0
  25. data/examples/singletons.rb +4 -3
  26. data/examples/sub_plan_concurrency_control.rb +2 -1
  27. data/examples/sub_plans.rb +3 -2
  28. data/examples/termination.rb +1 -0
  29. data/lib/dynflow.rb +20 -0
  30. data/lib/dynflow/action.rb +28 -3
  31. data/lib/dynflow/action/cancellable.rb +1 -0
  32. data/lib/dynflow/action/format.rb +1 -0
  33. data/lib/dynflow/action/missing.rb +1 -0
  34. data/lib/dynflow/action/polling.rb +3 -1
  35. data/lib/dynflow/action/progress.rb +1 -0
  36. data/lib/dynflow/action/rescue.rb +1 -0
  37. data/lib/dynflow/action/singleton.rb +1 -0
  38. data/lib/dynflow/action/suspended.rb +9 -2
  39. data/lib/dynflow/action/timeouts.rb +2 -1
  40. data/lib/dynflow/action/with_bulk_sub_plans.rb +2 -1
  41. data/lib/dynflow/action/with_polling_sub_plans.rb +7 -5
  42. data/lib/dynflow/action/with_sub_plans.rb +1 -0
  43. data/lib/dynflow/active_job/queue_adapter.rb +1 -0
  44. data/lib/dynflow/actor.rb +13 -5
  45. data/lib/dynflow/actors.rb +1 -0
  46. data/lib/dynflow/actors/execution_plan_cleaner.rb +1 -0
  47. data/lib/dynflow/clock.rb +27 -47
  48. data/lib/dynflow/config.rb +11 -2
  49. data/lib/dynflow/connectors.rb +1 -0
  50. data/lib/dynflow/connectors/abstract.rb +1 -0
  51. data/lib/dynflow/connectors/database.rb +1 -0
  52. data/lib/dynflow/connectors/direct.rb +1 -0
  53. data/lib/dynflow/coordinator.rb +1 -0
  54. data/lib/dynflow/coordinator_adapters.rb +1 -0
  55. data/lib/dynflow/coordinator_adapters/abstract.rb +1 -0
  56. data/lib/dynflow/coordinator_adapters/sequel.rb +1 -0
  57. data/lib/dynflow/dead_letter_silencer.rb +2 -0
  58. data/lib/dynflow/debug/telemetry/persistence.rb +1 -0
  59. data/lib/dynflow/delayed_executors.rb +1 -0
  60. data/lib/dynflow/delayed_executors/abstract.rb +1 -0
  61. data/lib/dynflow/delayed_executors/abstract_core.rb +1 -0
  62. data/lib/dynflow/delayed_executors/polling.rb +1 -0
  63. data/lib/dynflow/delayed_plan.rb +1 -0
  64. data/lib/dynflow/director.rb +80 -15
  65. data/lib/dynflow/director/execution_plan_manager.rb +17 -3
  66. data/lib/dynflow/director/flow_manager.rb +1 -0
  67. data/lib/dynflow/director/{work_queue.rb → queue_hash.rb} +9 -8
  68. data/lib/dynflow/director/running_steps_manager.rb +55 -18
  69. data/lib/dynflow/director/sequence_cursor.rb +1 -0
  70. data/lib/dynflow/director/sequential_manager.rb +12 -2
  71. data/lib/dynflow/dispatcher.rb +4 -2
  72. data/lib/dynflow/dispatcher/abstract.rb +1 -0
  73. data/lib/dynflow/dispatcher/client_dispatcher.rb +6 -4
  74. data/lib/dynflow/dispatcher/executor_dispatcher.rb +13 -1
  75. data/lib/dynflow/errors.rb +1 -0
  76. data/lib/dynflow/execution_history.rb +1 -0
  77. data/lib/dynflow/execution_plan.rb +3 -2
  78. data/lib/dynflow/execution_plan/dependency_graph.rb +1 -0
  79. data/lib/dynflow/execution_plan/hooks.rb +1 -0
  80. data/lib/dynflow/execution_plan/output_reference.rb +2 -1
  81. data/lib/dynflow/execution_plan/steps.rb +1 -0
  82. data/lib/dynflow/execution_plan/steps/abstract.rb +10 -5
  83. data/lib/dynflow/execution_plan/steps/abstract_flow_step.rb +2 -0
  84. data/lib/dynflow/execution_plan/steps/error.rb +1 -0
  85. data/lib/dynflow/execution_plan/steps/finalize_step.rb +1 -0
  86. data/lib/dynflow/execution_plan/steps/plan_step.rb +1 -0
  87. data/lib/dynflow/execution_plan/steps/run_step.rb +1 -0
  88. data/lib/dynflow/executors.rb +1 -1
  89. data/lib/dynflow/executors/abstract/core.rb +132 -0
  90. data/lib/dynflow/executors/parallel.rb +24 -11
  91. data/lib/dynflow/executors/parallel/core.rb +10 -91
  92. data/lib/dynflow/executors/parallel/pool.rb +4 -2
  93. data/lib/dynflow/executors/parallel/worker.rb +2 -1
  94. data/lib/dynflow/executors/sidekiq/core.rb +121 -0
  95. data/lib/dynflow/executors/sidekiq/internal_job_base.rb +24 -0
  96. data/lib/dynflow/executors/sidekiq/orchestrator_jobs.rb +60 -0
  97. data/lib/dynflow/executors/sidekiq/redis_locking.rb +69 -0
  98. data/lib/dynflow/executors/sidekiq/serialization.rb +33 -0
  99. data/lib/dynflow/executors/sidekiq/worker_jobs.rb +42 -0
  100. data/lib/dynflow/flows.rb +1 -0
  101. data/lib/dynflow/flows/abstract.rb +1 -0
  102. data/lib/dynflow/flows/abstract_composed.rb +1 -0
  103. data/lib/dynflow/flows/atom.rb +1 -0
  104. data/lib/dynflow/flows/concurrence.rb +1 -0
  105. data/lib/dynflow/flows/sequence.rb +1 -0
  106. data/lib/dynflow/logger_adapters.rb +1 -0
  107. data/lib/dynflow/logger_adapters/abstract.rb +1 -0
  108. data/lib/dynflow/logger_adapters/delegator.rb +1 -0
  109. data/lib/dynflow/logger_adapters/formatters.rb +1 -0
  110. data/lib/dynflow/logger_adapters/formatters/abstract.rb +1 -0
  111. data/lib/dynflow/logger_adapters/formatters/exception.rb +1 -0
  112. data/lib/dynflow/logger_adapters/simple.rb +1 -0
  113. data/lib/dynflow/middleware.rb +1 -0
  114. data/lib/dynflow/middleware/common/singleton.rb +1 -0
  115. data/lib/dynflow/middleware/common/transaction.rb +1 -0
  116. data/lib/dynflow/middleware/register.rb +1 -0
  117. data/lib/dynflow/middleware/resolver.rb +1 -0
  118. data/lib/dynflow/middleware/stack.rb +1 -0
  119. data/lib/dynflow/middleware/world.rb +1 -0
  120. data/lib/dynflow/persistence.rb +3 -2
  121. data/lib/dynflow/persistence_adapters.rb +1 -0
  122. data/lib/dynflow/persistence_adapters/abstract.rb +1 -0
  123. data/lib/dynflow/persistence_adapters/sequel.rb +10 -7
  124. data/lib/dynflow/persistence_adapters/sequel_migrations/001_initial.rb +1 -0
  125. data/lib/dynflow/persistence_adapters/sequel_migrations/002_incremental_progress.rb +1 -0
  126. data/lib/dynflow/persistence_adapters/sequel_migrations/003_parent_action.rb +1 -0
  127. data/lib/dynflow/persistence_adapters/sequel_migrations/004_coordinator_records.rb +1 -0
  128. data/lib/dynflow/persistence_adapters/sequel_migrations/005_envelopes.rb +1 -0
  129. data/lib/dynflow/persistence_adapters/sequel_migrations/006_fix_data_length.rb +1 -0
  130. data/lib/dynflow/persistence_adapters/sequel_migrations/007_future_execution.rb +1 -0
  131. data/lib/dynflow/persistence_adapters/sequel_migrations/008_rename_scheduled_plans_to_delayed_plans.rb +1 -0
  132. data/lib/dynflow/persistence_adapters/sequel_migrations/009_fix_mysql_data_length.rb +1 -0
  133. data/lib/dynflow/persistence_adapters/sequel_migrations/010_add_execution_plans_label.rb +1 -0
  134. data/lib/dynflow/persistence_adapters/sequel_migrations/011_placeholder.rb +1 -0
  135. data/lib/dynflow/persistence_adapters/sequel_migrations/012_add_delayed_plans_serialized_args.rb +1 -0
  136. data/lib/dynflow/persistence_adapters/sequel_migrations/013_add_action_columns.rb +1 -0
  137. data/lib/dynflow/persistence_adapters/sequel_migrations/014_add_step_columns.rb +1 -0
  138. data/lib/dynflow/persistence_adapters/sequel_migrations/015_add_execution_plan_columns.rb +1 -0
  139. data/lib/dynflow/persistence_adapters/sequel_migrations/016_add_step_queue.rb +1 -0
  140. data/lib/dynflow/persistence_adapters/sequel_migrations/017_add_delayed_plan_frozen.rb +1 -0
  141. data/lib/dynflow/persistence_adapters/sequel_migrations/018_add_uuid_column.rb +1 -0
  142. data/lib/dynflow/persistence_adapters/sequel_migrations/019_update_mysql_time_precision.rb +48 -0
  143. data/lib/dynflow/rails.rb +1 -0
  144. data/lib/dynflow/rails/configuration.rb +6 -3
  145. data/lib/dynflow/rails/daemon.rb +1 -0
  146. data/lib/dynflow/round_robin.rb +1 -0
  147. data/lib/dynflow/semaphores.rb +1 -0
  148. data/lib/dynflow/semaphores/abstract.rb +1 -0
  149. data/lib/dynflow/semaphores/aggregating.rb +1 -0
  150. data/lib/dynflow/semaphores/dummy.rb +1 -0
  151. data/lib/dynflow/semaphores/stateful.rb +1 -0
  152. data/lib/dynflow/serializable.rb +13 -4
  153. data/lib/dynflow/serializer.rb +24 -0
  154. data/lib/dynflow/serializers.rb +1 -0
  155. data/lib/dynflow/serializers/abstract.rb +1 -0
  156. data/lib/dynflow/serializers/noop.rb +1 -0
  157. data/lib/dynflow/stateful.rb +1 -0
  158. data/lib/dynflow/telemetry.rb +1 -0
  159. data/lib/dynflow/telemetry_adapters/abstract.rb +1 -0
  160. data/lib/dynflow/telemetry_adapters/dummy.rb +1 -0
  161. data/lib/dynflow/telemetry_adapters/statsd.rb +1 -0
  162. data/lib/dynflow/testing.rb +1 -0
  163. data/lib/dynflow/testing/assertions.rb +6 -5
  164. data/lib/dynflow/testing/dummy_execution_plan.rb +1 -0
  165. data/lib/dynflow/testing/dummy_executor.rb +19 -2
  166. data/lib/dynflow/testing/dummy_planned_action.rb +1 -0
  167. data/lib/dynflow/testing/dummy_step.rb +3 -1
  168. data/lib/dynflow/testing/dummy_world.rb +9 -0
  169. data/lib/dynflow/testing/factories.rb +6 -1
  170. data/lib/dynflow/testing/in_thread_executor.rb +22 -3
  171. data/lib/dynflow/testing/in_thread_world.rb +9 -0
  172. data/lib/dynflow/testing/managed_clock.rb +1 -0
  173. data/lib/dynflow/testing/mimic.rb +1 -0
  174. data/lib/dynflow/throttle_limiter.rb +1 -0
  175. data/lib/dynflow/transaction_adapters.rb +1 -0
  176. data/lib/dynflow/transaction_adapters/abstract.rb +1 -0
  177. data/lib/dynflow/transaction_adapters/active_record.rb +1 -0
  178. data/lib/dynflow/transaction_adapters/none.rb +1 -0
  179. data/lib/dynflow/utils.rb +1 -0
  180. data/lib/dynflow/utils/indifferent_hash.rb +1 -0
  181. data/lib/dynflow/utils/priority_queue.rb +1 -0
  182. data/lib/dynflow/version.rb +2 -1
  183. data/lib/dynflow/watchers/memory_consumption_watcher.rb +1 -0
  184. data/lib/dynflow/web.rb +1 -0
  185. data/lib/dynflow/web/console.rb +1 -0
  186. data/lib/dynflow/web/console_helpers.rb +1 -0
  187. data/lib/dynflow/web/filtering_helpers.rb +1 -0
  188. data/lib/dynflow/web/world_helpers.rb +1 -0
  189. data/lib/dynflow/web_console.rb +1 -0
  190. data/lib/dynflow/world.rb +11 -1
  191. data/lib/dynflow/world/invalidation.rb +7 -1
  192. data/test/abnormal_states_recovery_test.rb +41 -40
  193. data/test/action_test.rb +160 -110
  194. data/test/activejob_adapter_test.rb +1 -0
  195. data/test/batch_sub_tasks_test.rb +12 -11
  196. data/test/clock_test.rb +2 -1
  197. data/test/concurrency_control_test.rb +20 -19
  198. data/test/coordinator_test.rb +20 -21
  199. data/test/daemon_test.rb +2 -1
  200. data/test/dead_letter_silencer_test.rb +9 -7
  201. data/test/dispatcher_test.rb +2 -1
  202. data/test/execution_plan_cleaner_test.rb +13 -12
  203. data/test/execution_plan_hooks_test.rb +3 -2
  204. data/test/execution_plan_test.rb +33 -32
  205. data/test/executor_test.rb +533 -489
  206. data/test/future_execution_test.rb +45 -44
  207. data/test/memory_cosumption_watcher_test.rb +5 -4
  208. data/test/middleware_test.rb +55 -54
  209. data/test/persistence_test.rb +56 -53
  210. data/test/rescue_test.rb +36 -35
  211. data/test/round_robin_test.rb +13 -12
  212. data/test/semaphores_test.rb +31 -30
  213. data/test/support/code_workflow_example.rb +1 -0
  214. data/test/support/dummy_example.rb +14 -1
  215. data/test/support/middleware_example.rb +2 -1
  216. data/test/support/rails/config/environment.rb +1 -0
  217. data/test/support/rescue_example.rb +1 -0
  218. data/test/support/test_execution_log.rb +1 -0
  219. data/test/test_helper.rb +18 -17
  220. data/test/testing_test.rb +45 -44
  221. data/test/utils_test.rb +18 -17
  222. data/test/web_console_test.rb +1 -0
  223. data/test/world_test.rb +7 -6
  224. metadata +13 -4
  225. data/lib/dynflow/executors/abstract.rb +0 -40
@@ -1,20 +1,17 @@
1
+ # frozen_string_literal: true
2
+ require 'dynflow/executors/parallel/pool'
3
+ require 'dynflow/executors/parallel/worker'
4
+
1
5
  module Dynflow
2
6
  module Executors
3
- class Parallel < Abstract
4
- class Core < Actor
7
+ class Parallel
8
+ class Core < Abstract::Core
5
9
  attr_reader :logger
6
10
 
7
11
  def initialize(world, heartbeat_interval, queues_options)
8
- @logger = world.logger
9
- @world = Type! world, World
10
- @queues_options = queues_options
11
- @pools = {}
12
- @terminated = nil
13
- @director = Director.new(@world)
14
- @heartbeat_interval = heartbeat_interval
15
-
12
+ super
13
+ @pools = {}
16
14
  initialize_queues
17
- schedule_heartbeat
18
15
  end
19
16
 
20
17
  def initialize_queues
@@ -27,41 +24,8 @@ module Dynflow
27
24
  end
28
25
  end
29
26
 
30
- def handle_execution(execution_plan_id, finished)
31
- if terminating?
32
- raise Dynflow::Error,
33
- "cannot accept execution_plan_id:#{execution_plan_id} core is terminating"
34
- end
35
-
36
- feed_pool(@director.start_execution(execution_plan_id, finished))
37
- end
38
-
39
- def handle_event(event)
40
- Type! event, Director::Event
41
- if terminating?
42
- raise Dynflow::Error,
43
- "cannot accept event: #{event} core is terminating"
44
- end
45
- feed_pool(@director.handle_event(event))
46
- end
47
-
48
- def work_finished(work)
49
- feed_pool(@director.work_finished(work))
50
- end
51
-
52
- def handle_persistence_error(error, work = nil)
53
- logger.error "PersistenceError in executor"
54
- logger.error error
55
- @director.work_failed(work) if work
56
- if error.is_a? Errors::FatalPersistenceError
57
- logger.fatal "Terminating"
58
- @world.terminate
59
- end
60
- end
61
-
62
27
  def start_termination(*args)
63
28
  super
64
- logger.info 'shutting down Core ...'
65
29
  @pools.values.each { |pool| pool.tell([:start_termination, Concurrent::Promises.resolvable_future]) }
66
30
  end
67
31
 
@@ -69,66 +33,21 @@ module Dynflow
69
33
  @pools.delete(pool_name)
70
34
  # we expect this message from all worker pools
71
35
  return unless @pools.empty?
72
- @director.terminate
73
- logger.info '... Dynflow core terminated.'
74
36
  super()
75
37
  end
76
38
 
77
- def dead_letter_routing
78
- @world.dead_letter_handler
79
- end
80
-
81
39
  def execution_status(execution_plan_id = nil)
82
40
  @pools.each_with_object({}) do |(pool_name, pool), hash|
83
41
  hash[pool_name] = pool.ask!([:execution_status, execution_plan_id])
84
42
  end
85
43
  end
86
44
 
87
- def heartbeat
88
- @logger.debug('Executor heartbeat')
89
- record = @world.coordinator.find_records(:id => @world.id,
90
- :class => ['Dynflow::Coordinator::ExecutorWorld', 'Dynflow::Coordinator::ClientWorld']).first
91
- unless record
92
- logger.error(%{Executor's world record for #{@world.id} missing: terminating})
93
- @world.terminate
94
- return
95
- end
96
-
97
- record.data[:meta].update(:last_seen => Dynflow::Dispatcher::ClientDispatcher::PingCache.format_time)
98
- @world.coordinator.update_record(record)
99
- schedule_heartbeat
100
- end
101
-
102
- private
103
-
104
- def schedule_heartbeat
105
- @world.clock.ping(self, @heartbeat_interval, :heartbeat)
106
- end
107
-
108
- def on_message(message)
109
- super
110
- rescue Errors::PersistenceError => e
111
- self.tell([:handle_persistence_error, e])
112
- end
113
-
114
45
  def feed_pool(work_items)
115
- return if terminating?
116
- return if work_items.nil?
117
- work_items = [work_items] if work_items.is_a? Director::WorkItem
118
- work_items.all? { |i| Type! i, Director::WorkItem }
119
46
  work_items.each do |new_work|
120
- pool = @pools[new_work.queue]
121
- unless pool
122
- logger.error("Pool is not available for queue #{new_work.queue}, falling back to #{fallback_queue}")
123
- pool = @pools[fallback_queue]
124
- end
125
- pool.tell([:schedule_work, new_work])
47
+ new_work.world = @world
48
+ @pools.fetch(suggest_queue(new_work)).tell([:schedule_work, new_work])
126
49
  end
127
50
  end
128
-
129
- def fallback_queue
130
- :default
131
- end
132
51
  end
133
52
  end
134
53
  end
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  module Dynflow
2
3
  module Executors
3
- class Parallel < Abstract
4
+ class Parallel
4
5
  class Pool < Actor
5
6
  class JobStorage
6
7
  def initialize
@@ -53,7 +54,8 @@ module Dynflow
53
54
  end
54
55
 
55
56
  def worker_done(worker, work)
56
- @executor_core.tell([:work_finished, work])
57
+ step = work.step if work.is_a?(Director::StepWorkItem)
58
+ @executor_core.tell([:work_finished, work, step && step.delayed_events])
57
59
  @free_workers << worker
58
60
  Dynflow::Telemetry.with_instance { |t| t.set_gauge(:dynflow_active_workers, -1, telemetry_options) }
59
61
  distribute_jobs
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  module Dynflow
2
3
  module Executors
3
- class Parallel < Abstract
4
+ class Parallel
4
5
  class Worker < Actor
5
6
  def initialize(pool, transaction_adapter, telemetry_options = {})
6
7
  @pool = Type! pool, Concurrent::Actor::Reference
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+ require 'dynflow/executors/sidekiq/serialization'
3
+ require 'dynflow/executors/sidekiq/internal_job_base'
4
+ require 'dynflow/executors/sidekiq/orchestrator_jobs'
5
+ require 'dynflow/executors/sidekiq/worker_jobs'
6
+ require 'dynflow/executors/sidekiq/redis_locking'
7
+
8
+ require 'sidekiq-reliable-fetch'
9
+ Sidekiq.configure_server do |config|
10
+ # Use semi-reliable fetch
11
+ # for details see https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/blob/master/README.md
12
+ config.options[:semi_reliable_fetch] = true
13
+ Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
14
+ end
15
+
16
+ module Dynflow
17
+ module Executors
18
+ module Sidekiq
19
+ class Core < Abstract::Core
20
+ include RedisLocking
21
+
22
+ TELEMETRY_UPDATE_INTERVAL = 30 # update telemetry every 30s
23
+
24
+ attr_reader :logger
25
+
26
+ def initialize(world, *_args)
27
+ @world = world
28
+ @logger = world.logger
29
+ wait_for_orchestrator_lock
30
+ super
31
+ schedule_update_telemetry
32
+ begin_startup!
33
+ end
34
+
35
+ def heartbeat
36
+ super
37
+ reacquire_orchestrator_lock
38
+ end
39
+
40
+ def start_termination(*args)
41
+ super
42
+ release_orchestrator_lock
43
+ finish_termination
44
+ end
45
+
46
+ # TODO: needs thoughs on how to implement it
47
+ def execution_status(execution_plan_id = nil)
48
+ {}
49
+ end
50
+
51
+ def feed_pool(work_items)
52
+ work_items.each do |new_work|
53
+ WorkerJobs::PerformWork.set(queue: suggest_queue(new_work)).perform_async(new_work)
54
+ end
55
+ end
56
+
57
+ def update_telemetry
58
+ sidekiq_queues = ::Sidekiq::Stats.new.queues
59
+ @queues_options.keys.each do |queue|
60
+ queue_size = sidekiq_queues[queue.to_s]
61
+ if queue_size
62
+ Dynflow::Telemetry.with_instance { |t| t.set_gauge(:dynflow_queue_size, queue_size, telemetry_options(queue)) }
63
+ end
64
+ end
65
+ schedule_update_telemetry
66
+ end
67
+
68
+ def work_finished(work, delayed_events = nil)
69
+ # If the work item is sent in reply to a request from the current orchestrator, proceed
70
+ if work.sender_orchestrator_id == @world.id
71
+ super
72
+ else
73
+ # If we're in recovery, we can drop the work as the execution plan will be resumed during validity checks performed when leaving recovery
74
+ # If we're not in recovery and receive an event from another orchestrator, it means it survived the queue draining.
75
+ handle_unknown_work_item(work) unless @recovery
76
+ end
77
+ end
78
+
79
+ def begin_startup!
80
+ WorkerJobs::DrainMarker.perform_async(@world.id)
81
+ @recovery = true
82
+ end
83
+
84
+ def startup_complete
85
+ logger.info('Performing validity checks')
86
+ @world.perform_validity_checks
87
+ logger.info('Finished performing validity checks')
88
+ @recovery = false
89
+ end
90
+
91
+ private
92
+
93
+ def fallback_queue
94
+ :default
95
+ end
96
+
97
+ def schedule_update_telemetry
98
+ @world.clock.ping(reference, TELEMETRY_UPDATE_INTERVAL, [:update_telemetry])
99
+ end
100
+
101
+ def telemetry_options(queue)
102
+ { queue: queue.to_s, world: @world.id }
103
+ end
104
+
105
+ # We take a look if an execution lock is already being held by an orchestrator (it should be the current one). If no lock is held
106
+ # we try to resume the execution plan if possible
107
+ def handle_unknown_work_item(work)
108
+ # We are past recovery now, if we receive an event here, the execution plan will be most likely paused
109
+ # We can either try to rescue it or turn it over to stopped
110
+ execution_lock = @world.coordinator.find_locks(class: Coordinator::ExecutionLock.name,
111
+ id: "execution-plan:#{work.execution_plan_id}").first
112
+ if execution_lock.nil?
113
+ plan = @world.persistence.load_execution_plan(work.execution_plan_id)
114
+ should_resume = !plan.error? || plan.prepare_for_rescue == :running
115
+ @world.execute(plan.id) if should_resume
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+ module Dynflow
3
+ module Executors
4
+ module Sidekiq
5
+ class InternalJobBase
6
+ include ::Sidekiq::Worker
7
+ extend ::Dynflow::Executors::Sidekiq::Serialization::WorkerExtension::ClassMethods
8
+ sidekiq_options retry: false, backtrace: true
9
+
10
+ def self.inherited(klass)
11
+ klass.prepend(::Dynflow::Executors::Sidekiq::Serialization::WorkerExtension)
12
+ end
13
+
14
+ def worker_id
15
+ ::Sidekiq::Logging.tid
16
+ end
17
+
18
+ def telemetry_options(work_item)
19
+ { queue: work_item.queue.to_s, world: Dynflow.process_world.id, worker: worker_id }
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+ module Dynflow
3
+ module Executors
4
+ module Sidekiq
5
+ module OrchestratorJobs
6
+ # handles resposnes about finished work form the workers
7
+ # or some event to handle on orchestrator side
8
+ class WorkerDone < InternalJobBase
9
+ sidekiq_options queue: :dynflow_orchestrator
10
+
11
+ # @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
12
+ # usually to start new execution or to pass some event
13
+ def perform(work_item, delayed_events = nil)
14
+ # Usually the step is saved on the worker's side. However if sidekiq is shut down,
15
+ # then the step may not have been saved so we save it just to be sure
16
+ if work_item.is_a?(Director::StepWorkItem) && work_item.step&.error&.exception.is_a?(::Sidekiq::Shutdown)
17
+ work_item.step.save
18
+ end
19
+ Dynflow.process_world.executor.core.tell([:work_finished, work_item, delayed_events])
20
+ end
21
+ end
22
+
23
+ # handles setting up an event on orchestrator
24
+ class PlanEvent < InternalJobBase
25
+ sidekiq_options queue: :dynflow_orchestrator
26
+
27
+ # @param event_envelope [Dispatcher::Event] - request to handle on orchestrator side
28
+ # usually to start new execution or to pass some event
29
+ def perform(execution_plan_id, step_id, event, time)
30
+ Dynflow.process_world.plan_event(execution_plan_id, step_id, event, time)
31
+ end
32
+ end
33
+
34
+ class HandlePersistenceError < InternalJobBase
35
+ sidekiq_options queue: :dynflow_orchestrator
36
+
37
+ # @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
38
+ # usually to start new execution or to pass some event
39
+ def perform(error, work_item)
40
+ Dynflow.process_world.executor.core.tell([:handle_persistence_error, error, work_item])
41
+ end
42
+ end
43
+
44
+ class StartupComplete < InternalJobBase
45
+ sidekiq_options queue: :dynflow_orchestrator
46
+
47
+ # @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
48
+ # usually to start new execution or to pass some event
49
+ def perform(world_id)
50
+ if Dynflow.process_world.id == world_id
51
+ Dynflow.process_world.executor.core.tell([:startup_complete])
52
+ else
53
+ logger.warn("Received startup complete for a different world #{world_id}, discarding.")
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+ module Dynflow
3
+ module Executors
4
+ module Sidekiq
5
+ module RedisLocking
6
+ REDIS_LOCK_KEY = 'dynflow_orchestrator_uuid'
7
+ REDIS_LOCK_TTL = 60
8
+ REDIS_LOCK_POLL_INTERVAL = 15
9
+
10
+ ACQUIRE_OK = 0
11
+ ACQUIRE_MISSING = 1
12
+ ACQUIRE_TAKEN = 2
13
+
14
+ RELEASE_SCRIPT = <<~LUA
15
+ if redis.call("get", KEYS[1]) == ARGV[1] then
16
+ redis.call("del", KEYS[1])
17
+ end
18
+ return #{ACQUIRE_OK}
19
+ LUA
20
+
21
+ REACQUIRE_SCRIPT = <<~LUA
22
+ if redis.call("exists", KEYS[1]) == 1 then
23
+ local owner = redis.call("get", KEYS[1])
24
+ if owner == ARGV[1] then
25
+ redis.call("set", KEYS[1], ARGV[1], "XX", "EX", #{REDIS_LOCK_TTL})
26
+ return #{ACQUIRE_OK}
27
+ else
28
+ return #{ACQUIRE_TAKEN}
29
+ end
30
+ else
31
+ redis.call("set", KEYS[1], ARGV[1], "NX", "EX", #{REDIS_LOCK_TTL})
32
+ return #{ACQUIRE_MISSING}
33
+ end
34
+ LUA
35
+
36
+ def release_orchestrator_lock
37
+ ::Sidekiq.redis { |conn| conn.eval RELEASE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
38
+ end
39
+
40
+ def wait_for_orchestrator_lock
41
+ mode = nil
42
+ loop do
43
+ active = ::Sidekiq.redis do |conn|
44
+ conn.set(REDIS_LOCK_KEY, @world.id, :ex => REDIS_LOCK_TTL, :nx => true)
45
+ end
46
+ break if active
47
+ if mode.nil?
48
+ mode = :passive
49
+ @logger.info('Orchestrator lock already taken, entering passive mode.')
50
+ end
51
+ sleep REDIS_LOCK_POLL_INTERVAL
52
+ end
53
+ @logger.info('Acquired orchestrator lock, entering active mode.')
54
+ end
55
+
56
+ def reacquire_orchestrator_lock
57
+ case ::Sidekiq.redis { |conn| conn.eval REACQUIRE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
58
+ when ACQUIRE_MISSING
59
+ @logger.error('The orchestrator lock was lost, reacquired')
60
+ when ACQUIRE_TAKEN
61
+ owner = ::Sidekiq.redis { |conn| conn.get REDIS_LOCK_KEY }
62
+ @logger.fatal("The orchestrator lock was stolen by #{owner}, aborting.")
63
+ Process.kill('INT', Process.pid)
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ module Dynflow
3
+ module Executors
4
+ module Sidekiq
5
+ # Module to prepend the Sidekiq job to handle the serialization
6
+ module Serialization
7
+ def self.serialize(value)
8
+ Dynflow.serializer.dump(value)
9
+ end
10
+
11
+ def self.deserialize(value)
12
+ value = Utils::IndifferentHash.new(value) if value.is_a? Hash
13
+ Dynflow.serializer.load(value)
14
+ end
15
+
16
+ module WorkerExtension
17
+ # Overriding the Sidekiq entry method to perform additional serialization preparation
18
+ module ClassMethods
19
+ def client_push(opts)
20
+ opts['args'] = opts['args'].map { |a| Serialization.serialize(a) }
21
+ super(opts)
22
+ end
23
+ end
24
+
25
+ def perform(*args)
26
+ args = args.map { |a| Serialization.deserialize(a) }
27
+ super(*args)
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end