dynflow 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/.travis.yml +3 -4
- data/Dockerfile +9 -0
- data/Gemfile +6 -0
- data/Rakefile +1 -0
- data/doc/pages/Gemfile +1 -0
- data/doc/pages/Rakefile +1 -0
- data/doc/pages/plugins/alert_block.rb +1 -0
- data/doc/pages/plugins/div_tag.rb +1 -0
- data/doc/pages/plugins/graphviz.rb +1 -0
- data/doc/pages/plugins/plantuml.rb +1 -0
- data/doc/pages/plugins/play.rb +1 -0
- data/doc/pages/plugins/tags.rb +1 -0
- data/doc/pages/plugins/toc.rb +1 -0
- data/docker-compose.yml +41 -0
- data/dynflow.gemspec +1 -0
- data/examples/clock_benchmark.rb +1 -0
- data/examples/example_helper.rb +19 -2
- data/examples/future_execution.rb +2 -1
- data/examples/memory_limit_watcher.rb +1 -0
- data/examples/orchestrate.rb +4 -5
- data/examples/orchestrate_evented.rb +3 -2
- data/examples/remote_executor.rb +68 -0
- data/examples/singletons.rb +4 -3
- data/examples/sub_plan_concurrency_control.rb +2 -1
- data/examples/sub_plans.rb +3 -2
- data/examples/termination.rb +1 -0
- data/lib/dynflow.rb +20 -0
- data/lib/dynflow/action.rb +28 -3
- data/lib/dynflow/action/cancellable.rb +1 -0
- data/lib/dynflow/action/format.rb +1 -0
- data/lib/dynflow/action/missing.rb +1 -0
- data/lib/dynflow/action/polling.rb +3 -1
- data/lib/dynflow/action/progress.rb +1 -0
- data/lib/dynflow/action/rescue.rb +1 -0
- data/lib/dynflow/action/singleton.rb +1 -0
- data/lib/dynflow/action/suspended.rb +9 -2
- data/lib/dynflow/action/timeouts.rb +2 -1
- data/lib/dynflow/action/with_bulk_sub_plans.rb +2 -1
- data/lib/dynflow/action/with_polling_sub_plans.rb +7 -5
- data/lib/dynflow/action/with_sub_plans.rb +1 -0
- data/lib/dynflow/active_job/queue_adapter.rb +1 -0
- data/lib/dynflow/actor.rb +13 -5
- data/lib/dynflow/actors.rb +1 -0
- data/lib/dynflow/actors/execution_plan_cleaner.rb +1 -0
- data/lib/dynflow/clock.rb +27 -47
- data/lib/dynflow/config.rb +11 -2
- data/lib/dynflow/connectors.rb +1 -0
- data/lib/dynflow/connectors/abstract.rb +1 -0
- data/lib/dynflow/connectors/database.rb +1 -0
- data/lib/dynflow/connectors/direct.rb +1 -0
- data/lib/dynflow/coordinator.rb +1 -0
- data/lib/dynflow/coordinator_adapters.rb +1 -0
- data/lib/dynflow/coordinator_adapters/abstract.rb +1 -0
- data/lib/dynflow/coordinator_adapters/sequel.rb +1 -0
- data/lib/dynflow/dead_letter_silencer.rb +2 -0
- data/lib/dynflow/debug/telemetry/persistence.rb +1 -0
- data/lib/dynflow/delayed_executors.rb +1 -0
- data/lib/dynflow/delayed_executors/abstract.rb +1 -0
- data/lib/dynflow/delayed_executors/abstract_core.rb +1 -0
- data/lib/dynflow/delayed_executors/polling.rb +1 -0
- data/lib/dynflow/delayed_plan.rb +1 -0
- data/lib/dynflow/director.rb +80 -15
- data/lib/dynflow/director/execution_plan_manager.rb +17 -3
- data/lib/dynflow/director/flow_manager.rb +1 -0
- data/lib/dynflow/director/{work_queue.rb → queue_hash.rb} +9 -8
- data/lib/dynflow/director/running_steps_manager.rb +55 -18
- data/lib/dynflow/director/sequence_cursor.rb +1 -0
- data/lib/dynflow/director/sequential_manager.rb +12 -2
- data/lib/dynflow/dispatcher.rb +4 -2
- data/lib/dynflow/dispatcher/abstract.rb +1 -0
- data/lib/dynflow/dispatcher/client_dispatcher.rb +6 -4
- data/lib/dynflow/dispatcher/executor_dispatcher.rb +13 -1
- data/lib/dynflow/errors.rb +1 -0
- data/lib/dynflow/execution_history.rb +1 -0
- data/lib/dynflow/execution_plan.rb +3 -2
- data/lib/dynflow/execution_plan/dependency_graph.rb +1 -0
- data/lib/dynflow/execution_plan/hooks.rb +1 -0
- data/lib/dynflow/execution_plan/output_reference.rb +2 -1
- data/lib/dynflow/execution_plan/steps.rb +1 -0
- data/lib/dynflow/execution_plan/steps/abstract.rb +10 -5
- data/lib/dynflow/execution_plan/steps/abstract_flow_step.rb +2 -0
- data/lib/dynflow/execution_plan/steps/error.rb +1 -0
- data/lib/dynflow/execution_plan/steps/finalize_step.rb +1 -0
- data/lib/dynflow/execution_plan/steps/plan_step.rb +1 -0
- data/lib/dynflow/execution_plan/steps/run_step.rb +1 -0
- data/lib/dynflow/executors.rb +1 -1
- data/lib/dynflow/executors/abstract/core.rb +132 -0
- data/lib/dynflow/executors/parallel.rb +24 -11
- data/lib/dynflow/executors/parallel/core.rb +10 -91
- data/lib/dynflow/executors/parallel/pool.rb +4 -2
- data/lib/dynflow/executors/parallel/worker.rb +2 -1
- data/lib/dynflow/executors/sidekiq/core.rb +121 -0
- data/lib/dynflow/executors/sidekiq/internal_job_base.rb +24 -0
- data/lib/dynflow/executors/sidekiq/orchestrator_jobs.rb +60 -0
- data/lib/dynflow/executors/sidekiq/redis_locking.rb +69 -0
- data/lib/dynflow/executors/sidekiq/serialization.rb +33 -0
- data/lib/dynflow/executors/sidekiq/worker_jobs.rb +42 -0
- data/lib/dynflow/flows.rb +1 -0
- data/lib/dynflow/flows/abstract.rb +1 -0
- data/lib/dynflow/flows/abstract_composed.rb +1 -0
- data/lib/dynflow/flows/atom.rb +1 -0
- data/lib/dynflow/flows/concurrence.rb +1 -0
- data/lib/dynflow/flows/sequence.rb +1 -0
- data/lib/dynflow/logger_adapters.rb +1 -0
- data/lib/dynflow/logger_adapters/abstract.rb +1 -0
- data/lib/dynflow/logger_adapters/delegator.rb +1 -0
- data/lib/dynflow/logger_adapters/formatters.rb +1 -0
- data/lib/dynflow/logger_adapters/formatters/abstract.rb +1 -0
- data/lib/dynflow/logger_adapters/formatters/exception.rb +1 -0
- data/lib/dynflow/logger_adapters/simple.rb +1 -0
- data/lib/dynflow/middleware.rb +1 -0
- data/lib/dynflow/middleware/common/singleton.rb +1 -0
- data/lib/dynflow/middleware/common/transaction.rb +1 -0
- data/lib/dynflow/middleware/register.rb +1 -0
- data/lib/dynflow/middleware/resolver.rb +1 -0
- data/lib/dynflow/middleware/stack.rb +1 -0
- data/lib/dynflow/middleware/world.rb +1 -0
- data/lib/dynflow/persistence.rb +3 -2
- data/lib/dynflow/persistence_adapters.rb +1 -0
- data/lib/dynflow/persistence_adapters/abstract.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel.rb +10 -7
- data/lib/dynflow/persistence_adapters/sequel_migrations/001_initial.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/002_incremental_progress.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/003_parent_action.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/004_coordinator_records.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/005_envelopes.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/006_fix_data_length.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/007_future_execution.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/008_rename_scheduled_plans_to_delayed_plans.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/009_fix_mysql_data_length.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/010_add_execution_plans_label.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/011_placeholder.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/012_add_delayed_plans_serialized_args.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/013_add_action_columns.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/014_add_step_columns.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/015_add_execution_plan_columns.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/016_add_step_queue.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/017_add_delayed_plan_frozen.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/018_add_uuid_column.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/019_update_mysql_time_precision.rb +48 -0
- data/lib/dynflow/rails.rb +1 -0
- data/lib/dynflow/rails/configuration.rb +6 -3
- data/lib/dynflow/rails/daemon.rb +1 -0
- data/lib/dynflow/round_robin.rb +1 -0
- data/lib/dynflow/semaphores.rb +1 -0
- data/lib/dynflow/semaphores/abstract.rb +1 -0
- data/lib/dynflow/semaphores/aggregating.rb +1 -0
- data/lib/dynflow/semaphores/dummy.rb +1 -0
- data/lib/dynflow/semaphores/stateful.rb +1 -0
- data/lib/dynflow/serializable.rb +13 -4
- data/lib/dynflow/serializer.rb +24 -0
- data/lib/dynflow/serializers.rb +1 -0
- data/lib/dynflow/serializers/abstract.rb +1 -0
- data/lib/dynflow/serializers/noop.rb +1 -0
- data/lib/dynflow/stateful.rb +1 -0
- data/lib/dynflow/telemetry.rb +1 -0
- data/lib/dynflow/telemetry_adapters/abstract.rb +1 -0
- data/lib/dynflow/telemetry_adapters/dummy.rb +1 -0
- data/lib/dynflow/telemetry_adapters/statsd.rb +1 -0
- data/lib/dynflow/testing.rb +1 -0
- data/lib/dynflow/testing/assertions.rb +6 -5
- data/lib/dynflow/testing/dummy_execution_plan.rb +1 -0
- data/lib/dynflow/testing/dummy_executor.rb +19 -2
- data/lib/dynflow/testing/dummy_planned_action.rb +1 -0
- data/lib/dynflow/testing/dummy_step.rb +3 -1
- data/lib/dynflow/testing/dummy_world.rb +9 -0
- data/lib/dynflow/testing/factories.rb +6 -1
- data/lib/dynflow/testing/in_thread_executor.rb +22 -3
- data/lib/dynflow/testing/in_thread_world.rb +9 -0
- data/lib/dynflow/testing/managed_clock.rb +1 -0
- data/lib/dynflow/testing/mimic.rb +1 -0
- data/lib/dynflow/throttle_limiter.rb +1 -0
- data/lib/dynflow/transaction_adapters.rb +1 -0
- data/lib/dynflow/transaction_adapters/abstract.rb +1 -0
- data/lib/dynflow/transaction_adapters/active_record.rb +1 -0
- data/lib/dynflow/transaction_adapters/none.rb +1 -0
- data/lib/dynflow/utils.rb +1 -0
- data/lib/dynflow/utils/indifferent_hash.rb +1 -0
- data/lib/dynflow/utils/priority_queue.rb +1 -0
- data/lib/dynflow/version.rb +2 -1
- data/lib/dynflow/watchers/memory_consumption_watcher.rb +1 -0
- data/lib/dynflow/web.rb +1 -0
- data/lib/dynflow/web/console.rb +1 -0
- data/lib/dynflow/web/console_helpers.rb +1 -0
- data/lib/dynflow/web/filtering_helpers.rb +1 -0
- data/lib/dynflow/web/world_helpers.rb +1 -0
- data/lib/dynflow/web_console.rb +1 -0
- data/lib/dynflow/world.rb +11 -1
- data/lib/dynflow/world/invalidation.rb +7 -1
- data/test/abnormal_states_recovery_test.rb +41 -40
- data/test/action_test.rb +160 -110
- data/test/activejob_adapter_test.rb +1 -0
- data/test/batch_sub_tasks_test.rb +12 -11
- data/test/clock_test.rb +2 -1
- data/test/concurrency_control_test.rb +20 -19
- data/test/coordinator_test.rb +20 -21
- data/test/daemon_test.rb +2 -1
- data/test/dead_letter_silencer_test.rb +9 -7
- data/test/dispatcher_test.rb +2 -1
- data/test/execution_plan_cleaner_test.rb +13 -12
- data/test/execution_plan_hooks_test.rb +3 -2
- data/test/execution_plan_test.rb +33 -32
- data/test/executor_test.rb +533 -489
- data/test/future_execution_test.rb +45 -44
- data/test/memory_cosumption_watcher_test.rb +5 -4
- data/test/middleware_test.rb +55 -54
- data/test/persistence_test.rb +56 -53
- data/test/rescue_test.rb +36 -35
- data/test/round_robin_test.rb +13 -12
- data/test/semaphores_test.rb +31 -30
- data/test/support/code_workflow_example.rb +1 -0
- data/test/support/dummy_example.rb +14 -1
- data/test/support/middleware_example.rb +2 -1
- data/test/support/rails/config/environment.rb +1 -0
- data/test/support/rescue_example.rb +1 -0
- data/test/support/test_execution_log.rb +1 -0
- data/test/test_helper.rb +18 -17
- data/test/testing_test.rb +45 -44
- data/test/utils_test.rb +18 -17
- data/test/web_console_test.rb +1 -0
- data/test/world_test.rb +7 -6
- metadata +13 -4
- data/lib/dynflow/executors/abstract.rb +0 -40
@@ -1,20 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'dynflow/executors/parallel/pool'
|
3
|
+
require 'dynflow/executors/parallel/worker'
|
4
|
+
|
1
5
|
module Dynflow
|
2
6
|
module Executors
|
3
|
-
class Parallel
|
4
|
-
class Core <
|
7
|
+
class Parallel
|
8
|
+
class Core < Abstract::Core
|
5
9
|
attr_reader :logger
|
6
10
|
|
7
11
|
def initialize(world, heartbeat_interval, queues_options)
|
8
|
-
|
9
|
-
@
|
10
|
-
@queues_options = queues_options
|
11
|
-
@pools = {}
|
12
|
-
@terminated = nil
|
13
|
-
@director = Director.new(@world)
|
14
|
-
@heartbeat_interval = heartbeat_interval
|
15
|
-
|
12
|
+
super
|
13
|
+
@pools = {}
|
16
14
|
initialize_queues
|
17
|
-
schedule_heartbeat
|
18
15
|
end
|
19
16
|
|
20
17
|
def initialize_queues
|
@@ -27,41 +24,8 @@ module Dynflow
|
|
27
24
|
end
|
28
25
|
end
|
29
26
|
|
30
|
-
def handle_execution(execution_plan_id, finished)
|
31
|
-
if terminating?
|
32
|
-
raise Dynflow::Error,
|
33
|
-
"cannot accept execution_plan_id:#{execution_plan_id} core is terminating"
|
34
|
-
end
|
35
|
-
|
36
|
-
feed_pool(@director.start_execution(execution_plan_id, finished))
|
37
|
-
end
|
38
|
-
|
39
|
-
def handle_event(event)
|
40
|
-
Type! event, Director::Event
|
41
|
-
if terminating?
|
42
|
-
raise Dynflow::Error,
|
43
|
-
"cannot accept event: #{event} core is terminating"
|
44
|
-
end
|
45
|
-
feed_pool(@director.handle_event(event))
|
46
|
-
end
|
47
|
-
|
48
|
-
def work_finished(work)
|
49
|
-
feed_pool(@director.work_finished(work))
|
50
|
-
end
|
51
|
-
|
52
|
-
def handle_persistence_error(error, work = nil)
|
53
|
-
logger.error "PersistenceError in executor"
|
54
|
-
logger.error error
|
55
|
-
@director.work_failed(work) if work
|
56
|
-
if error.is_a? Errors::FatalPersistenceError
|
57
|
-
logger.fatal "Terminating"
|
58
|
-
@world.terminate
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
27
|
def start_termination(*args)
|
63
28
|
super
|
64
|
-
logger.info 'shutting down Core ...'
|
65
29
|
@pools.values.each { |pool| pool.tell([:start_termination, Concurrent::Promises.resolvable_future]) }
|
66
30
|
end
|
67
31
|
|
@@ -69,66 +33,21 @@ module Dynflow
|
|
69
33
|
@pools.delete(pool_name)
|
70
34
|
# we expect this message from all worker pools
|
71
35
|
return unless @pools.empty?
|
72
|
-
@director.terminate
|
73
|
-
logger.info '... Dynflow core terminated.'
|
74
36
|
super()
|
75
37
|
end
|
76
38
|
|
77
|
-
def dead_letter_routing
|
78
|
-
@world.dead_letter_handler
|
79
|
-
end
|
80
|
-
|
81
39
|
def execution_status(execution_plan_id = nil)
|
82
40
|
@pools.each_with_object({}) do |(pool_name, pool), hash|
|
83
41
|
hash[pool_name] = pool.ask!([:execution_status, execution_plan_id])
|
84
42
|
end
|
85
43
|
end
|
86
44
|
|
87
|
-
def heartbeat
|
88
|
-
@logger.debug('Executor heartbeat')
|
89
|
-
record = @world.coordinator.find_records(:id => @world.id,
|
90
|
-
:class => ['Dynflow::Coordinator::ExecutorWorld', 'Dynflow::Coordinator::ClientWorld']).first
|
91
|
-
unless record
|
92
|
-
logger.error(%{Executor's world record for #{@world.id} missing: terminating})
|
93
|
-
@world.terminate
|
94
|
-
return
|
95
|
-
end
|
96
|
-
|
97
|
-
record.data[:meta].update(:last_seen => Dynflow::Dispatcher::ClientDispatcher::PingCache.format_time)
|
98
|
-
@world.coordinator.update_record(record)
|
99
|
-
schedule_heartbeat
|
100
|
-
end
|
101
|
-
|
102
|
-
private
|
103
|
-
|
104
|
-
def schedule_heartbeat
|
105
|
-
@world.clock.ping(self, @heartbeat_interval, :heartbeat)
|
106
|
-
end
|
107
|
-
|
108
|
-
def on_message(message)
|
109
|
-
super
|
110
|
-
rescue Errors::PersistenceError => e
|
111
|
-
self.tell([:handle_persistence_error, e])
|
112
|
-
end
|
113
|
-
|
114
45
|
def feed_pool(work_items)
|
115
|
-
return if terminating?
|
116
|
-
return if work_items.nil?
|
117
|
-
work_items = [work_items] if work_items.is_a? Director::WorkItem
|
118
|
-
work_items.all? { |i| Type! i, Director::WorkItem }
|
119
46
|
work_items.each do |new_work|
|
120
|
-
|
121
|
-
|
122
|
-
logger.error("Pool is not available for queue #{new_work.queue}, falling back to #{fallback_queue}")
|
123
|
-
pool = @pools[fallback_queue]
|
124
|
-
end
|
125
|
-
pool.tell([:schedule_work, new_work])
|
47
|
+
new_work.world = @world
|
48
|
+
@pools.fetch(suggest_queue(new_work)).tell([:schedule_work, new_work])
|
126
49
|
end
|
127
50
|
end
|
128
|
-
|
129
|
-
def fallback_queue
|
130
|
-
:default
|
131
|
-
end
|
132
51
|
end
|
133
52
|
end
|
134
53
|
end
|
@@ -1,6 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Dynflow
|
2
3
|
module Executors
|
3
|
-
class Parallel
|
4
|
+
class Parallel
|
4
5
|
class Pool < Actor
|
5
6
|
class JobStorage
|
6
7
|
def initialize
|
@@ -53,7 +54,8 @@ module Dynflow
|
|
53
54
|
end
|
54
55
|
|
55
56
|
def worker_done(worker, work)
|
56
|
-
|
57
|
+
step = work.step if work.is_a?(Director::StepWorkItem)
|
58
|
+
@executor_core.tell([:work_finished, work, step && step.delayed_events])
|
57
59
|
@free_workers << worker
|
58
60
|
Dynflow::Telemetry.with_instance { |t| t.set_gauge(:dynflow_active_workers, -1, telemetry_options) }
|
59
61
|
distribute_jobs
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'dynflow/executors/sidekiq/serialization'
|
3
|
+
require 'dynflow/executors/sidekiq/internal_job_base'
|
4
|
+
require 'dynflow/executors/sidekiq/orchestrator_jobs'
|
5
|
+
require 'dynflow/executors/sidekiq/worker_jobs'
|
6
|
+
require 'dynflow/executors/sidekiq/redis_locking'
|
7
|
+
|
8
|
+
require 'sidekiq-reliable-fetch'
|
9
|
+
Sidekiq.configure_server do |config|
|
10
|
+
# Use semi-reliable fetch
|
11
|
+
# for details see https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/blob/master/README.md
|
12
|
+
config.options[:semi_reliable_fetch] = true
|
13
|
+
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
14
|
+
end
|
15
|
+
|
16
|
+
module Dynflow
|
17
|
+
module Executors
|
18
|
+
module Sidekiq
|
19
|
+
class Core < Abstract::Core
|
20
|
+
include RedisLocking
|
21
|
+
|
22
|
+
TELEMETRY_UPDATE_INTERVAL = 30 # update telemetry every 30s
|
23
|
+
|
24
|
+
attr_reader :logger
|
25
|
+
|
26
|
+
def initialize(world, *_args)
|
27
|
+
@world = world
|
28
|
+
@logger = world.logger
|
29
|
+
wait_for_orchestrator_lock
|
30
|
+
super
|
31
|
+
schedule_update_telemetry
|
32
|
+
begin_startup!
|
33
|
+
end
|
34
|
+
|
35
|
+
def heartbeat
|
36
|
+
super
|
37
|
+
reacquire_orchestrator_lock
|
38
|
+
end
|
39
|
+
|
40
|
+
def start_termination(*args)
|
41
|
+
super
|
42
|
+
release_orchestrator_lock
|
43
|
+
finish_termination
|
44
|
+
end
|
45
|
+
|
46
|
+
# TODO: needs thoughs on how to implement it
|
47
|
+
def execution_status(execution_plan_id = nil)
|
48
|
+
{}
|
49
|
+
end
|
50
|
+
|
51
|
+
def feed_pool(work_items)
|
52
|
+
work_items.each do |new_work|
|
53
|
+
WorkerJobs::PerformWork.set(queue: suggest_queue(new_work)).perform_async(new_work)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def update_telemetry
|
58
|
+
sidekiq_queues = ::Sidekiq::Stats.new.queues
|
59
|
+
@queues_options.keys.each do |queue|
|
60
|
+
queue_size = sidekiq_queues[queue.to_s]
|
61
|
+
if queue_size
|
62
|
+
Dynflow::Telemetry.with_instance { |t| t.set_gauge(:dynflow_queue_size, queue_size, telemetry_options(queue)) }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
schedule_update_telemetry
|
66
|
+
end
|
67
|
+
|
68
|
+
def work_finished(work, delayed_events = nil)
|
69
|
+
# If the work item is sent in reply to a request from the current orchestrator, proceed
|
70
|
+
if work.sender_orchestrator_id == @world.id
|
71
|
+
super
|
72
|
+
else
|
73
|
+
# If we're in recovery, we can drop the work as the execution plan will be resumed during validity checks performed when leaving recovery
|
74
|
+
# If we're not in recovery and receive an event from another orchestrator, it means it survived the queue draining.
|
75
|
+
handle_unknown_work_item(work) unless @recovery
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def begin_startup!
|
80
|
+
WorkerJobs::DrainMarker.perform_async(@world.id)
|
81
|
+
@recovery = true
|
82
|
+
end
|
83
|
+
|
84
|
+
def startup_complete
|
85
|
+
logger.info('Performing validity checks')
|
86
|
+
@world.perform_validity_checks
|
87
|
+
logger.info('Finished performing validity checks')
|
88
|
+
@recovery = false
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def fallback_queue
|
94
|
+
:default
|
95
|
+
end
|
96
|
+
|
97
|
+
def schedule_update_telemetry
|
98
|
+
@world.clock.ping(reference, TELEMETRY_UPDATE_INTERVAL, [:update_telemetry])
|
99
|
+
end
|
100
|
+
|
101
|
+
def telemetry_options(queue)
|
102
|
+
{ queue: queue.to_s, world: @world.id }
|
103
|
+
end
|
104
|
+
|
105
|
+
# We take a look if an execution lock is already being held by an orchestrator (it should be the current one). If no lock is held
|
106
|
+
# we try to resume the execution plan if possible
|
107
|
+
def handle_unknown_work_item(work)
|
108
|
+
# We are past recovery now, if we receive an event here, the execution plan will be most likely paused
|
109
|
+
# We can either try to rescue it or turn it over to stopped
|
110
|
+
execution_lock = @world.coordinator.find_locks(class: Coordinator::ExecutionLock.name,
|
111
|
+
id: "execution-plan:#{work.execution_plan_id}").first
|
112
|
+
if execution_lock.nil?
|
113
|
+
plan = @world.persistence.load_execution_plan(work.execution_plan_id)
|
114
|
+
should_resume = !plan.error? || plan.prepare_for_rescue == :running
|
115
|
+
@world.execute(plan.id) if should_resume
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Dynflow
|
3
|
+
module Executors
|
4
|
+
module Sidekiq
|
5
|
+
class InternalJobBase
|
6
|
+
include ::Sidekiq::Worker
|
7
|
+
extend ::Dynflow::Executors::Sidekiq::Serialization::WorkerExtension::ClassMethods
|
8
|
+
sidekiq_options retry: false, backtrace: true
|
9
|
+
|
10
|
+
def self.inherited(klass)
|
11
|
+
klass.prepend(::Dynflow::Executors::Sidekiq::Serialization::WorkerExtension)
|
12
|
+
end
|
13
|
+
|
14
|
+
def worker_id
|
15
|
+
::Sidekiq::Logging.tid
|
16
|
+
end
|
17
|
+
|
18
|
+
def telemetry_options(work_item)
|
19
|
+
{ queue: work_item.queue.to_s, world: Dynflow.process_world.id, worker: worker_id }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Dynflow
|
3
|
+
module Executors
|
4
|
+
module Sidekiq
|
5
|
+
module OrchestratorJobs
|
6
|
+
# handles resposnes about finished work form the workers
|
7
|
+
# or some event to handle on orchestrator side
|
8
|
+
class WorkerDone < InternalJobBase
|
9
|
+
sidekiq_options queue: :dynflow_orchestrator
|
10
|
+
|
11
|
+
# @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
|
12
|
+
# usually to start new execution or to pass some event
|
13
|
+
def perform(work_item, delayed_events = nil)
|
14
|
+
# Usually the step is saved on the worker's side. However if sidekiq is shut down,
|
15
|
+
# then the step may not have been saved so we save it just to be sure
|
16
|
+
if work_item.is_a?(Director::StepWorkItem) && work_item.step&.error&.exception.is_a?(::Sidekiq::Shutdown)
|
17
|
+
work_item.step.save
|
18
|
+
end
|
19
|
+
Dynflow.process_world.executor.core.tell([:work_finished, work_item, delayed_events])
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# handles setting up an event on orchestrator
|
24
|
+
class PlanEvent < InternalJobBase
|
25
|
+
sidekiq_options queue: :dynflow_orchestrator
|
26
|
+
|
27
|
+
# @param event_envelope [Dispatcher::Event] - request to handle on orchestrator side
|
28
|
+
# usually to start new execution or to pass some event
|
29
|
+
def perform(execution_plan_id, step_id, event, time)
|
30
|
+
Dynflow.process_world.plan_event(execution_plan_id, step_id, event, time)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class HandlePersistenceError < InternalJobBase
|
35
|
+
sidekiq_options queue: :dynflow_orchestrator
|
36
|
+
|
37
|
+
# @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
|
38
|
+
# usually to start new execution or to pass some event
|
39
|
+
def perform(error, work_item)
|
40
|
+
Dynflow.process_world.executor.core.tell([:handle_persistence_error, error, work_item])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class StartupComplete < InternalJobBase
|
45
|
+
sidekiq_options queue: :dynflow_orchestrator
|
46
|
+
|
47
|
+
# @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
|
48
|
+
# usually to start new execution or to pass some event
|
49
|
+
def perform(world_id)
|
50
|
+
if Dynflow.process_world.id == world_id
|
51
|
+
Dynflow.process_world.executor.core.tell([:startup_complete])
|
52
|
+
else
|
53
|
+
logger.warn("Received startup complete for a different world #{world_id}, discarding.")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Dynflow
|
3
|
+
module Executors
|
4
|
+
module Sidekiq
|
5
|
+
module RedisLocking
|
6
|
+
REDIS_LOCK_KEY = 'dynflow_orchestrator_uuid'
|
7
|
+
REDIS_LOCK_TTL = 60
|
8
|
+
REDIS_LOCK_POLL_INTERVAL = 15
|
9
|
+
|
10
|
+
ACQUIRE_OK = 0
|
11
|
+
ACQUIRE_MISSING = 1
|
12
|
+
ACQUIRE_TAKEN = 2
|
13
|
+
|
14
|
+
RELEASE_SCRIPT = <<~LUA
|
15
|
+
if redis.call("get", KEYS[1]) == ARGV[1] then
|
16
|
+
redis.call("del", KEYS[1])
|
17
|
+
end
|
18
|
+
return #{ACQUIRE_OK}
|
19
|
+
LUA
|
20
|
+
|
21
|
+
REACQUIRE_SCRIPT = <<~LUA
|
22
|
+
if redis.call("exists", KEYS[1]) == 1 then
|
23
|
+
local owner = redis.call("get", KEYS[1])
|
24
|
+
if owner == ARGV[1] then
|
25
|
+
redis.call("set", KEYS[1], ARGV[1], "XX", "EX", #{REDIS_LOCK_TTL})
|
26
|
+
return #{ACQUIRE_OK}
|
27
|
+
else
|
28
|
+
return #{ACQUIRE_TAKEN}
|
29
|
+
end
|
30
|
+
else
|
31
|
+
redis.call("set", KEYS[1], ARGV[1], "NX", "EX", #{REDIS_LOCK_TTL})
|
32
|
+
return #{ACQUIRE_MISSING}
|
33
|
+
end
|
34
|
+
LUA
|
35
|
+
|
36
|
+
def release_orchestrator_lock
|
37
|
+
::Sidekiq.redis { |conn| conn.eval RELEASE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
|
38
|
+
end
|
39
|
+
|
40
|
+
def wait_for_orchestrator_lock
|
41
|
+
mode = nil
|
42
|
+
loop do
|
43
|
+
active = ::Sidekiq.redis do |conn|
|
44
|
+
conn.set(REDIS_LOCK_KEY, @world.id, :ex => REDIS_LOCK_TTL, :nx => true)
|
45
|
+
end
|
46
|
+
break if active
|
47
|
+
if mode.nil?
|
48
|
+
mode = :passive
|
49
|
+
@logger.info('Orchestrator lock already taken, entering passive mode.')
|
50
|
+
end
|
51
|
+
sleep REDIS_LOCK_POLL_INTERVAL
|
52
|
+
end
|
53
|
+
@logger.info('Acquired orchestrator lock, entering active mode.')
|
54
|
+
end
|
55
|
+
|
56
|
+
def reacquire_orchestrator_lock
|
57
|
+
case ::Sidekiq.redis { |conn| conn.eval REACQUIRE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
|
58
|
+
when ACQUIRE_MISSING
|
59
|
+
@logger.error('The orchestrator lock was lost, reacquired')
|
60
|
+
when ACQUIRE_TAKEN
|
61
|
+
owner = ::Sidekiq.redis { |conn| conn.get REDIS_LOCK_KEY }
|
62
|
+
@logger.fatal("The orchestrator lock was stolen by #{owner}, aborting.")
|
63
|
+
Process.kill('INT', Process.pid)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Dynflow
|
3
|
+
module Executors
|
4
|
+
module Sidekiq
|
5
|
+
# Module to prepend the Sidekiq job to handle the serialization
|
6
|
+
module Serialization
|
7
|
+
def self.serialize(value)
|
8
|
+
Dynflow.serializer.dump(value)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.deserialize(value)
|
12
|
+
value = Utils::IndifferentHash.new(value) if value.is_a? Hash
|
13
|
+
Dynflow.serializer.load(value)
|
14
|
+
end
|
15
|
+
|
16
|
+
module WorkerExtension
|
17
|
+
# Overriding the Sidekiq entry method to perform additional serialization preparation
|
18
|
+
module ClassMethods
|
19
|
+
def client_push(opts)
|
20
|
+
opts['args'] = opts['args'].map { |a| Serialization.serialize(a) }
|
21
|
+
super(opts)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def perform(*args)
|
26
|
+
args = args.map { |a| Serialization.deserialize(a) }
|
27
|
+
super(*args)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|