dynflow 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/.travis.yml +3 -4
- data/Dockerfile +9 -0
- data/Gemfile +6 -0
- data/Rakefile +1 -0
- data/doc/pages/Gemfile +1 -0
- data/doc/pages/Rakefile +1 -0
- data/doc/pages/plugins/alert_block.rb +1 -0
- data/doc/pages/plugins/div_tag.rb +1 -0
- data/doc/pages/plugins/graphviz.rb +1 -0
- data/doc/pages/plugins/plantuml.rb +1 -0
- data/doc/pages/plugins/play.rb +1 -0
- data/doc/pages/plugins/tags.rb +1 -0
- data/doc/pages/plugins/toc.rb +1 -0
- data/docker-compose.yml +41 -0
- data/dynflow.gemspec +1 -0
- data/examples/clock_benchmark.rb +1 -0
- data/examples/example_helper.rb +19 -2
- data/examples/future_execution.rb +2 -1
- data/examples/memory_limit_watcher.rb +1 -0
- data/examples/orchestrate.rb +4 -5
- data/examples/orchestrate_evented.rb +3 -2
- data/examples/remote_executor.rb +68 -0
- data/examples/singletons.rb +4 -3
- data/examples/sub_plan_concurrency_control.rb +2 -1
- data/examples/sub_plans.rb +3 -2
- data/examples/termination.rb +1 -0
- data/lib/dynflow.rb +20 -0
- data/lib/dynflow/action.rb +28 -3
- data/lib/dynflow/action/cancellable.rb +1 -0
- data/lib/dynflow/action/format.rb +1 -0
- data/lib/dynflow/action/missing.rb +1 -0
- data/lib/dynflow/action/polling.rb +3 -1
- data/lib/dynflow/action/progress.rb +1 -0
- data/lib/dynflow/action/rescue.rb +1 -0
- data/lib/dynflow/action/singleton.rb +1 -0
- data/lib/dynflow/action/suspended.rb +9 -2
- data/lib/dynflow/action/timeouts.rb +2 -1
- data/lib/dynflow/action/with_bulk_sub_plans.rb +2 -1
- data/lib/dynflow/action/with_polling_sub_plans.rb +7 -5
- data/lib/dynflow/action/with_sub_plans.rb +1 -0
- data/lib/dynflow/active_job/queue_adapter.rb +1 -0
- data/lib/dynflow/actor.rb +13 -5
- data/lib/dynflow/actors.rb +1 -0
- data/lib/dynflow/actors/execution_plan_cleaner.rb +1 -0
- data/lib/dynflow/clock.rb +27 -47
- data/lib/dynflow/config.rb +11 -2
- data/lib/dynflow/connectors.rb +1 -0
- data/lib/dynflow/connectors/abstract.rb +1 -0
- data/lib/dynflow/connectors/database.rb +1 -0
- data/lib/dynflow/connectors/direct.rb +1 -0
- data/lib/dynflow/coordinator.rb +1 -0
- data/lib/dynflow/coordinator_adapters.rb +1 -0
- data/lib/dynflow/coordinator_adapters/abstract.rb +1 -0
- data/lib/dynflow/coordinator_adapters/sequel.rb +1 -0
- data/lib/dynflow/dead_letter_silencer.rb +2 -0
- data/lib/dynflow/debug/telemetry/persistence.rb +1 -0
- data/lib/dynflow/delayed_executors.rb +1 -0
- data/lib/dynflow/delayed_executors/abstract.rb +1 -0
- data/lib/dynflow/delayed_executors/abstract_core.rb +1 -0
- data/lib/dynflow/delayed_executors/polling.rb +1 -0
- data/lib/dynflow/delayed_plan.rb +1 -0
- data/lib/dynflow/director.rb +80 -15
- data/lib/dynflow/director/execution_plan_manager.rb +17 -3
- data/lib/dynflow/director/flow_manager.rb +1 -0
- data/lib/dynflow/director/{work_queue.rb → queue_hash.rb} +9 -8
- data/lib/dynflow/director/running_steps_manager.rb +55 -18
- data/lib/dynflow/director/sequence_cursor.rb +1 -0
- data/lib/dynflow/director/sequential_manager.rb +12 -2
- data/lib/dynflow/dispatcher.rb +4 -2
- data/lib/dynflow/dispatcher/abstract.rb +1 -0
- data/lib/dynflow/dispatcher/client_dispatcher.rb +6 -4
- data/lib/dynflow/dispatcher/executor_dispatcher.rb +13 -1
- data/lib/dynflow/errors.rb +1 -0
- data/lib/dynflow/execution_history.rb +1 -0
- data/lib/dynflow/execution_plan.rb +3 -2
- data/lib/dynflow/execution_plan/dependency_graph.rb +1 -0
- data/lib/dynflow/execution_plan/hooks.rb +1 -0
- data/lib/dynflow/execution_plan/output_reference.rb +2 -1
- data/lib/dynflow/execution_plan/steps.rb +1 -0
- data/lib/dynflow/execution_plan/steps/abstract.rb +10 -5
- data/lib/dynflow/execution_plan/steps/abstract_flow_step.rb +2 -0
- data/lib/dynflow/execution_plan/steps/error.rb +1 -0
- data/lib/dynflow/execution_plan/steps/finalize_step.rb +1 -0
- data/lib/dynflow/execution_plan/steps/plan_step.rb +1 -0
- data/lib/dynflow/execution_plan/steps/run_step.rb +1 -0
- data/lib/dynflow/executors.rb +1 -1
- data/lib/dynflow/executors/abstract/core.rb +132 -0
- data/lib/dynflow/executors/parallel.rb +24 -11
- data/lib/dynflow/executors/parallel/core.rb +10 -91
- data/lib/dynflow/executors/parallel/pool.rb +4 -2
- data/lib/dynflow/executors/parallel/worker.rb +2 -1
- data/lib/dynflow/executors/sidekiq/core.rb +121 -0
- data/lib/dynflow/executors/sidekiq/internal_job_base.rb +24 -0
- data/lib/dynflow/executors/sidekiq/orchestrator_jobs.rb +60 -0
- data/lib/dynflow/executors/sidekiq/redis_locking.rb +69 -0
- data/lib/dynflow/executors/sidekiq/serialization.rb +33 -0
- data/lib/dynflow/executors/sidekiq/worker_jobs.rb +42 -0
- data/lib/dynflow/flows.rb +1 -0
- data/lib/dynflow/flows/abstract.rb +1 -0
- data/lib/dynflow/flows/abstract_composed.rb +1 -0
- data/lib/dynflow/flows/atom.rb +1 -0
- data/lib/dynflow/flows/concurrence.rb +1 -0
- data/lib/dynflow/flows/sequence.rb +1 -0
- data/lib/dynflow/logger_adapters.rb +1 -0
- data/lib/dynflow/logger_adapters/abstract.rb +1 -0
- data/lib/dynflow/logger_adapters/delegator.rb +1 -0
- data/lib/dynflow/logger_adapters/formatters.rb +1 -0
- data/lib/dynflow/logger_adapters/formatters/abstract.rb +1 -0
- data/lib/dynflow/logger_adapters/formatters/exception.rb +1 -0
- data/lib/dynflow/logger_adapters/simple.rb +1 -0
- data/lib/dynflow/middleware.rb +1 -0
- data/lib/dynflow/middleware/common/singleton.rb +1 -0
- data/lib/dynflow/middleware/common/transaction.rb +1 -0
- data/lib/dynflow/middleware/register.rb +1 -0
- data/lib/dynflow/middleware/resolver.rb +1 -0
- data/lib/dynflow/middleware/stack.rb +1 -0
- data/lib/dynflow/middleware/world.rb +1 -0
- data/lib/dynflow/persistence.rb +3 -2
- data/lib/dynflow/persistence_adapters.rb +1 -0
- data/lib/dynflow/persistence_adapters/abstract.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel.rb +10 -7
- data/lib/dynflow/persistence_adapters/sequel_migrations/001_initial.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/002_incremental_progress.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/003_parent_action.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/004_coordinator_records.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/005_envelopes.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/006_fix_data_length.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/007_future_execution.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/008_rename_scheduled_plans_to_delayed_plans.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/009_fix_mysql_data_length.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/010_add_execution_plans_label.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/011_placeholder.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/012_add_delayed_plans_serialized_args.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/013_add_action_columns.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/014_add_step_columns.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/015_add_execution_plan_columns.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/016_add_step_queue.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/017_add_delayed_plan_frozen.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/018_add_uuid_column.rb +1 -0
- data/lib/dynflow/persistence_adapters/sequel_migrations/019_update_mysql_time_precision.rb +48 -0
- data/lib/dynflow/rails.rb +1 -0
- data/lib/dynflow/rails/configuration.rb +6 -3
- data/lib/dynflow/rails/daemon.rb +1 -0
- data/lib/dynflow/round_robin.rb +1 -0
- data/lib/dynflow/semaphores.rb +1 -0
- data/lib/dynflow/semaphores/abstract.rb +1 -0
- data/lib/dynflow/semaphores/aggregating.rb +1 -0
- data/lib/dynflow/semaphores/dummy.rb +1 -0
- data/lib/dynflow/semaphores/stateful.rb +1 -0
- data/lib/dynflow/serializable.rb +13 -4
- data/lib/dynflow/serializer.rb +24 -0
- data/lib/dynflow/serializers.rb +1 -0
- data/lib/dynflow/serializers/abstract.rb +1 -0
- data/lib/dynflow/serializers/noop.rb +1 -0
- data/lib/dynflow/stateful.rb +1 -0
- data/lib/dynflow/telemetry.rb +1 -0
- data/lib/dynflow/telemetry_adapters/abstract.rb +1 -0
- data/lib/dynflow/telemetry_adapters/dummy.rb +1 -0
- data/lib/dynflow/telemetry_adapters/statsd.rb +1 -0
- data/lib/dynflow/testing.rb +1 -0
- data/lib/dynflow/testing/assertions.rb +6 -5
- data/lib/dynflow/testing/dummy_execution_plan.rb +1 -0
- data/lib/dynflow/testing/dummy_executor.rb +19 -2
- data/lib/dynflow/testing/dummy_planned_action.rb +1 -0
- data/lib/dynflow/testing/dummy_step.rb +3 -1
- data/lib/dynflow/testing/dummy_world.rb +9 -0
- data/lib/dynflow/testing/factories.rb +6 -1
- data/lib/dynflow/testing/in_thread_executor.rb +22 -3
- data/lib/dynflow/testing/in_thread_world.rb +9 -0
- data/lib/dynflow/testing/managed_clock.rb +1 -0
- data/lib/dynflow/testing/mimic.rb +1 -0
- data/lib/dynflow/throttle_limiter.rb +1 -0
- data/lib/dynflow/transaction_adapters.rb +1 -0
- data/lib/dynflow/transaction_adapters/abstract.rb +1 -0
- data/lib/dynflow/transaction_adapters/active_record.rb +1 -0
- data/lib/dynflow/transaction_adapters/none.rb +1 -0
- data/lib/dynflow/utils.rb +1 -0
- data/lib/dynflow/utils/indifferent_hash.rb +1 -0
- data/lib/dynflow/utils/priority_queue.rb +1 -0
- data/lib/dynflow/version.rb +2 -1
- data/lib/dynflow/watchers/memory_consumption_watcher.rb +1 -0
- data/lib/dynflow/web.rb +1 -0
- data/lib/dynflow/web/console.rb +1 -0
- data/lib/dynflow/web/console_helpers.rb +1 -0
- data/lib/dynflow/web/filtering_helpers.rb +1 -0
- data/lib/dynflow/web/world_helpers.rb +1 -0
- data/lib/dynflow/web_console.rb +1 -0
- data/lib/dynflow/world.rb +11 -1
- data/lib/dynflow/world/invalidation.rb +7 -1
- data/test/abnormal_states_recovery_test.rb +41 -40
- data/test/action_test.rb +160 -110
- data/test/activejob_adapter_test.rb +1 -0
- data/test/batch_sub_tasks_test.rb +12 -11
- data/test/clock_test.rb +2 -1
- data/test/concurrency_control_test.rb +20 -19
- data/test/coordinator_test.rb +20 -21
- data/test/daemon_test.rb +2 -1
- data/test/dead_letter_silencer_test.rb +9 -7
- data/test/dispatcher_test.rb +2 -1
- data/test/execution_plan_cleaner_test.rb +13 -12
- data/test/execution_plan_hooks_test.rb +3 -2
- data/test/execution_plan_test.rb +33 -32
- data/test/executor_test.rb +533 -489
- data/test/future_execution_test.rb +45 -44
- data/test/memory_cosumption_watcher_test.rb +5 -4
- data/test/middleware_test.rb +55 -54
- data/test/persistence_test.rb +56 -53
- data/test/rescue_test.rb +36 -35
- data/test/round_robin_test.rb +13 -12
- data/test/semaphores_test.rb +31 -30
- data/test/support/code_workflow_example.rb +1 -0
- data/test/support/dummy_example.rb +14 -1
- data/test/support/middleware_example.rb +2 -1
- data/test/support/rails/config/environment.rb +1 -0
- data/test/support/rescue_example.rb +1 -0
- data/test/support/test_execution_log.rb +1 -0
- data/test/test_helper.rb +18 -17
- data/test/testing_test.rb +45 -44
- data/test/utils_test.rb +18 -17
- data/test/web_console_test.rb +1 -0
- data/test/world_test.rb +7 -6
- metadata +13 -4
- data/lib/dynflow/executors/abstract.rb +0 -40
@@ -1,20 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'dynflow/executors/parallel/pool'
|
3
|
+
require 'dynflow/executors/parallel/worker'
|
4
|
+
|
1
5
|
module Dynflow
|
2
6
|
module Executors
|
3
|
-
class Parallel
|
4
|
-
class Core <
|
7
|
+
class Parallel
|
8
|
+
class Core < Abstract::Core
|
5
9
|
attr_reader :logger
|
6
10
|
|
7
11
|
def initialize(world, heartbeat_interval, queues_options)
|
8
|
-
|
9
|
-
@
|
10
|
-
@queues_options = queues_options
|
11
|
-
@pools = {}
|
12
|
-
@terminated = nil
|
13
|
-
@director = Director.new(@world)
|
14
|
-
@heartbeat_interval = heartbeat_interval
|
15
|
-
|
12
|
+
super
|
13
|
+
@pools = {}
|
16
14
|
initialize_queues
|
17
|
-
schedule_heartbeat
|
18
15
|
end
|
19
16
|
|
20
17
|
def initialize_queues
|
@@ -27,41 +24,8 @@ module Dynflow
|
|
27
24
|
end
|
28
25
|
end
|
29
26
|
|
30
|
-
def handle_execution(execution_plan_id, finished)
|
31
|
-
if terminating?
|
32
|
-
raise Dynflow::Error,
|
33
|
-
"cannot accept execution_plan_id:#{execution_plan_id} core is terminating"
|
34
|
-
end
|
35
|
-
|
36
|
-
feed_pool(@director.start_execution(execution_plan_id, finished))
|
37
|
-
end
|
38
|
-
|
39
|
-
def handle_event(event)
|
40
|
-
Type! event, Director::Event
|
41
|
-
if terminating?
|
42
|
-
raise Dynflow::Error,
|
43
|
-
"cannot accept event: #{event} core is terminating"
|
44
|
-
end
|
45
|
-
feed_pool(@director.handle_event(event))
|
46
|
-
end
|
47
|
-
|
48
|
-
def work_finished(work)
|
49
|
-
feed_pool(@director.work_finished(work))
|
50
|
-
end
|
51
|
-
|
52
|
-
def handle_persistence_error(error, work = nil)
|
53
|
-
logger.error "PersistenceError in executor"
|
54
|
-
logger.error error
|
55
|
-
@director.work_failed(work) if work
|
56
|
-
if error.is_a? Errors::FatalPersistenceError
|
57
|
-
logger.fatal "Terminating"
|
58
|
-
@world.terminate
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
27
|
def start_termination(*args)
|
63
28
|
super
|
64
|
-
logger.info 'shutting down Core ...'
|
65
29
|
@pools.values.each { |pool| pool.tell([:start_termination, Concurrent::Promises.resolvable_future]) }
|
66
30
|
end
|
67
31
|
|
@@ -69,66 +33,21 @@ module Dynflow
|
|
69
33
|
@pools.delete(pool_name)
|
70
34
|
# we expect this message from all worker pools
|
71
35
|
return unless @pools.empty?
|
72
|
-
@director.terminate
|
73
|
-
logger.info '... Dynflow core terminated.'
|
74
36
|
super()
|
75
37
|
end
|
76
38
|
|
77
|
-
def dead_letter_routing
|
78
|
-
@world.dead_letter_handler
|
79
|
-
end
|
80
|
-
|
81
39
|
def execution_status(execution_plan_id = nil)
|
82
40
|
@pools.each_with_object({}) do |(pool_name, pool), hash|
|
83
41
|
hash[pool_name] = pool.ask!([:execution_status, execution_plan_id])
|
84
42
|
end
|
85
43
|
end
|
86
44
|
|
87
|
-
def heartbeat
|
88
|
-
@logger.debug('Executor heartbeat')
|
89
|
-
record = @world.coordinator.find_records(:id => @world.id,
|
90
|
-
:class => ['Dynflow::Coordinator::ExecutorWorld', 'Dynflow::Coordinator::ClientWorld']).first
|
91
|
-
unless record
|
92
|
-
logger.error(%{Executor's world record for #{@world.id} missing: terminating})
|
93
|
-
@world.terminate
|
94
|
-
return
|
95
|
-
end
|
96
|
-
|
97
|
-
record.data[:meta].update(:last_seen => Dynflow::Dispatcher::ClientDispatcher::PingCache.format_time)
|
98
|
-
@world.coordinator.update_record(record)
|
99
|
-
schedule_heartbeat
|
100
|
-
end
|
101
|
-
|
102
|
-
private
|
103
|
-
|
104
|
-
def schedule_heartbeat
|
105
|
-
@world.clock.ping(self, @heartbeat_interval, :heartbeat)
|
106
|
-
end
|
107
|
-
|
108
|
-
def on_message(message)
|
109
|
-
super
|
110
|
-
rescue Errors::PersistenceError => e
|
111
|
-
self.tell([:handle_persistence_error, e])
|
112
|
-
end
|
113
|
-
|
114
45
|
def feed_pool(work_items)
|
115
|
-
return if terminating?
|
116
|
-
return if work_items.nil?
|
117
|
-
work_items = [work_items] if work_items.is_a? Director::WorkItem
|
118
|
-
work_items.all? { |i| Type! i, Director::WorkItem }
|
119
46
|
work_items.each do |new_work|
|
120
|
-
|
121
|
-
|
122
|
-
logger.error("Pool is not available for queue #{new_work.queue}, falling back to #{fallback_queue}")
|
123
|
-
pool = @pools[fallback_queue]
|
124
|
-
end
|
125
|
-
pool.tell([:schedule_work, new_work])
|
47
|
+
new_work.world = @world
|
48
|
+
@pools.fetch(suggest_queue(new_work)).tell([:schedule_work, new_work])
|
126
49
|
end
|
127
50
|
end
|
128
|
-
|
129
|
-
def fallback_queue
|
130
|
-
:default
|
131
|
-
end
|
132
51
|
end
|
133
52
|
end
|
134
53
|
end
|
@@ -1,6 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Dynflow
|
2
3
|
module Executors
|
3
|
-
class Parallel
|
4
|
+
class Parallel
|
4
5
|
class Pool < Actor
|
5
6
|
class JobStorage
|
6
7
|
def initialize
|
@@ -53,7 +54,8 @@ module Dynflow
|
|
53
54
|
end
|
54
55
|
|
55
56
|
def worker_done(worker, work)
|
56
|
-
|
57
|
+
step = work.step if work.is_a?(Director::StepWorkItem)
|
58
|
+
@executor_core.tell([:work_finished, work, step && step.delayed_events])
|
57
59
|
@free_workers << worker
|
58
60
|
Dynflow::Telemetry.with_instance { |t| t.set_gauge(:dynflow_active_workers, -1, telemetry_options) }
|
59
61
|
distribute_jobs
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'dynflow/executors/sidekiq/serialization'
|
3
|
+
require 'dynflow/executors/sidekiq/internal_job_base'
|
4
|
+
require 'dynflow/executors/sidekiq/orchestrator_jobs'
|
5
|
+
require 'dynflow/executors/sidekiq/worker_jobs'
|
6
|
+
require 'dynflow/executors/sidekiq/redis_locking'
|
7
|
+
|
8
|
+
require 'sidekiq-reliable-fetch'
|
9
|
+
Sidekiq.configure_server do |config|
|
10
|
+
# Use semi-reliable fetch
|
11
|
+
# for details see https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/blob/master/README.md
|
12
|
+
config.options[:semi_reliable_fetch] = true
|
13
|
+
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
14
|
+
end
|
15
|
+
|
16
|
+
module Dynflow
|
17
|
+
module Executors
|
18
|
+
module Sidekiq
|
19
|
+
class Core < Abstract::Core
|
20
|
+
include RedisLocking
|
21
|
+
|
22
|
+
TELEMETRY_UPDATE_INTERVAL = 30 # update telemetry every 30s
|
23
|
+
|
24
|
+
attr_reader :logger
|
25
|
+
|
26
|
+
def initialize(world, *_args)
|
27
|
+
@world = world
|
28
|
+
@logger = world.logger
|
29
|
+
wait_for_orchestrator_lock
|
30
|
+
super
|
31
|
+
schedule_update_telemetry
|
32
|
+
begin_startup!
|
33
|
+
end
|
34
|
+
|
35
|
+
def heartbeat
|
36
|
+
super
|
37
|
+
reacquire_orchestrator_lock
|
38
|
+
end
|
39
|
+
|
40
|
+
def start_termination(*args)
|
41
|
+
super
|
42
|
+
release_orchestrator_lock
|
43
|
+
finish_termination
|
44
|
+
end
|
45
|
+
|
46
|
+
# TODO: needs thoughs on how to implement it
|
47
|
+
def execution_status(execution_plan_id = nil)
|
48
|
+
{}
|
49
|
+
end
|
50
|
+
|
51
|
+
def feed_pool(work_items)
|
52
|
+
work_items.each do |new_work|
|
53
|
+
WorkerJobs::PerformWork.set(queue: suggest_queue(new_work)).perform_async(new_work)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def update_telemetry
|
58
|
+
sidekiq_queues = ::Sidekiq::Stats.new.queues
|
59
|
+
@queues_options.keys.each do |queue|
|
60
|
+
queue_size = sidekiq_queues[queue.to_s]
|
61
|
+
if queue_size
|
62
|
+
Dynflow::Telemetry.with_instance { |t| t.set_gauge(:dynflow_queue_size, queue_size, telemetry_options(queue)) }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
schedule_update_telemetry
|
66
|
+
end
|
67
|
+
|
68
|
+
def work_finished(work, delayed_events = nil)
|
69
|
+
# If the work item is sent in reply to a request from the current orchestrator, proceed
|
70
|
+
if work.sender_orchestrator_id == @world.id
|
71
|
+
super
|
72
|
+
else
|
73
|
+
# If we're in recovery, we can drop the work as the execution plan will be resumed during validity checks performed when leaving recovery
|
74
|
+
# If we're not in recovery and receive an event from another orchestrator, it means it survived the queue draining.
|
75
|
+
handle_unknown_work_item(work) unless @recovery
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def begin_startup!
|
80
|
+
WorkerJobs::DrainMarker.perform_async(@world.id)
|
81
|
+
@recovery = true
|
82
|
+
end
|
83
|
+
|
84
|
+
def startup_complete
|
85
|
+
logger.info('Performing validity checks')
|
86
|
+
@world.perform_validity_checks
|
87
|
+
logger.info('Finished performing validity checks')
|
88
|
+
@recovery = false
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def fallback_queue
|
94
|
+
:default
|
95
|
+
end
|
96
|
+
|
97
|
+
def schedule_update_telemetry
|
98
|
+
@world.clock.ping(reference, TELEMETRY_UPDATE_INTERVAL, [:update_telemetry])
|
99
|
+
end
|
100
|
+
|
101
|
+
def telemetry_options(queue)
|
102
|
+
{ queue: queue.to_s, world: @world.id }
|
103
|
+
end
|
104
|
+
|
105
|
+
# We take a look if an execution lock is already being held by an orchestrator (it should be the current one). If no lock is held
|
106
|
+
# we try to resume the execution plan if possible
|
107
|
+
def handle_unknown_work_item(work)
|
108
|
+
# We are past recovery now, if we receive an event here, the execution plan will be most likely paused
|
109
|
+
# We can either try to rescue it or turn it over to stopped
|
110
|
+
execution_lock = @world.coordinator.find_locks(class: Coordinator::ExecutionLock.name,
|
111
|
+
id: "execution-plan:#{work.execution_plan_id}").first
|
112
|
+
if execution_lock.nil?
|
113
|
+
plan = @world.persistence.load_execution_plan(work.execution_plan_id)
|
114
|
+
should_resume = !plan.error? || plan.prepare_for_rescue == :running
|
115
|
+
@world.execute(plan.id) if should_resume
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Dynflow
|
3
|
+
module Executors
|
4
|
+
module Sidekiq
|
5
|
+
class InternalJobBase
|
6
|
+
include ::Sidekiq::Worker
|
7
|
+
extend ::Dynflow::Executors::Sidekiq::Serialization::WorkerExtension::ClassMethods
|
8
|
+
sidekiq_options retry: false, backtrace: true
|
9
|
+
|
10
|
+
def self.inherited(klass)
|
11
|
+
klass.prepend(::Dynflow::Executors::Sidekiq::Serialization::WorkerExtension)
|
12
|
+
end
|
13
|
+
|
14
|
+
def worker_id
|
15
|
+
::Sidekiq::Logging.tid
|
16
|
+
end
|
17
|
+
|
18
|
+
def telemetry_options(work_item)
|
19
|
+
{ queue: work_item.queue.to_s, world: Dynflow.process_world.id, worker: worker_id }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Dynflow
|
3
|
+
module Executors
|
4
|
+
module Sidekiq
|
5
|
+
module OrchestratorJobs
|
6
|
+
# handles resposnes about finished work form the workers
|
7
|
+
# or some event to handle on orchestrator side
|
8
|
+
class WorkerDone < InternalJobBase
|
9
|
+
sidekiq_options queue: :dynflow_orchestrator
|
10
|
+
|
11
|
+
# @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
|
12
|
+
# usually to start new execution or to pass some event
|
13
|
+
def perform(work_item, delayed_events = nil)
|
14
|
+
# Usually the step is saved on the worker's side. However if sidekiq is shut down,
|
15
|
+
# then the step may not have been saved so we save it just to be sure
|
16
|
+
if work_item.is_a?(Director::StepWorkItem) && work_item.step&.error&.exception.is_a?(::Sidekiq::Shutdown)
|
17
|
+
work_item.step.save
|
18
|
+
end
|
19
|
+
Dynflow.process_world.executor.core.tell([:work_finished, work_item, delayed_events])
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# handles setting up an event on orchestrator
|
24
|
+
class PlanEvent < InternalJobBase
|
25
|
+
sidekiq_options queue: :dynflow_orchestrator
|
26
|
+
|
27
|
+
# @param event_envelope [Dispatcher::Event] - request to handle on orchestrator side
|
28
|
+
# usually to start new execution or to pass some event
|
29
|
+
def perform(execution_plan_id, step_id, event, time)
|
30
|
+
Dynflow.process_world.plan_event(execution_plan_id, step_id, event, time)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class HandlePersistenceError < InternalJobBase
|
35
|
+
sidekiq_options queue: :dynflow_orchestrator
|
36
|
+
|
37
|
+
# @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
|
38
|
+
# usually to start new execution or to pass some event
|
39
|
+
def perform(error, work_item)
|
40
|
+
Dynflow.process_world.executor.core.tell([:handle_persistence_error, error, work_item])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class StartupComplete < InternalJobBase
|
45
|
+
sidekiq_options queue: :dynflow_orchestrator
|
46
|
+
|
47
|
+
# @param request_envelope [Dispatcher::Request] - request to handle on orchestrator side
|
48
|
+
# usually to start new execution or to pass some event
|
49
|
+
def perform(world_id)
|
50
|
+
if Dynflow.process_world.id == world_id
|
51
|
+
Dynflow.process_world.executor.core.tell([:startup_complete])
|
52
|
+
else
|
53
|
+
logger.warn("Received startup complete for a different world #{world_id}, discarding.")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Dynflow
|
3
|
+
module Executors
|
4
|
+
module Sidekiq
|
5
|
+
module RedisLocking
|
6
|
+
REDIS_LOCK_KEY = 'dynflow_orchestrator_uuid'
|
7
|
+
REDIS_LOCK_TTL = 60
|
8
|
+
REDIS_LOCK_POLL_INTERVAL = 15
|
9
|
+
|
10
|
+
ACQUIRE_OK = 0
|
11
|
+
ACQUIRE_MISSING = 1
|
12
|
+
ACQUIRE_TAKEN = 2
|
13
|
+
|
14
|
+
RELEASE_SCRIPT = <<~LUA
|
15
|
+
if redis.call("get", KEYS[1]) == ARGV[1] then
|
16
|
+
redis.call("del", KEYS[1])
|
17
|
+
end
|
18
|
+
return #{ACQUIRE_OK}
|
19
|
+
LUA
|
20
|
+
|
21
|
+
REACQUIRE_SCRIPT = <<~LUA
|
22
|
+
if redis.call("exists", KEYS[1]) == 1 then
|
23
|
+
local owner = redis.call("get", KEYS[1])
|
24
|
+
if owner == ARGV[1] then
|
25
|
+
redis.call("set", KEYS[1], ARGV[1], "XX", "EX", #{REDIS_LOCK_TTL})
|
26
|
+
return #{ACQUIRE_OK}
|
27
|
+
else
|
28
|
+
return #{ACQUIRE_TAKEN}
|
29
|
+
end
|
30
|
+
else
|
31
|
+
redis.call("set", KEYS[1], ARGV[1], "NX", "EX", #{REDIS_LOCK_TTL})
|
32
|
+
return #{ACQUIRE_MISSING}
|
33
|
+
end
|
34
|
+
LUA
|
35
|
+
|
36
|
+
def release_orchestrator_lock
|
37
|
+
::Sidekiq.redis { |conn| conn.eval RELEASE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
|
38
|
+
end
|
39
|
+
|
40
|
+
def wait_for_orchestrator_lock
|
41
|
+
mode = nil
|
42
|
+
loop do
|
43
|
+
active = ::Sidekiq.redis do |conn|
|
44
|
+
conn.set(REDIS_LOCK_KEY, @world.id, :ex => REDIS_LOCK_TTL, :nx => true)
|
45
|
+
end
|
46
|
+
break if active
|
47
|
+
if mode.nil?
|
48
|
+
mode = :passive
|
49
|
+
@logger.info('Orchestrator lock already taken, entering passive mode.')
|
50
|
+
end
|
51
|
+
sleep REDIS_LOCK_POLL_INTERVAL
|
52
|
+
end
|
53
|
+
@logger.info('Acquired orchestrator lock, entering active mode.')
|
54
|
+
end
|
55
|
+
|
56
|
+
def reacquire_orchestrator_lock
|
57
|
+
case ::Sidekiq.redis { |conn| conn.eval REACQUIRE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
|
58
|
+
when ACQUIRE_MISSING
|
59
|
+
@logger.error('The orchestrator lock was lost, reacquired')
|
60
|
+
when ACQUIRE_TAKEN
|
61
|
+
owner = ::Sidekiq.redis { |conn| conn.get REDIS_LOCK_KEY }
|
62
|
+
@logger.fatal("The orchestrator lock was stolen by #{owner}, aborting.")
|
63
|
+
Process.kill('INT', Process.pid)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Dynflow
|
3
|
+
module Executors
|
4
|
+
module Sidekiq
|
5
|
+
# Module to prepend the Sidekiq job to handle the serialization
|
6
|
+
module Serialization
|
7
|
+
def self.serialize(value)
|
8
|
+
Dynflow.serializer.dump(value)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.deserialize(value)
|
12
|
+
value = Utils::IndifferentHash.new(value) if value.is_a? Hash
|
13
|
+
Dynflow.serializer.load(value)
|
14
|
+
end
|
15
|
+
|
16
|
+
module WorkerExtension
|
17
|
+
# Overriding the Sidekiq entry method to perform additional serialization preparation
|
18
|
+
module ClassMethods
|
19
|
+
def client_push(opts)
|
20
|
+
opts['args'] = opts['args'].map { |a| Serialization.serialize(a) }
|
21
|
+
super(opts)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def perform(*args)
|
26
|
+
args = args.map { |a| Serialization.deserialize(a) }
|
27
|
+
super(*args)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|