dynflow 1.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e36392c5ebe5f3a20b879eca68d46ff0ee715985d7107af64f69b4ef256bf19e
4
- data.tar.gz: 3177acf84d594013c47f5a6413fc56f47dbb121727c5549948ef2f30ec2099de
3
+ metadata.gz: 6ee35eec200e14b25add8941b4b4637e994012053a271898c4abc4a70234942e
4
+ data.tar.gz: 720fc9161e5aadff8f165c12f6bb278cfa65435ef7a1d96d93df8614d21a1de8
5
5
  SHA512:
6
- metadata.gz: 41e469d615fb041a1323b6d512f31b57aafe55de76b35212b1d615e249822214aeda51f5710221932d4d7e78aae9515f500f80278b7d056f42403c24a28cdd31
7
- data.tar.gz: b7a202cf192a9d7af21e556971876867f180eb2a6319cd624075af854bb2a64ed0cd12435076261dc71aad4439ff01ebc3c2216799f85d0dbff30e0d495658d7
6
+ metadata.gz: 1753d21be5307643a16704a27b0e343fe55a7e74330469d1eece503d58cfa9fd04be1da57918e6e09e73f7e2822f2be2bd517e9eea58b89432bee7226d51ab93
7
+ data.tar.gz: ac12ce027be3e289227db4a2b5311328342de364c0f5d51f5badc27e7ba484a6f137488ce681d319e050b1a04ef55ac4821d0d87a06d6d70019f3b2c6e889fca
@@ -98,13 +98,21 @@ module Dynflow
98
98
  end
99
99
 
100
100
  config_attr :executor, Executors::Abstract, FalseClass do |world, config|
101
- Executors::Parallel.new(world, config.queues)
101
+ Executors::Parallel.new(world, config.executor_heartbeat_interval, config.queues)
102
102
  end
103
103
 
104
104
  config_attr :executor_semaphore, Semaphores::Abstract, FalseClass do |world, config|
105
105
  Semaphores::Dummy.new
106
106
  end
107
107
 
108
+ config_attr :executor_heartbeat_interval, Integer do
109
+ 15
110
+ end
111
+
112
+ config_attr :ping_cache_age, Integer do
113
+ 60
114
+ end
115
+
108
116
  config_attr :connector, Connectors::Abstract do |world|
109
117
  Connectors::Direct.new(world)
110
118
  end
@@ -118,7 +126,7 @@ module Dynflow
118
126
  end
119
127
 
120
128
  config_attr :validity_check_timeout, Numeric do
121
- 5
129
+ 30
122
130
  end
123
131
 
124
132
  config_attr :exit_on_terminate, Algebrick::Types::Boolean do
@@ -7,11 +7,18 @@ module Dynflow
7
7
  def initialize(world, options = {})
8
8
  @world = world
9
9
  @options = options
10
+ @started = false
10
11
  spawn
11
12
  end
12
13
 
14
+ def started?
15
+ @started
16
+ end
17
+
13
18
  def start
14
- @core.ask(:start)
19
+ @core.ask(:start).tap do
20
+ @started = true
21
+ end
15
22
  end
16
23
 
17
24
  def terminate
@@ -46,6 +46,7 @@ module Dynflow
46
46
  def process(delayed_plans, check_time)
47
47
  processed_plan_uuids = []
48
48
  delayed_plans.each do |plan|
49
+ next if plan.frozen
49
50
  fix_plan_state(plan)
50
51
  with_error_handling do
51
52
  if plan.execution_plan.state != :scheduled
@@ -3,14 +3,16 @@ module Dynflow
3
3
 
4
4
  include Algebrick::TypeCheck
5
5
 
6
- attr_reader :execution_plan_uuid, :start_at, :start_before
6
+ attr_reader :execution_plan_uuid, :start_before
7
+ attr_accessor :frozen, :start_at
7
8
 
8
- def initialize(world, execution_plan_uuid, start_at, start_before, args_serializer)
9
+ def initialize(world, execution_plan_uuid, start_at, start_before, args_serializer, frozen)
9
10
  @world = Type! world, World
10
11
  @execution_plan_uuid = Type! execution_plan_uuid, String
11
12
  @start_at = Type! start_at, Time, NilClass
12
13
  @start_before = Type! start_before, Time, NilClass
13
14
  @args_serializer = Type! args_serializer, Serializers::Abstract
15
+ @frozen = Type! frozen, Algebrick::Types::Boolean
14
16
  end
15
17
 
16
18
  def execution_plan
@@ -55,7 +57,8 @@ module Dynflow
55
57
  :start_at => @start_at,
56
58
  :start_before => @start_before,
57
59
  :serialized_args => @args_serializer.serialized_args,
58
- :args_serializer => @args_serializer.class.name
60
+ :args_serializer => @args_serializer.class.name,
61
+ :frozen => @frozen
59
62
  end
60
63
 
61
64
  # Retrieves arguments from the serializer
@@ -73,7 +76,8 @@ module Dynflow
73
76
  hash[:execution_plan_uuid],
74
77
  string_to_time(hash[:start_at]),
75
78
  string_to_time(hash[:start_before]),
76
- serializer)
79
+ serializer,
80
+ hash[:frozen] || false)
77
81
  rescue NameError => e
78
82
  error(e.message)
79
83
  end
@@ -12,7 +12,8 @@ module Dynflow
12
12
  end
13
13
 
14
14
  Ping = type do
15
- fields! receiver_id: String
15
+ fields! receiver_id: String,
16
+ use_cache: type { variants TrueClass, FalseClass }
16
17
  end
17
18
 
18
19
  Status = type do
@@ -26,16 +26,96 @@ module Dynflow
26
26
  end
27
27
  end
28
28
 
29
- def initialize(world)
29
+ # Class used for reducing the number of sent Pings among worlds.
30
+ # World's coordinator record include the time when was the world
31
+ # seen for the last time. This class can be used to query this
32
+ # information and determine whether the record is "fresh enough"
33
+ # or whether the Ping really needs to be sent.
34
+ class PingCache
35
+ # Format string used for formating and parsing times
36
+ TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%L'.freeze
37
+ DEFAULT_MAX_AGE = 60
38
+
39
+ # Formats time into a string
40
+ #
41
+ # @param time [Time] the time to format
42
+ # @return [String] the formatted time
43
+ def self.format_time(time = Time.now)
44
+ time.strftime(TIME_FORMAT)
45
+ end
46
+
47
+ # Parses time from a string
48
+ #
49
+ # @param time [String] the time string to parse
50
+ # @return [Time] the parsed time
51
+ def self.load_time(time)
52
+ Time.strptime(time, TIME_FORMAT)
53
+ end
54
+
55
+ # @param world [World] the world to which the PingCache belongs
56
+ def initialize(world, max_age = DEFAULT_MAX_AGE)
57
+ @world = world
58
+ @max_age = max_age
59
+ @executor = {}
60
+ end
61
+
62
+ # Records when was the world seen into the world's coordinator record
63
+ #
64
+ # @param id [String] Id of the world to be added to the cache
65
+ # @param time [Time] Time when was the world last seen
66
+ def add_record(id, time = Time.now)
67
+ record = find_world id
68
+ @executor[id] ||= record.data[:class] == 'Dynflow::Coordinator::ExecutorWorld'
69
+ record.data[:meta].update(:last_seen => self.class.format_time(time))
70
+ @world.coordinator.update_record(record)
71
+ end
72
+
73
+ # Looks into the cache whether the world has an executor
74
+ #
75
+ # @param id [String] Id of the world
76
+ # @return [TrueClass] if the world has an executor
77
+ # @return [FalseClass] if the world is a client world
78
+ # @return [NilClass] if unknown
79
+ def executor?(id)
80
+ @executor[id]
81
+ end
82
+
83
+ # Loads the coordinator record from the database and checks whether the world
84
+ # was last seen within the time limit
85
+ #
86
+ # @param id [String] Id of the world to be checked
87
+ # @return [TrueClass] if the world was last seen within the limit
88
+ # @return [FalseClass] if the world was last seen after the limit passed
89
+ def fresh_record?(id)
90
+ record = find_world(id)
91
+ return false if record.nil?
92
+ @executor[id] = record.data[:class] == 'Dynflow::Coordinator::ExecutorWorld'
93
+ time = self.class.load_time(record.data[:meta][:last_seen])
94
+ time >= Time.now - @max_age
95
+ end
96
+
97
+ private
98
+
99
+ def find_world(id)
100
+ @world.coordinator.find_records(:id => id,
101
+ :class => ['Dynflow::Coordinator::ExecutorWorld', 'Dynflow::Coordinator::ClientWorld']).first
102
+ end
103
+ end
104
+
105
+ attr_reader :ping_cache
106
+ def initialize(world, ping_cache_age)
30
107
  @world = Type! world, World
31
108
  @last_id = 0
32
109
  @tracked_requests = {}
33
110
  @terminated = nil
111
+ @ping_cache = PingCache.new world, ping_cache_age
34
112
  end
35
113
 
36
114
  def publish_request(future, request, timeout)
37
- track_request(future, request, timeout) do |tracked_request|
38
- dispatch_request(request, @world.id, tracked_request.id)
115
+ with_ping_request_caching(request, future) do
116
+ track_request(future, request, timeout) do |tracked_request|
117
+ dispatch_request(request, @world.id, tracked_request.id)
118
+ end
39
119
  end
40
120
  end
41
121
 
@@ -58,7 +138,7 @@ module Dynflow
58
138
  (on ~Event do |event|
59
139
  find_executor(event.execution_plan_id)
60
140
  end),
61
- (on Ping.(~any) | Status.(~any, ~any) do |receiver_id, _|
141
+ (on Ping.(~any, ~any) | Status.(~any, ~any) do |receiver_id, _|
62
142
  receiver_id
63
143
  end)
64
144
  envelope = Envelope[request_id, client_world_id, executor_id, request]
@@ -80,7 +160,11 @@ module Dynflow
80
160
  (on ~Failed do |msg|
81
161
  resolve_tracked_request(envelope.request_id, Dynflow::Error.new(msg.error))
82
162
  end),
83
- (on Done | Pong do
163
+ (on Done do
164
+ resolve_tracked_request(envelope.request_id)
165
+ end),
166
+ (on Pong do
167
+ add_ping_cache_record(envelope.sender_id)
84
168
  resolve_tracked_request(envelope.request_id)
85
169
  end),
86
170
  (on ExecutionStatus.(~any) do |steps|
@@ -88,6 +172,15 @@ module Dynflow
88
172
  end)
89
173
  end
90
174
 
175
+ # Records when was the world with provided id last seen using a PingCache
176
+ #
177
+ # @param id [String] Id of the world
178
+ # @see PingCache#add_record
179
+ def add_ping_cache_record(id)
180
+ log Logger::DEBUG, "adding ping cache record for #{id}"
181
+ @ping_cache.add_record id
182
+ end
183
+
91
184
  private
92
185
 
93
186
  def find_executor(execution_plan_id)
@@ -141,6 +234,27 @@ module Dynflow
141
234
  end
142
235
  end
143
236
 
237
+ # Tries to reduce the number of sent Ping requests by first looking into a cache. If the
238
+ # destination world is an executor world, the result is resolved solely from the cache.
239
+ # For client worlds the Ping might be sent if the cache record is stale.
240
+ #
241
+ # @param request [Dynflow::Dispatcher::Request] the request to send
242
+ # @param future [Concurrent::Future] the future to fulfill if the world was seen recently
243
+ # @return [Concurrent::Future] the future tracking the request
244
+ def with_ping_request_caching(request, future)
245
+ return yield unless request.is_a?(Dynflow::Dispatcher::Ping)
246
+ return yield unless request.use_cache
247
+
248
+ if @ping_cache.fresh_record?(request.receiver_id)
249
+ future.success(true)
250
+ else
251
+ if @ping_cache.executor?(request.receiver_id)
252
+ future.fail
253
+ else
254
+ yield
255
+ end
256
+ end
257
+ end
144
258
  end
145
259
  end
146
260
  end
@@ -252,7 +252,8 @@ module Dynflow
252
252
  id,
253
253
  delay_options[:start_at],
254
254
  delay_options.fetch(:start_before, nil),
255
- serializer)
255
+ serializer,
256
+ delay_options[:frozen] || false)
256
257
  persistence.save_delayed_plan(delayed_plan)
257
258
  ensure
258
259
  update_state(error? ? :stopped : :scheduled)
@@ -5,10 +5,10 @@ module Dynflow
5
5
  require 'dynflow/executors/parallel/pool'
6
6
  require 'dynflow/executors/parallel/worker'
7
7
 
8
- def initialize(world, queues_options = { :default => { :pool_size => 5 }})
8
+ def initialize(world, heartbeat_interval, queues_options = { :default => { :pool_size => 5 }})
9
9
  super(world)
10
10
  @core = Core.spawn name: 'parallel-executor-core',
11
- args: [world, queues_options],
11
+ args: [world, heartbeat_interval, queues_options],
12
12
  initialized: @core_initialized = Concurrent.future
13
13
  end
14
14
 
@@ -4,15 +4,17 @@ module Dynflow
4
4
  class Core < Actor
5
5
  attr_reader :logger
6
6
 
7
- def initialize(world, queues_options)
7
+ def initialize(world, heartbeat_interval, queues_options)
8
8
  @logger = world.logger
9
9
  @world = Type! world, World
10
10
  @queues_options = queues_options
11
11
  @pools = {}
12
12
  @terminated = nil
13
13
  @director = Director.new(@world)
14
+ @heartbeat_interval = heartbeat_interval
14
15
 
15
16
  initialize_queues
17
+ schedule_heartbeat
16
18
  end
17
19
 
18
20
  def initialize_queues
@@ -78,8 +80,27 @@ module Dynflow
78
80
  end
79
81
  end
80
82
 
83
+ def heartbeat
84
+ @logger.debug('Executor heartbeat')
85
+ record = @world.coordinator.find_records(:id => @world.id,
86
+ :class => ['Dynflow::Coordinator::ExecutorWorld', 'Dynflow::Coordinator::ClientWorld']).first
87
+ unless record
88
+ logger.error(%{Executor's world record for #{@world.id} missing: terminating})
89
+ @world.terminate
90
+ return
91
+ end
92
+
93
+ record.data[:meta].update(:last_seen => Dynflow::Dispatcher::ClientDispatcher::PingCache.format_time)
94
+ @world.coordinator.update_record(record)
95
+ schedule_heartbeat
96
+ end
97
+
81
98
  private
82
99
 
100
+ def schedule_heartbeat
101
+ @world.clock.ping(self, @heartbeat_interval, :heartbeat)
102
+ end
103
+
83
104
  def on_message(message)
84
105
  super
85
106
  rescue Errors::PersistenceError => e
@@ -93,6 +93,13 @@ module Dynflow
93
93
  adapter.save_delayed_plan(delayed_plan.execution_plan_uuid, delayed_plan.to_hash)
94
94
  end
95
95
 
96
+ def set_delayed_plan_frozen(execution_plan_id, frozen = true, new_start_at = nil)
97
+ plan = load_delayed_plan(execution_plan_id)
98
+ plan.frozen = frozen
99
+ plan.start_at = new_start_at if new_start_at
100
+ save_delayed_plan(plan)
101
+ end
102
+
96
103
  def load_delayed_plan(execution_plan_id)
97
104
  hash = adapter.load_delayed_plan(execution_plan_id)
98
105
  return nil unless hash
@@ -36,7 +36,7 @@ module Dynflow
36
36
  class action_class execution_plan_uuid queue),
37
37
  envelope: %w(receiver_id),
38
38
  coordinator_record: %w(id owner_id class),
39
- delayed: %w(execution_plan_uuid start_at start_before args_serializer)}
39
+ delayed: %w(execution_plan_uuid start_at start_before args_serializer frozen)}
40
40
 
41
41
  SERIALIZABLE_COLUMNS = { action: %w(input output),
42
42
  delayed: %w(serialized_args),
@@ -125,6 +125,7 @@ module Dynflow
125
125
  table_name = :delayed
126
126
  table(table_name)
127
127
  .where(::Sequel.lit('start_at <= ? OR (start_before IS NOT NULL AND start_before <= ?)', time, time))
128
+ .where(:frozen => false)
128
129
  .order_by(:start_at)
129
130
  .all
130
131
  .map { |plan| load_data(plan, table_name) }
@@ -0,0 +1,8 @@
1
+ Sequel.migration do
2
+ change do
3
+ alter_table(:dynflow_delayed_plans) do
4
+ add_column :frozen, :boolean
5
+ end
6
+ self[:dynflow_delayed_plans].update(:frozen => false)
7
+ end
8
+ end
@@ -42,8 +42,9 @@ module Dynflow
42
42
  config.run_on_init_hooks(world)
43
43
  # leave this just for long-running executors
44
44
  unless config.rake_task_with_executor?
45
- world.perform_validity_checks
45
+ invalidated_worlds = world.perform_validity_checks
46
46
  world.auto_execute
47
+ world.post_initialization if invalidated_worlds > 0
47
48
  end
48
49
  end
49
50
  end
@@ -155,7 +155,9 @@ module Dynflow
155
155
  if remote?
156
156
  false
157
157
  else
158
- ::Dynflow::Executors::Parallel.new(world, world.config.queues)
158
+ ::Dynflow::Executors::Parallel.new(world,
159
+ world.config.executor_heartbeat_interval,
160
+ world.config.queues)
159
161
  end
160
162
  end
161
163
 
File without changes
@@ -1,3 +1,3 @@
1
1
  module Dynflow
2
- VERSION = '1.0.5'.freeze
2
+ VERSION = '1.1.0'.freeze
3
3
  end
@@ -50,14 +50,14 @@ module Dynflow
50
50
  end
51
51
 
52
52
  post('/worlds/check') do
53
- load_worlds
54
53
  @validation_results = world.worlds_validity_check(params[:invalidate])
54
+ load_worlds
55
55
  erb :worlds
56
56
  end
57
57
 
58
58
  post('/worlds/:id/check') do |id|
59
- load_worlds
60
59
  @validation_results = world.worlds_validity_check(params[:invalidate], id: params[:id])
60
+ load_worlds
61
61
  erb :worlds
62
62
  end
63
63
 
@@ -1,8 +1,11 @@
1
1
  # -*- coding: utf-8 -*-
2
+ require 'dynflow/world/invalidation'
3
+
2
4
  module Dynflow
3
5
  class World
4
6
  include Algebrick::TypeCheck
5
7
  include Algebrick::Matching
8
+ include Invalidation
6
9
 
7
10
  attr_reader :id, :config, :client_dispatcher, :executor_dispatcher, :executor, :connector,
8
11
  :transaction_adapter, :logger_adapter, :coordinator,
@@ -28,7 +31,7 @@ module Dynflow
28
31
  @connector = @config.connector
29
32
  @middleware = Middleware::World.new
30
33
  @middleware.use Middleware::Common::Transaction if @transaction_adapter
31
- @client_dispatcher = spawn_and_wait(Dispatcher::ClientDispatcher, "client-dispatcher", self)
34
+ @client_dispatcher = spawn_and_wait(Dispatcher::ClientDispatcher, "client-dispatcher", self, @config.ping_cache_age)
32
35
  @dead_letter_handler = spawn_and_wait(DeadLetterSilencer, 'default_dead_letter_handler', @config.silent_dead_letter_matchers)
33
36
  @auto_validity_check = @config.auto_validity_check
34
37
  @validity_check_timeout = @config.validity_check_timeout
@@ -41,15 +44,9 @@ module Dynflow
41
44
  @executor_dispatcher = spawn_and_wait(Dispatcher::ExecutorDispatcher, "executor-dispatcher", self, @config.executor_semaphore)
42
45
  executor.initialized.wait
43
46
  end
47
+ update_register
44
48
  perform_validity_checks if auto_validity_check
45
49
 
46
- @delayed_executor = try_spawn(:delayed_executor, Coordinator::DelayedExecutorLock)
47
- @execution_plan_cleaner = try_spawn(:execution_plan_cleaner, Coordinator::ExecutionPlanCleanerLock)
48
- @meta = @config.meta
49
- @meta['queues'] = @config.queues if @executor
50
- @meta['delayed_executor'] = true if @delayed_executor
51
- @meta['execution_plan_cleaner'] = true if @execution_plan_cleaner
52
- coordinator.register_world(registered_world)
53
50
  @termination_barrier = Mutex.new
54
51
  @before_termination_hooks = Queue.new
55
52
 
@@ -59,14 +56,38 @@ module Dynflow
59
56
  self.terminate.wait
60
57
  end
61
58
  end
59
+ post_initialization
60
+ end
61
+
62
+ # performs steps once the executor is ready and invalidation of previous worls is finished.
63
+ # Needs to be indempotent, as it can be called several times (expecially when auto_validity_check
64
+ # if false, as it should be called after `perform_validity_checks` method)
65
+ def post_initialization
66
+ @delayed_executor ||= try_spawn(:delayed_executor, Coordinator::DelayedExecutorLock)
67
+ @execution_plan_cleaner ||= try_spawn(:execution_plan_cleaner, Coordinator::ExecutionPlanCleanerLock)
68
+ update_register
69
+ @delayed_executor.start if @delayed_executor && !@delayed_executor.started?
62
70
  self.auto_execute if @config.auto_execute
63
- @delayed_executor.start if @delayed_executor
64
71
  end
65
72
 
66
73
  def before_termination(&block)
67
74
  @before_termination_hooks << block
68
75
  end
69
76
 
77
+ def update_register
78
+ @meta ||= @config.meta
79
+ @meta['queues'] = @config.queues if @executor
80
+ @meta['delayed_executor'] = true if @delayed_executor
81
+ @meta['execution_plan_cleaner'] = true if @execution_plan_cleaner
82
+ @meta['last_seen'] = Dynflow::Dispatcher::ClientDispatcher::PingCache.format_time
83
+ if @already_registered
84
+ coordinator.update_record(registered_world)
85
+ else
86
+ coordinator.register_world(registered_world)
87
+ @already_registered = true
88
+ end
89
+ end
90
+
70
91
  def registered_world
71
92
  if executor
72
93
  Coordinator::ExecutorWorld.new(self)
@@ -193,7 +214,11 @@ module Dynflow
193
214
  end
194
215
 
195
216
  def ping(world_id, timeout, done = Concurrent.future)
196
- publish_request(Dispatcher::Ping[world_id], done, false, timeout)
217
+ publish_request(Dispatcher::Ping[world_id, true], done, false, timeout)
218
+ end
219
+
220
+ def ping_without_cache(world_id, timeout, done = Concurrent.future)
221
+ publish_request(Dispatcher::Ping[world_id, false], done, false, timeout)
197
222
  end
198
223
 
199
224
  def get_execution_status(world_id, execution_plan_id, timeout, done = Concurrent.future)
@@ -270,121 +295,7 @@ module Dynflow
270
295
  defined?(@terminating)
271
296
  end
272
297
 
273
- # Invalidate another world, that left some data in the runtime,
274
- # but it's not really running
275
- def invalidate(world)
276
- Type! world, Coordinator::ClientWorld, Coordinator::ExecutorWorld
277
- coordinator.acquire(Coordinator::WorldInvalidationLock.new(self, world)) do
278
- if world.is_a? Coordinator::ExecutorWorld
279
- old_execution_locks = coordinator.find_locks(class: Coordinator::ExecutionLock.name,
280
- owner_id: "world:#{world.id}")
281
-
282
- coordinator.deactivate_world(world)
283
-
284
- old_execution_locks.each do |execution_lock|
285
- invalidate_execution_lock(execution_lock)
286
- end
287
- end
288
-
289
- coordinator.delete_world(world)
290
- end
291
- end
292
-
293
- def invalidate_execution_lock(execution_lock)
294
- begin
295
- plan = persistence.load_execution_plan(execution_lock.execution_plan_id)
296
- rescue => e
297
- if e.is_a?(KeyError)
298
- logger.error "invalidated execution plan #{execution_lock.execution_plan_id} missing, skipping"
299
- else
300
- logger.error e
301
- logger.error "unexpected error when invalidating execution plan #{execution_lock.execution_plan_id}, skipping"
302
- end
303
- coordinator.release(execution_lock)
304
- coordinator.release_by_owner(execution_lock.execution_plan_id)
305
- return
306
- end
307
- unless plan.valid?
308
- logger.error "invalid plan #{plan.id}, skipping"
309
- coordinator.release(execution_lock)
310
- coordinator.release_by_owner(execution_lock.execution_plan_id)
311
- return
312
- end
313
- plan.execution_history.add('terminate execution', execution_lock.world_id)
314
-
315
- plan.steps.values.each do |step|
316
- if step.state == :running
317
- step.error = ExecutionPlan::Steps::Error.new("Abnormal termination (previous state: #{step.state})")
318
- step.state = :error
319
- step.save
320
- end
321
- end
322
-
323
- plan.update_state(:paused) if plan.state == :running
324
- plan.save
325
- coordinator.release(execution_lock)
326
-
327
- available_executors = coordinator.find_worlds(true)
328
- if available_executors.any? && !plan.error?
329
- client_dispatcher.tell([:dispatch_request,
330
- Dispatcher::Execution[execution_lock.execution_plan_id],
331
- execution_lock.client_world_id,
332
- execution_lock.request_id])
333
- end
334
- rescue Errors::PersistenceError
335
- logger.error "failed to write data while invalidating execution lock #{execution_lock}"
336
- end
337
-
338
- def perform_validity_checks
339
- worlds_validity_check
340
- locks_validity_check
341
- end
342
-
343
- def worlds_validity_check(auto_invalidate = true, worlds_filter = {})
344
- worlds = coordinator.find_worlds(false, worlds_filter)
345
-
346
- world_checks = worlds.reduce({}) do |hash, world|
347
- hash.update(world => ping(world.id, self.validity_check_timeout))
348
- end
349
- world_checks.values.each(&:wait)
350
-
351
- results = {}
352
- world_checks.each do |world, check|
353
- if check.success?
354
- result = :valid
355
- else
356
- if auto_invalidate
357
- begin
358
- invalidate(world)
359
- result = :invalidated
360
- rescue => e
361
- logger.error e
362
- result = e.message
363
- end
364
- else
365
- result = :invalid
366
- end
367
- end
368
- results[world.id] = result
369
- end
370
-
371
- unless results.values.all? { |result| result == :valid }
372
- logger.error "invalid worlds found #{results.inspect}"
373
- end
374
-
375
- return results
376
- end
377
-
378
- def locks_validity_check
379
- orphaned_locks = coordinator.clean_orphaned_locks
380
-
381
- unless orphaned_locks.empty?
382
- logger.error "invalid coordinator locks found and invalidated: #{orphaned_locks.inspect}"
383
- end
384
-
385
- return orphaned_locks
386
- end
387
-
298
+ # 24119 - ensure delayed executor is preserved after invalidation
388
299
  # executes plans that are planned/paused and haven't reported any error yet (usually when no executor
389
300
  # was available by the time of planning or terminating)
390
301
  def auto_execute
@@ -0,0 +1,160 @@
1
+ module Dynflow
2
+ class World
3
+ module Invalidation
4
+ # Invalidate another world, that left some data in the runtime,
5
+ # but it's not really running
6
+ #
7
+ # @param world [Coordinator::ClientWorld, Coordinator::ExecutorWorld] coordinator record
8
+ # left behind by the world we're trying to invalidate
9
+ # @return [void]
10
+ def invalidate(world)
11
+ Type! world, Coordinator::ClientWorld, Coordinator::ExecutorWorld
12
+ coordinator.acquire(Coordinator::WorldInvalidationLock.new(self, world)) do
13
+ if world.is_a? Coordinator::ExecutorWorld
14
+ old_execution_locks = coordinator.find_locks(class: Coordinator::ExecutionLock.name,
15
+ owner_id: "world:#{world.id}")
16
+
17
+ coordinator.deactivate_world(world)
18
+
19
+ old_execution_locks.each do |execution_lock|
20
+ invalidate_execution_lock(execution_lock)
21
+ end
22
+ end
23
+
24
+ coordinator.delete_world(world)
25
+ end
26
+ end
27
+
28
+ # Invalidate an execution lock, left behind by a executor that
29
+ # was executing an execution plan when it was terminated.
30
+ #
31
+ # @param execution_lock [Coordinator::ExecutionLock] the lock to invalidate
32
+ # @return [void]
33
+ def invalidate_execution_lock(execution_lock)
34
+ with_valid_execution_plan_for_lock(execution_lock) do |plan|
35
+ plan.execution_history.add('terminate execution', execution_lock.world_id)
36
+
37
+ plan.steps.values.each do |step|
38
+ if step.state == :running
39
+ step.error = ExecutionPlan::Steps::Error.new("Abnormal termination (previous state: #{step.state})")
40
+ step.state = :error
41
+ step.save
42
+ end
43
+ end
44
+
45
+ plan.update_state(:paused) if plan.state == :running
46
+ plan.save
47
+ coordinator.release(execution_lock)
48
+
49
+ if plan.error?
50
+ rescue_id = plan.rescue_plan_id
51
+ execute(rescue_id) if rescue_id
52
+ else
53
+ if coordinator.find_worlds(true).any? # Check if there are any executors
54
+ client_dispatcher.tell([:dispatch_request,
55
+ Dispatcher::Execution[execution_lock.execution_plan_id],
56
+ execution_lock.client_world_id,
57
+ execution_lock.request_id])
58
+ end
59
+ end
60
+ end
61
+ rescue Errors::PersistenceError
62
+ logger.error "failed to write data while invalidating execution lock #{execution_lock}"
63
+ end
64
+
65
+ # Tries to load an execution plan using id stored in the
66
+ # lock. If the execution plan cannot be loaded or is invalid,
67
+ # the lock is released. If the plan gets loaded successfully, it
68
+ # is yielded to a given block.
69
+ #
70
+ # @param execution_lock [Coordinator::ExecutionLock] the lock for which we're trying
71
+ # to load the execution plan
72
+ # @yieldparam [ExecutionPlan] execution_plan the successfully loaded execution plan
73
+ # @return [void]
74
+ def with_valid_execution_plan_for_lock(execution_lock)
75
+ begin
76
+ plan = persistence.load_execution_plan(execution_lock.execution_plan_id)
77
+ rescue => e
78
+ if e.is_a?(KeyError)
79
+ logger.error "invalidated execution plan #{execution_lock.execution_plan_id} missing, skipping"
80
+ else
81
+ logger.error e
82
+ logger.error "unexpected error when invalidating execution plan #{execution_lock.execution_plan_id}, skipping"
83
+ end
84
+ coordinator.release(execution_lock)
85
+ coordinator.release_by_owner(execution_lock.execution_plan_id)
86
+ return
87
+ end
88
+ unless plan.valid?
89
+ logger.error "invalid plan #{plan.id}, skipping"
90
+ coordinator.release(execution_lock)
91
+ coordinator.release_by_owner(execution_lock.execution_plan_id)
92
+ return
93
+ end
94
+ yield plan
95
+ end
96
+
97
+ # Performs world validity checks
98
+ #
99
+ # @return [Integer] number of invalidated worlds
100
+ def perform_validity_checks
101
+ world_invalidation_result = worlds_validity_check
102
+ locks_validity_check
103
+ world_invalidation_result.values.select { |result| result == :invalidated }.size
104
+ end
105
+
106
+ # Checks if all worlds are valid and optionally invalidates them
107
+ #
108
+ # @param auto_invalidate [Boolean] whether automatic invalidation should be performed
109
+ # @param worlds_filter [Hash] hash of filters to select only matching worlds
110
+ # @return [Hash{String=>Symbol}] hash containg validation results, mapping world id to a result
111
+ def worlds_validity_check(auto_invalidate = true, worlds_filter = {})
112
+ worlds = coordinator.find_worlds(false, worlds_filter)
113
+
114
+ world_checks = worlds.reduce({}) do |hash, world|
115
+ hash.update(world => ping_without_cache(world.id, self.validity_check_timeout))
116
+ end
117
+ world_checks.values.each(&:wait)
118
+
119
+ results = {}
120
+ world_checks.each do |world, check|
121
+ if check.success?
122
+ result = :valid
123
+ else
124
+ if auto_invalidate
125
+ begin
126
+ invalidate(world)
127
+ result = :invalidated
128
+ rescue => e
129
+ logger.error e
130
+ result = e.message
131
+ end
132
+ else
133
+ result = :invalid
134
+ end
135
+ end
136
+ results[world.id] = result
137
+ end
138
+
139
+ unless results.values.all? { |result| result == :valid }
140
+ logger.error "invalid worlds found #{results.inspect}"
141
+ end
142
+
143
+ return results
144
+ end
145
+
146
+ # Cleans up locks which don't have a resource
147
+ #
148
+ # @return [Array<Coordinator::Lock>] the removed locks
149
+ def locks_validity_check
150
+ orphaned_locks = coordinator.clean_orphaned_locks
151
+
152
+ unless orphaned_locks.empty?
153
+ logger.error "invalid coordinator locks found and invalidated: #{orphaned_locks.inspect}"
154
+ end
155
+
156
+ return orphaned_locks
157
+ end
158
+ end
159
+ end
160
+ end
@@ -72,6 +72,33 @@ module Dynflow
72
72
  end
73
73
  end
74
74
 
75
+ it "honors rescue strategy when invalidating execution locks" do
76
+ coordinator = executor_world_2.coordinator
77
+ # Plan and action
78
+ plan = client_world.plan(Support::DummyExample::SkippableDummy)
79
+ plan.update_state :running
80
+ plan.save
81
+
82
+ # Simulate leaving behind an execution lock for it
83
+ lock = Coordinator::ExecutionLock.new(executor_world, plan.id, client_world.id, 0)
84
+ coordinator.acquire(lock)
85
+
86
+ # Simulate abnormal termination
87
+ step = plan.steps.values.last
88
+ step.state = :error
89
+ step.save
90
+
91
+ # Invalidate the world's lock
92
+ world_lock = coordinator.find_worlds(false, :id => executor_world.id).first
93
+ executor_world_2.invalidate(world_lock)
94
+
95
+ wait_for do
96
+ plan = executor_world_2.persistence.load_execution_plan(plan.id)
97
+ step = plan.steps.values.last
98
+ plan.state == :stopped && step.state == :skipped
99
+ end
100
+ end
101
+
75
102
  it "prevents from running the invalidation twice on the same world" do
76
103
  client_world.invalidate(executor_world.registered_world)
77
104
  expected_locks = ["lock world-invalidation:#{executor_world.id}",
@@ -194,7 +221,7 @@ module Dynflow
194
221
  client_world_config = Config::ForWorld.new(Config.new.tap { |c| c.executor = false }, create_world )
195
222
  client_world_config.auto_validity_check.must_equal false
196
223
 
197
- executor_world_config = Config::ForWorld.new(Config.new.tap { |c| c.executor = lambda { |w, _| Executors::Parallel.new(w) } }, create_world )
224
+ executor_world_config = Config::ForWorld.new(Config.new.tap { |c| c.executor = lambda { |w, _| Executors::Parallel.new(w, 15) } }, create_world )
198
225
  executor_world_config.auto_validity_check.must_equal true
199
226
  end
200
227
 
@@ -17,7 +17,7 @@ class DaemonTest < ActiveSupport::TestCase
17
17
  @dummy_world = ::Dynflow::Testing::DummyWorld.new
18
18
  @dummy_world.stubs(:id => '123')
19
19
  @dummy_world.stubs(:auto_execute)
20
- @dummy_world.stubs(:perform_validity_checks)
20
+ @dummy_world.stubs(:perform_validity_checks => 0)
21
21
  @event = Concurrent.event
22
22
  @dummy_world.stubs(:terminated).returns(@event)
23
23
  @world_class.stubs(:new).returns(@dummy_world)
@@ -42,7 +42,14 @@ class DaemonTest < ActiveSupport::TestCase
42
42
  @event.wait
43
43
  end
44
44
 
45
- test 'run command works withou memory_limit option specified' do
45
+ test 'run command works without memory_limit option specified' do
46
+ @daemon.run(@current_folder)
47
+ @dynflow.initialize!
48
+ end
49
+
50
+ test 'runs post_initialization when there are invalid worlds detected' do
51
+ @dummy_world.stubs(:perform_validity_checks => 1)
52
+ @dummy_world.expects(:post_initialization)
46
53
  @daemon.run(@current_folder)
47
54
  @dynflow.initialize!
48
55
  end
@@ -76,12 +76,41 @@ module Dynflow
76
76
  assert ping_response.success?
77
77
  end
78
78
 
79
+ it 'succeeds when the world is available without cache' do
80
+ ping_response = client_world.ping_without_cache(executor_world.id, 0.5)
81
+ ping_response.wait
82
+ assert ping_response.success?
83
+ end
84
+
79
85
  it 'time-outs when the world is not responding' do
80
86
  executor_world.terminate.wait
81
87
  ping_response = client_world.ping(executor_world.id, 0.5)
82
88
  ping_response.wait
83
89
  assert ping_response.failed?
84
90
  end
91
+
92
+ it 'time-outs when the world is not responding without cache' do
93
+ executor_world.terminate.wait
94
+ ping_response = client_world.ping_without_cache(executor_world.id, 0.5)
95
+ ping_response.wait
96
+ assert ping_response.failed?
97
+ end
98
+
99
+ it 'caches the pings and pongs' do
100
+ # Spawn the worlds
101
+ client_world
102
+ executor_world
103
+
104
+ ping_cache = Dynflow::Dispatcher::ClientDispatcher::PingCache.new(executor_world)
105
+
106
+ # Records are fresh because of the heartbeat
107
+ assert ping_cache.fresh_record?(client_world.id)
108
+ assert ping_cache.fresh_record?(executor_world.id)
109
+
110
+ # Expire the record
111
+ ping_cache.add_record(executor_world.id, Time.now - 1000)
112
+ refute ping_cache.fresh_record?(executor_world.id)
113
+ end
85
114
  end
86
115
  end
87
116
 
@@ -177,7 +177,7 @@ module Dynflow
177
177
  let(:args) { %w(arg1 arg2) }
178
178
  let(:serializer) { Dynflow::Serializers::Noop.new(nil, args) }
179
179
  let(:delayed_plan) do
180
- Dynflow::DelayedPlan.new(Dynflow::World.allocate, 'an uuid', nil, nil, serializer)
180
+ Dynflow::DelayedPlan.new(Dynflow::World.allocate, 'an uuid', nil, nil, serializer, false)
181
181
  end
182
182
 
183
183
  it "allows access to serializer's args" do
@@ -302,16 +302,30 @@ module Dynflow
302
302
  it 'finds plans with start_before in past' do
303
303
  start_time = Time.now.utc
304
304
  prepare_and_save_plans
305
- adapter.save_delayed_plan('plan1', :execution_plan_uuid => 'plan1', :start_at => format_time(start_time + 60),
305
+ adapter.save_delayed_plan('plan1', :execution_plan_uuid => 'plan1', :frozen => false, :start_at => format_time(start_time + 60),
306
306
  :start_before => format_time(start_time - 60))
307
- adapter.save_delayed_plan('plan2', :execution_plan_uuid => 'plan2', :start_at => format_time(start_time - 60))
308
- adapter.save_delayed_plan('plan3', :execution_plan_uuid => 'plan3', :start_at => format_time(start_time + 60))
309
- adapter.save_delayed_plan('plan4', :execution_plan_uuid => 'plan4', :start_at => format_time(start_time - 60),
307
+ adapter.save_delayed_plan('plan2', :execution_plan_uuid => 'plan2', :frozen => false, :start_at => format_time(start_time - 60))
308
+ adapter.save_delayed_plan('plan3', :execution_plan_uuid => 'plan3', :frozen => false, :start_at => format_time(start_time + 60))
309
+ adapter.save_delayed_plan('plan4', :execution_plan_uuid => 'plan4', :frozen => false, :start_at => format_time(start_time - 60),
310
310
  :start_before => format_time(start_time - 60))
311
311
  plans = adapter.find_past_delayed_plans(start_time)
312
312
  plans.length.must_equal 3
313
313
  plans.map { |plan| plan[:execution_plan_uuid] }.must_equal %w(plan2 plan4 plan1)
314
314
  end
315
+
316
+ it 'does not find plans that are frozen' do
317
+ start_time = Time.now.utc
318
+ prepare_and_save_plans
319
+
320
+ adapter.save_delayed_plan('plan1', :execution_plan_uuid => 'plan1', :frozen => false, :start_at => format_time(start_time + 60),
321
+ :start_before => format_time(start_time - 60))
322
+ adapter.save_delayed_plan('plan2', :execution_plan_uuid => 'plan2', :frozen => true, :start_at => format_time(start_time + 60),
323
+ :start_before => format_time(start_time - 60))
324
+
325
+ plans = adapter.find_past_delayed_plans(start_time)
326
+ plans.length.must_equal 1
327
+ plans.first[:execution_plan_uuid].must_equal 'plan1'
328
+ end
315
329
  end
316
330
  end
317
331
 
@@ -6,6 +6,12 @@ module Support
6
6
  def run; end
7
7
  end
8
8
 
9
+ class SkippableDummy < Dummy
10
+ def rescue_strategy_for_self
11
+ Dynflow::Action::Rescue::Skip
12
+ end
13
+ end
14
+
9
15
  class MySerializer < Dynflow::Serializers::Noop
10
16
  def serialize(arg)
11
17
  raise 'Enforced serializer failure' if arg == :fail
@@ -10,6 +10,7 @@ module Dynflow
10
10
  describe '#meta' do
11
11
  it 'by default informs about the hostname and the pid running the world' do
12
12
  registered_world = world.coordinator.find_worlds(false, id: world.id).first
13
+ registered_world.meta.delete('last_seen')
13
14
  registered_world.meta.must_equal('hostname' => Socket.gethostname, 'pid' => Process.pid,
14
15
  'queues' => { 'default' => { 'pool_size' => 5 },
15
16
  'slow' => { 'pool_size' => 1 }})
@@ -9,7 +9,8 @@
9
9
  <h3>Executors</h3>
10
10
  <% @executors.each do |world| %>
11
11
  <%= value_field('Id', world.id) %>
12
- <%= value_field('Metadata', world.meta) %>
12
+ <%= value_field('Metadata', world.meta.reject { |key, _| key == 'last_seen' }) %>
13
+ <%= value_field('Last seen', world.meta['last_seen']) %>
13
14
  <p>
14
15
  <b>Status:</b>
15
16
  <%= erb :world_validation_result, locals: { world: world } %>
@@ -39,13 +40,15 @@
39
40
  <tr>
40
41
  <th>Id</th>
41
42
  <th>Meta</th>
43
+ <th>Last seen</th>
42
44
  <th></th>
43
45
  </tr>
44
46
  </thead>
45
47
  <% @clients.each do |world| %>
46
48
  <tr>
47
49
  <td><%= h(world.id) %></td>
48
- <td><%= h(world.meta) %></td>
50
+ <td><%= h(world.meta.reject { |key, _| key == 'last_seen' }) %></td>
51
+ <td><%= h(world.meta[:last_seen]) %></td>
49
52
  <td>
50
53
  <%= erb :world_validation_result, locals: { world: world } %>
51
54
  </td>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dynflow
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ivan Necas
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-06-13 00:00:00.000000000 Z
12
+ date: 2018-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: multi_json
@@ -483,6 +483,7 @@ files:
483
483
  - lib/dynflow/persistence_adapters/sequel_migrations/014_add_step_columns.rb
484
484
  - lib/dynflow/persistence_adapters/sequel_migrations/015_add_execution_plan_columns.rb
485
485
  - lib/dynflow/persistence_adapters/sequel_migrations/016_add_step_queue.rb
486
+ - lib/dynflow/persistence_adapters/sequel_migrations/017_add_delayed_plan_frozen.rb
486
487
  - lib/dynflow/rails.rb
487
488
  - lib/dynflow/rails/configuration.rb
488
489
  - lib/dynflow/rails/daemon.rb
@@ -498,6 +499,7 @@ files:
498
499
  - lib/dynflow/serializers/abstract.rb
499
500
  - lib/dynflow/serializers/noop.rb
500
501
  - lib/dynflow/stateful.rb
502
+ - lib/dynflow/telemetry.rb
501
503
  - lib/dynflow/testing.rb
502
504
  - lib/dynflow/testing/assertions.rb
503
505
  - lib/dynflow/testing/dummy_execution_plan.rb
@@ -525,6 +527,7 @@ files:
525
527
  - lib/dynflow/web/world_helpers.rb
526
528
  - lib/dynflow/web_console.rb
527
529
  - lib/dynflow/world.rb
530
+ - lib/dynflow/world/invalidation.rb
528
531
  - test/abnormal_states_recovery_test.rb
529
532
  - test/action_test.rb
530
533
  - test/activejob_adapter_test.rb