dynflow 1.0.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e36392c5ebe5f3a20b879eca68d46ff0ee715985d7107af64f69b4ef256bf19e
4
- data.tar.gz: 3177acf84d594013c47f5a6413fc56f47dbb121727c5549948ef2f30ec2099de
3
+ metadata.gz: 6ee35eec200e14b25add8941b4b4637e994012053a271898c4abc4a70234942e
4
+ data.tar.gz: 720fc9161e5aadff8f165c12f6bb278cfa65435ef7a1d96d93df8614d21a1de8
5
5
  SHA512:
6
- metadata.gz: 41e469d615fb041a1323b6d512f31b57aafe55de76b35212b1d615e249822214aeda51f5710221932d4d7e78aae9515f500f80278b7d056f42403c24a28cdd31
7
- data.tar.gz: b7a202cf192a9d7af21e556971876867f180eb2a6319cd624075af854bb2a64ed0cd12435076261dc71aad4439ff01ebc3c2216799f85d0dbff30e0d495658d7
6
+ metadata.gz: 1753d21be5307643a16704a27b0e343fe55a7e74330469d1eece503d58cfa9fd04be1da57918e6e09e73f7e2822f2be2bd517e9eea58b89432bee7226d51ab93
7
+ data.tar.gz: ac12ce027be3e289227db4a2b5311328342de364c0f5d51f5badc27e7ba484a6f137488ce681d319e050b1a04ef55ac4821d0d87a06d6d70019f3b2c6e889fca
@@ -98,13 +98,21 @@ module Dynflow
98
98
  end
99
99
 
100
100
  config_attr :executor, Executors::Abstract, FalseClass do |world, config|
101
- Executors::Parallel.new(world, config.queues)
101
+ Executors::Parallel.new(world, config.executor_heartbeat_interval, config.queues)
102
102
  end
103
103
 
104
104
  config_attr :executor_semaphore, Semaphores::Abstract, FalseClass do |world, config|
105
105
  Semaphores::Dummy.new
106
106
  end
107
107
 
108
+ config_attr :executor_heartbeat_interval, Integer do
109
+ 15
110
+ end
111
+
112
+ config_attr :ping_cache_age, Integer do
113
+ 60
114
+ end
115
+
108
116
  config_attr :connector, Connectors::Abstract do |world|
109
117
  Connectors::Direct.new(world)
110
118
  end
@@ -118,7 +126,7 @@ module Dynflow
118
126
  end
119
127
 
120
128
  config_attr :validity_check_timeout, Numeric do
121
- 5
129
+ 30
122
130
  end
123
131
 
124
132
  config_attr :exit_on_terminate, Algebrick::Types::Boolean do
@@ -7,11 +7,18 @@ module Dynflow
7
7
  def initialize(world, options = {})
8
8
  @world = world
9
9
  @options = options
10
+ @started = false
10
11
  spawn
11
12
  end
12
13
 
14
+ def started?
15
+ @started
16
+ end
17
+
13
18
  def start
14
- @core.ask(:start)
19
+ @core.ask(:start).tap do
20
+ @started = true
21
+ end
15
22
  end
16
23
 
17
24
  def terminate
@@ -46,6 +46,7 @@ module Dynflow
46
46
  def process(delayed_plans, check_time)
47
47
  processed_plan_uuids = []
48
48
  delayed_plans.each do |plan|
49
+ next if plan.frozen
49
50
  fix_plan_state(plan)
50
51
  with_error_handling do
51
52
  if plan.execution_plan.state != :scheduled
@@ -3,14 +3,16 @@ module Dynflow
3
3
 
4
4
  include Algebrick::TypeCheck
5
5
 
6
- attr_reader :execution_plan_uuid, :start_at, :start_before
6
+ attr_reader :execution_plan_uuid, :start_before
7
+ attr_accessor :frozen, :start_at
7
8
 
8
- def initialize(world, execution_plan_uuid, start_at, start_before, args_serializer)
9
+ def initialize(world, execution_plan_uuid, start_at, start_before, args_serializer, frozen)
9
10
  @world = Type! world, World
10
11
  @execution_plan_uuid = Type! execution_plan_uuid, String
11
12
  @start_at = Type! start_at, Time, NilClass
12
13
  @start_before = Type! start_before, Time, NilClass
13
14
  @args_serializer = Type! args_serializer, Serializers::Abstract
15
+ @frozen = Type! frozen, Algebrick::Types::Boolean
14
16
  end
15
17
 
16
18
  def execution_plan
@@ -55,7 +57,8 @@ module Dynflow
55
57
  :start_at => @start_at,
56
58
  :start_before => @start_before,
57
59
  :serialized_args => @args_serializer.serialized_args,
58
- :args_serializer => @args_serializer.class.name
60
+ :args_serializer => @args_serializer.class.name,
61
+ :frozen => @frozen
59
62
  end
60
63
 
61
64
  # Retrieves arguments from the serializer
@@ -73,7 +76,8 @@ module Dynflow
73
76
  hash[:execution_plan_uuid],
74
77
  string_to_time(hash[:start_at]),
75
78
  string_to_time(hash[:start_before]),
76
- serializer)
79
+ serializer,
80
+ hash[:frozen] || false)
77
81
  rescue NameError => e
78
82
  error(e.message)
79
83
  end
@@ -12,7 +12,8 @@ module Dynflow
12
12
  end
13
13
 
14
14
  Ping = type do
15
- fields! receiver_id: String
15
+ fields! receiver_id: String,
16
+ use_cache: type { variants TrueClass, FalseClass }
16
17
  end
17
18
 
18
19
  Status = type do
@@ -26,16 +26,96 @@ module Dynflow
26
26
  end
27
27
  end
28
28
 
29
- def initialize(world)
29
+ # Class used for reducing the number of sent Pings among worlds.
30
+ # World's coordinator record include the time when was the world
31
+ # seen for the last time. This class can be used to query this
32
+ # information and determine whether the record is "fresh enough"
33
+ # or whether the Ping really needs to be sent.
34
+ class PingCache
35
+ # Format string used for formating and parsing times
36
+ TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%L'.freeze
37
+ DEFAULT_MAX_AGE = 60
38
+
39
+ # Formats time into a string
40
+ #
41
+ # @param time [Time] the time to format
42
+ # @return [String] the formatted time
43
+ def self.format_time(time = Time.now)
44
+ time.strftime(TIME_FORMAT)
45
+ end
46
+
47
+ # Parses time from a string
48
+ #
49
+ # @param time [String] the time string to parse
50
+ # @return [Time] the parsed time
51
+ def self.load_time(time)
52
+ Time.strptime(time, TIME_FORMAT)
53
+ end
54
+
55
+ # @param world [World] the world to which the PingCache belongs
56
+ def initialize(world, max_age = DEFAULT_MAX_AGE)
57
+ @world = world
58
+ @max_age = max_age
59
+ @executor = {}
60
+ end
61
+
62
+ # Records when was the world seen into the world's coordinator record
63
+ #
64
+ # @param id [String] Id of the world to be added to the cache
65
+ # @param time [Time] Time when was the world last seen
66
+ def add_record(id, time = Time.now)
67
+ record = find_world id
68
+ @executor[id] ||= record.data[:class] == 'Dynflow::Coordinator::ExecutorWorld'
69
+ record.data[:meta].update(:last_seen => self.class.format_time(time))
70
+ @world.coordinator.update_record(record)
71
+ end
72
+
73
+ # Looks into the cache whether the world has an executor
74
+ #
75
+ # @param id [String] Id of the world
76
+ # @return [TrueClass] if the world has an executor
77
+ # @return [FalseClass] if the world is a client world
78
+ # @return [NilClass] if unknown
79
+ def executor?(id)
80
+ @executor[id]
81
+ end
82
+
83
+ # Loads the coordinator record from the database and checks whether the world
84
+ # was last seen within the time limit
85
+ #
86
+ # @param id [String] Id of the world to be checked
87
+ # @return [TrueClass] if the world was last seen within the limit
88
+ # @return [FalseClass] if the world was last seen after the limit passed
89
+ def fresh_record?(id)
90
+ record = find_world(id)
91
+ return false if record.nil?
92
+ @executor[id] = record.data[:class] == 'Dynflow::Coordinator::ExecutorWorld'
93
+ time = self.class.load_time(record.data[:meta][:last_seen])
94
+ time >= Time.now - @max_age
95
+ end
96
+
97
+ private
98
+
99
+ def find_world(id)
100
+ @world.coordinator.find_records(:id => id,
101
+ :class => ['Dynflow::Coordinator::ExecutorWorld', 'Dynflow::Coordinator::ClientWorld']).first
102
+ end
103
+ end
104
+
105
+ attr_reader :ping_cache
106
+ def initialize(world, ping_cache_age)
30
107
  @world = Type! world, World
31
108
  @last_id = 0
32
109
  @tracked_requests = {}
33
110
  @terminated = nil
111
+ @ping_cache = PingCache.new world, ping_cache_age
34
112
  end
35
113
 
36
114
  def publish_request(future, request, timeout)
37
- track_request(future, request, timeout) do |tracked_request|
38
- dispatch_request(request, @world.id, tracked_request.id)
115
+ with_ping_request_caching(request, future) do
116
+ track_request(future, request, timeout) do |tracked_request|
117
+ dispatch_request(request, @world.id, tracked_request.id)
118
+ end
39
119
  end
40
120
  end
41
121
 
@@ -58,7 +138,7 @@ module Dynflow
58
138
  (on ~Event do |event|
59
139
  find_executor(event.execution_plan_id)
60
140
  end),
61
- (on Ping.(~any) | Status.(~any, ~any) do |receiver_id, _|
141
+ (on Ping.(~any, ~any) | Status.(~any, ~any) do |receiver_id, _|
62
142
  receiver_id
63
143
  end)
64
144
  envelope = Envelope[request_id, client_world_id, executor_id, request]
@@ -80,7 +160,11 @@ module Dynflow
80
160
  (on ~Failed do |msg|
81
161
  resolve_tracked_request(envelope.request_id, Dynflow::Error.new(msg.error))
82
162
  end),
83
- (on Done | Pong do
163
+ (on Done do
164
+ resolve_tracked_request(envelope.request_id)
165
+ end),
166
+ (on Pong do
167
+ add_ping_cache_record(envelope.sender_id)
84
168
  resolve_tracked_request(envelope.request_id)
85
169
  end),
86
170
  (on ExecutionStatus.(~any) do |steps|
@@ -88,6 +172,15 @@ module Dynflow
88
172
  end)
89
173
  end
90
174
 
175
+ # Records when was the world with provided id last seen using a PingCache
176
+ #
177
+ # @param id [String] Id of the world
178
+ # @see PingCache#add_record
179
+ def add_ping_cache_record(id)
180
+ log Logger::DEBUG, "adding ping cache record for #{id}"
181
+ @ping_cache.add_record id
182
+ end
183
+
91
184
  private
92
185
 
93
186
  def find_executor(execution_plan_id)
@@ -141,6 +234,27 @@ module Dynflow
141
234
  end
142
235
  end
143
236
 
237
+ # Tries to reduce the number of sent Ping requests by first looking into a cache. If the
238
+ # destination world is an executor world, the result is resolved solely from the cache.
239
+ # For client worlds the Ping might be sent if the cache record is stale.
240
+ #
241
+ # @param request [Dynflow::Dispatcher::Request] the request to send
242
+ # @param future [Concurrent::Future] the future to fulfill if the world was seen recently
243
+ # @return [Concurrent::Future] the future tracking the request
244
+ def with_ping_request_caching(request, future)
245
+ return yield unless request.is_a?(Dynflow::Dispatcher::Ping)
246
+ return yield unless request.use_cache
247
+
248
+ if @ping_cache.fresh_record?(request.receiver_id)
249
+ future.success(true)
250
+ else
251
+ if @ping_cache.executor?(request.receiver_id)
252
+ future.fail
253
+ else
254
+ yield
255
+ end
256
+ end
257
+ end
144
258
  end
145
259
  end
146
260
  end
@@ -252,7 +252,8 @@ module Dynflow
252
252
  id,
253
253
  delay_options[:start_at],
254
254
  delay_options.fetch(:start_before, nil),
255
- serializer)
255
+ serializer,
256
+ delay_options[:frozen] || false)
256
257
  persistence.save_delayed_plan(delayed_plan)
257
258
  ensure
258
259
  update_state(error? ? :stopped : :scheduled)
@@ -5,10 +5,10 @@ module Dynflow
5
5
  require 'dynflow/executors/parallel/pool'
6
6
  require 'dynflow/executors/parallel/worker'
7
7
 
8
- def initialize(world, queues_options = { :default => { :pool_size => 5 }})
8
+ def initialize(world, heartbeat_interval, queues_options = { :default => { :pool_size => 5 }})
9
9
  super(world)
10
10
  @core = Core.spawn name: 'parallel-executor-core',
11
- args: [world, queues_options],
11
+ args: [world, heartbeat_interval, queues_options],
12
12
  initialized: @core_initialized = Concurrent.future
13
13
  end
14
14
 
@@ -4,15 +4,17 @@ module Dynflow
4
4
  class Core < Actor
5
5
  attr_reader :logger
6
6
 
7
- def initialize(world, queues_options)
7
+ def initialize(world, heartbeat_interval, queues_options)
8
8
  @logger = world.logger
9
9
  @world = Type! world, World
10
10
  @queues_options = queues_options
11
11
  @pools = {}
12
12
  @terminated = nil
13
13
  @director = Director.new(@world)
14
+ @heartbeat_interval = heartbeat_interval
14
15
 
15
16
  initialize_queues
17
+ schedule_heartbeat
16
18
  end
17
19
 
18
20
  def initialize_queues
@@ -78,8 +80,27 @@ module Dynflow
78
80
  end
79
81
  end
80
82
 
83
+ def heartbeat
84
+ @logger.debug('Executor heartbeat')
85
+ record = @world.coordinator.find_records(:id => @world.id,
86
+ :class => ['Dynflow::Coordinator::ExecutorWorld', 'Dynflow::Coordinator::ClientWorld']).first
87
+ unless record
88
+ logger.error(%{Executor's world record for #{@world.id} missing: terminating})
89
+ @world.terminate
90
+ return
91
+ end
92
+
93
+ record.data[:meta].update(:last_seen => Dynflow::Dispatcher::ClientDispatcher::PingCache.format_time)
94
+ @world.coordinator.update_record(record)
95
+ schedule_heartbeat
96
+ end
97
+
81
98
  private
82
99
 
100
+ def schedule_heartbeat
101
+ @world.clock.ping(self, @heartbeat_interval, :heartbeat)
102
+ end
103
+
83
104
  def on_message(message)
84
105
  super
85
106
  rescue Errors::PersistenceError => e
@@ -93,6 +93,13 @@ module Dynflow
93
93
  adapter.save_delayed_plan(delayed_plan.execution_plan_uuid, delayed_plan.to_hash)
94
94
  end
95
95
 
96
+ def set_delayed_plan_frozen(execution_plan_id, frozen = true, new_start_at = nil)
97
+ plan = load_delayed_plan(execution_plan_id)
98
+ plan.frozen = frozen
99
+ plan.start_at = new_start_at if new_start_at
100
+ save_delayed_plan(plan)
101
+ end
102
+
96
103
  def load_delayed_plan(execution_plan_id)
97
104
  hash = adapter.load_delayed_plan(execution_plan_id)
98
105
  return nil unless hash
@@ -36,7 +36,7 @@ module Dynflow
36
36
  class action_class execution_plan_uuid queue),
37
37
  envelope: %w(receiver_id),
38
38
  coordinator_record: %w(id owner_id class),
39
- delayed: %w(execution_plan_uuid start_at start_before args_serializer)}
39
+ delayed: %w(execution_plan_uuid start_at start_before args_serializer frozen)}
40
40
 
41
41
  SERIALIZABLE_COLUMNS = { action: %w(input output),
42
42
  delayed: %w(serialized_args),
@@ -125,6 +125,7 @@ module Dynflow
125
125
  table_name = :delayed
126
126
  table(table_name)
127
127
  .where(::Sequel.lit('start_at <= ? OR (start_before IS NOT NULL AND start_before <= ?)', time, time))
128
+ .where(:frozen => false)
128
129
  .order_by(:start_at)
129
130
  .all
130
131
  .map { |plan| load_data(plan, table_name) }
@@ -0,0 +1,8 @@
1
+ Sequel.migration do
2
+ change do
3
+ alter_table(:dynflow_delayed_plans) do
4
+ add_column :frozen, :boolean
5
+ end
6
+ self[:dynflow_delayed_plans].update(:frozen => false)
7
+ end
8
+ end
@@ -42,8 +42,9 @@ module Dynflow
42
42
  config.run_on_init_hooks(world)
43
43
  # leave this just for long-running executors
44
44
  unless config.rake_task_with_executor?
45
- world.perform_validity_checks
45
+ invalidated_worlds = world.perform_validity_checks
46
46
  world.auto_execute
47
+ world.post_initialization if invalidated_worlds > 0
47
48
  end
48
49
  end
49
50
  end
@@ -155,7 +155,9 @@ module Dynflow
155
155
  if remote?
156
156
  false
157
157
  else
158
- ::Dynflow::Executors::Parallel.new(world, world.config.queues)
158
+ ::Dynflow::Executors::Parallel.new(world,
159
+ world.config.executor_heartbeat_interval,
160
+ world.config.queues)
159
161
  end
160
162
  end
161
163
 
File without changes
@@ -1,3 +1,3 @@
1
1
  module Dynflow
2
- VERSION = '1.0.5'.freeze
2
+ VERSION = '1.1.0'.freeze
3
3
  end
@@ -50,14 +50,14 @@ module Dynflow
50
50
  end
51
51
 
52
52
  post('/worlds/check') do
53
- load_worlds
54
53
  @validation_results = world.worlds_validity_check(params[:invalidate])
54
+ load_worlds
55
55
  erb :worlds
56
56
  end
57
57
 
58
58
  post('/worlds/:id/check') do |id|
59
- load_worlds
60
59
  @validation_results = world.worlds_validity_check(params[:invalidate], id: params[:id])
60
+ load_worlds
61
61
  erb :worlds
62
62
  end
63
63
 
@@ -1,8 +1,11 @@
1
1
  # -*- coding: utf-8 -*-
2
+ require 'dynflow/world/invalidation'
3
+
2
4
  module Dynflow
3
5
  class World
4
6
  include Algebrick::TypeCheck
5
7
  include Algebrick::Matching
8
+ include Invalidation
6
9
 
7
10
  attr_reader :id, :config, :client_dispatcher, :executor_dispatcher, :executor, :connector,
8
11
  :transaction_adapter, :logger_adapter, :coordinator,
@@ -28,7 +31,7 @@ module Dynflow
28
31
  @connector = @config.connector
29
32
  @middleware = Middleware::World.new
30
33
  @middleware.use Middleware::Common::Transaction if @transaction_adapter
31
- @client_dispatcher = spawn_and_wait(Dispatcher::ClientDispatcher, "client-dispatcher", self)
34
+ @client_dispatcher = spawn_and_wait(Dispatcher::ClientDispatcher, "client-dispatcher", self, @config.ping_cache_age)
32
35
  @dead_letter_handler = spawn_and_wait(DeadLetterSilencer, 'default_dead_letter_handler', @config.silent_dead_letter_matchers)
33
36
  @auto_validity_check = @config.auto_validity_check
34
37
  @validity_check_timeout = @config.validity_check_timeout
@@ -41,15 +44,9 @@ module Dynflow
41
44
  @executor_dispatcher = spawn_and_wait(Dispatcher::ExecutorDispatcher, "executor-dispatcher", self, @config.executor_semaphore)
42
45
  executor.initialized.wait
43
46
  end
47
+ update_register
44
48
  perform_validity_checks if auto_validity_check
45
49
 
46
- @delayed_executor = try_spawn(:delayed_executor, Coordinator::DelayedExecutorLock)
47
- @execution_plan_cleaner = try_spawn(:execution_plan_cleaner, Coordinator::ExecutionPlanCleanerLock)
48
- @meta = @config.meta
49
- @meta['queues'] = @config.queues if @executor
50
- @meta['delayed_executor'] = true if @delayed_executor
51
- @meta['execution_plan_cleaner'] = true if @execution_plan_cleaner
52
- coordinator.register_world(registered_world)
53
50
  @termination_barrier = Mutex.new
54
51
  @before_termination_hooks = Queue.new
55
52
 
@@ -59,14 +56,38 @@ module Dynflow
59
56
  self.terminate.wait
60
57
  end
61
58
  end
59
+ post_initialization
60
+ end
61
+
62
+ # performs steps once the executor is ready and invalidation of previous worls is finished.
63
+ # Needs to be indempotent, as it can be called several times (expecially when auto_validity_check
64
+ # if false, as it should be called after `perform_validity_checks` method)
65
+ def post_initialization
66
+ @delayed_executor ||= try_spawn(:delayed_executor, Coordinator::DelayedExecutorLock)
67
+ @execution_plan_cleaner ||= try_spawn(:execution_plan_cleaner, Coordinator::ExecutionPlanCleanerLock)
68
+ update_register
69
+ @delayed_executor.start if @delayed_executor && !@delayed_executor.started?
62
70
  self.auto_execute if @config.auto_execute
63
- @delayed_executor.start if @delayed_executor
64
71
  end
65
72
 
66
73
  def before_termination(&block)
67
74
  @before_termination_hooks << block
68
75
  end
69
76
 
77
+ def update_register
78
+ @meta ||= @config.meta
79
+ @meta['queues'] = @config.queues if @executor
80
+ @meta['delayed_executor'] = true if @delayed_executor
81
+ @meta['execution_plan_cleaner'] = true if @execution_plan_cleaner
82
+ @meta['last_seen'] = Dynflow::Dispatcher::ClientDispatcher::PingCache.format_time
83
+ if @already_registered
84
+ coordinator.update_record(registered_world)
85
+ else
86
+ coordinator.register_world(registered_world)
87
+ @already_registered = true
88
+ end
89
+ end
90
+
70
91
  def registered_world
71
92
  if executor
72
93
  Coordinator::ExecutorWorld.new(self)
@@ -193,7 +214,11 @@ module Dynflow
193
214
  end
194
215
 
195
216
  def ping(world_id, timeout, done = Concurrent.future)
196
- publish_request(Dispatcher::Ping[world_id], done, false, timeout)
217
+ publish_request(Dispatcher::Ping[world_id, true], done, false, timeout)
218
+ end
219
+
220
+ def ping_without_cache(world_id, timeout, done = Concurrent.future)
221
+ publish_request(Dispatcher::Ping[world_id, false], done, false, timeout)
197
222
  end
198
223
 
199
224
  def get_execution_status(world_id, execution_plan_id, timeout, done = Concurrent.future)
@@ -270,121 +295,7 @@ module Dynflow
270
295
  defined?(@terminating)
271
296
  end
272
297
 
273
- # Invalidate another world, that left some data in the runtime,
274
- # but it's not really running
275
- def invalidate(world)
276
- Type! world, Coordinator::ClientWorld, Coordinator::ExecutorWorld
277
- coordinator.acquire(Coordinator::WorldInvalidationLock.new(self, world)) do
278
- if world.is_a? Coordinator::ExecutorWorld
279
- old_execution_locks = coordinator.find_locks(class: Coordinator::ExecutionLock.name,
280
- owner_id: "world:#{world.id}")
281
-
282
- coordinator.deactivate_world(world)
283
-
284
- old_execution_locks.each do |execution_lock|
285
- invalidate_execution_lock(execution_lock)
286
- end
287
- end
288
-
289
- coordinator.delete_world(world)
290
- end
291
- end
292
-
293
- def invalidate_execution_lock(execution_lock)
294
- begin
295
- plan = persistence.load_execution_plan(execution_lock.execution_plan_id)
296
- rescue => e
297
- if e.is_a?(KeyError)
298
- logger.error "invalidated execution plan #{execution_lock.execution_plan_id} missing, skipping"
299
- else
300
- logger.error e
301
- logger.error "unexpected error when invalidating execution plan #{execution_lock.execution_plan_id}, skipping"
302
- end
303
- coordinator.release(execution_lock)
304
- coordinator.release_by_owner(execution_lock.execution_plan_id)
305
- return
306
- end
307
- unless plan.valid?
308
- logger.error "invalid plan #{plan.id}, skipping"
309
- coordinator.release(execution_lock)
310
- coordinator.release_by_owner(execution_lock.execution_plan_id)
311
- return
312
- end
313
- plan.execution_history.add('terminate execution', execution_lock.world_id)
314
-
315
- plan.steps.values.each do |step|
316
- if step.state == :running
317
- step.error = ExecutionPlan::Steps::Error.new("Abnormal termination (previous state: #{step.state})")
318
- step.state = :error
319
- step.save
320
- end
321
- end
322
-
323
- plan.update_state(:paused) if plan.state == :running
324
- plan.save
325
- coordinator.release(execution_lock)
326
-
327
- available_executors = coordinator.find_worlds(true)
328
- if available_executors.any? && !plan.error?
329
- client_dispatcher.tell([:dispatch_request,
330
- Dispatcher::Execution[execution_lock.execution_plan_id],
331
- execution_lock.client_world_id,
332
- execution_lock.request_id])
333
- end
334
- rescue Errors::PersistenceError
335
- logger.error "failed to write data while invalidating execution lock #{execution_lock}"
336
- end
337
-
338
- def perform_validity_checks
339
- worlds_validity_check
340
- locks_validity_check
341
- end
342
-
343
- def worlds_validity_check(auto_invalidate = true, worlds_filter = {})
344
- worlds = coordinator.find_worlds(false, worlds_filter)
345
-
346
- world_checks = worlds.reduce({}) do |hash, world|
347
- hash.update(world => ping(world.id, self.validity_check_timeout))
348
- end
349
- world_checks.values.each(&:wait)
350
-
351
- results = {}
352
- world_checks.each do |world, check|
353
- if check.success?
354
- result = :valid
355
- else
356
- if auto_invalidate
357
- begin
358
- invalidate(world)
359
- result = :invalidated
360
- rescue => e
361
- logger.error e
362
- result = e.message
363
- end
364
- else
365
- result = :invalid
366
- end
367
- end
368
- results[world.id] = result
369
- end
370
-
371
- unless results.values.all? { |result| result == :valid }
372
- logger.error "invalid worlds found #{results.inspect}"
373
- end
374
-
375
- return results
376
- end
377
-
378
- def locks_validity_check
379
- orphaned_locks = coordinator.clean_orphaned_locks
380
-
381
- unless orphaned_locks.empty?
382
- logger.error "invalid coordinator locks found and invalidated: #{orphaned_locks.inspect}"
383
- end
384
-
385
- return orphaned_locks
386
- end
387
-
298
+ # 24119 - ensure delayed executor is preserved after invalidation
388
299
  # executes plans that are planned/paused and haven't reported any error yet (usually when no executor
389
300
  # was available by the time of planning or terminating)
390
301
  def auto_execute
@@ -0,0 +1,160 @@
1
+ module Dynflow
2
+ class World
3
+ module Invalidation
4
+ # Invalidate another world, that left some data in the runtime,
5
+ # but it's not really running
6
+ #
7
+ # @param world [Coordinator::ClientWorld, Coordinator::ExecutorWorld] coordinator record
8
+ # left behind by the world we're trying to invalidate
9
+ # @return [void]
10
+ def invalidate(world)
11
+ Type! world, Coordinator::ClientWorld, Coordinator::ExecutorWorld
12
+ coordinator.acquire(Coordinator::WorldInvalidationLock.new(self, world)) do
13
+ if world.is_a? Coordinator::ExecutorWorld
14
+ old_execution_locks = coordinator.find_locks(class: Coordinator::ExecutionLock.name,
15
+ owner_id: "world:#{world.id}")
16
+
17
+ coordinator.deactivate_world(world)
18
+
19
+ old_execution_locks.each do |execution_lock|
20
+ invalidate_execution_lock(execution_lock)
21
+ end
22
+ end
23
+
24
+ coordinator.delete_world(world)
25
+ end
26
+ end
27
+
28
+ # Invalidate an execution lock, left behind by a executor that
29
+ # was executing an execution plan when it was terminated.
30
+ #
31
+ # @param execution_lock [Coordinator::ExecutionLock] the lock to invalidate
32
+ # @return [void]
33
+ def invalidate_execution_lock(execution_lock)
34
+ with_valid_execution_plan_for_lock(execution_lock) do |plan|
35
+ plan.execution_history.add('terminate execution', execution_lock.world_id)
36
+
37
+ plan.steps.values.each do |step|
38
+ if step.state == :running
39
+ step.error = ExecutionPlan::Steps::Error.new("Abnormal termination (previous state: #{step.state})")
40
+ step.state = :error
41
+ step.save
42
+ end
43
+ end
44
+
45
+ plan.update_state(:paused) if plan.state == :running
46
+ plan.save
47
+ coordinator.release(execution_lock)
48
+
49
+ if plan.error?
50
+ rescue_id = plan.rescue_plan_id
51
+ execute(rescue_id) if rescue_id
52
+ else
53
+ if coordinator.find_worlds(true).any? # Check if there are any executors
54
+ client_dispatcher.tell([:dispatch_request,
55
+ Dispatcher::Execution[execution_lock.execution_plan_id],
56
+ execution_lock.client_world_id,
57
+ execution_lock.request_id])
58
+ end
59
+ end
60
+ end
61
+ rescue Errors::PersistenceError
62
+ logger.error "failed to write data while invalidating execution lock #{execution_lock}"
63
+ end
64
+
65
+ # Tries to load an execution plan using id stored in the
66
+ # lock. If the execution plan cannot be loaded or is invalid,
67
+ # the lock is released. If the plan gets loaded successfully, it
68
+ # is yielded to a given block.
69
+ #
70
+ # @param execution_lock [Coordinator::ExecutionLock] the lock for which we're trying
71
+ # to load the execution plan
72
+ # @yieldparam [ExecutionPlan] execution_plan the successfully loaded execution plan
73
+ # @return [void]
74
+ def with_valid_execution_plan_for_lock(execution_lock)
75
+ begin
76
+ plan = persistence.load_execution_plan(execution_lock.execution_plan_id)
77
+ rescue => e
78
+ if e.is_a?(KeyError)
79
+ logger.error "invalidated execution plan #{execution_lock.execution_plan_id} missing, skipping"
80
+ else
81
+ logger.error e
82
+ logger.error "unexpected error when invalidating execution plan #{execution_lock.execution_plan_id}, skipping"
83
+ end
84
+ coordinator.release(execution_lock)
85
+ coordinator.release_by_owner(execution_lock.execution_plan_id)
86
+ return
87
+ end
88
+ unless plan.valid?
89
+ logger.error "invalid plan #{plan.id}, skipping"
90
+ coordinator.release(execution_lock)
91
+ coordinator.release_by_owner(execution_lock.execution_plan_id)
92
+ return
93
+ end
94
+ yield plan
95
+ end
96
+
97
+ # Performs world validity checks
98
+ #
99
+ # @return [Integer] number of invalidated worlds
100
+ def perform_validity_checks
101
+ world_invalidation_result = worlds_validity_check
102
+ locks_validity_check
103
+ world_invalidation_result.values.select { |result| result == :invalidated }.size
104
+ end
105
+
106
+ # Checks if all worlds are valid and optionally invalidates them
107
+ #
108
+ # @param auto_invalidate [Boolean] whether automatic invalidation should be performed
109
+ # @param worlds_filter [Hash] hash of filters to select only matching worlds
110
+ # @return [Hash{String=>Symbol}] hash containg validation results, mapping world id to a result
111
+ def worlds_validity_check(auto_invalidate = true, worlds_filter = {})
112
+ worlds = coordinator.find_worlds(false, worlds_filter)
113
+
114
+ world_checks = worlds.reduce({}) do |hash, world|
115
+ hash.update(world => ping_without_cache(world.id, self.validity_check_timeout))
116
+ end
117
+ world_checks.values.each(&:wait)
118
+
119
+ results = {}
120
+ world_checks.each do |world, check|
121
+ if check.success?
122
+ result = :valid
123
+ else
124
+ if auto_invalidate
125
+ begin
126
+ invalidate(world)
127
+ result = :invalidated
128
+ rescue => e
129
+ logger.error e
130
+ result = e.message
131
+ end
132
+ else
133
+ result = :invalid
134
+ end
135
+ end
136
+ results[world.id] = result
137
+ end
138
+
139
+ unless results.values.all? { |result| result == :valid }
140
+ logger.error "invalid worlds found #{results.inspect}"
141
+ end
142
+
143
+ return results
144
+ end
145
+
146
+ # Cleans up locks which don't have a resource
147
+ #
148
+ # @return [Array<Coordinator::Lock>] the removed locks
149
+ def locks_validity_check
150
+ orphaned_locks = coordinator.clean_orphaned_locks
151
+
152
+ unless orphaned_locks.empty?
153
+ logger.error "invalid coordinator locks found and invalidated: #{orphaned_locks.inspect}"
154
+ end
155
+
156
+ return orphaned_locks
157
+ end
158
+ end
159
+ end
160
+ end
@@ -72,6 +72,33 @@ module Dynflow
72
72
  end
73
73
  end
74
74
 
75
+ it "honors rescue strategy when invalidating execution locks" do
76
+ coordinator = executor_world_2.coordinator
77
+ # Plan and action
78
+ plan = client_world.plan(Support::DummyExample::SkippableDummy)
79
+ plan.update_state :running
80
+ plan.save
81
+
82
+ # Simulate leaving behind an execution lock for it
83
+ lock = Coordinator::ExecutionLock.new(executor_world, plan.id, client_world.id, 0)
84
+ coordinator.acquire(lock)
85
+
86
+ # Simulate abnormal termination
87
+ step = plan.steps.values.last
88
+ step.state = :error
89
+ step.save
90
+
91
+ # Invalidate the world's lock
92
+ world_lock = coordinator.find_worlds(false, :id => executor_world.id).first
93
+ executor_world_2.invalidate(world_lock)
94
+
95
+ wait_for do
96
+ plan = executor_world_2.persistence.load_execution_plan(plan.id)
97
+ step = plan.steps.values.last
98
+ plan.state == :stopped && step.state == :skipped
99
+ end
100
+ end
101
+
75
102
  it "prevents from running the invalidation twice on the same world" do
76
103
  client_world.invalidate(executor_world.registered_world)
77
104
  expected_locks = ["lock world-invalidation:#{executor_world.id}",
@@ -194,7 +221,7 @@ module Dynflow
194
221
  client_world_config = Config::ForWorld.new(Config.new.tap { |c| c.executor = false }, create_world )
195
222
  client_world_config.auto_validity_check.must_equal false
196
223
 
197
- executor_world_config = Config::ForWorld.new(Config.new.tap { |c| c.executor = lambda { |w, _| Executors::Parallel.new(w) } }, create_world )
224
+ executor_world_config = Config::ForWorld.new(Config.new.tap { |c| c.executor = lambda { |w, _| Executors::Parallel.new(w, 15) } }, create_world )
198
225
  executor_world_config.auto_validity_check.must_equal true
199
226
  end
200
227
 
@@ -17,7 +17,7 @@ class DaemonTest < ActiveSupport::TestCase
17
17
  @dummy_world = ::Dynflow::Testing::DummyWorld.new
18
18
  @dummy_world.stubs(:id => '123')
19
19
  @dummy_world.stubs(:auto_execute)
20
- @dummy_world.stubs(:perform_validity_checks)
20
+ @dummy_world.stubs(:perform_validity_checks => 0)
21
21
  @event = Concurrent.event
22
22
  @dummy_world.stubs(:terminated).returns(@event)
23
23
  @world_class.stubs(:new).returns(@dummy_world)
@@ -42,7 +42,14 @@ class DaemonTest < ActiveSupport::TestCase
42
42
  @event.wait
43
43
  end
44
44
 
45
- test 'run command works withou memory_limit option specified' do
45
+ test 'run command works without memory_limit option specified' do
46
+ @daemon.run(@current_folder)
47
+ @dynflow.initialize!
48
+ end
49
+
50
+ test 'runs post_initialization when there are invalid worlds detected' do
51
+ @dummy_world.stubs(:perform_validity_checks => 1)
52
+ @dummy_world.expects(:post_initialization)
46
53
  @daemon.run(@current_folder)
47
54
  @dynflow.initialize!
48
55
  end
@@ -76,12 +76,41 @@ module Dynflow
76
76
  assert ping_response.success?
77
77
  end
78
78
 
79
+ it 'succeeds when the world is available without cache' do
80
+ ping_response = client_world.ping_without_cache(executor_world.id, 0.5)
81
+ ping_response.wait
82
+ assert ping_response.success?
83
+ end
84
+
79
85
  it 'time-outs when the world is not responding' do
80
86
  executor_world.terminate.wait
81
87
  ping_response = client_world.ping(executor_world.id, 0.5)
82
88
  ping_response.wait
83
89
  assert ping_response.failed?
84
90
  end
91
+
92
+ it 'time-outs when the world is not responding without cache' do
93
+ executor_world.terminate.wait
94
+ ping_response = client_world.ping_without_cache(executor_world.id, 0.5)
95
+ ping_response.wait
96
+ assert ping_response.failed?
97
+ end
98
+
99
+ it 'caches the pings and pongs' do
100
+ # Spawn the worlds
101
+ client_world
102
+ executor_world
103
+
104
+ ping_cache = Dynflow::Dispatcher::ClientDispatcher::PingCache.new(executor_world)
105
+
106
+ # Records are fresh because of the heartbeat
107
+ assert ping_cache.fresh_record?(client_world.id)
108
+ assert ping_cache.fresh_record?(executor_world.id)
109
+
110
+ # Expire the record
111
+ ping_cache.add_record(executor_world.id, Time.now - 1000)
112
+ refute ping_cache.fresh_record?(executor_world.id)
113
+ end
85
114
  end
86
115
  end
87
116
 
@@ -177,7 +177,7 @@ module Dynflow
177
177
  let(:args) { %w(arg1 arg2) }
178
178
  let(:serializer) { Dynflow::Serializers::Noop.new(nil, args) }
179
179
  let(:delayed_plan) do
180
- Dynflow::DelayedPlan.new(Dynflow::World.allocate, 'an uuid', nil, nil, serializer)
180
+ Dynflow::DelayedPlan.new(Dynflow::World.allocate, 'an uuid', nil, nil, serializer, false)
181
181
  end
182
182
 
183
183
  it "allows access to serializer's args" do
@@ -302,16 +302,30 @@ module Dynflow
302
302
  it 'finds plans with start_before in past' do
303
303
  start_time = Time.now.utc
304
304
  prepare_and_save_plans
305
- adapter.save_delayed_plan('plan1', :execution_plan_uuid => 'plan1', :start_at => format_time(start_time + 60),
305
+ adapter.save_delayed_plan('plan1', :execution_plan_uuid => 'plan1', :frozen => false, :start_at => format_time(start_time + 60),
306
306
  :start_before => format_time(start_time - 60))
307
- adapter.save_delayed_plan('plan2', :execution_plan_uuid => 'plan2', :start_at => format_time(start_time - 60))
308
- adapter.save_delayed_plan('plan3', :execution_plan_uuid => 'plan3', :start_at => format_time(start_time + 60))
309
- adapter.save_delayed_plan('plan4', :execution_plan_uuid => 'plan4', :start_at => format_time(start_time - 60),
307
+ adapter.save_delayed_plan('plan2', :execution_plan_uuid => 'plan2', :frozen => false, :start_at => format_time(start_time - 60))
308
+ adapter.save_delayed_plan('plan3', :execution_plan_uuid => 'plan3', :frozen => false, :start_at => format_time(start_time + 60))
309
+ adapter.save_delayed_plan('plan4', :execution_plan_uuid => 'plan4', :frozen => false, :start_at => format_time(start_time - 60),
310
310
  :start_before => format_time(start_time - 60))
311
311
  plans = adapter.find_past_delayed_plans(start_time)
312
312
  plans.length.must_equal 3
313
313
  plans.map { |plan| plan[:execution_plan_uuid] }.must_equal %w(plan2 plan4 plan1)
314
314
  end
315
+
316
+ it 'does not find plans that are frozen' do
317
+ start_time = Time.now.utc
318
+ prepare_and_save_plans
319
+
320
+ adapter.save_delayed_plan('plan1', :execution_plan_uuid => 'plan1', :frozen => false, :start_at => format_time(start_time + 60),
321
+ :start_before => format_time(start_time - 60))
322
+ adapter.save_delayed_plan('plan2', :execution_plan_uuid => 'plan2', :frozen => true, :start_at => format_time(start_time + 60),
323
+ :start_before => format_time(start_time - 60))
324
+
325
+ plans = adapter.find_past_delayed_plans(start_time)
326
+ plans.length.must_equal 1
327
+ plans.first[:execution_plan_uuid].must_equal 'plan1'
328
+ end
315
329
  end
316
330
  end
317
331
 
@@ -6,6 +6,12 @@ module Support
6
6
  def run; end
7
7
  end
8
8
 
9
+ class SkippableDummy < Dummy
10
+ def rescue_strategy_for_self
11
+ Dynflow::Action::Rescue::Skip
12
+ end
13
+ end
14
+
9
15
  class MySerializer < Dynflow::Serializers::Noop
10
16
  def serialize(arg)
11
17
  raise 'Enforced serializer failure' if arg == :fail
@@ -10,6 +10,7 @@ module Dynflow
10
10
  describe '#meta' do
11
11
  it 'by default informs about the hostname and the pid running the world' do
12
12
  registered_world = world.coordinator.find_worlds(false, id: world.id).first
13
+ registered_world.meta.delete('last_seen')
13
14
  registered_world.meta.must_equal('hostname' => Socket.gethostname, 'pid' => Process.pid,
14
15
  'queues' => { 'default' => { 'pool_size' => 5 },
15
16
  'slow' => { 'pool_size' => 1 }})
@@ -9,7 +9,8 @@
9
9
  <h3>Executors</h3>
10
10
  <% @executors.each do |world| %>
11
11
  <%= value_field('Id', world.id) %>
12
- <%= value_field('Metadata', world.meta) %>
12
+ <%= value_field('Metadata', world.meta.reject { |key, _| key == 'last_seen' }) %>
13
+ <%= value_field('Last seen', world.meta['last_seen']) %>
13
14
  <p>
14
15
  <b>Status:</b>
15
16
  <%= erb :world_validation_result, locals: { world: world } %>
@@ -39,13 +40,15 @@
39
40
  <tr>
40
41
  <th>Id</th>
41
42
  <th>Meta</th>
43
+ <th>Last seen</th>
42
44
  <th></th>
43
45
  </tr>
44
46
  </thead>
45
47
  <% @clients.each do |world| %>
46
48
  <tr>
47
49
  <td><%= h(world.id) %></td>
48
- <td><%= h(world.meta) %></td>
50
+ <td><%= h(world.meta.reject { |key, _| key == 'last_seen' }) %></td>
51
+ <td><%= h(world.meta[:last_seen]) %></td>
49
52
  <td>
50
53
  <%= erb :world_validation_result, locals: { world: world } %>
51
54
  </td>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dynflow
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ivan Necas
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-06-13 00:00:00.000000000 Z
12
+ date: 2018-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: multi_json
@@ -483,6 +483,7 @@ files:
483
483
  - lib/dynflow/persistence_adapters/sequel_migrations/014_add_step_columns.rb
484
484
  - lib/dynflow/persistence_adapters/sequel_migrations/015_add_execution_plan_columns.rb
485
485
  - lib/dynflow/persistence_adapters/sequel_migrations/016_add_step_queue.rb
486
+ - lib/dynflow/persistence_adapters/sequel_migrations/017_add_delayed_plan_frozen.rb
486
487
  - lib/dynflow/rails.rb
487
488
  - lib/dynflow/rails/configuration.rb
488
489
  - lib/dynflow/rails/daemon.rb
@@ -498,6 +499,7 @@ files:
498
499
  - lib/dynflow/serializers/abstract.rb
499
500
  - lib/dynflow/serializers/noop.rb
500
501
  - lib/dynflow/stateful.rb
502
+ - lib/dynflow/telemetry.rb
501
503
  - lib/dynflow/testing.rb
502
504
  - lib/dynflow/testing/assertions.rb
503
505
  - lib/dynflow/testing/dummy_execution_plan.rb
@@ -525,6 +527,7 @@ files:
525
527
  - lib/dynflow/web/world_helpers.rb
526
528
  - lib/dynflow/web_console.rb
527
529
  - lib/dynflow/world.rb
530
+ - lib/dynflow/world/invalidation.rb
528
531
  - test/abnormal_states_recovery_test.rb
529
532
  - test/action_test.rb
530
533
  - test/activejob_adapter_test.rb