aws-flow 2.4.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +8 -8
  2. data/aws-flow.gemspec +1 -0
  3. data/lib/aws/decider.rb +0 -1
  4. data/lib/aws/decider/starter.rb +6 -8
  5. data/lib/aws/decider/utilities.rb +6 -0
  6. data/lib/aws/decider/version.rb +1 -1
  7. data/lib/aws/decider/worker.rb +6 -0
  8. data/lib/aws/flow/future.rb +86 -6
  9. data/lib/aws/flow/implementation.rb +84 -13
  10. data/lib/aws/runner.rb +1 -1
  11. data/lib/aws/templates.rb +2 -0
  12. data/lib/aws/templates/activity.rb +41 -0
  13. data/lib/aws/templates/default.rb +11 -8
  14. data/lib/aws/templates/result.rb +183 -0
  15. data/lib/aws/templates/starter.rb +152 -226
  16. data/lib/aws/templates/utilities.rb +59 -0
  17. data/spec/aws/decider/integration/activity_spec.rb +1 -0
  18. data/spec/aws/decider/integration/options_spec.rb +16 -9
  19. data/spec/aws/decider/integration/starter_spec.rb +6 -7
  20. data/spec/aws/decider/unit/starter_spec.rb +2 -2
  21. data/spec/aws/decider/unit/worker_spec.rb +42 -0
  22. data/spec/aws/flow/{async_backtrace_spec.rb → unit/async_backtrace_spec.rb} +0 -0
  23. data/spec/aws/flow/{async_scope_spec.rb → unit/async_scope_spec.rb} +0 -0
  24. data/spec/aws/flow/{begin_rescue_ensure_spec.rb → unit/begin_rescue_ensure_spec.rb} +0 -0
  25. data/spec/aws/flow/unit/external_condition_variable_spec.rb +59 -0
  26. data/spec/aws/flow/{external_task_spec.rb → unit/external_task_spec.rb} +0 -0
  27. data/spec/aws/flow/{factories.rb → unit/factories.rb} +0 -0
  28. data/spec/aws/flow/{fiber_condition_variable_spec.rb → unit/fiber_condition_variable_spec.rb} +0 -0
  29. data/spec/aws/flow/{fiber_spec.rb → unit/fiber_spec.rb} +0 -0
  30. data/spec/aws/flow/{flow_spec.rb → unit/flow_spec.rb} +0 -0
  31. data/spec/aws/flow/{future_spec.rb → unit/future_spec.rb} +188 -0
  32. data/spec/aws/flow/{simple_dfa_spec.rb → unit/simple_dfa_spec.rb} +0 -0
  33. data/spec/aws/runner/integration/runner_integration_spec.rb +1 -0
  34. data/spec/aws/runner/unit/runner_unit_spec.rb +3 -3
  35. data/spec/aws/templates/unit/activity_spec.rb +9 -10
  36. data/spec/aws/templates/unit/base_spec.rb +10 -11
  37. data/spec/aws/templates/unit/default_spec.rb +23 -6
  38. data/spec/aws/templates/unit/result_spec.rb +130 -0
  39. data/spec/aws/templates/unit/starter_spec.rb +32 -105
  40. data/spec/aws/templates/unit/utilities_spec.rb +80 -0
  41. metadata +19 -13
@@ -0,0 +1,183 @@
1
+ module AWS
2
+ module Flow
3
+ module Templates
4
+
5
+ # ResultWorker is responsible for processing the results of the background
6
+ # jobs. It starts an ActivityWorker to process the ActivityTasks for
7
+ # FlowDefaultResultActivityRuby.run activity. It either returns futures or
8
+ # or actual results themselves back to the user
9
+ class ResultWorker
10
+
11
+ # Wrapper around a ruby {Hash} to provide synchronization around making
12
+ # changes to the encapsulated hash.
13
+ class SynchronizedHash
14
+ attr_reader :hash
15
+
16
+ def initialize
17
+ @semaphore = Mutex.new
18
+ @hash = {}
19
+ end
20
+
21
+ def method_missing(method, *args)
22
+ # Not very efficient but ruby structures are not thread
23
+ # safe in MRI.
24
+ @semaphore.synchronize{ return @hash.send(method, *args) }
25
+ end
26
+ end
27
+
28
+ class << self
29
+ attr_reader :results
30
+ end
31
+
32
+ # Controls synchronization around creation of the ActivityWorker to
33
+ # ensure singleton
34
+ @semaphore = Mutex.new
35
+
36
+ # Starts ResultWorker and ensures that a single ActivityWorker is
37
+ # started for this process. Initializes all class instance variables.
38
+ def self.start(domain)
39
+
40
+ # If already initiated, return
41
+ return @task_list if @task_list
42
+
43
+ # Acquire the lock to ensure only 1 copy of the worker is created
44
+ @semaphore.synchronize do
45
+ # If multiple threads were waiting on the lock, then we should
46
+ # return if the worker was created by the previous thread
47
+ return @task_list if @task_list
48
+ # Initiate all class instance variables. @semaphore around this
49
+ # block ensures a singleton
50
+ self.init
51
+ end
52
+
53
+ # Create pipes for IPC
54
+ reader, writer = IO.pipe
55
+
56
+ # Start the ForkingExecutor with the ActivityWorker
57
+ self.start_executor(reader, writer, domain)
58
+
59
+ # Close one end of the writer pipe
60
+ writer.close
61
+
62
+ # Start the listener thread
63
+ self.start_listener(reader)
64
+
65
+ # Register signal handlers for this process
66
+ self.handle_signals
67
+
68
+ return @task_list
69
+
70
+ end
71
+
72
+ private
73
+
74
+ # Initiates the class instance variables
75
+ # @api private
76
+ def self.init
77
+ # We want the result to be sent to a specific tasklist so that no other
78
+ # worker gets the result of this workflow.
79
+ @task_list ||= "#{Socket.gethostname}:#{Process.pid}:#{SecureRandom.uuid}"
80
+ # Results will be stored in this hash
81
+ @results ||= SynchronizedHash.new
82
+ # Create a new forking executor
83
+ @executor ||= ForkingExecutor.new
84
+ end
85
+
86
+ # Start the ActivityWorker using the ForkingExecutor
87
+ # @api private
88
+ def self.start_executor(reader, writer, domain)
89
+ # Create a child process and start an ActivityWorker
90
+ @executor.execute do
91
+ $0 = 'result-worker'
92
+ # Close one end of the reader pipe
93
+ reader.close
94
+
95
+ # Create a new instance of the FlowDefaultResultActivityRuby
96
+ # class and add it to the ActivityWorker. We instantiate the
97
+ # activity with the writer pipe so that the activity instance
98
+ # can report results back to the parent process.
99
+ activity = AWS::Flow::Templates.result_activity.new(writer)
100
+
101
+ # Start the activity worker. In case of UnknownResourceFault,
102
+ # register the types and start it again.
103
+ AWS::Flow::Templates::Utils.register_on_failure(domain) do |x|
104
+ swf = AWS::SimpleWorkflow.new
105
+ x = swf.domains[x]
106
+ AWS::Flow::ActivityWorker.new(x.client, x, @task_list, activity).start(false)
107
+ end
108
+ end
109
+ end
110
+
111
+ # Starts a listener thread that reads data from a reader pipe and
112
+ # updates the result hash
113
+ # @api private
114
+ def self.start_listener(reader)
115
+ @listener_t = Thread.new do
116
+ Thread.current[:name] = "listener_t"
117
+ while true
118
+ data = reader.gets
119
+ result = Marshal.load(data)
120
+ # Only update the result if an unset Future is present at the
121
+ # given location in the hash.
122
+ future = @results[result[:key]]
123
+ if future && !future.set?
124
+ future.set(result[:result])
125
+ end
126
+ end
127
+ end
128
+ end
129
+
130
+ # Resets all the class instance variables for ResultWorker
131
+ # @api private
132
+ def self.reset
133
+ @listener_t = nil
134
+ @results = nil
135
+ @task_list = nil
136
+ @executor = nil
137
+ end
138
+
139
+ # Stops the ResultWorker, i.e., terminates the listener thread and
140
+ # shutdowns the executor.
141
+ # @api private
142
+ def self.stop
143
+ @listener_t.terminate if @listener_t
144
+ @executor.shutdown(0) if @executor
145
+ self.reset
146
+ end
147
+
148
+ # Registers the signal handlers
149
+ # @api private
150
+ def self.handle_signals
151
+ at_exit {
152
+ self.stop
153
+ }
154
+ %w{ TERM INT }.each do |s|
155
+ Signal.trap(s) do
156
+ self.stop
157
+ Kernel.exit
158
+ end
159
+ end
160
+ end
161
+
162
+ # Gets the result of the background job. The job is identified by the
163
+ # unique key which was assigned to it during scheduling.
164
+ # The method returns a future which the users can wait on to get the
165
+ # result.
166
+ # @api private
167
+ def self.get_result_future(key)
168
+
169
+ # Get the future from the results hash
170
+ future = self.results[key]
171
+
172
+ # Self delete the future from the results hash when it is set
173
+ future.on_set { |x| self.results.delete(key) }
174
+
175
+ return future
176
+ end
177
+
178
+ end
179
+
180
+ end
181
+
182
+ end
183
+ end
@@ -4,250 +4,176 @@ module AWS
4
4
  # @api private
5
5
  module Templates
6
6
 
7
- # Starts an Activity or a Workflow Template execution using the default
8
- # workflow class FlowDefaultWorkflowRuby
9
- #
10
- # @param [String or AWS::Flow::Templates::TemplateBase] name_or_klass
11
- # The Activity or the Workflow Template that needs to be scheduled via
12
- # the default workflow. This argument can either be a string that
13
- # represents a fully qualified activity name - <ActivityClass>.<method_name>
14
- # or it can be an instance of AWS::Flow::Templates::TemplateBase
15
- #
16
- # @param [Hash] input
17
- # Input hash for the workflow execution
18
- #
19
- # @param [Hash] opts
20
- # Additional options to configure the workflow or activity execution.
21
- #
22
- # @option opts [true, false] :wait
23
- # *Optional* This boolean flag can be set to true if the result of the
24
- # task is required. Default value is false.
25
- #
26
- # @option opts [Integer] :wait_timeout
27
- # *Optional* This sets the timeout value for :wait. Default value is
28
- # nil.
29
- #
30
- # @option opts [Hash] :exponential_retry
31
- # A hash of {AWS::Flow::ExponentialRetryOptions}. Default value is -
32
- # { maximum_attempts: 3 }
33
- #
34
- # @option opts [String] *Optional* :domain
35
- # Default value is FlowDefault
36
- #
37
- # @option opts [Integer] *Optional* :execution_start_to_close_timeout
38
- # Default value is 3600 seconds (1 hour)
39
- #
40
- # @option opts [Integer] *Optional* :retention_in_days
41
- # Default value is 7 days
42
- #
43
- # @option opts [String] *Optional* :workflow_id
44
- #
45
- # @option opts [Integer] *Optional* :task_priority
46
- # Default value is 0
47
- #
48
- # @option opts [String] *Optional* :tag_list
49
- # By default, the name of the activity task gets added to the workflow's
50
- # tag_list
51
- #
52
- # @option opts *Optional* :data_converter
53
- # Default value is {AWS::Flow::YAMLDataConverter}. To use the
54
- # {AWS::Flow::S3DataConverter}, set the AWS_SWF_BUCKET_NAME environment
55
- # variable name with a valid AWS S3 bucket name.
56
- #
57
- # @option opts *Optional* A hash of {AWS::Flow::ActivityOptions}
58
- #
59
- # Usage -
60
- #
61
- # AWS::Flow::start("<ActivityClassName>.<method_name>", <input_hash>,
62
- # <options_hash> )
63
- #
64
- # Example -
65
- #
66
- # 1) Start an activity execution -
67
- # AWS::Flow::start("HelloWorldActivity.say_hello", { name: "World" })
68
- #
69
- # 2) Start an activity execution with overriden options -
70
- # AWS::Flow::start("HelloWorldActivity.say_hello", { name: "World" }, {
71
- # exponential_retry: { maximum_attempts: 10 } }
72
- # )
73
- #
74
- def self.start(name_or_klass, input, opts = {})
7
+ class Starter
8
+
9
+ # Starts an Activity or a Workflow Template execution using the default
10
+ # workflow class FlowDefaultWorkflowRuby
11
+ #
12
+ # @param [String or AWS::Flow::Templates::TemplateBase] name_or_klass
13
+ # The Activity or the Workflow Template that needs to be scheduled via
14
+ # the default workflow. This argument can either be a string that
15
+ # represents a fully qualified activity name - <ActivityClass>.<method_name>
16
+ # or it can be an instance of AWS::Flow::Templates::TemplateBase
17
+ #
18
+ # @param [Hash] input
19
+ # Input hash for the workflow execution
20
+ #
21
+ # @param [Hash] opts
22
+ # Additional options to configure the workflow or activity execution.
23
+ #
24
+ # @option opts [true, false] :get_result
25
+ # *Optional* This boolean flag can be set to true if the result future
26
+ # if required. The future can be waited on by using the
27
+ # AWS::Flow::wait_for_all, AWS::Flow::wait_for_any methods or by
28
+ # calling the ExternalFuture#get method. Default value is false.
29
+ #
30
+ # @option opts [Hash] :exponential_retry
31
+ # A hash of {AWS::Flow::ExponentialRetryOptions}. Default value is -
32
+ # { maximum_attempts: 3 }
33
+ #
34
+ # @option opts [String] *Optional* :domain
35
+ # Default value is FlowDefault
36
+ #
37
+ # @option opts [Integer] *Optional* :execution_start_to_close_timeout
38
+ # Default value is 3600 seconds (1 hour)
39
+ #
40
+ # @option opts [Integer] *Optional* :retention_in_days
41
+ # Default value is 7 days
42
+ #
43
+ # @option opts [String] *Optional* :workflow_id
44
+ #
45
+ # @option opts [Integer] *Optional* :task_priority
46
+ # Default value is 0
47
+ #
48
+ # @option opts [String] *Optional* :tag_list
49
+ # By default, the name of the activity task gets added to the workflow's
50
+ # tag_list
51
+ #
52
+ # @option opts *Optional* :data_converter
53
+ # Default value is {AWS::Flow::YAMLDataConverter}. To use the
54
+ # {AWS::Flow::S3DataConverter}, set the AWS_SWF_BUCKET_NAME environment
55
+ # variable name with a valid AWS S3 bucket name.
56
+ #
57
+ # @option opts *Optional* A hash of {AWS::Flow::ActivityOptions}
58
+ #
59
+ # Usage -
60
+ #
61
+ # AWS::Flow::start("<ActivityClassName>.<method_name>", <input_hash>,
62
+ # <options_hash> )
63
+ #
64
+ # Example -
65
+ #
66
+ # 1) Start an activity execution -
67
+ # AWS::Flow::start("HelloWorldActivity.say_hello", { name: "World" })
68
+ #
69
+ # 2) Start an activity execution with overriden options -
70
+ # AWS::Flow::start("HelloWorldActivity.say_hello", { name: "World" }, {
71
+ # exponential_retry: { maximum_attempts: 10 } }
72
+ # )
73
+ #
74
+ def self.start(name_or_klass, input, opts = {})
75
+
76
+ options = opts.dup
77
+
78
+ if name_or_klass.is_a?(String)
79
+ # Add activity name as a tag to the workflow execution
80
+ (options[:tag_list] ||= []) << name_or_klass
81
+
82
+ # If name_or_klass passed in is a string, we are assuming the user is
83
+ # trying to start a single activity task. Wrap the activity information
84
+ # in the activity template
85
+ name_or_klass = AWS::Flow::Templates.activity(name_or_klass, options)
86
+
87
+ # Keep only the required options in the hash
88
+ keys = [
89
+ :domain,
90
+ :retention_in_days,
91
+ :execution_start_to_close_timeout,
92
+ :task_priority,
93
+ :get_result,
94
+ :workflow_id,
95
+ :data_converter,
96
+ :tag_list
97
+ ]
98
+ options.select! { |x| keys.include?(x) }
75
99
 
76
- options = opts.dup
77
-
78
- if name_or_klass.is_a?(String)
79
- # Add activity name as a tag to the workflow execution
80
- (options[:tag_list] ||= []) << name_or_klass
100
+ end
81
101
 
82
- # If name_or_klass passed in is a string, we are assuming the user is
83
- # trying to start a single activity task. Wrap the activity information
84
- # in the activity template
85
- name_or_klass = AWS::Flow::Templates.activity(name_or_klass, options)
102
+ # Wrap the template in a root template
103
+ root = AWS::Flow::Templates.root(name_or_klass)
104
+
105
+ # Get the default options and merge them with the options passed in. The
106
+ # order of the two hashes 'defaults' and 'options' is important here.
107
+ defaults = FlowConstants.defaults.select do |key|
108
+ [
109
+ :domain,
110
+ :prefix_name,
111
+ :execution_method,
112
+ :version,
113
+ :execution_start_to_close_timeout,
114
+ :data_converter,
115
+ :task_list
116
+ ].include?(key)
117
+ end
118
+ options = defaults.merge(options)
86
119
 
87
- # Keep only the required options in the hash
88
- keys = [
89
- :domain,
90
- :retention_in_days,
91
- :execution_start_to_close_timeout,
92
- :task_priority,
93
- :wait,
94
- :wait_timeout,
95
- :workflow_id,
96
- :data_converter,
97
- :tag_list
98
- ]
99
- options.select! { |x| keys.include?(x) }
120
+ raise "input needs to be a Hash" unless input.is_a?(Hash)
100
121
 
101
- end
122
+ # Set the input for the default workflow
123
+ workflow_input = {
124
+ definition: root,
125
+ args: input
126
+ }
102
127
 
103
- # Wrap the template in a root template
104
- root = AWS::Flow::Templates.root(name_or_klass)
128
+ # get_result specifies if we should return back a result future
129
+ # for this task
130
+ get_result = options.delete(:get_result)
105
131
 
106
- # Get the default options and merge them with the options passed in. The
107
- # order of the two hashes 'defaults' and 'options' is important here.
108
- defaults = FlowConstants.defaults.select do |key|
109
- [
110
- :domain,
111
- :prefix_name,
112
- :execution_method,
113
- :version,
114
- :execution_start_to_close_timeout,
115
- :data_converter,
116
- :task_list
117
- ].include?(key)
118
- end
119
- options = defaults.merge(options)
132
+ if get_result
133
+ # Start the default result activity worker
134
+ task_list = ResultWorker.start(options[:domain])
120
135
 
121
- raise "input needs to be a Hash" unless input.is_a?(Hash)
136
+ # Set the result_step for the root template. We need to pass in the
137
+ # task_list to ensure the result activity task is sent to the right
138
+ # task list. This method will return back a unique key that will
139
+ # help us locate the result of this task in ResultWorker.results hash
140
+ key = set_result_activity(task_list, root)
141
+ end
122
142
 
123
- # Set the input for the default workflow
124
- workflow_input = {
125
- definition: root,
126
- args: input,
127
- }
143
+ # Call #start_workflow with the correct options to start the workflow
144
+ # execution. If it fails with UnknownResourceFault, then regsiter the
145
+ # default types and retry.
146
+ AWS::Flow::Templates::Utils.register_on_failure(options[:domain]) do
147
+ AWS::Flow::start_workflow(workflow_input, options)
148
+ end
128
149
 
129
- # Set the result_step for the root template if wait flag is
130
- # set.
131
- wait = options.delete(:wait)
132
- wait_timeout = options.delete(:wait_timeout)
133
- result_tasklist = set_result_activity(root) if wait
150
+ # Get the result identified by this key
151
+ ResultWorker.get_result_future(key) if get_result
134
152
 
135
- # Call #start_workflow with the correct options to start the workflow
136
- # execution
137
- begin
138
- AWS::Flow::start_workflow(workflow_input, options)
139
- rescue AWS::SimpleWorkflow::Errors::UnknownResourceFault => e
140
- register_defaults(options[:domain])
141
- AWS::Flow::start_workflow(workflow_input, options)
142
153
  end
143
154
 
144
- # Wait for result
145
- get_result(result_tasklist, options[:domain], wait_timeout) if wait
146
-
147
- end
148
-
149
- # Sets the result activity with a unique tasklist name for the root template.
150
- # @api private
151
- def self.set_result_activity(root)
152
- # We want the result to be sent to a specific tasklist so that no other
153
- # worker gets the result of this workflow.
154
- result_tasklist = "result_tasklist: #{SecureRandom.uuid}"
155
-
156
- name = "#{FlowConstants.defaults[:result_activity_prefix]}."\
157
- "#{FlowConstants.defaults[:result_activity_method]}"
158
-
159
- # Set the result_step of the root template to the result activity and
160
- # override the tasklist and timeouts.
161
- root.result_step = activity(name, {
162
- task_list: result_tasklist,
155
+ # Sets the result activity with a unique key. The key is used to match
156
+ # the task with the result of the task. It is provided as an input to
157
+ # the default result activity and is used to create a new ExternalFuture
158
+ # in the ResultWorker.results hash.
159
+ # @api private
160
+ def self.set_result_activity(task_list, root)
161
+
162
+ key = "result_key: #{SecureRandom.uuid}"
163
+ # Set the result_step of the root template to the result activity and
164
+ # override the tasklist and timeouts.
165
+ root.result_step = AWS::Flow::Templates.result(key, {
166
+ task_list: task_list,
163
167
  schedule_to_start_timeout: FlowConstants.defaults[:schedule_to_start_timeout],
164
168
  start_to_close_timeout: FlowConstants.defaults[:start_to_close_timeout]
165
- }
166
- )
167
- result_tasklist
168
- end
169
+ })
169
170
 
170
- # Gets the result of the workflow execution by starting an ActivityWorker
171
- # on the FlowDefaultResultActivityRuby class. The result activity will set
172
- # the instance variable future :result with the result of the template.
173
- # It will block till either the result future is set or till the timeout
174
- # expires - whichever comes first.
175
- # @api private
176
- def self.get_result(tasklist, domain, timeout=nil)
171
+ # Create a new ExternalFuture in the ResultWorker.results hash.
172
+ ResultWorker.results[key] = ExternalFuture.new
177
173
 
178
- swf = AWS::SimpleWorkflow.new
179
- domain = swf.domains[domain]
180
-
181
- # Create a new instance of the FlowDefaultResultActivityRuby class and
182
- # add it to the ActivityWorker. We pass in the instance instead of the
183
- # class itself, so that we can locally access the instance variable set
184
- # by the activity method.
185
- activity = FlowDefaultResultActivityRuby.new
186
-
187
- # Create the activity worker to poll on the result tasklist
188
- worker = AWS::Flow::ActivityWorker.new(domain.client, domain, tasklist, activity) {{ use_forking: false }}
189
-
190
- # Keep polling till we get the result or timeout. A 0 or nil timeout
191
- # will let the loop run to completion.
192
- begin
193
- Timeout::timeout(timeout) do
194
- until activity.result.set?
195
- worker.run_once(false)
196
- end
197
- end
198
- rescue Timeout::Error => e
199
- activity.result.set
200
- return
174
+ key
201
175
  end
202
176
 
203
- # Get the result from the future
204
- result = activity.result.get
205
- if result.is_a?(Hash) && result[:failure] && result[:failure].is_a?(Exception)
206
- raise result[:failure]
207
- end
208
-
209
- result
210
- end
211
-
212
- # Registers the relevant defaults with the Simple Workflow Service
213
- # @api private
214
- def self.register_defaults(name=nil)
215
- domain = name.nil? ? register_default_domain : AWS::SimpleWorkflow.new.domains[name]
216
-
217
- register_default_workflow(domain)
218
- register_default_result_activity(domain)
219
- end
220
-
221
- # Registers the default domain FlowDefault with the Simple Workflow
222
- # Service
223
- # @api private
224
- def self.register_default_domain
225
- AWS::Flow::Utilities.register_domain(FlowConstants.defaults[:domain])
226
- end
227
-
228
- # Registers the default workflow type FlowDefaultWorkflowRuby with the
229
- # Simple Workflow Service
230
- # @api private
231
- def self.register_default_workflow(domain)
232
- AWS::Flow::WorkflowWorker.new(
233
- domain.client,
234
- domain,
235
- nil,
236
- AWS::Flow::Templates.default_workflow
237
- ).register
238
- end
239
-
240
- # Registers the default result activity type FlowDefaultResultActivityRuby
241
- # with the Simple Workflow Service
242
- # @api private
243
- def self.register_default_result_activity(domain)
244
- worker = AWS::Flow::ActivityWorker.new(
245
- domain.client,
246
- domain,
247
- nil,
248
- AWS::Flow::Templates.result_activity
249
- ) {{ use_forking: false }}
250
- worker.register
251
177
  end
252
178
 
253
179
  end