libis-workflow 2.0.25 → 2.0.28

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +1 -1
  3. data/.gitignore +36 -36
  4. data/.travis.yml +32 -32
  5. data/Gemfile +4 -4
  6. data/LICENSE +20 -20
  7. data/README.md +380 -380
  8. data/Rakefile +6 -6
  9. data/lib/libis/exceptions.rb +6 -6
  10. data/lib/libis/workflow.rb +41 -41
  11. data/lib/libis/workflow/action.rb +24 -24
  12. data/lib/libis/workflow/base/dir_item.rb +13 -13
  13. data/lib/libis/workflow/base/file_item.rb +80 -80
  14. data/lib/libis/workflow/base/job.rb +83 -83
  15. data/lib/libis/workflow/base/logging.rb +66 -66
  16. data/lib/libis/workflow/base/run.rb +97 -95
  17. data/lib/libis/workflow/base/work_item.rb +173 -173
  18. data/lib/libis/workflow/base/workflow.rb +149 -149
  19. data/lib/libis/workflow/config.rb +22 -22
  20. data/lib/libis/workflow/dir_item.rb +10 -10
  21. data/lib/libis/workflow/file_item.rb +15 -15
  22. data/lib/libis/workflow/job.rb +28 -28
  23. data/lib/libis/workflow/message_registry.rb +30 -30
  24. data/lib/libis/workflow/run.rb +34 -34
  25. data/lib/libis/workflow/status.rb +133 -133
  26. data/lib/libis/workflow/task.rb +318 -316
  27. data/lib/libis/workflow/task_group.rb +72 -71
  28. data/lib/libis/workflow/task_runner.rb +34 -34
  29. data/lib/libis/workflow/version.rb +5 -5
  30. data/lib/libis/workflow/work_item.rb +37 -37
  31. data/lib/libis/workflow/worker.rb +42 -42
  32. data/lib/libis/workflow/workflow.rb +20 -20
  33. data/libis-workflow.gemspec +38 -38
  34. data/spec/items.rb +2 -2
  35. data/spec/items/test_dir_item.rb +13 -13
  36. data/spec/items/test_file_item.rb +16 -16
  37. data/spec/items/test_run.rb +8 -8
  38. data/spec/spec_helper.rb +8 -8
  39. data/spec/task_spec.rb +15 -15
  40. data/spec/tasks/camelize_name.rb +12 -12
  41. data/spec/tasks/checksum_tester.rb +32 -32
  42. data/spec/tasks/collect_files.rb +47 -47
  43. data/spec/workflow_spec.rb +154 -154
  44. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b3de031988a8f96ef1f3fee1f55108d76da4bd75
4
- data.tar.gz: 623ca157c5b70718bca975c746b9971a246fc2b7
3
+ metadata.gz: 9ea9a53ae48e3988b4bdd736ab68334309466c4d
4
+ data.tar.gz: 342ab04cba426925e1d5170aa9f9221c25eab308
5
5
  SHA512:
6
- metadata.gz: 56eee1c03848b2e55285298e60102cc31ad056fe60d448446d28d275b143b0d3c8aee2f9cc51d09eaafb3af53f13d542c8bf70b71b6e10cf5228ecae09b84e60
7
- data.tar.gz: b7ea256e14f8e2220ddbc551f0b641ca72f03121c04e04dee446560f537185717860dc1255571c9f1e2b078325fe3df59078a571a6a382d73fcb9400feb62d33
6
+ metadata.gz: a6ab39cccd0598f5954452c7f6ef88f261bfa15904ba021c5d0a5abd5cc632f5634337ce6c250356e702d79e555734cb40b80ac5f5a1a77aafd2572251b50243
7
+ data.tar.gz: 201d95fe13fe3469a4266d4cd3b9164ed695bfc3dacc16c51cea0c33597c5f426975bb9d42abc3eb03e8cdf9ef85d57eb00ddb8fce7749abb0b72e1c9f2c6daa
data/.coveralls.yml CHANGED
@@ -1,2 +1,2 @@
1
- service_name: travis-ci
1
+ service_name: travis-ci
2
2
  repo_token: TMosCEIw4eu2hK05NxyY2UYIRJYQPzemt
data/.gitignore CHANGED
@@ -1,37 +1,37 @@
1
- *.gem
2
- *.rbc
3
- /.config
4
- /coverage/
5
- /InstalledFiles
6
- /pkg/
7
- /spec/reports/
8
- /test/tmp/
9
- /test/version_tmp/
10
- /tmp/
11
-
12
- ## Specific to RubyMotion:
13
- .dat*
14
- .repl_history
15
- build/
16
-
17
- ## Documentation cache and generated files:
18
- /.yardoc/
19
- /_yardoc/
20
- /doc/
21
- /rdoc/
22
-
23
- ## Environment normalisation:
24
- /.bundle/
25
- /lib/bundler/man/
26
-
27
- # for a library or gem, you might want to ignore these files since the code is
28
- # intended to run in multiple environments; otherwise, check them in:
29
- Gemfile.lock
30
- .ruby-version
31
- .ruby-gemset
32
-
33
- # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
- .rvmrc
35
-
36
- .idea/
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ Gemfile.lock
30
+ .ruby-version
31
+ .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
35
+
36
+ .idea/
37
37
  /spec/coverage
data/.travis.yml CHANGED
@@ -1,32 +1,32 @@
1
- language: ruby
2
- sudo: false
3
- bundler_args: --without development
4
- cache: bundler
5
- rvm:
6
- - 2.1.0
7
- - 2.2.0
8
- - ruby-head
9
- - jruby-9.0.1.0
10
- jdk:
11
- - openjdk7
12
- - oraclejdk7
13
- - oraclejdk8
14
- matrix:
15
- exclude:
16
- - rvm: 2.1.0
17
- jdk: oraclejdk7
18
- - rvm: 2.1.0
19
- jdk: oraclejdk8
20
- - rvm: 2.2.0
21
- jdk: oraclejdk7
22
- - rvm: 2.2.0
23
- jdk: oraclejdk8
24
- - rvm: ruby-head
25
- jdk: oraclejdk7
26
- - rvm: ruby-head
27
- jdk: oraclejdk8
28
- allow_failures:
29
- - rvm: ruby-head
30
- branches:
31
- only:
32
- - master
1
+ language: ruby
2
+ sudo: false
3
+ bundler_args: --without development
4
+ cache: bundler
5
+ rvm:
6
+ - 2.1.0
7
+ - 2.2.0
8
+ - ruby-head
9
+ - jruby-9.0.1.0
10
+ jdk:
11
+ - openjdk7
12
+ - oraclejdk7
13
+ - oraclejdk8
14
+ matrix:
15
+ exclude:
16
+ - rvm: 2.1.0
17
+ jdk: oraclejdk7
18
+ - rvm: 2.1.0
19
+ jdk: oraclejdk8
20
+ - rvm: 2.2.0
21
+ jdk: oraclejdk7
22
+ - rvm: 2.2.0
23
+ jdk: oraclejdk8
24
+ - rvm: ruby-head
25
+ jdk: oraclejdk7
26
+ - rvm: ruby-head
27
+ jdk: oraclejdk8
28
+ allow_failures:
29
+ - rvm: ruby-head
30
+ branches:
31
+ only:
32
+ - master
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source 'https://rubygems.org'
2
-
3
- gemspec name: 'libis-workflow', development_group: :test
4
-
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec name: 'libis-workflow', development_group: :test
4
+
data/LICENSE CHANGED
@@ -1,21 +1,21 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2014 LIBIS
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 LIBIS
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
21
  SOFTWARE.
data/README.md CHANGED
@@ -1,380 +1,380 @@
1
- [![Gem Version](https://badge.fury.io/rb/libis-workflow.svg)](http://badge.fury.io/rb/libis-workflow)
2
- [![Build Status](https://travis-ci.org/Kris-LIBIS/workflow.svg?branch=master)](https://travis-ci.org/Kris-LIBIS/workflow)
3
- [![Coverage Status](https://img.shields.io/coveralls/Kris-LIBIS/workflow.svg)](https://coveralls.io/r/Kris-LIBIS/workflow)
4
- [![Dependency Status](https://gemnasium.com/Kris-LIBIS/workflow.svg)](https://gemnasium.com/Kris-LIBIS/workflow)
5
-
6
- # LIBIS Workflow
7
-
8
- LIBIS Workflow framework
9
-
10
- ## Installation
11
-
12
- Add this line to your application's Gemfile:
13
-
14
- ```ruby
15
- gem 'libis-workflow'
16
- ```
17
-
18
-
19
- And then execute:
20
-
21
- $ bundle
22
-
23
- Or install it yourself as:
24
-
25
- $ gem install 'libis-workflow'
26
-
27
- ## Architecture
28
-
29
- This gem is essentially a simple, custom workflow system. The core of the workflow are the tasks. You can - and should -
30
- create your own tasks by creating new classes inherited from ::Libis::Workflow::Task. The ::Libis::Workflow::Task class
31
- and the included ::Libis::Workflow::Base::Logger module provide the necessary attributes and methods to make them work
32
- in the workflow. See the detailed documentation for the class and module for more information.
33
-
34
- The objects that the tasks will be working on should include the ::Libis::Workflow::Base::WorkItem module.
35
- When working with file objects the module ::Libis::Workflow::Base::FileItem and/or ::Libis::Workflow::Base::DirItem
36
- modules should be included for additional file-specific functionality.
37
- Work items can be organized in different types and a hierarchical structure. A simple implementation of work items with
38
- in-memory storage is provided as classes ::Libis::Workflow::WorkItem, ::Libis::Workflow::FileItem and
39
- ::Libis::Workflow::DirItem.
40
-
41
- All the tasks will be organized into a workflow object for which a base module ::Libis::Workflow::Base::Workflow is
42
- provided. It contains all the basic logic required for proper configuration and operation. Again a in-memory
43
- implementation is provided in the class ::Libis::Workflow::Workflow for your convenience to be used as-is or to derive
44
- your own from.
45
-
46
- The Job class is responsible for instantiating a run-time workflow execution object - a Run - that captures the
47
- configuration and workitems generated while executing the tasks. Essential logic is provided in the module
48
- ::Libis::Workflow::Base::Run with a simple in-memory implementation in ::Libis::Workflow::Run. The run object's class
49
- name has to be provided to the job configuration so that the job can instantiate the correct object. The run object
50
- will be able to execute the tasks in proper order on all the WorkItems supplied/collected. Each task can be implemented
51
- with code to run or simply contain a list of child tasks.
52
-
53
- The whole ingester workflow is configured by a Singleton object ::Libis::Workflow::Config which contains settings for
54
- logging and paths where tasks and workitems can be found.
55
-
56
- ## Usage
57
-
58
- You should start by including the following line in your source code:
59
-
60
- ```ruby
61
- require 'libis-workflow'
62
- ```
63
-
64
- This will load all of the Libis Workflow framework into your environment, but including only the required parts is OK as
65
- well. This is shown in the examples below.
66
-
67
- ### Workflows and Jobs
68
-
69
- An implementation of ::Libis::Workflow::Base::Workflow contains the definition of a workflow. Once instantiated, it can
70
- be run by calling the 'execute' method on a job object created for that workflow. This will create an intance of an
71
- implementation of ::Libis::Workflow::Base::Run, configure it and call the 'run' method on it. The Workflow constructor
72
- takes no arguments, but is should be configured by calling the 'configure' method with the workflow configuration as an
73
- argument. The job's 'execute' method takes an option Hash as argument with extra/overriding configuration values.
74
-
75
- ### Job configuration
76
- A job configuration is a Hash with:
77
- * name: String to identify the workflow
78
- * description: String with detailed textual information
79
- * workflow: Object reference to a Workflow that contains the task configuration
80
- * run_object: String with class name of the ::Libis::Workflow::Base::Run implementation to be created. An istance of
81
- this class will be created for each run and serves as the root work item for that particular run.
82
- * input: Hash with input parameter values for the workflow
83
-
84
- #### Workflow configuration
85
-
86
- A workflow configuration is a Hash with:
87
- * name: String to identify the workflow
88
- * description: String with detailed textual information
89
- * tasks: Array of task descriptions
90
- * input: Hash with input variable definitions
91
-
92
- ##### Task description
93
-
94
- is a Hash with:
95
- * class: String with class name of the task
96
- * name: String with the name of the task
97
- * tasks: Array with task definitions of sub-tasks
98
- * any task parameter values. Each task can define parameters that configure the task. It is using the
99
- ::Libis::Tools::Parameter class for this.
100
-
101
- The ::Libis::Workflow::Task base class allready defines the following parameters:
102
- * recursive: Run the task on all subitems recursively. Default: false
103
- * abort_recursion_on_failure: Stop processing items recursively if one item fails. Default: false
104
- * retry_count: Number of times to retry the task. Default: 0
105
- * retry_interval: Number of seconds to wait between retries. Default: 10
106
-
107
- If 'class' is not present, the default '::Libis::Workflow::TaskGroup' with the given name will be instantiated, which
108
- performs each sub-task on the item.
109
-
110
- If the task is configured to be recursive, it will iterate over the child items and perform each sub-task on each of
111
- the child items. If a 'class' value is given, an instance of that class will be created and the task will be handed
112
- the work item to process on. See the chapter on 'Tasks' below for more information on tasks.
113
-
114
- Note that a task with custom processing will not execute sub-tasks. If you configured a processing task with subtasks
115
- an exception will be thrown when trying to execute the job.
116
-
117
- ##### Input variable definition
118
-
119
- The input variables define parameters for the workflow. When a job is executed, it can provide values for any of these
120
- input variables and the workflow run will use the new values instead of the defaults.
121
-
122
- The key of the input Hash is the unique name of the variable. The value is another Hash with the parameter definition.
123
- See ::Libis::Tools::Parameter for the content of this Hash.
124
-
125
- An additional property of the parameters is the 'propagate_to' property. It defines how the workflow run should push
126
- the values set for the input parameters to the parameters on the tasks. These task parameters can be addressed by a
127
- '<Task class or Task name>[#<parameter name>]' string. If necessary the task class or name may be specified as a full
128
- path with '/' separators. The parameter name part is optional and considered to be the same as the input parameter name
129
- if absent.
130
-
131
- #### Run-time configuration
132
-
133
- The job's 'execute' method takes an optional Hash as argument which will complement and override the options Hash
134
- described in the previous chapter.
135
-
136
- Once the workflow is configured and the root work item instantiated, the method will run each top-level task on the root
137
- work item in sequence until all tasks have completed successfully or a task has failed.
138
-
139
- ### Work items
140
-
141
- Creating your own work items is highly recommended and is fairly easy:
142
-
143
- ```ruby
144
-
145
- require 'libis/workflow'
146
-
147
- class MyWorkItem < ::Libis::Workflow::WorkItem
148
- attr_accesor :name
149
-
150
- def initialize
151
- @name = 'My work item'
152
- super # Note: this is important as the base class requires some initialization
153
- end
154
- end
155
- ```
156
-
157
- or if a custom storage implementation is desired, a number of data items and methods require implementation:
158
-
159
- ```ruby
160
-
161
- require 'libis/workflow'
162
-
163
- class MyWorkItem < MyStorageItem
164
- include ::Libis::Workflow::Base::WorkItem
165
-
166
- stored_attribute :parent
167
- stored_attribute :items
168
- stored_attribute :options
169
- stored_attribute :properties
170
- stored_attribute :status_log
171
- stored_attribute :summary
172
-
173
- def initialize
174
- self.parent = nil
175
- self.items = []
176
- self.options = {}
177
- self.properties = {}
178
- self.status_log = []
179
- self.summary = {}
180
- end
181
-
182
- protected
183
-
184
- def add_status_log(info)
185
- self.status_log << info
186
- end
187
-
188
- end
189
- ```
190
-
191
- Work items that are file-based can derive from the ::Libis::Workflow::FileItem class:
192
-
193
- ```ruby
194
-
195
- require 'libis/workflow'
196
-
197
- class MyFileItem < ::Libis::Workflow::FileItem
198
-
199
- def initialize(file)
200
- filename = file
201
- super
202
- end
203
-
204
- def filesize
205
- properties[:size]
206
- end
207
-
208
- def fixity_check(checksum)
209
- properties[:checksum] == checksum
210
- end
211
-
212
- end
213
- ```
214
-
215
- or include the ::Libis::Workflow::Base::FileItem module:
216
-
217
- ```ruby
218
-
219
- require 'libis/workflow'
220
-
221
- class MyFileItem < MyWorkItem
222
- include ::Libis::Workflow::FileItem
223
-
224
- def initialize(file)
225
- filename = file
226
- super
227
- end
228
-
229
- def filesize
230
- properties[:size]
231
- end
232
-
233
- def fixity_check(checksum)
234
- properties[:checksum] == checksum
235
- end
236
-
237
- end
238
- ```
239
-
240
-
241
-
242
- ## Tasks
243
-
244
- Tasks should inherit from ::Libis::Workflow::Task and specify the actions it wants to
245
- perform on each work item:
246
-
247
- ```ruby
248
-
249
- class MyTask < ::Libis::Workflow::Task
250
-
251
- def process_item(item)
252
- if do_something(item)
253
- info "Did something"
254
- else
255
- raise ::Libis::WorkflowError, "Something went wrong"
256
- end
257
- rescue Exception => e
258
- error "Fatal problem, aborting"
259
- raise ::Libis::WorkflowAbort, "Fatal problem"
260
- ensure
261
- item
262
- end
263
-
264
- end
265
- ```
266
-
267
- As seen above, the task should define a method called process_item that takes one argument. The argument will be a
268
- reference to the work item that it needs to perform an action on. The task has several option to progress after
269
- performing its actions:
270
- * return. This is considered a normal and successful operation result. After a successful return the item's status will
271
- be set to 'done' for the given task.
272
- * raise a ::Libis::WorkflowError. Indicates that something went wrong during the processing of the item. The item's
273
- status will be set to failed for the given task and the exception message will be printed in the error log. Processing
274
- will continue with the next item. This action is recommended for temporary or recoverable errors. The parent item will
275
- be flagged as 'failed' if any of the child items failed.
276
- * raise a ::Libis::WorkflowAbort. A severe and fatal error has occured. Processing will abort immediately and the
277
- failure status will be escalated to all items up the item hierarchy. Due to the escalating behaviour, no message is
278
- printed in the error log automatically, so it is up to the task to an appropriate log the error itself.
279
- * raise any other Exception. Should be avoided, but if it happens nevertheless, it will cause the item to fail for the
280
- given task and the exception message to be logged in the error log. It will not attempt to process the other items.
281
-
282
- ### Controlling behavior with parameters
283
-
284
- You have some options to control how the task will behave in special cases. These are controlled using parameters on
285
- the task, which can be set (and fixed with the 'frozen' option) on the task, but can be configured at run-time with the
286
- help of workflow input parameters and run options.
287
-
288
- #### Performing an action on the work item and all child items recursively
289
-
290
- With the 'recursive' parameter set to true, your task's process_item method will be called for the work item and then
291
- once for each child and each child's children recursively.
292
-
293
- Note: you should not make both parent and child tasks recursive as this will cause the subitems to be processed
294
- multiple times. If you make the parent task recursive, all tasks and sub-tasks will be performed on each item in the
295
- tree. Making the child tasks recursive makes the parent task only perform on the top item and then performs each
296
- sub-task one-by-one for the whole item tree. The last option is the most efficient.
297
-
298
- Attention should be paid for the
299
-
300
- #### Retrying if task failed
301
-
302
- The parameters 'retry_count' and 'retry_interval' control the task's behaviour if a task has to wait for a result for an
303
- asynchonous job. A task could be waiting for a result from the other job which will be indicated by a 'ASYNC_WAIT'
304
- status. Alternatively the task may know that the job is halted and waiting for user interaction, indicated with the
305
- 'ASYNC_HALT' status. Only when the status is 'ASYNC_WAIT', the task will retry its process. By default the 'retry_count'
306
- is 0, which causes the task not to retry. Before retrying the task will pause for the number of seconds given in the
307
- parameter 'retry_interval', which is 30 by default.
308
-
309
- ### Pre- and postprocessing
310
-
311
- The default implementation of 'process' is to call 'pre_process' and then call 'process_item' on each child item,
312
- followed by calling 'post_process'. The methods 'pre_process' and 'post_process' are no-operation methods by default,
313
- but can be overwritten if needed.
314
-
315
- The 'pre_process' is intended to re-initialize the task before processing a new item. It can also be used to force the
316
- task to skip processing the items altogether by calling the 'skip_processing_item' method or to prevent a recursive
317
- task from traveling further down the item tree by calling the 'stop_processing_subitems' method. The temporary locks
318
- behave as reset-on-read switches and are only active for the processing of the current item.
319
-
320
- The 'post_process' method can be used to update any object after the item processing.
321
-
322
- ### Convenience functions
323
-
324
- #### get_root_item()
325
-
326
- Returns the work item that the workflow started with (and is the root/grand parent of all work items in the ingest run).
327
-
328
- #### get_work_dir()
329
-
330
- Returns the work directory as configured for the current ingest run. The work directory can be used as scrap directory
331
- for creating derived files that can be added as work items to the current flow or for downloading files that will be
332
- processed later. The work directory is not automaticaly cleaned up, which is considered a task for the workflow implementation.
333
-
334
- #### capture_cmd(cmd, *args)
335
-
336
- Allows the task to run an external command-line program and capture it's stdout and stderr output at the same time. The
337
- first argument is mandatory and should be the command-line program that has to be executed. An arbitrary number of
338
- command-line arguments may follow.
339
-
340
- The return value is an array with three elements: the status code returned by the command, the stdout string and the
341
- stderr string.
342
-
343
- #### names()
344
-
345
- An array of strings with the hierarchical path of tasks leading to the current task. Can be usefull for log messages.
346
- The method 'namepath' returns a '/' separated path of tasks.
347
-
348
- #### (debug/info/warn/error/fatal)(message, *args)
349
-
350
- Convenience function for creating log entries. The logger set in ::Libis::Workflow::Config is used to dump log messages.
351
-
352
- The first argument is mandatory and can be:
353
- * an integer. The integer is used to look up the message text in ::Libis::Workflow::MessageRegistry.
354
- * a static string. The message text is used as-is.
355
- * a string with placement holders as used in String#%. Args can either be an array or a hash. See also Kernel#sprintf.
356
-
357
- The log message is logged to the general logging and attached to the current work item (workitem) unless another
358
- work item is passed as first argument after the message.
359
-
360
- #### check_item_type(klass, item = nil)
361
-
362
- Checks if the work item is of the given class. 'workitem' is checked if the item argument is not present. If the check
363
- fails a Runtime exception is thrown which will cause the task to abort if not catched.
364
-
365
- #### item_type?(klass, item = nil)
366
-
367
- A less severe variant version of check_item_type which returns a boolean (false if failed).
368
-
369
- #### to_status(status)
370
-
371
- Simply prepends the status text with the current task name. The output of this function is typically what the work item
372
- status field should be set at.
373
-
374
- ## Contributing
375
-
376
- 1. Fork it ( https://github.com/libis/workflow/fork )
377
- 2. Create your feature branch (`git checkout -b my-new-feature`)
378
- 3. Commit your changes (`git commit -am 'Add some feature'`)
379
- 4. Push to the branch (`git push origin my-new-feature`)
380
- 5. Create new Pull Request
1
+ [![Gem Version](https://badge.fury.io/rb/libis-workflow.svg)](http://badge.fury.io/rb/libis-workflow)
2
+ [![Build Status](https://travis-ci.org/Kris-LIBIS/workflow.svg?branch=master)](https://travis-ci.org/Kris-LIBIS/workflow)
3
+ [![Coverage Status](https://img.shields.io/coveralls/Kris-LIBIS/workflow.svg)](https://coveralls.io/r/Kris-LIBIS/workflow)
4
+ [![Dependency Status](https://gemnasium.com/Kris-LIBIS/workflow.svg)](https://gemnasium.com/Kris-LIBIS/workflow)
5
+
6
+ # LIBIS Workflow
7
+
8
+ LIBIS Workflow framework
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ ```ruby
15
+ gem 'libis-workflow'
16
+ ```
17
+
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install 'libis-workflow'
26
+
27
+ ## Architecture
28
+
29
+ This gem is essentially a simple, custom workflow system. The core of the workflow are the tasks. You can - and should -
30
+ create your own tasks by creating new classes inherited from ::Libis::Workflow::Task. The ::Libis::Workflow::Task class
31
+ and the included ::Libis::Workflow::Base::Logger module provide the necessary attributes and methods to make them work
32
+ in the workflow. See the detailed documentation for the class and module for more information.
33
+
34
+ The objects that the tasks will be working on should include the ::Libis::Workflow::Base::WorkItem module.
35
+ When working with file objects the module ::Libis::Workflow::Base::FileItem and/or ::Libis::Workflow::Base::DirItem
36
+ modules should be included for additional file-specific functionality.
37
+ Work items can be organized in different types and a hierarchical structure. A simple implementation of work items with
38
+ in-memory storage is provided as classes ::Libis::Workflow::WorkItem, ::Libis::Workflow::FileItem and
39
+ ::Libis::Workflow::DirItem.
40
+
41
+ All the tasks will be organized into a workflow object for which a base module ::Libis::Workflow::Base::Workflow is
42
+ provided. It contains all the basic logic required for proper configuration and operation. Again a in-memory
43
+ implementation is provided in the class ::Libis::Workflow::Workflow for your convenience to be used as-is or to derive
44
+ your own from.
45
+
46
+ The Job class is responsible for instantiating a run-time workflow execution object - a Run - that captures the
47
+ configuration and workitems generated while executing the tasks. Essential logic is provided in the module
48
+ ::Libis::Workflow::Base::Run with a simple in-memory implementation in ::Libis::Workflow::Run. The run object's class
49
+ name has to be provided to the job configuration so that the job can instantiate the correct object. The run object
50
+ will be able to execute the tasks in proper order on all the WorkItems supplied/collected. Each task can be implemented
51
+ with code to run or simply contain a list of child tasks.
52
+
53
+ The whole ingester workflow is configured by a Singleton object ::Libis::Workflow::Config which contains settings for
54
+ logging and paths where tasks and workitems can be found.
55
+
56
+ ## Usage
57
+
58
+ You should start by including the following line in your source code:
59
+
60
+ ```ruby
61
+ require 'libis-workflow'
62
+ ```
63
+
64
+ This will load all of the Libis Workflow framework into your environment, but including only the required parts is OK as
65
+ well. This is shown in the examples below.
66
+
67
+ ### Workflows and Jobs
68
+
69
+ An implementation of ::Libis::Workflow::Base::Workflow contains the definition of a workflow. Once instantiated, it can
70
+ be run by calling the 'execute' method on a job object created for that workflow. This will create an intance of an
71
+ implementation of ::Libis::Workflow::Base::Run, configure it and call the 'run' method on it. The Workflow constructor
72
+ takes no arguments, but is should be configured by calling the 'configure' method with the workflow configuration as an
73
+ argument. The job's 'execute' method takes an option Hash as argument with extra/overriding configuration values.
74
+
75
+ ### Job configuration
76
+ A job configuration is a Hash with:
77
+ * name: String to identify the workflow
78
+ * description: String with detailed textual information
79
+ * workflow: Object reference to a Workflow that contains the task configuration
80
+ * run_object: String with class name of the ::Libis::Workflow::Base::Run implementation to be created. An istance of
81
+ this class will be created for each run and serves as the root work item for that particular run.
82
+ * input: Hash with input parameter values for the workflow
83
+
84
+ #### Workflow configuration
85
+
86
+ A workflow configuration is a Hash with:
87
+ * name: String to identify the workflow
88
+ * description: String with detailed textual information
89
+ * tasks: Array of task descriptions
90
+ * input: Hash with input variable definitions
91
+
92
+ ##### Task description
93
+
94
+ is a Hash with:
95
+ * class: String with class name of the task
96
+ * name: String with the name of the task
97
+ * tasks: Array with task definitions of sub-tasks
98
+ * any task parameter values. Each task can define parameters that configure the task. It is using the
99
+ ::Libis::Tools::Parameter class for this.
100
+
101
+ The ::Libis::Workflow::Task base class allready defines the following parameters:
102
+ * recursive: Run the task on all subitems recursively. Default: false
103
+ * abort_recursion_on_failure: Stop processing items recursively if one item fails. Default: false
104
+ * retry_count: Number of times to retry the task. Default: 0
105
+ * retry_interval: Number of seconds to wait between retries. Default: 10
106
+
107
+ If 'class' is not present, the default '::Libis::Workflow::TaskGroup' with the given name will be instantiated, which
108
+ performs each sub-task on the item.
109
+
110
+ If the task is configured to be recursive, it will iterate over the child items and perform each sub-task on each of
111
+ the child items. If a 'class' value is given, an instance of that class will be created and the task will be handed
112
+ the work item to process on. See the chapter on 'Tasks' below for more information on tasks.
113
+
114
+ Note that a task with custom processing will not execute sub-tasks. If you configured a processing task with subtasks
115
+ an exception will be thrown when trying to execute the job.
116
+
117
+ ##### Input variable definition
118
+
119
+ The input variables define parameters for the workflow. When a job is executed, it can provide values for any of these
120
+ input variables and the workflow run will use the new values instead of the defaults.
121
+
122
+ The key of the input Hash is the unique name of the variable. The value is another Hash with the parameter definition.
123
+ See ::Libis::Tools::Parameter for the content of this Hash.
124
+
125
+ An additional property of the parameters is the 'propagate_to' property. It defines how the workflow run should push
126
+ the values set for the input parameters to the parameters on the tasks. These task parameters can be addressed by a
127
+ '<Task class or Task name>[#<parameter name>]' string. If necessary the task class or name may be specified as a full
128
+ path with '/' separators. The parameter name part is optional and considered to be the same as the input parameter name
129
+ if absent.
130
+
131
+ #### Run-time configuration
132
+
133
+ The job's 'execute' method takes an optional Hash as argument which will complement and override the options Hash
134
+ described in the previous chapter.
135
+
136
+ Once the workflow is configured and the root work item instantiated, the method will run each top-level task on the root
137
+ work item in sequence until all tasks have completed successfully or a task has failed.
138
+
139
+ ### Work items
140
+
141
+ Creating your own work items is highly recommended and is fairly easy:
142
+
143
+ ```ruby
144
+
145
+ require 'libis/workflow'
146
+
147
+ class MyWorkItem < ::Libis::Workflow::WorkItem
148
+ attr_accesor :name
149
+
150
+ def initialize
151
+ @name = 'My work item'
152
+ super # Note: this is important as the base class requires some initialization
153
+ end
154
+ end
155
+ ```
156
+
157
+ or if a custom storage implementation is desired, a number of data items and methods require implementation:
158
+
159
+ ```ruby
160
+
161
+ require 'libis/workflow'
162
+
163
+ class MyWorkItem < MyStorageItem
164
+ include ::Libis::Workflow::Base::WorkItem
165
+
166
+ stored_attribute :parent
167
+ stored_attribute :items
168
+ stored_attribute :options
169
+ stored_attribute :properties
170
+ stored_attribute :status_log
171
+ stored_attribute :summary
172
+
173
+ def initialize
174
+ self.parent = nil
175
+ self.items = []
176
+ self.options = {}
177
+ self.properties = {}
178
+ self.status_log = []
179
+ self.summary = {}
180
+ end
181
+
182
+ protected
183
+
184
+ def add_status_log(info)
185
+ self.status_log << info
186
+ end
187
+
188
+ end
189
+ ```
190
+
191
+ Work items that are file-based can derive from the ::Libis::Workflow::FileItem class:
192
+
193
+ ```ruby
194
+
195
+ require 'libis/workflow'
196
+
197
+ class MyFileItem < ::Libis::Workflow::FileItem
198
+
199
+ def initialize(file)
200
+ filename = file
201
+ super
202
+ end
203
+
204
+ def filesize
205
+ properties[:size]
206
+ end
207
+
208
+ def fixity_check(checksum)
209
+ properties[:checksum] == checksum
210
+ end
211
+
212
+ end
213
+ ```
214
+
215
+ or include the ::Libis::Workflow::Base::FileItem module:
216
+
217
+ ```ruby
218
+
219
+ require 'libis/workflow'
220
+
221
+ class MyFileItem < MyWorkItem
222
+ include ::Libis::Workflow::FileItem
223
+
224
+ def initialize(file)
225
+ filename = file
226
+ super
227
+ end
228
+
229
+ def filesize
230
+ properties[:size]
231
+ end
232
+
233
+ def fixity_check(checksum)
234
+ properties[:checksum] == checksum
235
+ end
236
+
237
+ end
238
+ ```
239
+
240
+
241
+
242
+ ## Tasks
243
+
244
+ Tasks should inherit from ::Libis::Workflow::Task and specify the actions it wants to
245
+ perform on each work item:
246
+
247
+ ```ruby
248
+
249
+ class MyTask < ::Libis::Workflow::Task
250
+
251
+ def process_item(item)
252
+ if do_something(item)
253
+ info "Did something"
254
+ else
255
+ raise ::Libis::WorkflowError, "Something went wrong"
256
+ end
257
+ rescue Exception => e
258
+ error "Fatal problem, aborting"
259
+ raise ::Libis::WorkflowAbort, "Fatal problem"
260
+ ensure
261
+ item
262
+ end
263
+
264
+ end
265
+ ```
266
+
267
+ As seen above, the task should define a method called process_item that takes one argument. The argument will be a
268
+ reference to the work item that it needs to perform an action on. The task has several option to progress after
269
+ performing its actions:
270
+ * return. This is considered a normal and successful operation result. After a successful return the item's status will
271
+ be set to 'done' for the given task.
272
+ * raise a ::Libis::WorkflowError. Indicates that something went wrong during the processing of the item. The item's
273
+ status will be set to failed for the given task and the exception message will be printed in the error log. Processing
274
+ will continue with the next item. This action is recommended for temporary or recoverable errors. The parent item will
275
+ be flagged as 'failed' if any of the child items failed.
276
+ * raise a ::Libis::WorkflowAbort. A severe and fatal error has occured. Processing will abort immediately and the
277
+ failure status will be escalated to all items up the item hierarchy. Due to the escalating behaviour, no message is
278
+ printed in the error log automatically, so it is up to the task to an appropriate log the error itself.
279
+ * raise any other Exception. Should be avoided, but if it happens nevertheless, it will cause the item to fail for the
280
+ given task and the exception message to be logged in the error log. It will not attempt to process the other items.
281
+
282
+ ### Controlling behavior with parameters
283
+
284
+ You have some options to control how the task will behave in special cases. These are controlled using parameters on
285
+ the task, which can be set (and fixed with the 'frozen' option) on the task, but can be configured at run-time with the
286
+ help of workflow input parameters and run options.
287
+
288
+ #### Performing an action on the work item and all child items recursively
289
+
290
+ With the 'recursive' parameter set to true, your task's process_item method will be called for the work item and then
291
+ once for each child and each child's children recursively.
292
+
293
+ Note: you should not make both parent and child tasks recursive as this will cause the subitems to be processed
294
+ multiple times. If you make the parent task recursive, all tasks and sub-tasks will be performed on each item in the
295
+ tree. Making the child tasks recursive makes the parent task only perform on the top item and then performs each
296
+ sub-task one-by-one for the whole item tree. The last option is the most efficient.
297
+
298
+ Attention should be paid for the
299
+
300
+ #### Retrying if task failed
301
+
302
+ The parameters 'retry_count' and 'retry_interval' control the task's behaviour if a task has to wait for a result for an
303
+ asynchonous job. A task could be waiting for a result from the other job which will be indicated by a 'ASYNC_WAIT'
304
+ status. Alternatively the task may know that the job is halted and waiting for user interaction, indicated with the
305
+ 'ASYNC_HALT' status. Only when the status is 'ASYNC_WAIT', the task will retry its process. By default the 'retry_count'
306
+ is 0, which causes the task not to retry. Before retrying the task will pause for the number of seconds given in the
307
+ parameter 'retry_interval', which is 30 by default.
308
+
309
+ ### Pre- and postprocessing
310
+
311
+ The default implementation of 'process' is to call 'pre_process' and then call 'process_item' on each child item,
312
+ followed by calling 'post_process'. The methods 'pre_process' and 'post_process' are no-operation methods by default,
313
+ but can be overwritten if needed.
314
+
315
+ The 'pre_process' is intended to re-initialize the task before processing a new item. It can also be used to force the
316
+ task to skip processing the items altogether by calling the 'skip_processing_item' method or to prevent a recursive
317
+ task from traveling further down the item tree by calling the 'stop_processing_subitems' method. The temporary locks
318
+ behave as reset-on-read switches and are only active for the processing of the current item.
319
+
320
+ The 'post_process' method can be used to update any object after the item processing.
321
+
322
+ ### Convenience functions
323
+
324
+ #### get_root_item()
325
+
326
+ Returns the work item that the workflow started with (and is the root/grand parent of all work items in the ingest run).
327
+
328
+ #### get_work_dir()
329
+
330
+ Returns the work directory as configured for the current ingest run. The work directory can be used as scrap directory
331
+ for creating derived files that can be added as work items to the current flow or for downloading files that will be
332
+ processed later. The work directory is not automaticaly cleaned up, which is considered a task for the workflow implementation.
333
+
334
+ #### capture_cmd(cmd, *args)
335
+
336
+ Allows the task to run an external command-line program and capture it's stdout and stderr output at the same time. The
337
+ first argument is mandatory and should be the command-line program that has to be executed. An arbitrary number of
338
+ command-line arguments may follow.
339
+
340
+ The return value is an array with three elements: the status code returned by the command, the stdout string and the
341
+ stderr string.
342
+
343
+ #### names()
344
+
345
+ An array of strings with the hierarchical path of tasks leading to the current task. Can be usefull for log messages.
346
+ The method 'namepath' returns a '/' separated path of tasks.
347
+
348
+ #### (debug/info/warn/error/fatal)(message, *args)
349
+
350
+ Convenience function for creating log entries. The logger set in ::Libis::Workflow::Config is used to dump log messages.
351
+
352
+ The first argument is mandatory and can be:
353
+ * an integer. The integer is used to look up the message text in ::Libis::Workflow::MessageRegistry.
354
+ * a static string. The message text is used as-is.
355
+ * a string with placement holders as used in String#%. Args can either be an array or a hash. See also Kernel#sprintf.
356
+
357
+ The log message is logged to the general logging and attached to the current work item (workitem) unless another
358
+ work item is passed as first argument after the message.
359
+
360
+ #### check_item_type(klass, item = nil)
361
+
362
+ Checks if the work item is of the given class. 'workitem' is checked if the item argument is not present. If the check
363
+ fails a Runtime exception is thrown which will cause the task to abort if not catched.
364
+
365
+ #### item_type?(klass, item = nil)
366
+
367
+ A less severe variant version of check_item_type which returns a boolean (false if failed).
368
+
369
+ #### to_status(status)
370
+
371
+ Simply prepends the status text with the current task name. The output of this function is typically what the work item
372
+ status field should be set at.
373
+
374
+ ## Contributing
375
+
376
+ 1. Fork it ( https://github.com/libis/workflow/fork )
377
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
378
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
379
+ 4. Push to the branch (`git push origin my-new-feature`)
380
+ 5. Create new Pull Request