libis-workflow 2.0.25 → 2.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +1 -1
  3. data/.gitignore +36 -36
  4. data/.travis.yml +32 -32
  5. data/Gemfile +4 -4
  6. data/LICENSE +20 -20
  7. data/README.md +380 -380
  8. data/Rakefile +6 -6
  9. data/lib/libis/exceptions.rb +6 -6
  10. data/lib/libis/workflow.rb +41 -41
  11. data/lib/libis/workflow/action.rb +24 -24
  12. data/lib/libis/workflow/base/dir_item.rb +13 -13
  13. data/lib/libis/workflow/base/file_item.rb +80 -80
  14. data/lib/libis/workflow/base/job.rb +83 -83
  15. data/lib/libis/workflow/base/logging.rb +66 -66
  16. data/lib/libis/workflow/base/run.rb +97 -95
  17. data/lib/libis/workflow/base/work_item.rb +173 -173
  18. data/lib/libis/workflow/base/workflow.rb +149 -149
  19. data/lib/libis/workflow/config.rb +22 -22
  20. data/lib/libis/workflow/dir_item.rb +10 -10
  21. data/lib/libis/workflow/file_item.rb +15 -15
  22. data/lib/libis/workflow/job.rb +28 -28
  23. data/lib/libis/workflow/message_registry.rb +30 -30
  24. data/lib/libis/workflow/run.rb +34 -34
  25. data/lib/libis/workflow/status.rb +133 -133
  26. data/lib/libis/workflow/task.rb +318 -316
  27. data/lib/libis/workflow/task_group.rb +72 -71
  28. data/lib/libis/workflow/task_runner.rb +34 -34
  29. data/lib/libis/workflow/version.rb +5 -5
  30. data/lib/libis/workflow/work_item.rb +37 -37
  31. data/lib/libis/workflow/worker.rb +42 -42
  32. data/lib/libis/workflow/workflow.rb +20 -20
  33. data/libis-workflow.gemspec +38 -38
  34. data/spec/items.rb +2 -2
  35. data/spec/items/test_dir_item.rb +13 -13
  36. data/spec/items/test_file_item.rb +16 -16
  37. data/spec/items/test_run.rb +8 -8
  38. data/spec/spec_helper.rb +8 -8
  39. data/spec/task_spec.rb +15 -15
  40. data/spec/tasks/camelize_name.rb +12 -12
  41. data/spec/tasks/checksum_tester.rb +32 -32
  42. data/spec/tasks/collect_files.rb +47 -47
  43. data/spec/workflow_spec.rb +154 -154
  44. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b3de031988a8f96ef1f3fee1f55108d76da4bd75
4
- data.tar.gz: 623ca157c5b70718bca975c746b9971a246fc2b7
3
+ metadata.gz: 9ea9a53ae48e3988b4bdd736ab68334309466c4d
4
+ data.tar.gz: 342ab04cba426925e1d5170aa9f9221c25eab308
5
5
  SHA512:
6
- metadata.gz: 56eee1c03848b2e55285298e60102cc31ad056fe60d448446d28d275b143b0d3c8aee2f9cc51d09eaafb3af53f13d542c8bf70b71b6e10cf5228ecae09b84e60
7
- data.tar.gz: b7ea256e14f8e2220ddbc551f0b641ca72f03121c04e04dee446560f537185717860dc1255571c9f1e2b078325fe3df59078a571a6a382d73fcb9400feb62d33
6
+ metadata.gz: a6ab39cccd0598f5954452c7f6ef88f261bfa15904ba021c5d0a5abd5cc632f5634337ce6c250356e702d79e555734cb40b80ac5f5a1a77aafd2572251b50243
7
+ data.tar.gz: 201d95fe13fe3469a4266d4cd3b9164ed695bfc3dacc16c51cea0c33597c5f426975bb9d42abc3eb03e8cdf9ef85d57eb00ddb8fce7749abb0b72e1c9f2c6daa
data/.coveralls.yml CHANGED
@@ -1,2 +1,2 @@
1
- service_name: travis-ci
1
+ service_name: travis-ci
2
2
  repo_token: TMosCEIw4eu2hK05NxyY2UYIRJYQPzemt
data/.gitignore CHANGED
@@ -1,37 +1,37 @@
1
- *.gem
2
- *.rbc
3
- /.config
4
- /coverage/
5
- /InstalledFiles
6
- /pkg/
7
- /spec/reports/
8
- /test/tmp/
9
- /test/version_tmp/
10
- /tmp/
11
-
12
- ## Specific to RubyMotion:
13
- .dat*
14
- .repl_history
15
- build/
16
-
17
- ## Documentation cache and generated files:
18
- /.yardoc/
19
- /_yardoc/
20
- /doc/
21
- /rdoc/
22
-
23
- ## Environment normalisation:
24
- /.bundle/
25
- /lib/bundler/man/
26
-
27
- # for a library or gem, you might want to ignore these files since the code is
28
- # intended to run in multiple environments; otherwise, check them in:
29
- Gemfile.lock
30
- .ruby-version
31
- .ruby-gemset
32
-
33
- # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
- .rvmrc
35
-
36
- .idea/
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ Gemfile.lock
30
+ .ruby-version
31
+ .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
35
+
36
+ .idea/
37
37
  /spec/coverage
data/.travis.yml CHANGED
@@ -1,32 +1,32 @@
1
- language: ruby
2
- sudo: false
3
- bundler_args: --without development
4
- cache: bundler
5
- rvm:
6
- - 2.1.0
7
- - 2.2.0
8
- - ruby-head
9
- - jruby-9.0.1.0
10
- jdk:
11
- - openjdk7
12
- - oraclejdk7
13
- - oraclejdk8
14
- matrix:
15
- exclude:
16
- - rvm: 2.1.0
17
- jdk: oraclejdk7
18
- - rvm: 2.1.0
19
- jdk: oraclejdk8
20
- - rvm: 2.2.0
21
- jdk: oraclejdk7
22
- - rvm: 2.2.0
23
- jdk: oraclejdk8
24
- - rvm: ruby-head
25
- jdk: oraclejdk7
26
- - rvm: ruby-head
27
- jdk: oraclejdk8
28
- allow_failures:
29
- - rvm: ruby-head
30
- branches:
31
- only:
32
- - master
1
+ language: ruby
2
+ sudo: false
3
+ bundler_args: --without development
4
+ cache: bundler
5
+ rvm:
6
+ - 2.1.0
7
+ - 2.2.0
8
+ - ruby-head
9
+ - jruby-9.0.1.0
10
+ jdk:
11
+ - openjdk7
12
+ - oraclejdk7
13
+ - oraclejdk8
14
+ matrix:
15
+ exclude:
16
+ - rvm: 2.1.0
17
+ jdk: oraclejdk7
18
+ - rvm: 2.1.0
19
+ jdk: oraclejdk8
20
+ - rvm: 2.2.0
21
+ jdk: oraclejdk7
22
+ - rvm: 2.2.0
23
+ jdk: oraclejdk8
24
+ - rvm: ruby-head
25
+ jdk: oraclejdk7
26
+ - rvm: ruby-head
27
+ jdk: oraclejdk8
28
+ allow_failures:
29
+ - rvm: ruby-head
30
+ branches:
31
+ only:
32
+ - master
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source 'https://rubygems.org'
2
-
3
- gemspec name: 'libis-workflow', development_group: :test
4
-
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec name: 'libis-workflow', development_group: :test
4
+
data/LICENSE CHANGED
@@ -1,21 +1,21 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2014 LIBIS
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 LIBIS
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
21
  SOFTWARE.
data/README.md CHANGED
@@ -1,380 +1,380 @@
1
- [![Gem Version](https://badge.fury.io/rb/libis-workflow.svg)](http://badge.fury.io/rb/libis-workflow)
2
- [![Build Status](https://travis-ci.org/Kris-LIBIS/workflow.svg?branch=master)](https://travis-ci.org/Kris-LIBIS/workflow)
3
- [![Coverage Status](https://img.shields.io/coveralls/Kris-LIBIS/workflow.svg)](https://coveralls.io/r/Kris-LIBIS/workflow)
4
- [![Dependency Status](https://gemnasium.com/Kris-LIBIS/workflow.svg)](https://gemnasium.com/Kris-LIBIS/workflow)
5
-
6
- # LIBIS Workflow
7
-
8
- LIBIS Workflow framework
9
-
10
- ## Installation
11
-
12
- Add this line to your application's Gemfile:
13
-
14
- ```ruby
15
- gem 'libis-workflow'
16
- ```
17
-
18
-
19
- And then execute:
20
-
21
- $ bundle
22
-
23
- Or install it yourself as:
24
-
25
- $ gem install 'libis-workflow'
26
-
27
- ## Architecture
28
-
29
- This gem is essentially a simple, custom workflow system. The core of the workflow are the tasks. You can - and should -
30
- create your own tasks by creating new classes inherited from ::Libis::Workflow::Task. The ::Libis::Workflow::Task class
31
- and the included ::Libis::Workflow::Base::Logger module provide the necessary attributes and methods to make them work
32
- in the workflow. See the detailed documentation for the class and module for more information.
33
-
34
- The objects that the tasks will be working on should include the ::Libis::Workflow::Base::WorkItem module.
35
- When working with file objects the module ::Libis::Workflow::Base::FileItem and/or ::Libis::Workflow::Base::DirItem
36
- modules should be included for additional file-specific functionality.
37
- Work items can be organized in different types and a hierarchical structure. A simple implementation of work items with
38
- in-memory storage is provided as classes ::Libis::Workflow::WorkItem, ::Libis::Workflow::FileItem and
39
- ::Libis::Workflow::DirItem.
40
-
41
- All the tasks will be organized into a workflow object for which a base module ::Libis::Workflow::Base::Workflow is
42
- provided. It contains all the basic logic required for proper configuration and operation. Again a in-memory
43
- implementation is provided in the class ::Libis::Workflow::Workflow for your convenience to be used as-is or to derive
44
- your own from.
45
-
46
- The Job class is responsible for instantiating a run-time workflow execution object - a Run - that captures the
47
- configuration and workitems generated while executing the tasks. Essential logic is provided in the module
48
- ::Libis::Workflow::Base::Run with a simple in-memory implementation in ::Libis::Workflow::Run. The run object's class
49
- name has to be provided to the job configuration so that the job can instantiate the correct object. The run object
50
- will be able to execute the tasks in proper order on all the WorkItems supplied/collected. Each task can be implemented
51
- with code to run or simply contain a list of child tasks.
52
-
53
- The whole ingester workflow is configured by a Singleton object ::Libis::Workflow::Config which contains settings for
54
- logging and paths where tasks and workitems can be found.
55
-
56
- ## Usage
57
-
58
- You should start by including the following line in your source code:
59
-
60
- ```ruby
61
- require 'libis-workflow'
62
- ```
63
-
64
- This will load all of the Libis Workflow framework into your environment, but including only the required parts is OK as
65
- well. This is shown in the examples below.
66
-
67
- ### Workflows and Jobs
68
-
69
- An implementation of ::Libis::Workflow::Base::Workflow contains the definition of a workflow. Once instantiated, it can
70
- be run by calling the 'execute' method on a job object created for that workflow. This will create an intance of an
71
- implementation of ::Libis::Workflow::Base::Run, configure it and call the 'run' method on it. The Workflow constructor
72
- takes no arguments, but is should be configured by calling the 'configure' method with the workflow configuration as an
73
- argument. The job's 'execute' method takes an option Hash as argument with extra/overriding configuration values.
74
-
75
- ### Job configuration
76
- A job configuration is a Hash with:
77
- * name: String to identify the workflow
78
- * description: String with detailed textual information
79
- * workflow: Object reference to a Workflow that contains the task configuration
80
- * run_object: String with class name of the ::Libis::Workflow::Base::Run implementation to be created. An istance of
81
- this class will be created for each run and serves as the root work item for that particular run.
82
- * input: Hash with input parameter values for the workflow
83
-
84
- #### Workflow configuration
85
-
86
- A workflow configuration is a Hash with:
87
- * name: String to identify the workflow
88
- * description: String with detailed textual information
89
- * tasks: Array of task descriptions
90
- * input: Hash with input variable definitions
91
-
92
- ##### Task description
93
-
94
- is a Hash with:
95
- * class: String with class name of the task
96
- * name: String with the name of the task
97
- * tasks: Array with task definitions of sub-tasks
98
- * any task parameter values. Each task can define parameters that configure the task. It is using the
99
- ::Libis::Tools::Parameter class for this.
100
-
101
- The ::Libis::Workflow::Task base class allready defines the following parameters:
102
- * recursive: Run the task on all subitems recursively. Default: false
103
- * abort_recursion_on_failure: Stop processing items recursively if one item fails. Default: false
104
- * retry_count: Number of times to retry the task. Default: 0
105
- * retry_interval: Number of seconds to wait between retries. Default: 10
106
-
107
- If 'class' is not present, the default '::Libis::Workflow::TaskGroup' with the given name will be instantiated, which
108
- performs each sub-task on the item.
109
-
110
- If the task is configured to be recursive, it will iterate over the child items and perform each sub-task on each of
111
- the child items. If a 'class' value is given, an instance of that class will be created and the task will be handed
112
- the work item to process on. See the chapter on 'Tasks' below for more information on tasks.
113
-
114
- Note that a task with custom processing will not execute sub-tasks. If you configured a processing task with subtasks
115
- an exception will be thrown when trying to execute the job.
116
-
117
- ##### Input variable definition
118
-
119
- The input variables define parameters for the workflow. When a job is executed, it can provide values for any of these
120
- input variables and the workflow run will use the new values instead of the defaults.
121
-
122
- The key of the input Hash is the unique name of the variable. The value is another Hash with the parameter definition.
123
- See ::Libis::Tools::Parameter for the content of this Hash.
124
-
125
- An additional property of the parameters is the 'propagate_to' property. It defines how the workflow run should push
126
- the values set for the input parameters to the parameters on the tasks. These task parameters can be addressed by a
127
- '<Task class or Task name>[#<parameter name>]' string. If necessary the task class or name may be specified as a full
128
- path with '/' separators. The parameter name part is optional and considered to be the same as the input parameter name
129
- if absent.
130
-
131
- #### Run-time configuration
132
-
133
- The job's 'execute' method takes an optional Hash as argument which will complement and override the options Hash
134
- described in the previous chapter.
135
-
136
- Once the workflow is configured and the root work item instantiated, the method will run each top-level task on the root
137
- work item in sequence until all tasks have completed successfully or a task has failed.
138
-
139
- ### Work items
140
-
141
- Creating your own work items is highly recommended and is fairly easy:
142
-
143
- ```ruby
144
-
145
- require 'libis/workflow'
146
-
147
- class MyWorkItem < ::Libis::Workflow::WorkItem
148
- attr_accesor :name
149
-
150
- def initialize
151
- @name = 'My work item'
152
- super # Note: this is important as the base class requires some initialization
153
- end
154
- end
155
- ```
156
-
157
- or if a custom storage implementation is desired, a number of data items and methods require implementation:
158
-
159
- ```ruby
160
-
161
- require 'libis/workflow'
162
-
163
- class MyWorkItem < MyStorageItem
164
- include ::Libis::Workflow::Base::WorkItem
165
-
166
- stored_attribute :parent
167
- stored_attribute :items
168
- stored_attribute :options
169
- stored_attribute :properties
170
- stored_attribute :status_log
171
- stored_attribute :summary
172
-
173
- def initialize
174
- self.parent = nil
175
- self.items = []
176
- self.options = {}
177
- self.properties = {}
178
- self.status_log = []
179
- self.summary = {}
180
- end
181
-
182
- protected
183
-
184
- def add_status_log(info)
185
- self.status_log << info
186
- end
187
-
188
- end
189
- ```
190
-
191
- Work items that are file-based can derive from the ::Libis::Workflow::FileItem class:
192
-
193
- ```ruby
194
-
195
- require 'libis/workflow'
196
-
197
- class MyFileItem < ::Libis::Workflow::FileItem
198
-
199
- def initialize(file)
200
- filename = file
201
- super
202
- end
203
-
204
- def filesize
205
- properties[:size]
206
- end
207
-
208
- def fixity_check(checksum)
209
- properties[:checksum] == checksum
210
- end
211
-
212
- end
213
- ```
214
-
215
- or include the ::Libis::Workflow::Base::FileItem module:
216
-
217
- ```ruby
218
-
219
- require 'libis/workflow'
220
-
221
- class MyFileItem < MyWorkItem
222
- include ::Libis::Workflow::FileItem
223
-
224
- def initialize(file)
225
- filename = file
226
- super
227
- end
228
-
229
- def filesize
230
- properties[:size]
231
- end
232
-
233
- def fixity_check(checksum)
234
- properties[:checksum] == checksum
235
- end
236
-
237
- end
238
- ```
239
-
240
-
241
-
242
- ## Tasks
243
-
244
- Tasks should inherit from ::Libis::Workflow::Task and specify the actions it wants to
245
- perform on each work item:
246
-
247
- ```ruby
248
-
249
- class MyTask < ::Libis::Workflow::Task
250
-
251
- def process_item(item)
252
- if do_something(item)
253
- info "Did something"
254
- else
255
- raise ::Libis::WorkflowError, "Something went wrong"
256
- end
257
- rescue Exception => e
258
- error "Fatal problem, aborting"
259
- raise ::Libis::WorkflowAbort, "Fatal problem"
260
- ensure
261
- item
262
- end
263
-
264
- end
265
- ```
266
-
267
- As seen above, the task should define a method called process_item that takes one argument. The argument will be a
268
- reference to the work item that it needs to perform an action on. The task has several option to progress after
269
- performing its actions:
270
- * return. This is considered a normal and successful operation result. After a successful return the item's status will
271
- be set to 'done' for the given task.
272
- * raise a ::Libis::WorkflowError. Indicates that something went wrong during the processing of the item. The item's
273
- status will be set to failed for the given task and the exception message will be printed in the error log. Processing
274
- will continue with the next item. This action is recommended for temporary or recoverable errors. The parent item will
275
- be flagged as 'failed' if any of the child items failed.
276
- * raise a ::Libis::WorkflowAbort. A severe and fatal error has occured. Processing will abort immediately and the
277
- failure status will be escalated to all items up the item hierarchy. Due to the escalating behaviour, no message is
278
- printed in the error log automatically, so it is up to the task to an appropriate log the error itself.
279
- * raise any other Exception. Should be avoided, but if it happens nevertheless, it will cause the item to fail for the
280
- given task and the exception message to be logged in the error log. It will not attempt to process the other items.
281
-
282
- ### Controlling behavior with parameters
283
-
284
- You have some options to control how the task will behave in special cases. These are controlled using parameters on
285
- the task, which can be set (and fixed with the 'frozen' option) on the task, but can be configured at run-time with the
286
- help of workflow input parameters and run options.
287
-
288
- #### Performing an action on the work item and all child items recursively
289
-
290
- With the 'recursive' parameter set to true, your task's process_item method will be called for the work item and then
291
- once for each child and each child's children recursively.
292
-
293
- Note: you should not make both parent and child tasks recursive as this will cause the subitems to be processed
294
- multiple times. If you make the parent task recursive, all tasks and sub-tasks will be performed on each item in the
295
- tree. Making the child tasks recursive makes the parent task only perform on the top item and then performs each
296
- sub-task one-by-one for the whole item tree. The last option is the most efficient.
297
-
298
- Attention should be paid for the
299
-
300
- #### Retrying if task failed
301
-
302
- The parameters 'retry_count' and 'retry_interval' control the task's behaviour if a task has to wait for a result for an
303
- asynchonous job. A task could be waiting for a result from the other job which will be indicated by a 'ASYNC_WAIT'
304
- status. Alternatively the task may know that the job is halted and waiting for user interaction, indicated with the
305
- 'ASYNC_HALT' status. Only when the status is 'ASYNC_WAIT', the task will retry its process. By default the 'retry_count'
306
- is 0, which causes the task not to retry. Before retrying the task will pause for the number of seconds given in the
307
- parameter 'retry_interval', which is 30 by default.
308
-
309
- ### Pre- and postprocessing
310
-
311
- The default implementation of 'process' is to call 'pre_process' and then call 'process_item' on each child item,
312
- followed by calling 'post_process'. The methods 'pre_process' and 'post_process' are no-operation methods by default,
313
- but can be overwritten if needed.
314
-
315
- The 'pre_process' is intended to re-initialize the task before processing a new item. It can also be used to force the
316
- task to skip processing the items altogether by calling the 'skip_processing_item' method or to prevent a recursive
317
- task from traveling further down the item tree by calling the 'stop_processing_subitems' method. The temporary locks
318
- behave as reset-on-read switches and are only active for the processing of the current item.
319
-
320
- The 'post_process' method can be used to update any object after the item processing.
321
-
322
- ### Convenience functions
323
-
324
- #### get_root_item()
325
-
326
- Returns the work item that the workflow started with (and is the root/grand parent of all work items in the ingest run).
327
-
328
- #### get_work_dir()
329
-
330
- Returns the work directory as configured for the current ingest run. The work directory can be used as scrap directory
331
- for creating derived files that can be added as work items to the current flow or for downloading files that will be
332
- processed later. The work directory is not automaticaly cleaned up, which is considered a task for the workflow implementation.
333
-
334
- #### capture_cmd(cmd, *args)
335
-
336
- Allows the task to run an external command-line program and capture it's stdout and stderr output at the same time. The
337
- first argument is mandatory and should be the command-line program that has to be executed. An arbitrary number of
338
- command-line arguments may follow.
339
-
340
- The return value is an array with three elements: the status code returned by the command, the stdout string and the
341
- stderr string.
342
-
343
- #### names()
344
-
345
- An array of strings with the hierarchical path of tasks leading to the current task. Can be usefull for log messages.
346
- The method 'namepath' returns a '/' separated path of tasks.
347
-
348
- #### (debug/info/warn/error/fatal)(message, *args)
349
-
350
- Convenience function for creating log entries. The logger set in ::Libis::Workflow::Config is used to dump log messages.
351
-
352
- The first argument is mandatory and can be:
353
- * an integer. The integer is used to look up the message text in ::Libis::Workflow::MessageRegistry.
354
- * a static string. The message text is used as-is.
355
- * a string with placement holders as used in String#%. Args can either be an array or a hash. See also Kernel#sprintf.
356
-
357
- The log message is logged to the general logging and attached to the current work item (workitem) unless another
358
- work item is passed as first argument after the message.
359
-
360
- #### check_item_type(klass, item = nil)
361
-
362
- Checks if the work item is of the given class. 'workitem' is checked if the item argument is not present. If the check
363
- fails a Runtime exception is thrown which will cause the task to abort if not catched.
364
-
365
- #### item_type?(klass, item = nil)
366
-
367
- A less severe variant version of check_item_type which returns a boolean (false if failed).
368
-
369
- #### to_status(status)
370
-
371
- Simply prepends the status text with the current task name. The output of this function is typically what the work item
372
- status field should be set at.
373
-
374
- ## Contributing
375
-
376
- 1. Fork it ( https://github.com/libis/workflow/fork )
377
- 2. Create your feature branch (`git checkout -b my-new-feature`)
378
- 3. Commit your changes (`git commit -am 'Add some feature'`)
379
- 4. Push to the branch (`git push origin my-new-feature`)
380
- 5. Create new Pull Request
1
+ [![Gem Version](https://badge.fury.io/rb/libis-workflow.svg)](http://badge.fury.io/rb/libis-workflow)
2
+ [![Build Status](https://travis-ci.org/Kris-LIBIS/workflow.svg?branch=master)](https://travis-ci.org/Kris-LIBIS/workflow)
3
+ [![Coverage Status](https://img.shields.io/coveralls/Kris-LIBIS/workflow.svg)](https://coveralls.io/r/Kris-LIBIS/workflow)
4
+ [![Dependency Status](https://gemnasium.com/Kris-LIBIS/workflow.svg)](https://gemnasium.com/Kris-LIBIS/workflow)
5
+
6
+ # LIBIS Workflow
7
+
8
+ LIBIS Workflow framework
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ ```ruby
15
+ gem 'libis-workflow'
16
+ ```
17
+
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install 'libis-workflow'
26
+
27
+ ## Architecture
28
+
29
+ This gem is essentially a simple, custom workflow system. The core of the workflow are the tasks. You can - and should -
30
+ create your own tasks by creating new classes inherited from ::Libis::Workflow::Task. The ::Libis::Workflow::Task class
31
+ and the included ::Libis::Workflow::Base::Logger module provide the necessary attributes and methods to make them work
32
+ in the workflow. See the detailed documentation for the class and module for more information.
33
+
34
+ The objects that the tasks will be working on should include the ::Libis::Workflow::Base::WorkItem module.
35
+ When working with file objects the module ::Libis::Workflow::Base::FileItem and/or ::Libis::Workflow::Base::DirItem
36
+ modules should be included for additional file-specific functionality.
37
+ Work items can be organized in different types and a hierarchical structure. A simple implementation of work items with
38
+ in-memory storage is provided as classes ::Libis::Workflow::WorkItem, ::Libis::Workflow::FileItem and
39
+ ::Libis::Workflow::DirItem.
40
+
41
+ All the tasks will be organized into a workflow object for which a base module ::Libis::Workflow::Base::Workflow is
42
+ provided. It contains all the basic logic required for proper configuration and operation. Again a in-memory
43
+ implementation is provided in the class ::Libis::Workflow::Workflow for your convenience to be used as-is or to derive
44
+ your own from.
45
+
46
+ The Job class is responsible for instantiating a run-time workflow execution object - a Run - that captures the
47
+ configuration and workitems generated while executing the tasks. Essential logic is provided in the module
48
+ ::Libis::Workflow::Base::Run with a simple in-memory implementation in ::Libis::Workflow::Run. The run object's class
49
+ name has to be provided to the job configuration so that the job can instantiate the correct object. The run object
50
+ will be able to execute the tasks in proper order on all the WorkItems supplied/collected. Each task can be implemented
51
+ with code to run or simply contain a list of child tasks.
52
+
53
+ The whole ingester workflow is configured by a Singleton object ::Libis::Workflow::Config which contains settings for
54
+ logging and paths where tasks and workitems can be found.
55
+
56
+ ## Usage
57
+
58
+ You should start by including the following line in your source code:
59
+
60
+ ```ruby
61
+ require 'libis-workflow'
62
+ ```
63
+
64
+ This will load all of the Libis Workflow framework into your environment, but including only the required parts is OK as
65
+ well. This is shown in the examples below.
66
+
67
+ ### Workflows and Jobs
68
+
69
+ An implementation of ::Libis::Workflow::Base::Workflow contains the definition of a workflow. Once instantiated, it can
70
+ be run by calling the 'execute' method on a job object created for that workflow. This will create an intance of an
71
+ implementation of ::Libis::Workflow::Base::Run, configure it and call the 'run' method on it. The Workflow constructor
72
+ takes no arguments, but is should be configured by calling the 'configure' method with the workflow configuration as an
73
+ argument. The job's 'execute' method takes an option Hash as argument with extra/overriding configuration values.
74
+
75
+ ### Job configuration
76
+ A job configuration is a Hash with:
77
+ * name: String to identify the workflow
78
+ * description: String with detailed textual information
79
+ * workflow: Object reference to a Workflow that contains the task configuration
80
+ * run_object: String with class name of the ::Libis::Workflow::Base::Run implementation to be created. An istance of
81
+ this class will be created for each run and serves as the root work item for that particular run.
82
+ * input: Hash with input parameter values for the workflow
83
+
84
+ #### Workflow configuration
85
+
86
+ A workflow configuration is a Hash with:
87
+ * name: String to identify the workflow
88
+ * description: String with detailed textual information
89
+ * tasks: Array of task descriptions
90
+ * input: Hash with input variable definitions
91
+
92
+ ##### Task description
93
+
94
+ is a Hash with:
95
+ * class: String with class name of the task
96
+ * name: String with the name of the task
97
+ * tasks: Array with task definitions of sub-tasks
98
+ * any task parameter values. Each task can define parameters that configure the task. It is using the
99
+ ::Libis::Tools::Parameter class for this.
100
+
101
+ The ::Libis::Workflow::Task base class allready defines the following parameters:
102
+ * recursive: Run the task on all subitems recursively. Default: false
103
+ * abort_recursion_on_failure: Stop processing items recursively if one item fails. Default: false
104
+ * retry_count: Number of times to retry the task. Default: 0
105
+ * retry_interval: Number of seconds to wait between retries. Default: 10
106
+
107
+ If 'class' is not present, the default '::Libis::Workflow::TaskGroup' with the given name will be instantiated, which
108
+ performs each sub-task on the item.
109
+
110
+ If the task is configured to be recursive, it will iterate over the child items and perform each sub-task on each of
111
+ the child items. If a 'class' value is given, an instance of that class will be created and the task will be handed
112
+ the work item to process on. See the chapter on 'Tasks' below for more information on tasks.
113
+
114
+ Note that a task with custom processing will not execute sub-tasks. If you configured a processing task with subtasks
115
+ an exception will be thrown when trying to execute the job.
116
+
117
+ ##### Input variable definition
118
+
119
+ The input variables define parameters for the workflow. When a job is executed, it can provide values for any of these
120
+ input variables and the workflow run will use the new values instead of the defaults.
121
+
122
+ The key of the input Hash is the unique name of the variable. The value is another Hash with the parameter definition.
123
+ See ::Libis::Tools::Parameter for the content of this Hash.
124
+
125
+ An additional property of the parameters is the 'propagate_to' property. It defines how the workflow run should push
126
+ the values set for the input parameters to the parameters on the tasks. These task parameters can be addressed by a
127
+ '<Task class or Task name>[#<parameter name>]' string. If necessary the task class or name may be specified as a full
128
+ path with '/' separators. The parameter name part is optional and considered to be the same as the input parameter name
129
+ if absent.
130
+
131
+ #### Run-time configuration
132
+
133
+ The job's 'execute' method takes an optional Hash as argument which will complement and override the options Hash
134
+ described in the previous chapter.
135
+
136
+ Once the workflow is configured and the root work item instantiated, the method will run each top-level task on the root
137
+ work item in sequence until all tasks have completed successfully or a task has failed.
138
+
139
+ ### Work items
140
+
141
+ Creating your own work items is highly recommended and is fairly easy:
142
+
143
+ ```ruby
144
+
145
+ require 'libis/workflow'
146
+
147
+ class MyWorkItem < ::Libis::Workflow::WorkItem
148
+ attr_accesor :name
149
+
150
+ def initialize
151
+ @name = 'My work item'
152
+ super # Note: this is important as the base class requires some initialization
153
+ end
154
+ end
155
+ ```
156
+
157
+ or if a custom storage implementation is desired, a number of data items and methods require implementation:
158
+
159
+ ```ruby
160
+
161
+ require 'libis/workflow'
162
+
163
+ class MyWorkItem < MyStorageItem
164
+ include ::Libis::Workflow::Base::WorkItem
165
+
166
+ stored_attribute :parent
167
+ stored_attribute :items
168
+ stored_attribute :options
169
+ stored_attribute :properties
170
+ stored_attribute :status_log
171
+ stored_attribute :summary
172
+
173
+ def initialize
174
+ self.parent = nil
175
+ self.items = []
176
+ self.options = {}
177
+ self.properties = {}
178
+ self.status_log = []
179
+ self.summary = {}
180
+ end
181
+
182
+ protected
183
+
184
+ def add_status_log(info)
185
+ self.status_log << info
186
+ end
187
+
188
+ end
189
+ ```
190
+
191
+ Work items that are file-based can derive from the ::Libis::Workflow::FileItem class:
192
+
193
+ ```ruby
194
+
195
+ require 'libis/workflow'
196
+
197
+ class MyFileItem < ::Libis::Workflow::FileItem
198
+
199
+ def initialize(file)
200
+ filename = file
201
+ super
202
+ end
203
+
204
+ def filesize
205
+ properties[:size]
206
+ end
207
+
208
+ def fixity_check(checksum)
209
+ properties[:checksum] == checksum
210
+ end
211
+
212
+ end
213
+ ```
214
+
215
+ or include the ::Libis::Workflow::Base::FileItem module:
216
+
217
+ ```ruby
218
+
219
+ require 'libis/workflow'
220
+
221
+ class MyFileItem < MyWorkItem
222
+ include ::Libis::Workflow::FileItem
223
+
224
+ def initialize(file)
225
+ filename = file
226
+ super
227
+ end
228
+
229
+ def filesize
230
+ properties[:size]
231
+ end
232
+
233
+ def fixity_check(checksum)
234
+ properties[:checksum] == checksum
235
+ end
236
+
237
+ end
238
+ ```
239
+
240
+
241
+
242
+ ## Tasks
243
+
244
+ Tasks should inherit from ::Libis::Workflow::Task and specify the actions it wants to
245
+ perform on each work item:
246
+
247
+ ```ruby
248
+
249
+ class MyTask < ::Libis::Workflow::Task
250
+
251
+ def process_item(item)
252
+ if do_something(item)
253
+ info "Did something"
254
+ else
255
+ raise ::Libis::WorkflowError, "Something went wrong"
256
+ end
257
+ rescue Exception => e
258
+ error "Fatal problem, aborting"
259
+ raise ::Libis::WorkflowAbort, "Fatal problem"
260
+ ensure
261
+ item
262
+ end
263
+
264
+ end
265
+ ```
266
+
267
+ As seen above, the task should define a method called process_item that takes one argument. The argument will be a
268
+ reference to the work item that it needs to perform an action on. The task has several option to progress after
269
+ performing its actions:
270
+ * return. This is considered a normal and successful operation result. After a successful return the item's status will
271
+ be set to 'done' for the given task.
272
+ * raise a ::Libis::WorkflowError. Indicates that something went wrong during the processing of the item. The item's
273
+ status will be set to failed for the given task and the exception message will be printed in the error log. Processing
274
+ will continue with the next item. This action is recommended for temporary or recoverable errors. The parent item will
275
+ be flagged as 'failed' if any of the child items failed.
276
+ * raise a ::Libis::WorkflowAbort. A severe and fatal error has occured. Processing will abort immediately and the
277
+ failure status will be escalated to all items up the item hierarchy. Due to the escalating behaviour, no message is
278
+ printed in the error log automatically, so it is up to the task to an appropriate log the error itself.
279
+ * raise any other Exception. Should be avoided, but if it happens nevertheless, it will cause the item to fail for the
280
+ given task and the exception message to be logged in the error log. It will not attempt to process the other items.
281
+
282
+ ### Controlling behavior with parameters
283
+
284
+ You have some options to control how the task will behave in special cases. These are controlled using parameters on
285
+ the task, which can be set (and fixed with the 'frozen' option) on the task, but can be configured at run-time with the
286
+ help of workflow input parameters and run options.
287
+
288
+ #### Performing an action on the work item and all child items recursively
289
+
290
+ With the 'recursive' parameter set to true, your task's process_item method will be called for the work item and then
291
+ once for each child and each child's children recursively.
292
+
293
+ Note: you should not make both parent and child tasks recursive as this will cause the subitems to be processed
294
+ multiple times. If you make the parent task recursive, all tasks and sub-tasks will be performed on each item in the
295
+ tree. Making the child tasks recursive makes the parent task only perform on the top item and then performs each
296
+ sub-task one-by-one for the whole item tree. The last option is the most efficient.
297
+
298
+ Attention should be paid for the
299
+
300
+ #### Retrying if task failed
301
+
302
+ The parameters 'retry_count' and 'retry_interval' control the task's behaviour if a task has to wait for a result for an
303
+ asynchonous job. A task could be waiting for a result from the other job which will be indicated by a 'ASYNC_WAIT'
304
+ status. Alternatively the task may know that the job is halted and waiting for user interaction, indicated with the
305
+ 'ASYNC_HALT' status. Only when the status is 'ASYNC_WAIT', the task will retry its process. By default the 'retry_count'
306
+ is 0, which causes the task not to retry. Before retrying the task will pause for the number of seconds given in the
307
+ parameter 'retry_interval', which is 30 by default.
308
+
309
+ ### Pre- and postprocessing
310
+
311
+ The default implementation of 'process' is to call 'pre_process' and then call 'process_item' on each child item,
312
+ followed by calling 'post_process'. The methods 'pre_process' and 'post_process' are no-operation methods by default,
313
+ but can be overwritten if needed.
314
+
315
+ The 'pre_process' is intended to re-initialize the task before processing a new item. It can also be used to force the
316
+ task to skip processing the items altogether by calling the 'skip_processing_item' method or to prevent a recursive
317
+ task from traveling further down the item tree by calling the 'stop_processing_subitems' method. The temporary locks
318
+ behave as reset-on-read switches and are only active for the processing of the current item.
319
+
320
+ The 'post_process' method can be used to update any object after the item processing.
321
+
322
+ ### Convenience functions
323
+
324
+ #### get_root_item()
325
+
326
+ Returns the work item that the workflow started with (and is the root/grand parent of all work items in the ingest run).
327
+
328
+ #### get_work_dir()
329
+
330
+ Returns the work directory as configured for the current ingest run. The work directory can be used as scrap directory
331
+ for creating derived files that can be added as work items to the current flow or for downloading files that will be
332
+ processed later. The work directory is not automaticaly cleaned up, which is considered a task for the workflow implementation.
333
+
334
+ #### capture_cmd(cmd, *args)
335
+
336
+ Allows the task to run an external command-line program and capture it's stdout and stderr output at the same time. The
337
+ first argument is mandatory and should be the command-line program that has to be executed. An arbitrary number of
338
+ command-line arguments may follow.
339
+
340
+ The return value is an array with three elements: the status code returned by the command, the stdout string and the
341
+ stderr string.
342
+
343
+ #### names()
344
+
345
+ An array of strings with the hierarchical path of tasks leading to the current task. Can be usefull for log messages.
346
+ The method 'namepath' returns a '/' separated path of tasks.
347
+
348
+ #### (debug/info/warn/error/fatal)(message, *args)
349
+
350
+ Convenience function for creating log entries. The logger set in ::Libis::Workflow::Config is used to dump log messages.
351
+
352
+ The first argument is mandatory and can be:
353
+ * an integer. The integer is used to look up the message text in ::Libis::Workflow::MessageRegistry.
354
+ * a static string. The message text is used as-is.
355
+ * a string with placement holders as used in String#%. Args can either be an array or a hash. See also Kernel#sprintf.
356
+
357
+ The log message is logged to the general logging and attached to the current work item (workitem) unless another
358
+ work item is passed as first argument after the message.
359
+
360
+ #### check_item_type(klass, item = nil)
361
+
362
+ Checks if the work item is of the given class. 'workitem' is checked if the item argument is not present. If the check
363
+ fails a Runtime exception is thrown which will cause the task to abort if not catched.
364
+
365
+ #### item_type?(klass, item = nil)
366
+
367
+ A less severe variant version of check_item_type which returns a boolean (false if failed).
368
+
369
+ #### to_status(status)
370
+
371
+ Simply prepends the status text with the current task name. The output of this function is typically what the work item
372
+ status field should be set at.
373
+
374
+ ## Contributing
375
+
376
+ 1. Fork it ( https://github.com/libis/workflow/fork )
377
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
378
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
379
+ 4. Push to the branch (`git push origin my-new-feature`)
380
+ 5. Create new Pull Request