libis-workflow 2.0.beta.19-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +36 -0
  4. data/.travis.yml +32 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE +21 -0
  7. data/README.md +397 -0
  8. data/Rakefile +7 -0
  9. data/lib/libis/exceptions.rb +8 -0
  10. data/lib/libis/workflow/action.rb +24 -0
  11. data/lib/libis/workflow/base/dir_item.rb +15 -0
  12. data/lib/libis/workflow/base/file_item.rb +82 -0
  13. data/lib/libis/workflow/base/job.rb +85 -0
  14. data/lib/libis/workflow/base/logger.rb +30 -0
  15. data/lib/libis/workflow/base/logging.rb +76 -0
  16. data/lib/libis/workflow/base/run.rb +86 -0
  17. data/lib/libis/workflow/base/work_item.rb +176 -0
  18. data/lib/libis/workflow/base/workflow.rb +153 -0
  19. data/lib/libis/workflow/base.rb +7 -0
  20. data/lib/libis/workflow/config.rb +24 -0
  21. data/lib/libis/workflow/dir_item.rb +12 -0
  22. data/lib/libis/workflow/file_item.rb +17 -0
  23. data/lib/libis/workflow/job.rb +26 -0
  24. data/lib/libis/workflow/message_registry.rb +32 -0
  25. data/lib/libis/workflow/run.rb +26 -0
  26. data/lib/libis/workflow/status.rb +83 -0
  27. data/lib/libis/workflow/task.rb +287 -0
  28. data/lib/libis/workflow/task_group.rb +62 -0
  29. data/lib/libis/workflow/tasks/analyzer.rb +49 -0
  30. data/lib/libis/workflow/version.rb +7 -0
  31. data/lib/libis/workflow/work_item.rb +43 -0
  32. data/lib/libis/workflow/worker.rb +42 -0
  33. data/lib/libis/workflow/workflow.rb +22 -0
  34. data/lib/libis/workflow.rb +40 -0
  35. data/lib/libis-workflow.rb +2 -0
  36. data/libis-workflow.gemspec +38 -0
  37. data/spec/items/test_dir_item.rb +15 -0
  38. data/spec/items/test_file_item.rb +18 -0
  39. data/spec/items/test_run.rb +10 -0
  40. data/spec/items.rb +3 -0
  41. data/spec/spec_helper.rb +8 -0
  42. data/spec/task_spec.rb +16 -0
  43. data/spec/tasks/camelize_name.rb +13 -0
  44. data/spec/tasks/checksum_tester.rb +33 -0
  45. data/spec/tasks/collect_files.rb +48 -0
  46. data/spec/workflow_spec.rb +188 -0
  47. metadata +196 -0
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+
3
+ require 'libis/workflow/base/job'
4
+ require 'libis/workflow/workflow'
5
+ require 'libis/workflow/run'
6
+
7
+ module Libis
8
+ module Workflow
9
+
10
+ class Job
11
+ include ::Libis::Workflow::Base::Job
12
+
13
+ attr_accessor :name, :description, :workflow, :run_object, :input
14
+
15
+ def initialize
16
+ @name = ''
17
+ @description = ''
18
+ @input = Hash.new
19
+ @workflow = ::Libis::Workflow::Workflow.new
20
+ @run_object = ::Libis::Workflow::Run.new
21
+ end
22
+
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+
3
+ require 'singleton'
4
+
5
+ module Libis
6
+ module Workflow
7
+ class MessageRegistry
8
+ include Singleton
9
+
10
+ def initialize
11
+ @message_db = {}
12
+ end
13
+
14
+ def register_message(id, message)
15
+ @message_db[id] = message
16
+ end
17
+
18
+ def get_message(id)
19
+ @message_db[id]
20
+ end
21
+
22
+ def self.register_message(id, message)
23
+ self.instance.register_message id, message
24
+ end
25
+
26
+ def self.get_message(id)
27
+ self.instance.get_message id
28
+ end
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+
3
+ require 'libis/workflow/config'
4
+ require 'libis/workflow/workflow'
5
+
6
+ require 'libis/workflow/base/run'
7
+ require 'libis/workflow/work_item'
8
+
9
+ module Libis
10
+ module Workflow
11
+
12
+ class Run < ::Libis::Workflow::WorkItem
13
+ include ::Libis::Workflow::Base::Run
14
+
15
+ attr_accessor :start_date, :job
16
+
17
+ def initialize
18
+ @start_date = Time.now
19
+ @job = nil
20
+ super
21
+ end
22
+
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,83 @@
1
+ module Libis
2
+ module Workflow
3
+ module Status
4
+
5
+ STATUS = {
6
+ NOT_STARTED: 0,
7
+ STARTED: 1,
8
+ DONE: 2,
9
+ ASYNC_WAIT: 3,
10
+ ASYNC_HALT: 4,
11
+ FAILED: 5
12
+ }
13
+
14
+ # Changes the status of the object. The status changed is logged in the status_log with the current timestamp.
15
+ #
16
+ # @param [Array] x Array with status and task
17
+ def status=(x)
18
+ s, task = x
19
+ self.add_status_log(task: task, status: s)
20
+ self.save
21
+ end
22
+
23
+ # Get last known status symbol for a given task
24
+ #
25
+ # @param [String] task task name to check item status for
26
+ # @return [Symbol] the status code
27
+ def status(task = nil)
28
+ entry = status_entry(task)
29
+ status_symbol(entry[:status]) rescue :NOT_STARTED
30
+ end
31
+
32
+ # Gets the last known status label of the object.
33
+ #
34
+ # @param [String] task name of task to get the status for
35
+ # @return [String] status label ( = task name + status )
36
+ def status_label(task = nil)
37
+ entry = self.status_entry(task)
38
+ "#{entry[:task] rescue nil}#{entry[:status].capitalize rescue nil}"
39
+ end
40
+
41
+ # Check status of the object.
42
+ #
43
+ # @param [Symbol] state status to look for
44
+ # @param [String] task name of task whose status to check
45
+ # @return [Boolean] true if the object status matches
46
+ def check_status(state, task = nil)
47
+ self.status(task) == state
48
+ end
49
+
50
+ # Compare status with current status of the object.
51
+ #
52
+ # @param [Symbol] state
53
+ # @return [Integer] 1, 0 or -1 depnding on which
54
+ def compare_status(state, task = nil)
55
+ STATUS[self.status(task)] <=> STATUS[state]
56
+ end
57
+
58
+ protected
59
+
60
+ # Get last known status entry for a given task
61
+ #
62
+ # @param [String] task task name to check item status for
63
+ # @return [Hash] the status entry
64
+ def status_entry(task = nil)
65
+ return self.status_log.last if task.blank?
66
+ self.status_log.select { |entry| entry[:task] == task }.last
67
+ end
68
+
69
+ # Convert String, Symbol or Integer to correct symbol for the status.
70
+ # If the input value is nil, the fist status entry is returned.
71
+ #
72
+ # @param [String|Symbol|Integer] x string, symbol or integer for status code.
73
+ # @return [Symbol] the corresponding STATUS symbol
74
+ def status_symbol(x)
75
+ return STATUS.key(x) if x.is_a?(Integer)
76
+ return x if STATUS.has_key?(x)
77
+ x = x.to_s.upcase.to_sym
78
+ STATUS.has_key?(x) ? x : nil
79
+ end
80
+
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,287 @@
1
+ # encoding: utf-8
2
+ require 'backports/rails/hash'
3
+ require 'backports/rails/string'
4
+
5
+ require 'libis/tools/parameter'
6
+ require 'libis/tools/extend/hash'
7
+
8
+ require 'libis/workflow'
9
+
10
+ module Libis
11
+ module Workflow
12
+
13
+ # noinspection RubyTooManyMethodsInspection
14
+ class Task
15
+ include ::Libis::Workflow::Base::Logger
16
+ include ::Libis::Tools::ParameterContainer
17
+
18
+ attr_accessor :parent, :name, :workitem
19
+
20
+ parameter quiet: false, description: 'Prevemt generating log output.'
21
+ parameter recursive: false, description: 'Run the task on all subitems recursively.'
22
+ parameter retry_count: 0, description: 'Number of times to retry the task.'
23
+ parameter retry_interval: 10, description: 'Number of seconds to wait between retries.'
24
+
25
+ def self.task_classes
26
+ ObjectSpace.each_object(::Class).select { |klass| klass < self }
27
+ end
28
+
29
+ def initialize(parent, cfg = {})
30
+ @subitems_stopper = false
31
+ @subtasks_stopper = false
32
+ self.parent = parent
33
+ configure cfg
34
+ end
35
+
36
+ def <<(task)
37
+ raise Libis::WorkflowError, "Processing task '#{self.namepath}' is not allowed to have subtasks."
38
+ end
39
+
40
+ # @param [Libis::Workflow::Base::WorkItem] item
41
+ def run(item)
42
+ check_item_type ::Libis::Workflow::Base::WorkItem, item
43
+ self.workitem = item
44
+
45
+ case self.action
46
+ when :retry
47
+ return if item.check_status(:DONE, self.namepath)
48
+ when :failed
49
+ return
50
+ else
51
+ end
52
+
53
+ (parameter(:retry_count)+1).times do
54
+
55
+ run_item(item)
56
+
57
+ case item.status(self.namepath)
58
+ when :DONE
59
+ self.action = :run
60
+ return
61
+ when :ASYNC_WAIT
62
+ self.action = :retry
63
+ when :ASYNC_HALT
64
+ break
65
+ when :FAILED
66
+ break
67
+ else
68
+ return
69
+ end
70
+
71
+ self.action = :retry
72
+
73
+ sleep(parameter(:retry_interval))
74
+
75
+ end
76
+
77
+ item.get_run.action = :failed
78
+
79
+ rescue WorkflowError => e
80
+ error e.message, item
81
+ update_status item, :FAILED
82
+
83
+ rescue WorkflowAbort => e
84
+ update_status item, :FAILED
85
+ raise e if parent
86
+
87
+ rescue ::Exception => e
88
+ update_status item, :FAILED
89
+ fatal "Exception occured: #{e.message}", item
90
+ debug e.backtrace.join("\n")
91
+
92
+ ensure
93
+ item.save
94
+
95
+ end
96
+
97
+ def names
98
+ (self.parent.names rescue Array.new).push(name).compact
99
+ end
100
+
101
+ def namepath;
102
+ self.names.join('/');
103
+ end
104
+
105
+ def apply_options(opts)
106
+ o = {}
107
+ o.merge!(opts[self.class.to_s] || {})
108
+ o.merge!(opts[self.name] || opts[self.names.join('/')] || {})
109
+ o.key_strings_to_symbols! recursive: true
110
+
111
+ if o and o.is_a? Hash
112
+ default_values.each do |name, _|
113
+ next unless o.key?(name)
114
+ next if o[name].nil?
115
+ paramdef = get_parameter_definition name
116
+ value = paramdef.parse(o[name])
117
+ self.parameter(name, value)
118
+ end
119
+ end
120
+
121
+ end
122
+
123
+ protected
124
+
125
+ def configure(cfg)
126
+ self.name = cfg[:name] || (cfg[:class] || self.class).to_s.split('::').last
127
+ (cfg[:options] || {}).merge(
128
+ cfg.reject { |k, _| [:options, :name, :class].include? k.to_sym }
129
+ ).symbolize_keys.each do |k, v|
130
+ self.parameter(k, v)
131
+ end
132
+ end
133
+
134
+ def run_item(item)
135
+ @item_skipper = false
136
+
137
+ pre_process(item)
138
+
139
+ set_status item, :STARTED
140
+
141
+ self.process item unless @item_skipper
142
+
143
+ run_subitems(item) if parameter(:recursive)
144
+
145
+ update_status item, :DONE
146
+
147
+ post_process item
148
+ end
149
+
150
+ def pre_process(_)
151
+ true
152
+ # optional implementation
153
+ end
154
+
155
+ def post_process(_)
156
+ # optional implementation
157
+ end
158
+
159
+ def run_subitems(parent_item)
160
+ return unless check_processing_subitems
161
+ return unless parent_item.count > 0
162
+
163
+ status = Hash.new(0)
164
+ subitems(parent_item).each_with_index do |item, i|
165
+ debug 'Processing subitem (%d/%d): %s', parent_item, i+1, parent_item.count, item.to_s
166
+ run_item item
167
+ status[item.status(self.namepath)] += 1
168
+ end
169
+
170
+ debug '%d of %d subitems passed', parent_item, status[:DONE], parent_item.count
171
+ substatus_check(status, parent_item, 'item')
172
+ end
173
+
174
+ def substatus_check(status, item, task_or_item)
175
+ if (failed = status[:FAILED] > 0)
176
+ warn "%d sub#{task_or_item}(s) failed", item, failed
177
+ update_status(item, :FAILED)
178
+ end
179
+
180
+ if (halted = status[:ASYNC_HALT] > 0)
181
+ warn "%d sub#{task_or_item}(s) halted in async process", item, halted
182
+ update_status(item, :ASYNC_HALT)
183
+ end
184
+
185
+ if (waiting = status[:ASYNC_WAIT] > 0)
186
+ warn "waiting for %d sub#{task_or_item}(s) in async process", item, waiting
187
+ update_status(item, :ASYNC_WAIT)
188
+ end
189
+
190
+ update_status(item, :DONE)
191
+ end
192
+
193
+ def capture_cmd(cmd, *opts)
194
+ out = StringIO.new
195
+ err = StringIO.new
196
+ $stdout = out
197
+ $stderr = err
198
+ status = system cmd, *opts
199
+ return [status, out.string, err.string]
200
+ ensure
201
+ $stdout = STDOUT
202
+ $stderr = STDERR
203
+ end
204
+
205
+ def action=(action)
206
+ self.workitem.get_run.action = action
207
+ end
208
+
209
+ def action
210
+ self.workitem.get_run.action
211
+ end
212
+
213
+ def get_root_item(item = nil)
214
+ (item || self.workitem).get_root
215
+ end
216
+
217
+ def get_work_dir(item = nil)
218
+ get_root_item(item).work_dir
219
+ end
220
+
221
+ def stop_processing_subitems
222
+ @subitems_stopper = true if parameter(:recursive)
223
+ end
224
+
225
+ def check_processing_subitems
226
+ if @subitems_stopper
227
+ @subitems_stopper = false
228
+ return false
229
+ end
230
+ true
231
+ end
232
+
233
+ def skip_processing_item
234
+ @item_skipper = true
235
+ end
236
+
237
+ def update_status(item, state)
238
+ return nil unless item.compare_status(state, self.namepath) < 0
239
+ set_status(item, state)
240
+ end
241
+
242
+ def set_status(item, state)
243
+ item.status = to_status(state)
244
+ state
245
+ end
246
+
247
+ def to_status(state)
248
+ [state, self.namepath]
249
+ end
250
+
251
+ def check_item_type(klass, item = nil)
252
+ item ||= self.workitem
253
+ unless item.is_a? klass.to_s.constantize
254
+ raise WorkflowError, "Workitem is of wrong type : #{item.class} - expected #{klass.to_s}"
255
+ end
256
+ end
257
+
258
+ def item_type?(klass, item = nil)
259
+ item ||= self.workitem
260
+ item.is_a? klass.to_s.constantize
261
+ end
262
+
263
+ private
264
+
265
+ def subtasks
266
+ self.tasks
267
+ end
268
+
269
+ def subitems(item = nil)
270
+ (item || self.workitem).get_items
271
+ end
272
+
273
+ def default_values
274
+ self.class.default_values
275
+ end
276
+
277
+ def self.default_values
278
+ parameter_defs.inject({}) do |hash, parameter|
279
+ hash[parameter.first] = parameter.last[:default]
280
+ hash
281
+ end
282
+ end
283
+
284
+ end
285
+
286
+ end
287
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: utf-8
2
+ require_relative 'task'
3
+
4
+ module Libis
5
+ module Workflow
6
+
7
+ # noinspection RubyTooManyMethodsInspection
8
+ class TaskGroup < Libis::Workflow::Task
9
+
10
+ attr_accessor :tasks
11
+
12
+ def initialize(parent, cfg = {})
13
+ self.tasks = []
14
+ super parent, cfg
15
+ end
16
+
17
+ def <<(task)
18
+ self.tasks << task
19
+ end
20
+
21
+ def apply_options(opts)
22
+ super opts
23
+ self.tasks.each do |task|
24
+ task.apply_options opts
25
+ end
26
+ end
27
+
28
+ protected
29
+
30
+ def process(item)
31
+
32
+ return unless check_processing_subtasks
33
+
34
+ tasks = subtasks
35
+ return unless tasks.count > 0
36
+
37
+ status = Hash.new(0)
38
+ tasks.each_with_index do |task, i|
39
+ debug 'Running subtask (%d/%d): %s', item, i+1, tasks.count, task.name
40
+ task.run item
41
+ status[item.status(task.namepath)] += 1
42
+ end
43
+
44
+ substatus_check(status, item, 'task')
45
+ end
46
+
47
+ def stop_processing_subtasks
48
+ @subtasks_stopper= true
49
+ end
50
+
51
+ def check_processing_subtasks
52
+ if @subtasks_stopper
53
+ @subtasks_stopper = false
54
+ return false
55
+ end
56
+ true
57
+ end
58
+
59
+ end
60
+
61
+ end
62
+ end
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'libis/workflow/task'
4
+
5
+ module Libis
6
+ module Workflow
7
+ module Tasks
8
+
9
+ class Analyzer < Task
10
+
11
+ parameter quiet: true, frozen: true
12
+ parameter recursive: false, frozen: true
13
+
14
+ # @param [Libis::Workflow::Base::WorkItem] item
15
+ def run(item)
16
+
17
+ item.properties[:ingest_failed] = item.check_status(:FAILED)
18
+
19
+ item.summary = {}
20
+ item.log_history.each do |log|
21
+ level = log[:severity]
22
+ item.summary[level] ||= 0
23
+ item.summary[level] += 1
24
+ end
25
+
26
+ item.each do |i|
27
+ run i
28
+ i.summary.each do |level, count|
29
+ item.summary[level] ||= 0
30
+ item.summary[level] += (count || 0)
31
+ end
32
+ end
33
+
34
+ rescue RuntimeError => ex
35
+
36
+ puts 'Failed to analyze item: %s - %s' % [item.class, item.name]
37
+ puts 'Exception: %s' % ex.message
38
+
39
+ ensure
40
+
41
+ item.save
42
+
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,7 @@
1
+ # encoding: utf-8
2
+
3
+ module Libis
4
+ module Workflow
5
+ VERSION = '2.0.beta.19' unless const_defined? :VERSION # the guard is against a redefinition warning that happens on Travis
6
+ end
7
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+ require 'libis/tools/extend/hash'
3
+ require 'libis/workflow/base/work_item'
4
+
5
+ module Libis
6
+ module Workflow
7
+
8
+ # In-memory implementation of ::Libis::Workflow::Base::WorkItem
9
+ class WorkItem
10
+ include ::Libis::Workflow::Base::WorkItem
11
+
12
+ attr_accessor :parent
13
+ attr_accessor :items
14
+ attr_accessor :options, :properties
15
+ attr_accessor :log_history, :status_log
16
+ attr_accessor :summary
17
+
18
+ def initialize
19
+ self.parent = nil
20
+ self.items = []
21
+ self.options = {}
22
+ self.properties = {}
23
+ self.log_history = []
24
+ self.status_log = []
25
+ self.summary = {}
26
+ end
27
+
28
+ protected
29
+
30
+ def add_log_entry(msg)
31
+ # noinspection RubyResolve
32
+ self.log_history << msg.merge(c_at: ::Time.now)
33
+ end
34
+
35
+ def add_status_log(info)
36
+ # noinspection RubyResolve
37
+ self.status_log << info.merge(timestamp: ::Time.now).cleanup
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+ require 'sidekiq'
3
+
4
+ require 'libis/workflow/config'
5
+ require 'libis/workflow/workflow'
6
+
7
+ module Libis
8
+ module Workflow
9
+
10
+ class Worker
11
+ include Sidekiq::Worker
12
+
13
+ def perform(job_config, options = {})
14
+ job = configure(job_config, options)
15
+ options[:interactive] = false
16
+ job.execute options
17
+ end
18
+
19
+ def configure(job_config, options = {})
20
+ log_path = options.delete :log_path
21
+ if log_path
22
+ Config.logger = ::Logger.new(
23
+ File.join(log_path, "#{job_config[:name]}.log"),
24
+ (options.delete(:log_shift_age) || 'daily'),
25
+ (options.delete(:log_shift_size) || 1024 ** 2)
26
+ )
27
+ Config.logger.formatter = ::Logger::Formatter.new
28
+ Config.logger.level = (options.delete(:log_level) || ::Logger::DEBUG)
29
+ end
30
+ get_job(job_config)
31
+ end
32
+
33
+ def get_job(job_config)
34
+ job = ::Libis::Workflow::Job.new
35
+ job.configure job_config
36
+ job
37
+ end
38
+
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+
3
+ require 'libis/workflow/base/workflow'
4
+
5
+ module Libis
6
+ module Workflow
7
+
8
+ class Workflow
9
+ include ::Libis::Workflow::Base::Workflow
10
+
11
+ attr_accessor :name, :description, :config
12
+
13
+ def initialize
14
+ @name = ''
15
+ @description = ''
16
+ @config = Hash.new
17
+ end
18
+
19
+ end
20
+
21
+ end
22
+ end