libis-workflow 2.0.beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +36 -0
- data/.travis.yml +33 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +296 -0
- data/Rakefile +7 -0
- data/lib/libis/exceptions.rb +8 -0
- data/lib/libis/workflow/base/logger.rb +30 -0
- data/lib/libis/workflow/base/run.rb +68 -0
- data/lib/libis/workflow/base/workflow.rb +123 -0
- data/lib/libis/workflow/config.rb +92 -0
- data/lib/libis/workflow/message_registry.rb +32 -0
- data/lib/libis/workflow/run.rb +27 -0
- data/lib/libis/workflow/task.rb +259 -0
- data/lib/libis/workflow/tasks/analyzer.rb +41 -0
- data/lib/libis/workflow/version.rb +7 -0
- data/lib/libis/workflow/worker.rb +42 -0
- data/lib/libis/workflow/workflow.rb +29 -0
- data/lib/libis/workflow/workitems/dir_item.rb +12 -0
- data/lib/libis/workflow/workitems/file_item.rb +78 -0
- data/lib/libis/workflow/workitems/work_item.rb +231 -0
- data/lib/libis/workflow/workitems.rb +5 -0
- data/lib/libis/workflow.rb +28 -0
- data/lib/libis-workflow.rb +2 -0
- data/libis-workflow.gemspec +36 -0
- data/spec/items/test_dir_item.rb +16 -0
- data/spec/items/test_file_item.rb +19 -0
- data/spec/items/test_run.rb +10 -0
- data/spec/items.rb +3 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/task_spec.rb +17 -0
- data/spec/tasks/camelize_name.rb +13 -0
- data/spec/tasks/checksum_tester.rb +33 -0
- data/spec/tasks/collect_files.rb +48 -0
- data/spec/workflow_spec.rb +231 -0
- metadata +187 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'backports/rails/hash'
|
4
|
+
|
5
|
+
require 'libis/workflow/config'
|
6
|
+
|
7
|
+
module Libis
|
8
|
+
module Workflow
|
9
|
+
|
10
|
+
# Base module for all work items.
|
11
|
+
#
|
12
|
+
# This module contains some basic attributes required for making the workflow and tasks behave properly:
|
13
|
+
#
|
14
|
+
# - status: [Symbol] the status field. Each task sets the status of the items it works on. Before starting processing
|
15
|
+
# the status is set to "#{task_name}Started". After successfull processing it is set to "#{task_name}Done" and if
|
16
|
+
# the task failed, it is set to "#{task_name}Failed". The status field can be used to perform real-time
|
17
|
+
# monitoring, reporting and error-recovery or restart of the ingest.
|
18
|
+
# The initial value for this attribute is :START.
|
19
|
+
# - parent: [Object|nil] a link to a parent work item. Work items can be organized in any hierarchy you think is
|
20
|
+
# relevant for your workflow (e.g. directory[/directory...]/file/line or library/section/book/page). Of course
|
21
|
+
# hierarchies are not mandatory.
|
22
|
+
# - items: [Array] a list of child work items. see above.
|
23
|
+
# - options: [Hash] a set of options for the task chain on how to deal with this work item. This attribute can be
|
24
|
+
# used to fine-tune the behaviour of tasks for a particular work item.
|
25
|
+
# - properties: [Hash] a set of properties, typically collected during the workflow processing and used to store
|
26
|
+
# final or intermediate resulst of tasks. The ::Lias::Ingester::FileItem module uses this attribute to store the
|
27
|
+
# properties (e.g. size, checksum, ...) of the file it represents.
|
28
|
+
# - log_history: [Array] a list of all logging messages collected for this work item. Whenever a task logs a message
|
29
|
+
# it will automatically be registered for the work item that it is processing or for the work item that was
|
30
|
+
# supplied as the first argument.
|
31
|
+
# - status_log: [Array] a list of all status changes the work item went through.
|
32
|
+
# - summary: [Hash] collected statistics about the ingest for the work item and its children. This structure will
|
33
|
+
# be filled in by the included task ::Lias::Ingester::Tasks::Analyzer wich is appended to the workflow by default.
|
34
|
+
#
|
35
|
+
# The module is created so that it is possible to implement an ActiveRecord/Datamapper/... implementation easily.
|
36
|
+
|
37
|
+
module WorkItem
|
38
|
+
include Enumerable
|
39
|
+
|
40
|
+
attr_accessor :parent
|
41
|
+
attr_accessor :items
|
42
|
+
attr_accessor :options, :properties
|
43
|
+
attr_accessor :log_history, :status_log
|
44
|
+
attr_accessor :summary
|
45
|
+
|
46
|
+
# The initializer takes care of properly setting the correct default values for the attributes. A derived class
|
47
|
+
# that wishes to define it's own initializer should take care to call 'super' or make sure it overwrites the
|
48
|
+
# attribute definitions itself. (e.g. in a ActiveRecord implementation)
|
49
|
+
def initialize
|
50
|
+
self.parent = nil
|
51
|
+
self.items = []
|
52
|
+
self.options = {}
|
53
|
+
self.properties = {}
|
54
|
+
self.log_history = []
|
55
|
+
self.status_log = []
|
56
|
+
self.summary = {}
|
57
|
+
end
|
58
|
+
|
59
|
+
# String representation of the identity of the work item.
|
60
|
+
#
|
61
|
+
# You may want to overwrite this method as it tries the :name property or whatever #inspect returns if that
|
62
|
+
# failes. Typically this should return the key value, file name or id number. If that's what your :name property
|
63
|
+
# contains, you're fine.
|
64
|
+
#
|
65
|
+
# @return [String] string identification for this work item.
|
66
|
+
def name
|
67
|
+
self.properties[:name] || self.inspect
|
68
|
+
end
|
69
|
+
|
70
|
+
def to_s; self.name; end
|
71
|
+
|
72
|
+
def names
|
73
|
+
(self.parent.names rescue Array.new).push(name).compact
|
74
|
+
end
|
75
|
+
|
76
|
+
def namepath; self.names.join('/'); end
|
77
|
+
|
78
|
+
# File name save version of the to_s output. The output should be safe to use as a file name to store work item
|
79
|
+
# data. Typical use is when extra file items are created by a task and need to be stored on disk. The default
|
80
|
+
# implementation URL-encodes (%xx) all characters except alphanumeric, '.' and '-'.
|
81
|
+
#
|
82
|
+
# @return [String] file name
|
83
|
+
def to_filename
|
84
|
+
self.to_s.gsub(/[^\w.-]/) { |s| '%%%02x' % s.ord }
|
85
|
+
end
|
86
|
+
|
87
|
+
# Gets the current status of the object.
|
88
|
+
#
|
89
|
+
# @return [Symbol] status code
|
90
|
+
def status
|
91
|
+
s = self.status_log.last
|
92
|
+
status_label((s[:text] rescue nil), (s[:tasklist] rescue nil))
|
93
|
+
end
|
94
|
+
|
95
|
+
# Changes the status of the object. As a side effect the status is also logged in the status_log with the current
|
96
|
+
# timestamp.
|
97
|
+
#
|
98
|
+
# @param [Symbol] s
|
99
|
+
def status=(s, tasklist = nil)
|
100
|
+
s, tasklist = s if s.is_a? Array
|
101
|
+
s = s.to_sym
|
102
|
+
if status_label(s, tasklist) != self.status
|
103
|
+
self.status_log << {
|
104
|
+
timestamp: ::Time.now,
|
105
|
+
tasklist: tasklist,
|
106
|
+
text: s
|
107
|
+
}
|
108
|
+
self.save
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Check ingest status of the object. The status is checked to see if it ends in 'Failed'.
|
113
|
+
#
|
114
|
+
# @return [Boolean] true if the object failed, false otherwise
|
115
|
+
def failed?
|
116
|
+
self.status.to_s =~ /Failed$/ ? true : false
|
117
|
+
end
|
118
|
+
|
119
|
+
# Helper function for the Tasks to add a log entry to the log_history.
|
120
|
+
#
|
121
|
+
# The supplied message structure is expected to contain the following fields:
|
122
|
+
# - :severity : ::Logger::Severity value
|
123
|
+
# - :id : optional message id
|
124
|
+
# - :text : message text
|
125
|
+
# - :task : list of tasks names (task hierarchy) that submits the message
|
126
|
+
#
|
127
|
+
# @param [Hash] message
|
128
|
+
def add_log(message = {})
|
129
|
+
msg = message_struct(message)
|
130
|
+
self.log_history << msg
|
131
|
+
self.save
|
132
|
+
end
|
133
|
+
|
134
|
+
def <=(message = {}); self.add_log(message); end
|
135
|
+
|
136
|
+
# Iterates over the work item clients and invokes code on each of them.
|
137
|
+
def each
|
138
|
+
self.items.each { |item| yield item }
|
139
|
+
end
|
140
|
+
|
141
|
+
# Add a child work item
|
142
|
+
#
|
143
|
+
# @param [WorkItem] item to be added to the child list :items
|
144
|
+
def add_item(item)
|
145
|
+
return self unless item and item.is_a? WorkItem
|
146
|
+
self.items << item
|
147
|
+
item.parent = self
|
148
|
+
self.save!
|
149
|
+
item.save!
|
150
|
+
self
|
151
|
+
end
|
152
|
+
|
153
|
+
alias :<< :add_item
|
154
|
+
|
155
|
+
# Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
|
156
|
+
# called to save the current item's state. If state needs to persisted, you should override this method or make
|
157
|
+
# sure your persistence layer implements it in your class.
|
158
|
+
def save
|
159
|
+
end
|
160
|
+
|
161
|
+
# Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
|
162
|
+
# called to save the current item's state. If state needs to persisted, you should override this method or make
|
163
|
+
# sure your persistence layer implements it in your class.
|
164
|
+
def save!
|
165
|
+
end
|
166
|
+
|
167
|
+
# Add a structured message to the log history. The message text can be submitted as an integer or text. If an
|
168
|
+
# integer is submitted, it will be used to look up the text in the MessageRegistry. The message text will be
|
169
|
+
# passed to the % operator with the args parameter. If that failes (e.g. because the format string is not correct)
|
170
|
+
# the args value is appended to the message.
|
171
|
+
#
|
172
|
+
# @param [Symbol] severity
|
173
|
+
# @param [Hash] msg should contain message text as :id or :text and the hierarchical name of the task as :task
|
174
|
+
# @param [Array] args string format values
|
175
|
+
def log_message(severity, msg, *args)
|
176
|
+
# Prepare info from msg struct for use with string substitution
|
177
|
+
message_id, message_text = if msg[:id]
|
178
|
+
[msg[:id], MessageRegistry.instance.get_message(msg[:id])]
|
179
|
+
elsif msg[:text]
|
180
|
+
[0, msg[:text]]
|
181
|
+
else
|
182
|
+
[0, '']
|
183
|
+
end
|
184
|
+
task = msg[:task] || '*UNKNOWN*'
|
185
|
+
message_text = (message_text % args rescue "#{message_text} - #{args}")
|
186
|
+
|
187
|
+
self.add_log severity: severity, id: message_id.to_i, text: message_text, task: task
|
188
|
+
name = ''
|
189
|
+
begin
|
190
|
+
name = self.to_s
|
191
|
+
name = self.name
|
192
|
+
name = self.namepath
|
193
|
+
rescue
|
194
|
+
# do nothing
|
195
|
+
end
|
196
|
+
Config.logger.add(severity, message_text, ('%s - %s ' % [task, name]))
|
197
|
+
end
|
198
|
+
|
199
|
+
protected
|
200
|
+
|
201
|
+
SEV_LABEL = %w(DEBUG INFO WARN ERROR FATAL ANY) unless const_defined? :SEV_LABEL
|
202
|
+
|
203
|
+
# go up the hierarchy and return the topmost work item
|
204
|
+
#
|
205
|
+
# @return [WorkItem] the root work item
|
206
|
+
def root
|
207
|
+
root = self
|
208
|
+
root = root.parent while root.parent and root.parent.is_a? WorkItem
|
209
|
+
root
|
210
|
+
end
|
211
|
+
|
212
|
+
# create and return a proper message structure
|
213
|
+
# @param [Hash] opts
|
214
|
+
def message_struct(opts = {})
|
215
|
+
opts.reverse_merge!(severity: ::Logger::INFO, id: 0, text: '')
|
216
|
+
{
|
217
|
+
timestamp: ::Time.now,
|
218
|
+
severity: SEV_LABEL[opts[:severity]],
|
219
|
+
task: opts[:task],
|
220
|
+
id: opts[:id],
|
221
|
+
message: opts[:text]
|
222
|
+
}
|
223
|
+
end
|
224
|
+
|
225
|
+
def status_label(text, tasklist)
|
226
|
+
s = "#{tasklist.last rescue ''}#{text}" rescue :NOT_STARTED
|
227
|
+
s.empty? ? :NOT_STARTED : s.to_sym
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/exceptions'
|
3
|
+
|
4
|
+
module Libis
|
5
|
+
module Workflow
|
6
|
+
|
7
|
+
autoload :MessageRegistry, 'libis/workflow/message_registry'
|
8
|
+
autoload :Config, 'libis/workflow/config'
|
9
|
+
|
10
|
+
autoload :WorkItem, 'libis/workflow/workitems/work_item'
|
11
|
+
autoload :FileItem, 'libis/workflow/workitems/file_item'
|
12
|
+
autoload :DirItem, 'libis/workflow/workitems/dir_item'
|
13
|
+
|
14
|
+
autoload :Workflow, 'libis/workflow/workflow'
|
15
|
+
autoload :Run, 'libis/workflow/run'
|
16
|
+
autoload :Task, 'libis/workflow/task'
|
17
|
+
|
18
|
+
autoload :Parameter, 'libis/workflow/parameter'
|
19
|
+
|
20
|
+
autoload :Worker, 'libis/workflow/worker'
|
21
|
+
|
22
|
+
def self.configure
|
23
|
+
yield Config.instance
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'libis/workflow/version'
|
7
|
+
|
8
|
+
Gem::Specification.new do |gem|
|
9
|
+
gem.name = 'libis-workflow'
|
10
|
+
gem.version = ::Libis::Workflow::VERSION
|
11
|
+
gem.date = Date.today.to_s
|
12
|
+
|
13
|
+
gem.summary = %q{LIBIS Workflow framework.}
|
14
|
+
gem.description = %q{A simple framework to build custom task/workflow solutions.}
|
15
|
+
|
16
|
+
gem.author = 'Kris Dekeyser'
|
17
|
+
gem.email = 'kris.dekeyser@libis.be'
|
18
|
+
gem.homepage = 'https://github.com/Kris-LIBIS/workflow'
|
19
|
+
gem.license = 'MIT'
|
20
|
+
|
21
|
+
gem.files = `git ls-files -z`.split("\x0")
|
22
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
23
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
24
|
+
|
25
|
+
gem.require_paths = ['lib']
|
26
|
+
|
27
|
+
gem.add_development_dependency 'bundler', '~> 1.6'
|
28
|
+
gem.add_development_dependency 'rake', '~> 10.3'
|
29
|
+
gem.add_development_dependency 'rspec', '~> 3.1'
|
30
|
+
gem.add_development_dependency 'simplecov', '~> 0.9'
|
31
|
+
gem.add_development_dependency 'coveralls', '~> 0.7'
|
32
|
+
|
33
|
+
gem.add_runtime_dependency 'libis-tools', '~> 0.9'
|
34
|
+
gem.add_runtime_dependency 'sidekiq', '~> 3.3'
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/workflow/workitems'
|
3
|
+
|
4
|
+
class TestDirItem
|
5
|
+
include ::Libis::Workflow::DirItem
|
6
|
+
|
7
|
+
def name=(dir)
|
8
|
+
raise RuntimeError, "'#{dir}' is not a directory" unless File.directory? dir
|
9
|
+
super dir
|
10
|
+
end
|
11
|
+
|
12
|
+
def name
|
13
|
+
self.properties[:name] || super
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/tools/checksum'
|
3
|
+
|
4
|
+
require 'libis/workflow/workitems'
|
5
|
+
|
6
|
+
class TestFileItem
|
7
|
+
include ::Libis::Workflow::FileItem
|
8
|
+
|
9
|
+
def filename=(file)
|
10
|
+
raise RuntimeError, "'#{file}' is not a file" unless File.file? file
|
11
|
+
set_checksum :SHA256, ::Libis::Tools::Checksum.hexdigest(file, :SHA256)
|
12
|
+
super file
|
13
|
+
end
|
14
|
+
|
15
|
+
def name
|
16
|
+
self.properties[:name] || super
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
data/spec/items.rb
ADDED
data/spec/spec_helper.rb
ADDED
data/spec/task_spec.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require_relative 'spec_helper'
|
2
|
+
|
3
|
+
require 'libis/workflow/task'
|
4
|
+
|
5
|
+
describe 'Task' do
|
6
|
+
|
7
|
+
it 'should create a default task' do
|
8
|
+
|
9
|
+
task = ::Libis::Workflow::Task.new nil
|
10
|
+
|
11
|
+
expect(task.parent).to eq nil
|
12
|
+
expect(task.name).to eq 'Task'
|
13
|
+
expect(task.options[:abort_on_error]).to eq false
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'backports/rails/string'
|
3
|
+
|
4
|
+
require 'libis/workflow/workitems'
|
5
|
+
|
6
|
+
class CamelizeName < ::Libis::Workflow::Task
|
7
|
+
|
8
|
+
def process(item)
|
9
|
+
return unless (item.is_a?(TestFileItem) || item.is_a?(TestDirItem))
|
10
|
+
item.properties[:name] = item.name.camelize
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/tools/checksum'
|
3
|
+
|
4
|
+
require 'libis/exceptions'
|
5
|
+
require 'libis/workflow/workitems'
|
6
|
+
|
7
|
+
class ChecksumTester < ::Libis::Workflow::Task
|
8
|
+
|
9
|
+
parameter checksum_type: nil,
|
10
|
+
description: 'Checksum type to use.',
|
11
|
+
constraint: ::Libis::Tools::Checksum::CHECKSUM_TYPES.map {|x| x.to_s}
|
12
|
+
|
13
|
+
def process(item)
|
14
|
+
return unless item.is_a? TestFileItem
|
15
|
+
|
16
|
+
checksum_type = options[:checksum_type]
|
17
|
+
|
18
|
+
if checksum_type.nil?
|
19
|
+
::Libis::Tools::Checksum::CHECKSUM_TYPES.each do |x|
|
20
|
+
test_checksum(item, x) if item.checksum(x)
|
21
|
+
end
|
22
|
+
else
|
23
|
+
test_checksum(item, checksum_type)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_checksum(item, checksum_type)
|
28
|
+
checksum = ::Libis::Tools::Checksum.hexdigest(item.fullpath, checksum_type.to_sym)
|
29
|
+
return if item.checksum(checksum_type) == checksum
|
30
|
+
raise ::Libis::WorkflowError, "Checksum test #{checksum_type} failed for #{item.filepath}"
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/exceptions'
|
3
|
+
|
4
|
+
require_relative '../items'
|
5
|
+
|
6
|
+
class CollectFiles < ::Libis::Workflow::Task
|
7
|
+
|
8
|
+
parameter location: '.',
|
9
|
+
description: 'Dir location to start scanning for files.'
|
10
|
+
parameter subdirs: false,
|
11
|
+
description: 'Look for files in subdirs too.'
|
12
|
+
parameter selection: nil,
|
13
|
+
description: 'Only select files that match the given regular expression. Ignored if empty.'
|
14
|
+
|
15
|
+
def process(item)
|
16
|
+
if item.is_a? TestRun
|
17
|
+
add_item(item, options[:location])
|
18
|
+
elsif item.is_a? TestDirItem
|
19
|
+
collect_files(item, item.fullpath)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def collect_files(item, dir)
|
24
|
+
glob_string = dir
|
25
|
+
glob_string = File.join(glob_string, '**') if options[:subdirs]
|
26
|
+
glob_string = File.join(glob_string, '*')
|
27
|
+
|
28
|
+
Dir.glob(glob_string).select do |x|
|
29
|
+
options[:selection] && !options[:selection].empty? ? x =~ Regexp.new(options[:selection]) : true
|
30
|
+
end.sort.each do |file|
|
31
|
+
next if %w'. ..'.include? file
|
32
|
+
add_item(item, file)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def add_item(item, file)
|
37
|
+
child = if File.file?(file)
|
38
|
+
TestFileItem.new
|
39
|
+
elsif File.directory?(file)
|
40
|
+
TestDirItem.new
|
41
|
+
else
|
42
|
+
Item.new
|
43
|
+
end
|
44
|
+
child.filename = file
|
45
|
+
item << child
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|