libis-workflow 2.0.beta.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +36 -0
- data/.travis.yml +33 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +296 -0
- data/Rakefile +7 -0
- data/lib/libis/exceptions.rb +8 -0
- data/lib/libis/workflow/base/logger.rb +30 -0
- data/lib/libis/workflow/base/run.rb +68 -0
- data/lib/libis/workflow/base/workflow.rb +123 -0
- data/lib/libis/workflow/config.rb +92 -0
- data/lib/libis/workflow/message_registry.rb +32 -0
- data/lib/libis/workflow/run.rb +27 -0
- data/lib/libis/workflow/task.rb +259 -0
- data/lib/libis/workflow/tasks/analyzer.rb +41 -0
- data/lib/libis/workflow/version.rb +7 -0
- data/lib/libis/workflow/worker.rb +42 -0
- data/lib/libis/workflow/workflow.rb +29 -0
- data/lib/libis/workflow/workitems/dir_item.rb +12 -0
- data/lib/libis/workflow/workitems/file_item.rb +78 -0
- data/lib/libis/workflow/workitems/work_item.rb +231 -0
- data/lib/libis/workflow/workitems.rb +5 -0
- data/lib/libis/workflow.rb +28 -0
- data/lib/libis-workflow.rb +2 -0
- data/libis-workflow.gemspec +36 -0
- data/spec/items/test_dir_item.rb +16 -0
- data/spec/items/test_file_item.rb +19 -0
- data/spec/items/test_run.rb +10 -0
- data/spec/items.rb +3 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/task_spec.rb +17 -0
- data/spec/tasks/camelize_name.rb +13 -0
- data/spec/tasks/checksum_tester.rb +33 -0
- data/spec/tasks/collect_files.rb +48 -0
- data/spec/workflow_spec.rb +231 -0
- metadata +187 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'backports/rails/hash'
|
4
|
+
|
5
|
+
require 'libis/workflow/config'
|
6
|
+
|
7
|
+
module Libis
|
8
|
+
module Workflow
|
9
|
+
|
10
|
+
# Base module for all work items.
|
11
|
+
#
|
12
|
+
# This module contains some basic attributes required for making the workflow and tasks behave properly:
|
13
|
+
#
|
14
|
+
# - status: [Symbol] the status field. Each task sets the status of the items it works on. Before starting processing
|
15
|
+
# the status is set to "#{task_name}Started". After successfull processing it is set to "#{task_name}Done" and if
|
16
|
+
# the task failed, it is set to "#{task_name}Failed". The status field can be used to perform real-time
|
17
|
+
# monitoring, reporting and error-recovery or restart of the ingest.
|
18
|
+
# The initial value for this attribute is :START.
|
19
|
+
# - parent: [Object|nil] a link to a parent work item. Work items can be organized in any hierarchy you think is
|
20
|
+
# relevant for your workflow (e.g. directory[/directory...]/file/line or library/section/book/page). Of course
|
21
|
+
# hierarchies are not mandatory.
|
22
|
+
# - items: [Array] a list of child work items. see above.
|
23
|
+
# - options: [Hash] a set of options for the task chain on how to deal with this work item. This attribute can be
|
24
|
+
# used to fine-tune the behaviour of tasks for a particular work item.
|
25
|
+
# - properties: [Hash] a set of properties, typically collected during the workflow processing and used to store
|
26
|
+
# final or intermediate resulst of tasks. The ::Lias::Ingester::FileItem module uses this attribute to store the
|
27
|
+
# properties (e.g. size, checksum, ...) of the file it represents.
|
28
|
+
# - log_history: [Array] a list of all logging messages collected for this work item. Whenever a task logs a message
|
29
|
+
# it will automatically be registered for the work item that it is processing or for the work item that was
|
30
|
+
# supplied as the first argument.
|
31
|
+
# - status_log: [Array] a list of all status changes the work item went through.
|
32
|
+
# - summary: [Hash] collected statistics about the ingest for the work item and its children. This structure will
|
33
|
+
# be filled in by the included task ::Lias::Ingester::Tasks::Analyzer wich is appended to the workflow by default.
|
34
|
+
#
|
35
|
+
# The module is created so that it is possible to implement an ActiveRecord/Datamapper/... implementation easily.
|
36
|
+
|
37
|
+
module WorkItem
|
38
|
+
include Enumerable
|
39
|
+
|
40
|
+
attr_accessor :parent
|
41
|
+
attr_accessor :items
|
42
|
+
attr_accessor :options, :properties
|
43
|
+
attr_accessor :log_history, :status_log
|
44
|
+
attr_accessor :summary
|
45
|
+
|
46
|
+
# The initializer takes care of properly setting the correct default values for the attributes. A derived class
|
47
|
+
# that wishes to define it's own initializer should take care to call 'super' or make sure it overwrites the
|
48
|
+
# attribute definitions itself. (e.g. in a ActiveRecord implementation)
|
49
|
+
def initialize
|
50
|
+
self.parent = nil
|
51
|
+
self.items = []
|
52
|
+
self.options = {}
|
53
|
+
self.properties = {}
|
54
|
+
self.log_history = []
|
55
|
+
self.status_log = []
|
56
|
+
self.summary = {}
|
57
|
+
end
|
58
|
+
|
59
|
+
# String representation of the identity of the work item.
|
60
|
+
#
|
61
|
+
# You may want to overwrite this method as it tries the :name property or whatever #inspect returns if that
|
62
|
+
# failes. Typically this should return the key value, file name or id number. If that's what your :name property
|
63
|
+
# contains, you're fine.
|
64
|
+
#
|
65
|
+
# @return [String] string identification for this work item.
|
66
|
+
def name
|
67
|
+
self.properties[:name] || self.inspect
|
68
|
+
end
|
69
|
+
|
70
|
+
def to_s; self.name; end
|
71
|
+
|
72
|
+
def names
|
73
|
+
(self.parent.names rescue Array.new).push(name).compact
|
74
|
+
end
|
75
|
+
|
76
|
+
def namepath; self.names.join('/'); end
|
77
|
+
|
78
|
+
# File name save version of the to_s output. The output should be safe to use as a file name to store work item
|
79
|
+
# data. Typical use is when extra file items are created by a task and need to be stored on disk. The default
|
80
|
+
# implementation URL-encodes (%xx) all characters except alphanumeric, '.' and '-'.
|
81
|
+
#
|
82
|
+
# @return [String] file name
|
83
|
+
def to_filename
|
84
|
+
self.to_s.gsub(/[^\w.-]/) { |s| '%%%02x' % s.ord }
|
85
|
+
end
|
86
|
+
|
87
|
+
# Gets the current status of the object.
|
88
|
+
#
|
89
|
+
# @return [Symbol] status code
|
90
|
+
def status
|
91
|
+
s = self.status_log.last
|
92
|
+
status_label((s[:text] rescue nil), (s[:tasklist] rescue nil))
|
93
|
+
end
|
94
|
+
|
95
|
+
# Changes the status of the object. As a side effect the status is also logged in the status_log with the current
|
96
|
+
# timestamp.
|
97
|
+
#
|
98
|
+
# @param [Symbol] s
|
99
|
+
def status=(s, tasklist = nil)
|
100
|
+
s, tasklist = s if s.is_a? Array
|
101
|
+
s = s.to_sym
|
102
|
+
if status_label(s, tasklist) != self.status
|
103
|
+
self.status_log << {
|
104
|
+
timestamp: ::Time.now,
|
105
|
+
tasklist: tasklist,
|
106
|
+
text: s
|
107
|
+
}
|
108
|
+
self.save
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Check ingest status of the object. The status is checked to see if it ends in 'Failed'.
|
113
|
+
#
|
114
|
+
# @return [Boolean] true if the object failed, false otherwise
|
115
|
+
def failed?
|
116
|
+
self.status.to_s =~ /Failed$/ ? true : false
|
117
|
+
end
|
118
|
+
|
119
|
+
# Helper function for the Tasks to add a log entry to the log_history.
|
120
|
+
#
|
121
|
+
# The supplied message structure is expected to contain the following fields:
|
122
|
+
# - :severity : ::Logger::Severity value
|
123
|
+
# - :id : optional message id
|
124
|
+
# - :text : message text
|
125
|
+
# - :task : list of tasks names (task hierarchy) that submits the message
|
126
|
+
#
|
127
|
+
# @param [Hash] message
|
128
|
+
def add_log(message = {})
|
129
|
+
msg = message_struct(message)
|
130
|
+
self.log_history << msg
|
131
|
+
self.save
|
132
|
+
end
|
133
|
+
|
134
|
+
def <=(message = {}); self.add_log(message); end
|
135
|
+
|
136
|
+
# Iterates over the work item clients and invokes code on each of them.
|
137
|
+
def each
|
138
|
+
self.items.each { |item| yield item }
|
139
|
+
end
|
140
|
+
|
141
|
+
# Add a child work item
|
142
|
+
#
|
143
|
+
# @param [WorkItem] item to be added to the child list :items
|
144
|
+
def add_item(item)
|
145
|
+
return self unless item and item.is_a? WorkItem
|
146
|
+
self.items << item
|
147
|
+
item.parent = self
|
148
|
+
self.save!
|
149
|
+
item.save!
|
150
|
+
self
|
151
|
+
end
|
152
|
+
|
153
|
+
alias :<< :add_item
|
154
|
+
|
155
|
+
# Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
|
156
|
+
# called to save the current item's state. If state needs to persisted, you should override this method or make
|
157
|
+
# sure your persistence layer implements it in your class.
|
158
|
+
def save
|
159
|
+
end
|
160
|
+
|
161
|
+
# Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
|
162
|
+
# called to save the current item's state. If state needs to persisted, you should override this method or make
|
163
|
+
# sure your persistence layer implements it in your class.
|
164
|
+
def save!
|
165
|
+
end
|
166
|
+
|
167
|
+
# Add a structured message to the log history. The message text can be submitted as an integer or text. If an
|
168
|
+
# integer is submitted, it will be used to look up the text in the MessageRegistry. The message text will be
|
169
|
+
# passed to the % operator with the args parameter. If that failes (e.g. because the format string is not correct)
|
170
|
+
# the args value is appended to the message.
|
171
|
+
#
|
172
|
+
# @param [Symbol] severity
|
173
|
+
# @param [Hash] msg should contain message text as :id or :text and the hierarchical name of the task as :task
|
174
|
+
# @param [Array] args string format values
|
175
|
+
def log_message(severity, msg, *args)
|
176
|
+
# Prepare info from msg struct for use with string substitution
|
177
|
+
message_id, message_text = if msg[:id]
|
178
|
+
[msg[:id], MessageRegistry.instance.get_message(msg[:id])]
|
179
|
+
elsif msg[:text]
|
180
|
+
[0, msg[:text]]
|
181
|
+
else
|
182
|
+
[0, '']
|
183
|
+
end
|
184
|
+
task = msg[:task] || '*UNKNOWN*'
|
185
|
+
message_text = (message_text % args rescue "#{message_text} - #{args}")
|
186
|
+
|
187
|
+
self.add_log severity: severity, id: message_id.to_i, text: message_text, task: task
|
188
|
+
name = ''
|
189
|
+
begin
|
190
|
+
name = self.to_s
|
191
|
+
name = self.name
|
192
|
+
name = self.namepath
|
193
|
+
rescue
|
194
|
+
# do nothing
|
195
|
+
end
|
196
|
+
Config.logger.add(severity, message_text, ('%s - %s ' % [task, name]))
|
197
|
+
end
|
198
|
+
|
199
|
+
protected
|
200
|
+
|
201
|
+
SEV_LABEL = %w(DEBUG INFO WARN ERROR FATAL ANY) unless const_defined? :SEV_LABEL
|
202
|
+
|
203
|
+
# go up the hierarchy and return the topmost work item
|
204
|
+
#
|
205
|
+
# @return [WorkItem] the root work item
|
206
|
+
def root
|
207
|
+
root = self
|
208
|
+
root = root.parent while root.parent and root.parent.is_a? WorkItem
|
209
|
+
root
|
210
|
+
end
|
211
|
+
|
212
|
+
# create and return a proper message structure
|
213
|
+
# @param [Hash] opts
|
214
|
+
def message_struct(opts = {})
|
215
|
+
opts.reverse_merge!(severity: ::Logger::INFO, id: 0, text: '')
|
216
|
+
{
|
217
|
+
timestamp: ::Time.now,
|
218
|
+
severity: SEV_LABEL[opts[:severity]],
|
219
|
+
task: opts[:task],
|
220
|
+
id: opts[:id],
|
221
|
+
message: opts[:text]
|
222
|
+
}
|
223
|
+
end
|
224
|
+
|
225
|
+
def status_label(text, tasklist)
|
226
|
+
s = "#{tasklist.last rescue ''}#{text}" rescue :NOT_STARTED
|
227
|
+
s.empty? ? :NOT_STARTED : s.to_sym
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/exceptions'
|
3
|
+
|
4
|
+
module Libis
|
5
|
+
module Workflow
|
6
|
+
|
7
|
+
autoload :MessageRegistry, 'libis/workflow/message_registry'
|
8
|
+
autoload :Config, 'libis/workflow/config'
|
9
|
+
|
10
|
+
autoload :WorkItem, 'libis/workflow/workitems/work_item'
|
11
|
+
autoload :FileItem, 'libis/workflow/workitems/file_item'
|
12
|
+
autoload :DirItem, 'libis/workflow/workitems/dir_item'
|
13
|
+
|
14
|
+
autoload :Workflow, 'libis/workflow/workflow'
|
15
|
+
autoload :Run, 'libis/workflow/run'
|
16
|
+
autoload :Task, 'libis/workflow/task'
|
17
|
+
|
18
|
+
autoload :Parameter, 'libis/workflow/parameter'
|
19
|
+
|
20
|
+
autoload :Worker, 'libis/workflow/worker'
|
21
|
+
|
22
|
+
def self.configure
|
23
|
+
yield Config.instance
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'libis/workflow/version'
|
7
|
+
|
8
|
+
Gem::Specification.new do |gem|
|
9
|
+
gem.name = 'libis-workflow'
|
10
|
+
gem.version = ::Libis::Workflow::VERSION
|
11
|
+
gem.date = Date.today.to_s
|
12
|
+
|
13
|
+
gem.summary = %q{LIBIS Workflow framework.}
|
14
|
+
gem.description = %q{A simple framework to build custom task/workflow solutions.}
|
15
|
+
|
16
|
+
gem.author = 'Kris Dekeyser'
|
17
|
+
gem.email = 'kris.dekeyser@libis.be'
|
18
|
+
gem.homepage = 'https://github.com/Kris-LIBIS/workflow'
|
19
|
+
gem.license = 'MIT'
|
20
|
+
|
21
|
+
gem.files = `git ls-files -z`.split("\x0")
|
22
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
23
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
24
|
+
|
25
|
+
gem.require_paths = ['lib']
|
26
|
+
|
27
|
+
gem.add_development_dependency 'bundler', '~> 1.6'
|
28
|
+
gem.add_development_dependency 'rake', '~> 10.3'
|
29
|
+
gem.add_development_dependency 'rspec', '~> 3.1'
|
30
|
+
gem.add_development_dependency 'simplecov', '~> 0.9'
|
31
|
+
gem.add_development_dependency 'coveralls', '~> 0.7'
|
32
|
+
|
33
|
+
gem.add_runtime_dependency 'libis-tools', '~> 0.9'
|
34
|
+
gem.add_runtime_dependency 'sidekiq', '~> 3.3'
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/workflow/workitems'
|
3
|
+
|
4
|
+
class TestDirItem
|
5
|
+
include ::Libis::Workflow::DirItem
|
6
|
+
|
7
|
+
def name=(dir)
|
8
|
+
raise RuntimeError, "'#{dir}' is not a directory" unless File.directory? dir
|
9
|
+
super dir
|
10
|
+
end
|
11
|
+
|
12
|
+
def name
|
13
|
+
self.properties[:name] || super
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/tools/checksum'
|
3
|
+
|
4
|
+
require 'libis/workflow/workitems'
|
5
|
+
|
6
|
+
class TestFileItem
|
7
|
+
include ::Libis::Workflow::FileItem
|
8
|
+
|
9
|
+
def filename=(file)
|
10
|
+
raise RuntimeError, "'#{file}' is not a file" unless File.file? file
|
11
|
+
set_checksum :SHA256, ::Libis::Tools::Checksum.hexdigest(file, :SHA256)
|
12
|
+
super file
|
13
|
+
end
|
14
|
+
|
15
|
+
def name
|
16
|
+
self.properties[:name] || super
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
data/spec/items.rb
ADDED
data/spec/spec_helper.rb
ADDED
data/spec/task_spec.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require_relative 'spec_helper'
|
2
|
+
|
3
|
+
require 'libis/workflow/task'
|
4
|
+
|
5
|
+
describe 'Task' do
|
6
|
+
|
7
|
+
it 'should create a default task' do
|
8
|
+
|
9
|
+
task = ::Libis::Workflow::Task.new nil
|
10
|
+
|
11
|
+
expect(task.parent).to eq nil
|
12
|
+
expect(task.name).to eq 'Task'
|
13
|
+
expect(task.options[:abort_on_error]).to eq false
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'backports/rails/string'
|
3
|
+
|
4
|
+
require 'libis/workflow/workitems'
|
5
|
+
|
6
|
+
class CamelizeName < ::Libis::Workflow::Task
|
7
|
+
|
8
|
+
def process(item)
|
9
|
+
return unless (item.is_a?(TestFileItem) || item.is_a?(TestDirItem))
|
10
|
+
item.properties[:name] = item.name.camelize
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/tools/checksum'
|
3
|
+
|
4
|
+
require 'libis/exceptions'
|
5
|
+
require 'libis/workflow/workitems'
|
6
|
+
|
7
|
+
class ChecksumTester < ::Libis::Workflow::Task
|
8
|
+
|
9
|
+
parameter checksum_type: nil,
|
10
|
+
description: 'Checksum type to use.',
|
11
|
+
constraint: ::Libis::Tools::Checksum::CHECKSUM_TYPES.map {|x| x.to_s}
|
12
|
+
|
13
|
+
def process(item)
|
14
|
+
return unless item.is_a? TestFileItem
|
15
|
+
|
16
|
+
checksum_type = options[:checksum_type]
|
17
|
+
|
18
|
+
if checksum_type.nil?
|
19
|
+
::Libis::Tools::Checksum::CHECKSUM_TYPES.each do |x|
|
20
|
+
test_checksum(item, x) if item.checksum(x)
|
21
|
+
end
|
22
|
+
else
|
23
|
+
test_checksum(item, checksum_type)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_checksum(item, checksum_type)
|
28
|
+
checksum = ::Libis::Tools::Checksum.hexdigest(item.fullpath, checksum_type.to_sym)
|
29
|
+
return if item.checksum(checksum_type) == checksum
|
30
|
+
raise ::Libis::WorkflowError, "Checksum test #{checksum_type} failed for #{item.filepath}"
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'libis/exceptions'
|
3
|
+
|
4
|
+
require_relative '../items'
|
5
|
+
|
6
|
+
class CollectFiles < ::Libis::Workflow::Task
|
7
|
+
|
8
|
+
parameter location: '.',
|
9
|
+
description: 'Dir location to start scanning for files.'
|
10
|
+
parameter subdirs: false,
|
11
|
+
description: 'Look for files in subdirs too.'
|
12
|
+
parameter selection: nil,
|
13
|
+
description: 'Only select files that match the given regular expression. Ignored if empty.'
|
14
|
+
|
15
|
+
def process(item)
|
16
|
+
if item.is_a? TestRun
|
17
|
+
add_item(item, options[:location])
|
18
|
+
elsif item.is_a? TestDirItem
|
19
|
+
collect_files(item, item.fullpath)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def collect_files(item, dir)
|
24
|
+
glob_string = dir
|
25
|
+
glob_string = File.join(glob_string, '**') if options[:subdirs]
|
26
|
+
glob_string = File.join(glob_string, '*')
|
27
|
+
|
28
|
+
Dir.glob(glob_string).select do |x|
|
29
|
+
options[:selection] && !options[:selection].empty? ? x =~ Regexp.new(options[:selection]) : true
|
30
|
+
end.sort.each do |file|
|
31
|
+
next if %w'. ..'.include? file
|
32
|
+
add_item(item, file)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def add_item(item, file)
|
37
|
+
child = if File.file?(file)
|
38
|
+
TestFileItem.new
|
39
|
+
elsif File.directory?(file)
|
40
|
+
TestDirItem.new
|
41
|
+
else
|
42
|
+
Item.new
|
43
|
+
end
|
44
|
+
child.filename = file
|
45
|
+
item << child
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|