libis-workflow 2.0.beta.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,231 @@
1
+ # encoding: utf-8
2
+
3
+ require 'backports/rails/hash'
4
+
5
+ require 'libis/workflow/config'
6
+
7
+ module Libis
8
+ module Workflow
9
+
10
+ # Base module for all work items.
11
+ #
12
+ # This module contains some basic attributes required for making the workflow and tasks behave properly:
13
+ #
14
+ # - status: [Symbol] the status field. Each task sets the status of the items it works on. Before starting processing
15
+ # the status is set to "#{task_name}Started". After successfull processing it is set to "#{task_name}Done" and if
16
+ # the task failed, it is set to "#{task_name}Failed". The status field can be used to perform real-time
17
+ # monitoring, reporting and error-recovery or restart of the ingest.
18
+ # The initial value for this attribute is :START.
19
+ # - parent: [Object|nil] a link to a parent work item. Work items can be organized in any hierarchy you think is
20
+ # relevant for your workflow (e.g. directory[/directory...]/file/line or library/section/book/page). Of course
21
+ # hierarchies are not mandatory.
22
+ # - items: [Array] a list of child work items. see above.
23
+ # - options: [Hash] a set of options for the task chain on how to deal with this work item. This attribute can be
24
+ # used to fine-tune the behaviour of tasks for a particular work item.
25
+ # - properties: [Hash] a set of properties, typically collected during the workflow processing and used to store
26
+ # final or intermediate resulst of tasks. The ::Lias::Ingester::FileItem module uses this attribute to store the
27
+ # properties (e.g. size, checksum, ...) of the file it represents.
28
+ # - log_history: [Array] a list of all logging messages collected for this work item. Whenever a task logs a message
29
+ # it will automatically be registered for the work item that it is processing or for the work item that was
30
+ # supplied as the first argument.
31
+ # - status_log: [Array] a list of all status changes the work item went through.
32
+ # - summary: [Hash] collected statistics about the ingest for the work item and its children. This structure will
33
+ # be filled in by the included task ::Lias::Ingester::Tasks::Analyzer wich is appended to the workflow by default.
34
+ #
35
+ # The module is created so that it is possible to implement an ActiveRecord/Datamapper/... implementation easily.
36
+
37
+ module WorkItem
38
+ include Enumerable
39
+
40
+ attr_accessor :parent
41
+ attr_accessor :items
42
+ attr_accessor :options, :properties
43
+ attr_accessor :log_history, :status_log
44
+ attr_accessor :summary
45
+
46
+ # The initializer takes care of properly setting the correct default values for the attributes. A derived class
47
+ # that wishes to define it's own initializer should take care to call 'super' or make sure it overwrites the
48
+ # attribute definitions itself. (e.g. in a ActiveRecord implementation)
49
+ def initialize
50
+ self.parent = nil
51
+ self.items = []
52
+ self.options = {}
53
+ self.properties = {}
54
+ self.log_history = []
55
+ self.status_log = []
56
+ self.summary = {}
57
+ end
58
+
59
+ # String representation of the identity of the work item.
60
+ #
61
+ # You may want to overwrite this method as it tries the :name property or whatever #inspect returns if that
62
+ # failes. Typically this should return the key value, file name or id number. If that's what your :name property
63
+ # contains, you're fine.
64
+ #
65
+ # @return [String] string identification for this work item.
66
+ def name
67
+ self.properties[:name] || self.inspect
68
+ end
69
+
70
+ def to_s; self.name; end
71
+
72
+ def names
73
+ (self.parent.names rescue Array.new).push(name).compact
74
+ end
75
+
76
+ def namepath; self.names.join('/'); end
77
+
78
+ # File name save version of the to_s output. The output should be safe to use as a file name to store work item
79
+ # data. Typical use is when extra file items are created by a task and need to be stored on disk. The default
80
+ # implementation URL-encodes (%xx) all characters except alphanumeric, '.' and '-'.
81
+ #
82
+ # @return [String] file name
83
+ def to_filename
84
+ self.to_s.gsub(/[^\w.-]/) { |s| '%%%02x' % s.ord }
85
+ end
86
+
87
+ # Gets the current status of the object.
88
+ #
89
+ # @return [Symbol] status code
90
+ def status
91
+ s = self.status_log.last
92
+ status_label((s[:text] rescue nil), (s[:tasklist] rescue nil))
93
+ end
94
+
95
+ # Changes the status of the object. As a side effect the status is also logged in the status_log with the current
96
+ # timestamp.
97
+ #
98
+ # @param [Symbol] s
99
+ def status=(s, tasklist = nil)
100
+ s, tasklist = s if s.is_a? Array
101
+ s = s.to_sym
102
+ if status_label(s, tasklist) != self.status
103
+ self.status_log << {
104
+ timestamp: ::Time.now,
105
+ tasklist: tasklist,
106
+ text: s
107
+ }
108
+ self.save
109
+ end
110
+ end
111
+
112
+ # Check ingest status of the object. The status is checked to see if it ends in 'Failed'.
113
+ #
114
+ # @return [Boolean] true if the object failed, false otherwise
115
+ def failed?
116
+ self.status.to_s =~ /Failed$/ ? true : false
117
+ end
118
+
119
+ # Helper function for the Tasks to add a log entry to the log_history.
120
+ #
121
+ # The supplied message structure is expected to contain the following fields:
122
+ # - :severity : ::Logger::Severity value
123
+ # - :id : optional message id
124
+ # - :text : message text
125
+ # - :task : list of tasks names (task hierarchy) that submits the message
126
+ #
127
+ # @param [Hash] message
128
+ def add_log(message = {})
129
+ msg = message_struct(message)
130
+ self.log_history << msg
131
+ self.save
132
+ end
133
+
134
+ def <=(message = {}); self.add_log(message); end
135
+
136
+ # Iterates over the work item clients and invokes code on each of them.
137
+ def each
138
+ self.items.each { |item| yield item }
139
+ end
140
+
141
+ # Add a child work item
142
+ #
143
+ # @param [WorkItem] item to be added to the child list :items
144
+ def add_item(item)
145
+ return self unless item and item.is_a? WorkItem
146
+ self.items << item
147
+ item.parent = self
148
+ self.save!
149
+ item.save!
150
+ self
151
+ end
152
+
153
+ alias :<< :add_item
154
+
155
+ # Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
156
+ # called to save the current item's state. If state needs to persisted, you should override this method or make
157
+ # sure your persistence layer implements it in your class.
158
+ def save
159
+ end
160
+
161
+ # Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
162
+ # called to save the current item's state. If state needs to persisted, you should override this method or make
163
+ # sure your persistence layer implements it in your class.
164
+ def save!
165
+ end
166
+
167
+ # Add a structured message to the log history. The message text can be submitted as an integer or text. If an
168
+ # integer is submitted, it will be used to look up the text in the MessageRegistry. The message text will be
169
+ # passed to the % operator with the args parameter. If that failes (e.g. because the format string is not correct)
170
+ # the args value is appended to the message.
171
+ #
172
+ # @param [Symbol] severity
173
+ # @param [Hash] msg should contain message text as :id or :text and the hierarchical name of the task as :task
174
+ # @param [Array] args string format values
175
+ def log_message(severity, msg, *args)
176
+ # Prepare info from msg struct for use with string substitution
177
+ message_id, message_text = if msg[:id]
178
+ [msg[:id], MessageRegistry.instance.get_message(msg[:id])]
179
+ elsif msg[:text]
180
+ [0, msg[:text]]
181
+ else
182
+ [0, '']
183
+ end
184
+ task = msg[:task] || '*UNKNOWN*'
185
+ message_text = (message_text % args rescue "#{message_text} - #{args}")
186
+
187
+ self.add_log severity: severity, id: message_id.to_i, text: message_text, task: task
188
+ name = ''
189
+ begin
190
+ name = self.to_s
191
+ name = self.name
192
+ name = self.namepath
193
+ rescue
194
+ # do nothing
195
+ end
196
+ Config.logger.add(severity, message_text, ('%s - %s ' % [task, name]))
197
+ end
198
+
199
+ protected
200
+
201
+ SEV_LABEL = %w(DEBUG INFO WARN ERROR FATAL ANY) unless const_defined? :SEV_LABEL
202
+
203
+ # go up the hierarchy and return the topmost work item
204
+ #
205
+ # @return [WorkItem] the root work item
206
+ def root
207
+ root = self
208
+ root = root.parent while root.parent and root.parent.is_a? WorkItem
209
+ root
210
+ end
211
+
212
+ # create and return a proper message structure
213
+ # @param [Hash] opts
214
+ def message_struct(opts = {})
215
+ opts.reverse_merge!(severity: ::Logger::INFO, id: 0, text: '')
216
+ {
217
+ timestamp: ::Time.now,
218
+ severity: SEV_LABEL[opts[:severity]],
219
+ task: opts[:task],
220
+ id: opts[:id],
221
+ message: opts[:text]
222
+ }
223
+ end
224
+
225
+ def status_label(text, tasklist)
226
+ s = "#{tasklist.last rescue ''}#{text}" rescue :NOT_STARTED
227
+ s.empty? ? :NOT_STARTED : s.to_sym
228
+ end
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'workitems/work_item'
4
+ require_relative 'workitems/file_item'
5
+ require_relative 'workitems/dir_item'
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ require 'libis/exceptions'
3
+
4
+ module Libis
5
+ module Workflow
6
+
7
+ autoload :MessageRegistry, 'libis/workflow/message_registry'
8
+ autoload :Config, 'libis/workflow/config'
9
+
10
+ autoload :WorkItem, 'libis/workflow/workitems/work_item'
11
+ autoload :FileItem, 'libis/workflow/workitems/file_item'
12
+ autoload :DirItem, 'libis/workflow/workitems/dir_item'
13
+
14
+ autoload :Workflow, 'libis/workflow/workflow'
15
+ autoload :Run, 'libis/workflow/run'
16
+ autoload :Task, 'libis/workflow/task'
17
+
18
+ autoload :Parameter, 'libis/workflow/parameter'
19
+
20
+ autoload :Worker, 'libis/workflow/worker'
21
+
22
+ def self.configure
23
+ yield Config.instance
24
+ end
25
+
26
+ end
27
+ end
28
+
@@ -0,0 +1,2 @@
1
+ # encoding: utf-8
2
+ require 'libis/workflow'
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'libis/workflow/version'
7
+
8
+ Gem::Specification.new do |gem|
9
+ gem.name = 'libis-workflow'
10
+ gem.version = ::Libis::Workflow::VERSION
11
+ gem.date = Date.today.to_s
12
+
13
+ gem.summary = %q{LIBIS Workflow framework.}
14
+ gem.description = %q{A simple framework to build custom task/workflow solutions.}
15
+
16
+ gem.author = 'Kris Dekeyser'
17
+ gem.email = 'kris.dekeyser@libis.be'
18
+ gem.homepage = 'https://github.com/Kris-LIBIS/workflow'
19
+ gem.license = 'MIT'
20
+
21
+ gem.files = `git ls-files -z`.split("\x0")
22
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
23
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
24
+
25
+ gem.require_paths = ['lib']
26
+
27
+ gem.add_development_dependency 'bundler', '~> 1.6'
28
+ gem.add_development_dependency 'rake', '~> 10.3'
29
+ gem.add_development_dependency 'rspec', '~> 3.1'
30
+ gem.add_development_dependency 'simplecov', '~> 0.9'
31
+ gem.add_development_dependency 'coveralls', '~> 0.7'
32
+
33
+ gem.add_runtime_dependency 'libis-tools', '~> 0.9'
34
+ gem.add_runtime_dependency 'sidekiq', '~> 3.3'
35
+
36
+ end
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+ require 'libis/workflow/workitems'
3
+
4
+ class TestDirItem
5
+ include ::Libis::Workflow::DirItem
6
+
7
+ def name=(dir)
8
+ raise RuntimeError, "'#{dir}' is not a directory" unless File.directory? dir
9
+ super dir
10
+ end
11
+
12
+ def name
13
+ self.properties[:name] || super
14
+ end
15
+
16
+ end
@@ -0,0 +1,19 @@
1
+ # encoding: utf-8
2
+ require 'libis/tools/checksum'
3
+
4
+ require 'libis/workflow/workitems'
5
+
6
+ class TestFileItem
7
+ include ::Libis::Workflow::FileItem
8
+
9
+ def filename=(file)
10
+ raise RuntimeError, "'#{file}' is not a file" unless File.file? file
11
+ set_checksum :SHA256, ::Libis::Tools::Checksum.hexdigest(file, :SHA256)
12
+ super file
13
+ end
14
+
15
+ def name
16
+ self.properties[:name] || super
17
+ end
18
+
19
+ end
@@ -0,0 +1,10 @@
1
+ # encoding: utf-8
2
+ require 'libis/workflow/workitems'
3
+
4
+ require_relative 'test_dir_item'
5
+
6
+ class TestRun < ::Libis::Workflow::Run
7
+
8
+ def name; 'TestRun'; end
9
+
10
+ end
data/spec/items.rb ADDED
@@ -0,0 +1,3 @@
1
+ require_relative 'items/test_dir_item'
2
+ require_relative 'items/test_file_item'
3
+ require_relative 'items/test_run'
@@ -0,0 +1,8 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
4
+ require 'bundler/setup'
5
+ Bundler.setup
6
+
7
+ require 'rspec'
8
+ require 'libis-workflow'
data/spec/task_spec.rb ADDED
@@ -0,0 +1,17 @@
1
+ require_relative 'spec_helper'
2
+
3
+ require 'libis/workflow/task'
4
+
5
+ describe 'Task' do
6
+
7
+ it 'should create a default task' do
8
+
9
+ task = ::Libis::Workflow::Task.new nil
10
+
11
+ expect(task.parent).to eq nil
12
+ expect(task.name).to eq 'Task'
13
+ expect(task.options[:abort_on_error]).to eq false
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+ require 'backports/rails/string'
3
+
4
+ require 'libis/workflow/workitems'
5
+
6
+ class CamelizeName < ::Libis::Workflow::Task
7
+
8
+ def process(item)
9
+ return unless (item.is_a?(TestFileItem) || item.is_a?(TestDirItem))
10
+ item.properties[:name] = item.name.camelize
11
+ end
12
+
13
+ end
@@ -0,0 +1,33 @@
1
+ # encoding: utf-8
2
+ require 'libis/tools/checksum'
3
+
4
+ require 'libis/exceptions'
5
+ require 'libis/workflow/workitems'
6
+
7
+ class ChecksumTester < ::Libis::Workflow::Task
8
+
9
+ parameter checksum_type: nil,
10
+ description: 'Checksum type to use.',
11
+ constraint: ::Libis::Tools::Checksum::CHECKSUM_TYPES.map {|x| x.to_s}
12
+
13
+ def process(item)
14
+ return unless item.is_a? TestFileItem
15
+
16
+ checksum_type = options[:checksum_type]
17
+
18
+ if checksum_type.nil?
19
+ ::Libis::Tools::Checksum::CHECKSUM_TYPES.each do |x|
20
+ test_checksum(item, x) if item.checksum(x)
21
+ end
22
+ else
23
+ test_checksum(item, checksum_type)
24
+ end
25
+ end
26
+
27
+ def test_checksum(item, checksum_type)
28
+ checksum = ::Libis::Tools::Checksum.hexdigest(item.fullpath, checksum_type.to_sym)
29
+ return if item.checksum(checksum_type) == checksum
30
+ raise ::Libis::WorkflowError, "Checksum test #{checksum_type} failed for #{item.filepath}"
31
+ end
32
+
33
+ end
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+ require 'libis/exceptions'
3
+
4
+ require_relative '../items'
5
+
6
+ class CollectFiles < ::Libis::Workflow::Task
7
+
8
+ parameter location: '.',
9
+ description: 'Dir location to start scanning for files.'
10
+ parameter subdirs: false,
11
+ description: 'Look for files in subdirs too.'
12
+ parameter selection: nil,
13
+ description: 'Only select files that match the given regular expression. Ignored if empty.'
14
+
15
+ def process(item)
16
+ if item.is_a? TestRun
17
+ add_item(item, options[:location])
18
+ elsif item.is_a? TestDirItem
19
+ collect_files(item, item.fullpath)
20
+ end
21
+ end
22
+
23
+ def collect_files(item, dir)
24
+ glob_string = dir
25
+ glob_string = File.join(glob_string, '**') if options[:subdirs]
26
+ glob_string = File.join(glob_string, '*')
27
+
28
+ Dir.glob(glob_string).select do |x|
29
+ options[:selection] && !options[:selection].empty? ? x =~ Regexp.new(options[:selection]) : true
30
+ end.sort.each do |file|
31
+ next if %w'. ..'.include? file
32
+ add_item(item, file)
33
+ end
34
+ end
35
+
36
+ def add_item(item, file)
37
+ child = if File.file?(file)
38
+ TestFileItem.new
39
+ elsif File.directory?(file)
40
+ TestDirItem.new
41
+ else
42
+ Item.new
43
+ end
44
+ child.filename = file
45
+ item << child
46
+ end
47
+
48
+ end