libis-workflow 2.0.beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,231 @@
1
+ # encoding: utf-8
2
+
3
+ require 'backports/rails/hash'
4
+
5
+ require 'libis/workflow/config'
6
+
7
+ module Libis
8
+ module Workflow
9
+
10
+ # Base module for all work items.
11
+ #
12
+ # This module contains some basic attributes required for making the workflow and tasks behave properly:
13
+ #
14
+ # - status: [Symbol] the status field. Each task sets the status of the items it works on. Before starting processing
15
+ # the status is set to "#{task_name}Started". After successfull processing it is set to "#{task_name}Done" and if
16
+ # the task failed, it is set to "#{task_name}Failed". The status field can be used to perform real-time
17
+ # monitoring, reporting and error-recovery or restart of the ingest.
18
+ # The initial value for this attribute is :START.
19
+ # - parent: [Object|nil] a link to a parent work item. Work items can be organized in any hierarchy you think is
20
+ # relevant for your workflow (e.g. directory[/directory...]/file/line or library/section/book/page). Of course
21
+ # hierarchies are not mandatory.
22
+ # - items: [Array] a list of child work items. see above.
23
+ # - options: [Hash] a set of options for the task chain on how to deal with this work item. This attribute can be
24
+ # used to fine-tune the behaviour of tasks for a particular work item.
25
+ # - properties: [Hash] a set of properties, typically collected during the workflow processing and used to store
26
+ # final or intermediate resulst of tasks. The ::Lias::Ingester::FileItem module uses this attribute to store the
27
+ # properties (e.g. size, checksum, ...) of the file it represents.
28
+ # - log_history: [Array] a list of all logging messages collected for this work item. Whenever a task logs a message
29
+ # it will automatically be registered for the work item that it is processing or for the work item that was
30
+ # supplied as the first argument.
31
+ # - status_log: [Array] a list of all status changes the work item went through.
32
+ # - summary: [Hash] collected statistics about the ingest for the work item and its children. This structure will
33
+ # be filled in by the included task ::Lias::Ingester::Tasks::Analyzer wich is appended to the workflow by default.
34
+ #
35
+ # The module is created so that it is possible to implement an ActiveRecord/Datamapper/... implementation easily.
36
+
37
+ module WorkItem
38
+ include Enumerable
39
+
40
+ attr_accessor :parent
41
+ attr_accessor :items
42
+ attr_accessor :options, :properties
43
+ attr_accessor :log_history, :status_log
44
+ attr_accessor :summary
45
+
46
+ # The initializer takes care of properly setting the correct default values for the attributes. A derived class
47
+ # that wishes to define it's own initializer should take care to call 'super' or make sure it overwrites the
48
+ # attribute definitions itself. (e.g. in a ActiveRecord implementation)
49
+ def initialize
50
+ self.parent = nil
51
+ self.items = []
52
+ self.options = {}
53
+ self.properties = {}
54
+ self.log_history = []
55
+ self.status_log = []
56
+ self.summary = {}
57
+ end
58
+
59
+ # String representation of the identity of the work item.
60
+ #
61
+ # You may want to overwrite this method as it tries the :name property or whatever #inspect returns if that
62
+ # failes. Typically this should return the key value, file name or id number. If that's what your :name property
63
+ # contains, you're fine.
64
+ #
65
+ # @return [String] string identification for this work item.
66
+ def name
67
+ self.properties[:name] || self.inspect
68
+ end
69
+
70
+ def to_s; self.name; end
71
+
72
+ def names
73
+ (self.parent.names rescue Array.new).push(name).compact
74
+ end
75
+
76
+ def namepath; self.names.join('/'); end
77
+
78
+ # File name save version of the to_s output. The output should be safe to use as a file name to store work item
79
+ # data. Typical use is when extra file items are created by a task and need to be stored on disk. The default
80
+ # implementation URL-encodes (%xx) all characters except alphanumeric, '.' and '-'.
81
+ #
82
+ # @return [String] file name
83
+ def to_filename
84
+ self.to_s.gsub(/[^\w.-]/) { |s| '%%%02x' % s.ord }
85
+ end
86
+
87
+ # Gets the current status of the object.
88
+ #
89
+ # @return [Symbol] status code
90
+ def status
91
+ s = self.status_log.last
92
+ status_label((s[:text] rescue nil), (s[:tasklist] rescue nil))
93
+ end
94
+
95
+ # Changes the status of the object. As a side effect the status is also logged in the status_log with the current
96
+ # timestamp.
97
+ #
98
+ # @param [Symbol] s
99
+ def status=(s, tasklist = nil)
100
+ s, tasklist = s if s.is_a? Array
101
+ s = s.to_sym
102
+ if status_label(s, tasklist) != self.status
103
+ self.status_log << {
104
+ timestamp: ::Time.now,
105
+ tasklist: tasklist,
106
+ text: s
107
+ }
108
+ self.save
109
+ end
110
+ end
111
+
112
+ # Check ingest status of the object. The status is checked to see if it ends in 'Failed'.
113
+ #
114
+ # @return [Boolean] true if the object failed, false otherwise
115
+ def failed?
116
+ self.status.to_s =~ /Failed$/ ? true : false
117
+ end
118
+
119
+ # Helper function for the Tasks to add a log entry to the log_history.
120
+ #
121
+ # The supplied message structure is expected to contain the following fields:
122
+ # - :severity : ::Logger::Severity value
123
+ # - :id : optional message id
124
+ # - :text : message text
125
+ # - :task : list of tasks names (task hierarchy) that submits the message
126
+ #
127
+ # @param [Hash] message
128
+ def add_log(message = {})
129
+ msg = message_struct(message)
130
+ self.log_history << msg
131
+ self.save
132
+ end
133
+
134
+ def <=(message = {}); self.add_log(message); end
135
+
136
+ # Iterates over the work item clients and invokes code on each of them.
137
+ def each
138
+ self.items.each { |item| yield item }
139
+ end
140
+
141
+ # Add a child work item
142
+ #
143
+ # @param [WorkItem] item to be added to the child list :items
144
+ def add_item(item)
145
+ return self unless item and item.is_a? WorkItem
146
+ self.items << item
147
+ item.parent = self
148
+ self.save!
149
+ item.save!
150
+ self
151
+ end
152
+
153
+ alias :<< :add_item
154
+
155
+ # Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
156
+ # called to save the current item's state. If state needs to persisted, you should override this method or make
157
+ # sure your persistence layer implements it in your class.
158
+ def save
159
+ end
160
+
161
+ # Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
162
+ # called to save the current item's state. If state needs to persisted, you should override this method or make
163
+ # sure your persistence layer implements it in your class.
164
+ def save!
165
+ end
166
+
167
+ # Add a structured message to the log history. The message text can be submitted as an integer or text. If an
168
+ # integer is submitted, it will be used to look up the text in the MessageRegistry. The message text will be
169
+ # passed to the % operator with the args parameter. If that failes (e.g. because the format string is not correct)
170
+ # the args value is appended to the message.
171
+ #
172
+ # @param [Symbol] severity
173
+ # @param [Hash] msg should contain message text as :id or :text and the hierarchical name of the task as :task
174
+ # @param [Array] args string format values
175
+ def log_message(severity, msg, *args)
176
+ # Prepare info from msg struct for use with string substitution
177
+ message_id, message_text = if msg[:id]
178
+ [msg[:id], MessageRegistry.instance.get_message(msg[:id])]
179
+ elsif msg[:text]
180
+ [0, msg[:text]]
181
+ else
182
+ [0, '']
183
+ end
184
+ task = msg[:task] || '*UNKNOWN*'
185
+ message_text = (message_text % args rescue "#{message_text} - #{args}")
186
+
187
+ self.add_log severity: severity, id: message_id.to_i, text: message_text, task: task
188
+ name = ''
189
+ begin
190
+ name = self.to_s
191
+ name = self.name
192
+ name = self.namepath
193
+ rescue
194
+ # do nothing
195
+ end
196
+ Config.logger.add(severity, message_text, ('%s - %s ' % [task, name]))
197
+ end
198
+
199
+ protected
200
+
201
+ SEV_LABEL = %w(DEBUG INFO WARN ERROR FATAL ANY) unless const_defined? :SEV_LABEL
202
+
203
+ # go up the hierarchy and return the topmost work item
204
+ #
205
+ # @return [WorkItem] the root work item
206
+ def root
207
+ root = self
208
+ root = root.parent while root.parent and root.parent.is_a? WorkItem
209
+ root
210
+ end
211
+
212
+ # create and return a proper message structure
213
+ # @param [Hash] opts
214
+ def message_struct(opts = {})
215
+ opts.reverse_merge!(severity: ::Logger::INFO, id: 0, text: '')
216
+ {
217
+ timestamp: ::Time.now,
218
+ severity: SEV_LABEL[opts[:severity]],
219
+ task: opts[:task],
220
+ id: opts[:id],
221
+ message: opts[:text]
222
+ }
223
+ end
224
+
225
+ def status_label(text, tasklist)
226
+ s = "#{tasklist.last rescue ''}#{text}" rescue :NOT_STARTED
227
+ s.empty? ? :NOT_STARTED : s.to_sym
228
+ end
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'workitems/work_item'
4
+ require_relative 'workitems/file_item'
5
+ require_relative 'workitems/dir_item'
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ require 'libis/exceptions'
3
+
4
+ module Libis
5
+ module Workflow
6
+
7
+ autoload :MessageRegistry, 'libis/workflow/message_registry'
8
+ autoload :Config, 'libis/workflow/config'
9
+
10
+ autoload :WorkItem, 'libis/workflow/workitems/work_item'
11
+ autoload :FileItem, 'libis/workflow/workitems/file_item'
12
+ autoload :DirItem, 'libis/workflow/workitems/dir_item'
13
+
14
+ autoload :Workflow, 'libis/workflow/workflow'
15
+ autoload :Run, 'libis/workflow/run'
16
+ autoload :Task, 'libis/workflow/task'
17
+
18
+ autoload :Parameter, 'libis/workflow/parameter'
19
+
20
+ autoload :Worker, 'libis/workflow/worker'
21
+
22
+ def self.configure
23
+ yield Config.instance
24
+ end
25
+
26
+ end
27
+ end
28
+
@@ -0,0 +1,2 @@
1
+ # encoding: utf-8
2
+ require 'libis/workflow'
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'libis/workflow/version'
7
+
8
+ Gem::Specification.new do |gem|
9
+ gem.name = 'libis-workflow'
10
+ gem.version = ::Libis::Workflow::VERSION
11
+ gem.date = Date.today.to_s
12
+
13
+ gem.summary = %q{LIBIS Workflow framework.}
14
+ gem.description = %q{A simple framework to build custom task/workflow solutions.}
15
+
16
+ gem.author = 'Kris Dekeyser'
17
+ gem.email = 'kris.dekeyser@libis.be'
18
+ gem.homepage = 'https://github.com/Kris-LIBIS/workflow'
19
+ gem.license = 'MIT'
20
+
21
+ gem.files = `git ls-files -z`.split("\x0")
22
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
23
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
24
+
25
+ gem.require_paths = ['lib']
26
+
27
+ gem.add_development_dependency 'bundler', '~> 1.6'
28
+ gem.add_development_dependency 'rake', '~> 10.3'
29
+ gem.add_development_dependency 'rspec', '~> 3.1'
30
+ gem.add_development_dependency 'simplecov', '~> 0.9'
31
+ gem.add_development_dependency 'coveralls', '~> 0.7'
32
+
33
+ gem.add_runtime_dependency 'libis-tools', '~> 0.9'
34
+ gem.add_runtime_dependency 'sidekiq', '~> 3.3'
35
+
36
+ end
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+ require 'libis/workflow/workitems'
3
+
4
+ class TestDirItem
5
+ include ::Libis::Workflow::DirItem
6
+
7
+ def name=(dir)
8
+ raise RuntimeError, "'#{dir}' is not a directory" unless File.directory? dir
9
+ super dir
10
+ end
11
+
12
+ def name
13
+ self.properties[:name] || super
14
+ end
15
+
16
+ end
@@ -0,0 +1,19 @@
1
+ # encoding: utf-8
2
+ require 'libis/tools/checksum'
3
+
4
+ require 'libis/workflow/workitems'
5
+
6
+ class TestFileItem
7
+ include ::Libis::Workflow::FileItem
8
+
9
+ def filename=(file)
10
+ raise RuntimeError, "'#{file}' is not a file" unless File.file? file
11
+ set_checksum :SHA256, ::Libis::Tools::Checksum.hexdigest(file, :SHA256)
12
+ super file
13
+ end
14
+
15
+ def name
16
+ self.properties[:name] || super
17
+ end
18
+
19
+ end
@@ -0,0 +1,10 @@
1
+ # encoding: utf-8
2
+ require 'libis/workflow/workitems'
3
+
4
+ require_relative 'test_dir_item'
5
+
6
+ class TestRun < ::Libis::Workflow::Run
7
+
8
+ def name; 'TestRun'; end
9
+
10
+ end
data/spec/items.rb ADDED
@@ -0,0 +1,3 @@
1
+ require_relative 'items/test_dir_item'
2
+ require_relative 'items/test_file_item'
3
+ require_relative 'items/test_run'
@@ -0,0 +1,8 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
4
+ require 'bundler/setup'
5
+ Bundler.setup
6
+
7
+ require 'rspec'
8
+ require 'libis-workflow'
data/spec/task_spec.rb ADDED
@@ -0,0 +1,17 @@
1
+ require_relative 'spec_helper'
2
+
3
+ require 'libis/workflow/task'
4
+
5
+ describe 'Task' do
6
+
7
+ it 'should create a default task' do
8
+
9
+ task = ::Libis::Workflow::Task.new nil
10
+
11
+ expect(task.parent).to eq nil
12
+ expect(task.name).to eq 'Task'
13
+ expect(task.options[:abort_on_error]).to eq false
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+ require 'backports/rails/string'
3
+
4
+ require 'libis/workflow/workitems'
5
+
6
+ class CamelizeName < ::Libis::Workflow::Task
7
+
8
+ def process(item)
9
+ return unless (item.is_a?(TestFileItem) || item.is_a?(TestDirItem))
10
+ item.properties[:name] = item.name.camelize
11
+ end
12
+
13
+ end
@@ -0,0 +1,33 @@
1
+ # encoding: utf-8
2
+ require 'libis/tools/checksum'
3
+
4
+ require 'libis/exceptions'
5
+ require 'libis/workflow/workitems'
6
+
7
+ class ChecksumTester < ::Libis::Workflow::Task
8
+
9
+ parameter checksum_type: nil,
10
+ description: 'Checksum type to use.',
11
+ constraint: ::Libis::Tools::Checksum::CHECKSUM_TYPES.map {|x| x.to_s}
12
+
13
+ def process(item)
14
+ return unless item.is_a? TestFileItem
15
+
16
+ checksum_type = options[:checksum_type]
17
+
18
+ if checksum_type.nil?
19
+ ::Libis::Tools::Checksum::CHECKSUM_TYPES.each do |x|
20
+ test_checksum(item, x) if item.checksum(x)
21
+ end
22
+ else
23
+ test_checksum(item, checksum_type)
24
+ end
25
+ end
26
+
27
+ def test_checksum(item, checksum_type)
28
+ checksum = ::Libis::Tools::Checksum.hexdigest(item.fullpath, checksum_type.to_sym)
29
+ return if item.checksum(checksum_type) == checksum
30
+ raise ::Libis::WorkflowError, "Checksum test #{checksum_type} failed for #{item.filepath}"
31
+ end
32
+
33
+ end
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+ require 'libis/exceptions'
3
+
4
+ require_relative '../items'
5
+
6
+ class CollectFiles < ::Libis::Workflow::Task
7
+
8
+ parameter location: '.',
9
+ description: 'Dir location to start scanning for files.'
10
+ parameter subdirs: false,
11
+ description: 'Look for files in subdirs too.'
12
+ parameter selection: nil,
13
+ description: 'Only select files that match the given regular expression. Ignored if empty.'
14
+
15
+ def process(item)
16
+ if item.is_a? TestRun
17
+ add_item(item, options[:location])
18
+ elsif item.is_a? TestDirItem
19
+ collect_files(item, item.fullpath)
20
+ end
21
+ end
22
+
23
+ def collect_files(item, dir)
24
+ glob_string = dir
25
+ glob_string = File.join(glob_string, '**') if options[:subdirs]
26
+ glob_string = File.join(glob_string, '*')
27
+
28
+ Dir.glob(glob_string).select do |x|
29
+ options[:selection] && !options[:selection].empty? ? x =~ Regexp.new(options[:selection]) : true
30
+ end.sort.each do |file|
31
+ next if %w'. ..'.include? file
32
+ add_item(item, file)
33
+ end
34
+ end
35
+
36
+ def add_item(item, file)
37
+ child = if File.file?(file)
38
+ TestFileItem.new
39
+ elsif File.directory?(file)
40
+ TestDirItem.new
41
+ else
42
+ Item.new
43
+ end
44
+ child.filename = file
45
+ item << child
46
+ end
47
+
48
+ end