RubyGems - libis-workflow - Versions diffs - 2.0.beta.3 - Mend

libis-workflow 2.0.beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +7 -0
data/.coveralls.yml +2 -0
data/.gitignore +36 -0
data/.travis.yml +33 -0
data/Gemfile +4 -0
data/LICENSE +21 -0
data/README.md +296 -0
data/Rakefile +7 -0
data/lib/libis/exceptions.rb +8 -0
data/lib/libis/workflow/base/logger.rb +30 -0
data/lib/libis/workflow/base/run.rb +68 -0
data/lib/libis/workflow/base/workflow.rb +123 -0
data/lib/libis/workflow/config.rb +92 -0
data/lib/libis/workflow/message_registry.rb +32 -0
data/lib/libis/workflow/run.rb +27 -0
data/lib/libis/workflow/task.rb +259 -0
data/lib/libis/workflow/tasks/analyzer.rb +41 -0
data/lib/libis/workflow/version.rb +7 -0
data/lib/libis/workflow/worker.rb +42 -0
data/lib/libis/workflow/workflow.rb +29 -0
data/lib/libis/workflow/workitems/dir_item.rb +12 -0
data/lib/libis/workflow/workitems/file_item.rb +78 -0
data/lib/libis/workflow/workitems/work_item.rb +231 -0
data/lib/libis/workflow/workitems.rb +5 -0
data/lib/libis/workflow.rb +28 -0
data/lib/libis-workflow.rb +2 -0
data/libis-workflow.gemspec +36 -0
data/spec/items/test_dir_item.rb +16 -0
data/spec/items/test_file_item.rb +19 -0
data/spec/items/test_run.rb +10 -0
data/spec/items.rb +3 -0
data/spec/spec_helper.rb +8 -0
data/spec/task_spec.rb +17 -0
data/spec/tasks/camelize_name.rb +13 -0
data/spec/tasks/checksum_tester.rb +33 -0
data/spec/tasks/collect_files.rb +48 -0
data/spec/workflow_spec.rb +231 -0
metadata +187 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 9790f9c81c7096dd8871ffd8d586a233543e68d9
+  data.tar.gz: 605daffb9699e2deb3da97b921be71182ef5d65a
+SHA512:
+  metadata.gz: 4ea5a60fb96c162c8a9c138613bf057cfac85ecde4d4dc42a5ea28833b98480ed1113ecf121fb28c3c5fe63f47813e4e13185dee7c406c1fb08246a307cc5cb7
+  data.tar.gz: 2cfe51aaa9e05dde6d6b5cba4aee591e614121d04f1121710f369787baffae0c326903c6b308a57c2b46058a8ebd780546499cba4b5c5bcf93aecf457b537ea8

data/.coveralls.yml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ service_name: travis-ci
2	+ repo_token: TMosCEIw4eu2hK05NxyY2UYIRJYQPzemt

data/.gitignore ADDED Viewed

@@ -0,0 +1,36 @@
+*.gem
+*.rbc
+/.config
+/coverage/
+/InstalledFiles
+/pkg/
+/spec/reports/
+/test/tmp/
+/test/version_tmp/
+/tmp/
+## Specific to RubyMotion:
+.dat*
+.repl_history
+build/
+## Documentation cache and generated files:
+/.yardoc/
+/_yardoc/
+/doc/
+/rdoc/
+## Environment normalisation:
+/.bundle/
+/lib/bundler/man/
+# for a library or gem, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+Gemfile.lock
+.ruby-version
+.ruby-gemset
+# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
+.rvmrc
+.idea/

data/.travis.yml ADDED Viewed

@@ -0,0 +1,33 @@
+language: ruby
+cache: bundler
+rvm:
+  - 1.9.3
+  - 2.1.0
+  - 2.2.0
+  - ruby-head
+  - jruby-19mode
+jdk:
+  - openjdk7
+  - oraclejdk7
+  - oraclejdk8
+matrix:
+  exclude:
+    - rvm: 1.9.3
+      jdk: oraclejdk7
+    - rvm: 1.9.3
+      jdk: oraclejdk8
+    - rvm: 2.1.0
+      jdk: oraclejdk7
+    - rvm: 2.1.0
+      jdk: oraclejdk8
+    - rvm: 2.2.0
+      jdk: oraclejdk7
+    - rvm: 2.2.0
+      jdk: oraclejdk8
+    - rvm: ruby-head
+      jdk: oraclejdk7
+    - rvm: ruby-head
+      jdk: oraclejdk8
+branches:
+  only:
+      - master

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+gemspec name: 'libis-workflow', development_group: :test

data/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2014 LIBIS
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,296 @@
+[![Build Status](https://travis-ci.org/Kris-LIBIS/workflow.svg?branch=master)](https://travis-ci.org/Kris-LIBIS/workflow)
+[![Coverage Status](https://img.shields.io/coveralls/Kris-LIBIS/workflow.svg)](https://coveralls.io/r/Kris-LIBIS/workflow)
+# LIBIS Workflow
+LIBIS Workflow framework
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+    gem 'libis-workflow'
+```
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install 'libis-workflow'
+## Architecture
+This gem is essentially a simple, custom workflow system. The core of the workflow are the tasks. You can - and should -
+create your own tasks by creating new classes and include ::Libis::Workflow::Task. The ::Libis::Workflow::Task module
+and the included ::Libis::Workflow::Base::Logger module provide the necessary attributes and methods to make them work
+in the workflow. See the detailed documentation for the modules for more information.
+The objects that the tasks will be working on should include the ::Libis::Workflow::WorkItem module.
+When working with file objects the module ::Libis::Workflow::FileItem and/or ::Libis::Workflow::DirItem modules should
+be included for additional file-specific functionality.
+Work items can be organized in different types and a hierarchical structure.
+All the tasks will be organized into a ::Libis::Workflow::WorkflowDefinition which will be able to execute the tasks in
+proper order on all the WorkItems supplied/collected. Each task can be implemented with code to run or simply contain a
+list of child tasks.
+Two tasks are predefined:
+::Libis::Workflow::Tasks::VirusChecker - runs a virus check on each WorkItem that is also a FileItem.
+::Libis::Workflow::Tasks::Analyzer - analyzes the workflow run and summarizes the results. It is always included as the
+last task by the workflow unless you supply a closing task called 'Analyzer' yourself.
+The whole ingester workflow is configured by a Singleton object ::Libis::Workflow::Config which contains settings for
+logging, paths where tasks and workitems can be found and the path to the virus scanner program.
+## Usage
+You should start by including the following line in your source code:
+```ruby
+    require 'libis-workflow'
+```
+This will load all of the Libis Workflow framework into your environment, but including only the required parts is OK as
+well. This is shown in the examples below.
+### Workflows
+A ::Libis::Workflow::WorkflowDefinition instance contains the definition of a workflow. Once instantiated, it can be run
+by calling the 'run' method. This will create a ::Libis::Workflow::WorkflowRun instance, configure it and call the 'run'
+method on it. The Workflow constructor takes no arguments, but is should be configured by calling the 'set_config'
+method with the workflow configuration as an argument. The 'run' method takes an option Hash as argument.
+#### Workflow configuration
+A workflow configuration is a Hash with:
+* tasks: Array of task descriptions
+* start_object: String with class name of the starting object to be created. An istance of this class will be created
+  for each run and serves as the root work item for that particular run.
+* input: Hash with input variable definitions
+##### Task description
+is a Hash with:
+* class: String with class name of the task
+* name: String with the name of the task
+* tasks: Array with task definitions of sub-tasks
+* options: Hash with additional task configuration options (see 'Tasks - Configuration' for more info)
+If 'class' is not present, the default '::Libis::Workflow::Task' with the given name will be instantiated, which simply
+iterates over the child items of the given work item and performs each sub-task on each of the child items. If a 'class'
+value is given, an instance of that class will be created and the task will be handed the work item to process on. See
+the chapter on 'Tasks' below for more information on tasks.
+##### Input variable definition
+The key of the input Hash is the unique id of the variable. The value is a Hash with:
+* name: String with the name of the input variable
+  This value is used for display only
+* description: String with descriptive text explaining the use/meaning of the variable
+* type: String with the type of the variable
+  Currently only 'String', 'Time' and 'Boolean' are supported. If the value is not present, 'String' is asumed.
+* default: String with the default value
+  If the default value contains the string %s, it will be replaced with the current time in the format yymmddHHMMSS when
+  the workflow is started.  For boolean values, 'true', 'yes', 't', 'y' and 1 are all interpreted as boolean true.
+All of these Hash keys are optional. Each input variable key and value will be added to the root work item's option Hash.
+#### Options
+The option Hash contains special run-time configuration parameters for the workflow:
+* action: String with the action that should be taken. Currently only 'start' is supported. In the future support for
+  'restart' and 'continue' will be added.
+* interactive: Boolean that indicates if the user should be queried to input values for variables that have no value set.
+  This will pause the workflow run and is therefore not compatible with scheduling the workflow. For unattended runs the
+  options should be set to false, causing the run to throw an exception if an input variable is missing a value.
+Remaining values are considered to be (default) values for the input variables.
+#### Run-time configuration
+The 'run' method takes an optional Hash as argument which will complement and override the options Hash described in the
+previous chapter.
+Once the workflow is configured and the root work item instantiated, the method will run each top-level task on the root
+work item in sequence until all tasks have completed successfully or a task has failed.
+### Work items
+Creating your own work items is highly recommended and is fairly easy:
+```ruby
+    require 'libis/workflow/workitems'
+    class MyWorkItem < ::Libis::Workflow::WorkItem
+      attr_accesor :name
+      def initialize
+        @name = 'My work item'
+        super # Note: this is important as the base class requires some initialization
+      end
+    end
+```
+Work items that are file-based should also include the ::Libis::Workflow::FileItem module:
+```ruby
+    require 'libis/workflow/workitems'
+    class MyFileItem < ::Libis::Workflow::WorkItem
+      include ::Libis::Workflow::FileItem
+      def initialize(file)
+        filename = file
+        super
+      end
+      def filesize
+        properties[:size]
+      end
+      def fixity_check(checksum)
+        properties[:checksum] == checksum
+      end
+    end
+```
+## Tasks
+Tasks should inherit from ::Libis::Workflow::Task and specify the actions it wants to
+perform on each work item:
+```ruby
+    class MyTask < ::Libis::Workflow::Task
+      def process_item(item)
+        item.perform_my_action
+      rescue Exception => e
+        item.set_status(to_status(:failed))
+      end
+    end
+```
+You have some options to specify the actions:
+### Performing an action on each child item of the provided work item
+In that case the task should provide a 'process_item' method as above. Each child item will be passed as the argument
+to the method and perform whatever needs to be done on the item.
+If the action fails the method is expected to set the item status field to failed. This is also shown in the previous
+example. If the error is so severe that no other child items should be processed, the action can decide to throw an
+exception, preferably a ::Libis::Workflow::Exception or a child exception thereof.
+### Performing an action on the provided work item
+If the task wants to perform an action on the work item directly, it should define a 'process' method. The work item is
+available to the method as class instance variable 'workitem'. Again the method is responsible to communicate errors
+with a failed status or by throwing an exception.
+### Combining both
+It is possible to perform some action on the parent work item first and then process each child item. Processing the
+child items should be done in process_item as usual, but processing the parent item can be done either by defining a
+pre_process method or a process method that ends with a 'super' call. Using this should be an exception as it is
+recommended to create a seperate task to process the child work items.
+### Default behaviour
+The default implementation of 'process' is to call 'pre_process' and then call 'process_item' on each child item.
+The default implementation for 'process_item' is to run each child task for each given child item. This will raise an
+exception unless the workflow has defined some sub-tasks for this task. This means that in the workflow definition tree
+each leaf task should either implement it's own 'process_item' method or override the 'process' method. Only non-leaf
+nodes in the workflow definition tree are allowed to use the default implementation (by defining only 'name' and 'tasks'
+value). See above on 'Workflow configuration' for more info.
+### Configuration
+The task takes some options that determine how the task will be handling special cases. The options should be passed to
+the Task constructor as part of the initialization. The workflow configuration will take care of that.
+* quiet: Boolean - default: false
+* always_run: Boolean - default: false
+* items_first: Boolean - default: false
+The quiet option surpresses all logging for this task.
+When the option always_run is set, the task will run even when a previous task failed to run on the item before. Note
+that successfully running such a task will unmark the item as failed. The status history of the item will show which
+tasks failed. Only use this option if you are sure the task will fully recover if the previous tasks failed or did not
+run due to a previous failure.
+The items_fist option determines the processing order. If a task has multiple subtasks and the given workitem has
+multiple subitems, setting the items_first option will cause it to take the first subitem, run the first subtask on it,
+then the second subtask and so on. Next it will run the first, second, ... subtask on the second subitem and so on. If
+the option is not set or set to false, the first subtask will run on each subitem, then the second subtask on each
+subitem, and so on.
+### Convenience functions
+#### get_root_item()
+Returns the work item that the workflow started with (and is the root/grand parent of all work items in the ingest run).
+#### get_work_dir()
+Returns the work directory as configured for the current ingest run. The work directory can be used as scrap directory
+for creating derived files that can be added as work items to the current flow or for downloading files that will be
+processed later. The work directory is not automaticaly cleaned up, which is considered a task for the workflow implementation.
+#### capture_cmd(cmd, *args)
+Allows the task to run an external command-line program and capture it's stdout and stderr output at the same time. The
+first argument is mandatory and should be the command-line program that has to be executed. An arbitrary number of
+command-line arguments may follow.
+The return value is an array with three elements: the status code returned by the command, the stdout string and the
+stderr string.
+#### names()
+An array of strings with the hierarchical path of tasks leading to the current task. Can be usefull for log messages.
+#### (debug/info/warn/error/fatal)(message, *args)
+Convenience function for creating log entries. The logger set in ::Libis::Workflow::Config is used to dump log messages.
+The first argument is mandatory and can be:
+* an integer. The integer is used to look up the message text in ::Libis::Workflow::MessageRegistry.
+* a static string. The message text is used as-is.
+* a string with placement holders as used in String#%. Args can either be an array or a hash. See also Kernel#sprintf.
+The log message is logged to the general logging and attached to the current work item (workitem) unless another
+work item is passed as first argument after the message.
+#### check_item_type(klass, item = nil)
+Checks if the work item is of the given class. 'workitem' is checked if the item argument is not present. If the check
+fails a Runtime exception is thrown which will cause the task to abort if not catched.
+#### item_type?(klass, item = nil)
+A less severe variant version of check_item_type which returns a boolean (false if failed).
+#### to_status(status)
+Simply prepends the status text with the current task name. The output of this function is typically what the work item
+status field should be set at.
+## Contributing
+1. Fork it ( https://github.com/libis/workflow/fork )
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/Rakefile ADDED Viewed

@@ -0,0 +1,7 @@
+require 'bundler/gem_tasks'
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new('spec')
+desc 'run tests'
+task :default => :spec

data/lib/libis/exceptions.rb ADDED Viewed

@@ -0,0 +1,8 @@
+# encoding: utf-8
+module Libis
+  class WorkflowError < ::RuntimeError
+  end
+  class WorkflowAbort < ::RuntimeError
+  end
+end

data/lib/libis/workflow/base/logger.rb ADDED Viewed

@@ -0,0 +1,30 @@
+require 'libis/tools/logger'
+module Libis
+  module Workflow
+    module Base
+      module Logger
+        include ::Libis::Tools::Logger
+        def message(severity, msg, *args)
+          item = self.workitem
+          item = args.shift if args.size > 0 and args[0].is_a?(WorkItem)
+          item.log_message(severity, to_msg(msg), *args) if item
+        end
+        def to_msg(msg)
+          case msg
+            when String
+              {text: msg}
+            when Integer
+              {id: msg}
+            else
+              {text: (msg.to_s rescue '')}
+          end.merge task: self.namepath
+        end
+      end
+    end
+  end
+end

data/lib/libis/workflow/base/run.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# encoding: utf-8
+require 'fileutils'
+require 'libis/workflow/workitems/work_item'
+module Libis
+  module Workflow
+    module Base
+      module Run
+        include ::Libis::Workflow::WorkItem
+        def start_date; raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def start_date=(_); raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def tasks; raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def tasks=(_); raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def workflow; raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def work_dir
+          dir = File.join(Config.workdir, self.name)
+          FileUtils.mkpath dir unless Dir.exist?(dir)
+          dir
+        end
+        def name
+          self.workflow.run_name(self.start_date)
+        end
+        def names
+          Array.new
+        end
+        def namepath
+          self.name
+        end
+        def run(opts = {})
+          self.start_date = Time.now
+          self.options = workflow.prepare_input(self.options.merge(opts))
+          self.tasks = self.workflow.tasks(self)
+          configure_tasks self.options
+          self.status = :STARTED
+          self.tasks.each do |task|
+            next if self.failed? and not task.options[:allways_run]
+            task.run self
+          end
+          self.status = :DONE unless self.failed?
+        end
+        protected
+        def configure_tasks(opts)
+          self.tasks.each { |task| task.apply_options opts }
+        end
+      end
+    end
+  end
+end

data/lib/libis/workflow/base/workflow.rb ADDED Viewed

@@ -0,0 +1,123 @@
+# encoding: utf-8
+require 'libis/tools/parameter'
+module Libis
+  module Workflow
+    module Base
+      module Workflow
+        module ClassMethods
+          def require_all
+            Config.require_all(File.join(File.dirname(__FILE__), '..', 'tasks'))
+            Config.require_all(Config.taskdir)
+            Config.require_all(Config.itemdir)
+          end
+        end
+        def self.included(base)
+          base.extend ClassMethods
+        end
+        def name; raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def name=(_) ; raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def description; raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def description=(_); raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def config; raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def config=(_); raise RuntimeError.new "Method not implemented: #{caller[0]}"; end
+        def configure(cfg)
+          self.config.merge! input: {}, tasks: []
+          self.config.merge! cfg
+          self.name = self.config.delete(:name) || self.class.name
+          self.description = self.config.delete(:description) || ''
+          self.class.require_all
+          unless self.config[:tasks].last[:class] && self.config[:tasks].last[:class].split('::').last == 'Analyzer'
+            self.config[:tasks] << {class: '::Libis::Workflow::Tasks::Analyzer'}
+          end
+          self.config
+        end
+        def input
+          self.config[:input].inject({}) do |hash, input_def|
+            parameter = ::Libis::Tools::Parameter.new input_def.first.to_sym
+            input_def.last.each { |k, v| parameter[k] = v}
+            hash[input_def.first.to_sym] = parameter
+            hash
+          end
+        end
+        def run_name(timestamp = Time.now)
+          "#{self.workflow.name}-#{timestamp.strftime('%Y%m%d%H%M%S')}"
+        end
+        def perform(opts = {})
+          self.run opts
+        end
+        def create_run_object
+          self.config[:run_object].constantize.new
+        end
+        # @param [Hash] opts
+        def run(opts = {})
+          run_object = self.create_run_object
+          raise RuntimeError.new "Could not create instance of run object '#{self.config[:run_object]}'" unless run_object
+          run_object.workflow = self
+          run_object.options = opts
+          run_object.save
+          run_object.run opts
+          run_object
+        end
+        # @param [Hash] opts
+        def prepare_input(opts)
+          options = opts.dup
+          self.input.each do |key, parameter|
+            key
+            # provided in opts
+            options[key] = parameter[:default] unless options.has_key? key
+            options[key] = parameter.parse(options[key])
+            propagate_to = []
+            propagate_to = parameter[:propagate_to] if parameter[:propagate_to].is_a? Array
+            propagate_to = [parameter[:propagate_to]] if parameter[:propagate_to].is_a? String
+            propagate_to.each do |target|
+              task_name, param_name = target.split('#')
+              param_name ||= key
+              options[task_name] ||= {}
+              options[task_name][param_name.to_sym] = options[key]
+            end
+          end
+          options
+        end
+        def tasks(parent = nil)
+          self.config[:tasks].map do |cfg|
+            instantize_task(parent || self, cfg)
+          end
+        end
+        def instantize_task(parent, cfg)
+          task_class = Task
+          task_class = cfg[:class].constantize if cfg[:class]
+          # noinspection RubyArgCount
+          task_instance = task_class.new(parent, cfg)
+          cfg[:tasks].map do |task_cfg|
+            task_instance << instantize_task(task_instance, task_cfg)
+          end rescue nil
+          task_instance
+        end
+      end
+    end
+  end
+end