RubyGems - libis-workflow - Versions diffs - 2.0.24 → 2.0.25 - Mend

libis-workflow 2.0.24 → 2.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/.coveralls.yml +1 -1
data/.gitignore +36 -36
data/.travis.yml +32 -32
data/Gemfile +4 -4
data/LICENSE +20 -20
data/README.md +380 -380
data/Rakefile +6 -6
data/lib/libis/exceptions.rb +6 -6
data/lib/libis/workflow.rb +41 -41
data/lib/libis/workflow/action.rb +24 -24
data/lib/libis/workflow/base/dir_item.rb +13 -13
data/lib/libis/workflow/base/file_item.rb +80 -80
data/lib/libis/workflow/base/job.rb +83 -83
data/lib/libis/workflow/base/logging.rb +66 -66
data/lib/libis/workflow/base/run.rb +95 -95
data/lib/libis/workflow/base/work_item.rb +173 -173
data/lib/libis/workflow/base/workflow.rb +149 -149
data/lib/libis/workflow/config.rb +22 -22
data/lib/libis/workflow/dir_item.rb +10 -10
data/lib/libis/workflow/file_item.rb +15 -15
data/lib/libis/workflow/job.rb +28 -28
data/lib/libis/workflow/message_registry.rb +30 -30
data/lib/libis/workflow/run.rb +34 -34
data/lib/libis/workflow/status.rb +133 -133
data/lib/libis/workflow/task.rb +316 -316
data/lib/libis/workflow/task_group.rb +71 -71
data/lib/libis/workflow/task_runner.rb +34 -34
data/lib/libis/workflow/version.rb +5 -5
data/lib/libis/workflow/work_item.rb +37 -37
data/lib/libis/workflow/worker.rb +42 -42
data/lib/libis/workflow/workflow.rb +20 -20
data/libis-workflow.gemspec +38 -38
data/spec/items.rb +2 -2
data/spec/items/test_dir_item.rb +13 -13
data/spec/items/test_file_item.rb +16 -16
data/spec/items/test_run.rb +8 -8
data/spec/spec_helper.rb +8 -8
data/spec/task_spec.rb +15 -15
data/spec/tasks/camelize_name.rb +12 -12
data/spec/tasks/checksum_tester.rb +32 -32
data/spec/tasks/collect_files.rb +47 -47
data/spec/workflow_spec.rb +154 -154
metadata +3 -3

data/lib/libis/workflow/base/logging.rb CHANGED Viewed

@@ -1,67 +1,67 @@
-module Libis
-  module Workflow
-    module Base
-      module Logging
-        # Add a structured message to the log history. The message text can be submitted as an integer or text. If an
-        # integer is submitted, it will be used to look up the text in the MessageRegistry. The message text will be
-        # passed to the % operator with the args parameter. If that failes (e.g. because the format string is not correct)
-        # the args value is appended to the message.
-        #
-        # @param [Symbol] severity
-        # @param [Hash] msg should contain message text as :id or :text and the hierarchical name of the task as :task
-        # @param [Array] args string format values
-        def log_message(severity, msg, *args)
-          # Prepare info from msg struct for use with string substitution
-          message_id, message_text = if msg[:id]
-                                       [msg[:id], MessageRegistry.instance.get_message(msg[:id])]
-                                     elsif msg[:text]
-                                       [0, msg[:text]]
-                                     else
-                                       [0, '']
-                                     end
-          task = msg[:task] || ''
-          message_text = (message_text % args rescue "#{message_text} - #{args}")
-          run_id = self.get_run.id rescue nil
-          self.add_log severity: severity, id: message_id.to_i, text: message_text, task: task, run_id: run_id
-        end
-        # Helper function for the WorkItems to add a log entry to the log_history.
-        #
-        # The supplied message structure is expected to contain the following fields:
-        # - :severity : ::Logger::Severity value
-        # - :id : optional message id
-        # - :text : message text
-        # - :task : list of tasks names (task hierarchy) that submits the message
-        #
-        # @param [Hash] message
-        def add_log(message = {})
-          msg = message_struct(message)
-          add_log_entry(msg)
-          self.save!
-        end
-        def <=(message = {})
-          self.add_log(message)
-        end
-        protected
-        # create and return a proper message structure
-        # @param [Hash] opts
-        def message_struct(opts = {})
-          opts.reverse_merge!(severity: :info, code: nil, text: '')
-          {
-              severity: ::Logging::levelify(opts[:severity]).upcase,
-              task: opts[:task],
-              code: opts[:code],
-              message: opts[:text]
-          }.cleanup
-        end
-      end
-    end
-  end
+module Libis
+  module Workflow
+    module Base
+      module Logging
+        # Add a structured message to the log history. The message text can be submitted as an integer or text. If an
+        # integer is submitted, it will be used to look up the text in the MessageRegistry. The message text will be
+        # passed to the % operator with the args parameter. If that failes (e.g. because the format string is not correct)
+        # the args value is appended to the message.
+        #
+        # @param [Symbol] severity
+        # @param [Hash] msg should contain message text as :id or :text and the hierarchical name of the task as :task
+        # @param [Array] args string format values
+        def log_message(severity, msg, *args)
+          # Prepare info from msg struct for use with string substitution
+          message_id, message_text = if msg[:id]
+                                       [msg[:id], MessageRegistry.instance.get_message(msg[:id])]
+                                     elsif msg[:text]
+                                       [0, msg[:text]]
+                                     else
+                                       [0, '']
+                                     end
+          task = msg[:task] || ''
+          message_text = (message_text % args rescue "#{message_text} - #{args}")
+          run_id = self.get_run.id rescue nil
+          self.add_log severity: severity, id: message_id.to_i, text: message_text, task: task, run_id: run_id
+        end
+        # Helper function for the WorkItems to add a log entry to the log_history.
+        #
+        # The supplied message structure is expected to contain the following fields:
+        # - :severity : ::Logger::Severity value
+        # - :id : optional message id
+        # - :text : message text
+        # - :task : list of tasks names (task hierarchy) that submits the message
+        #
+        # @param [Hash] message
+        def add_log(message = {})
+          msg = message_struct(message)
+          add_log_entry(msg)
+          self.save!
+        end
+        def <=(message = {})
+          self.add_log(message)
+        end
+        protected
+        # create and return a proper message structure
+        # @param [Hash] opts
+        def message_struct(opts = {})
+          opts.reverse_merge!(severity: :info, code: nil, text: '')
+          {
+              severity: ::Logging::levelify(opts[:severity]).upcase,
+              task: opts[:task],
+              code: opts[:code],
+              message: opts[:text]
+          }.cleanup
+        end
+      end
+    end
+  end
 end

data/lib/libis/workflow/base/run.rb CHANGED Viewed

@@ -1,95 +1,95 @@
-require 'fileutils'
-require 'libis/workflow/base/work_item'
-require 'libis/workflow/task_runner'
-module Libis
-  module Workflow
-    module Base
-      # Base module for all workflow runs. It is created by an associated workflow when the workflow is executed.
-      #
-      # This module lacks the implementation for the data attributes. It functions as an interface that describes the
-      # common functionality regardless of the storage implementation. These attributes require some implementation:
-      #
-      # - start_date: [Time] the timestamp of the execution of the run
-      # - job: [Object] a reference to the Job this Run belongs to
-      # - id: [String] (Optional) a unique run number
-      #
-      # Note that ::Libis::Workflow::Base::WorkItem is a parent module and therefore requires implementation of the
-      # attributes of that module too.
-      #
-      # A simple in-memory implementation can be found in ::Libis::Workflow::Run
-      module Run
-        include ::Libis::Workflow::Base::WorkItem
-        attr_accessor :tasks, :action
-        def work_dir
-          # noinspection RubyResolve
-          dir = File.join(Libis::Workflow::Config.workdir, self.name)
-          FileUtils.mkpath dir unless Dir.exist?(dir)
-          dir
-        end
-        def name
-          self.job.run_name(self.start_date)
-        end
-        def names
-          Array.new
-        end
-        def namepath
-          self.name
-        end
-        def workflow
-          self.job.workflow
-        end
-        def logger
-          self.properties['logger'] || self.job.logger rescue ::Libis::Workflow::Config.logger
-        end
-        # Execute the workflow.
-        #
-        # The action parameter defines how the execution of the tasks will behave:
-        #  - With the default :run action each task will be executed regardsless how the task performed on the item
-        #    previously.
-        #  - When using the :retry action a task will not perform on an item if it was successful the last time. This
-        #    allows you to retry a run when an temporary error (e.g. asynchronous wait or halt) occured.
-        #
-        # @param [Symbol] action the type of action to take during this run. :run or :retry
-        def run(action = :run)
-          self.action = action
-          self.start_date = Time.now unless action == :retry
-          self.options = workflow.prepare_input(self.options)
-          self.tasks = workflow.tasks
-          configure_tasks self.options
-          self.save!
-          runner = Libis::Workflow::TaskRunner.new nil
-          self.tasks.each do |task|
-            runner << task
-          end
-          runner.run self
-        end
-        protected
-        def configure_tasks(opts)
-          self.tasks.each { |task| task.apply_options opts }
-        end
-      end
-    end
-  end
-end
+require 'fileutils'
+require 'libis/workflow/base/work_item'
+require 'libis/workflow/task_runner'
+module Libis
+  module Workflow
+    module Base
+      # Base module for all workflow runs. It is created by an associated workflow when the workflow is executed.
+      #
+      # This module lacks the implementation for the data attributes. It functions as an interface that describes the
+      # common functionality regardless of the storage implementation. These attributes require some implementation:
+      #
+      # - start_date: [Time] the timestamp of the execution of the run
+      # - job: [Object] a reference to the Job this Run belongs to
+      # - id: [String] (Optional) a unique run number
+      #
+      # Note that ::Libis::Workflow::Base::WorkItem is a parent module and therefore requires implementation of the
+      # attributes of that module too.
+      #
+      # A simple in-memory implementation can be found in ::Libis::Workflow::Run
+      module Run
+        include ::Libis::Workflow::Base::WorkItem
+        attr_accessor :tasks, :action
+        def work_dir
+          # noinspection RubyResolve
+          dir = File.join(Libis::Workflow::Config.workdir, self.name)
+          FileUtils.mkpath dir unless Dir.exist?(dir)
+          dir
+        end
+        def name
+          self.job.run_name(self.start_date)
+        end
+        def names
+          Array.new
+        end
+        def namepath
+          self.name
+        end
+        def workflow
+          self.job.workflow
+        end
+        def logger
+          self.properties['logger'] || self.job.logger rescue ::Libis::Workflow::Config.logger
+        end
+        # Execute the workflow.
+        #
+        # The action parameter defines how the execution of the tasks will behave:
+        #  - With the default :run action each task will be executed regardsless how the task performed on the item
+        #    previously.
+        #  - When using the :retry action a task will not perform on an item if it was successful the last time. This
+        #    allows you to retry a run when an temporary error (e.g. asynchronous wait or halt) occured.
+        #
+        # @param [Symbol] action the type of action to take during this run. :run or :retry
+        def run(action = :run)
+          self.action = action
+          self.start_date = Time.now unless action == :retry
+          self.options = workflow.prepare_input(self.options)
+          self.tasks = workflow.tasks
+          configure_tasks self.options
+          self.save!
+          runner = Libis::Workflow::TaskRunner.new nil
+          self.tasks.each do |task|
+            runner << task
+          end
+          runner.run self
+        end
+        protected
+        def configure_tasks(opts)
+          self.tasks.each { |task| task.apply_options opts }
+        end
+      end
+    end
+  end
+end

data/lib/libis/workflow/base/work_item.rb CHANGED Viewed

@@ -1,173 +1,173 @@
-require 'backports/rails/hash'
-require 'libis/tools/extend/hash'
-require 'libis/workflow/config'
-require 'libis/workflow/status'
-require_relative 'logging'
-module Libis
-  module Workflow
-    module Base
-      # Base module for all work items.
-      #
-      # This module lacks the implementation for the data attributes. It functions as an interface that describes the
-      # common functionality regardless of the storage implementation. These attributes require some implementation:
-      #
-      # - parent: [Object|nil] a link to a parent work item. Work items can be organized in any hierarchy you think is
-      #     relevant for your workflow (e.g. directory[/directory...]/file/line or library/section/book/page). Of course
-      #     hierarchies are not mandatory.
-      # - items: [Array] a list of child work items. see above.
-      # - options: [Hash] a set of options for the task chain on how to deal with this work item. This attribute can be
-      #     used to fine-tune the behaviour of tasks for a particular work item.
-      # - properties: [Hash] a set of properties, typically collected during the workflow processing and used to store
-      #     final or intermediate resulst of tasks. The ::Lias::Ingester::FileItem module uses this attribute to store the
-      #     properties (e.g. size, checksum, ...) of the file it represents.
-      # - status_log: [Array] a list of all status changes the work item went through.
-      # - summary: [Hash] collected statistics about the ingest for the work item and its children. This structure will
-      #     be filled in by the included task ::Lias::Ingester::Tasks::Analyzer wich is appended to the workflow by default.
-      #
-      # The module is created so that it is possible to implement an ActiveRecord/Datamapper/... implementation easily.
-      # A simple in-memory implementation would require:
-      #
-      # attr_accessor :parent
-      # attr_accessor :items
-      # attr_accessor :options, :properties
-      # attr_accessor :status_log
-      # attr_accessor :summary
-      #
-      # def initialize
-      #   self.parent = nil
-      #   self.items = []
-      #   self.options = {}
-      #   self.properties = {}
-      #   self.status_log = []
-      #   self.summary = {}
-      # end
-      #
-      # protected
-      #
-      # ## Method below should be adapted to match the implementation of the status array
-      #
-      # def add_status_log(info)
-      #   self.status_log << info
-      # end
-      #
-      #
-      module WorkItem
-        include Enumerable
-        include Libis::Workflow::Status
-        include Libis::Workflow::Base::Logging
-        # String representation of the identity of the work item.
-        #
-        # You may want to overwrite this method as it tries the :name property or whatever #inspect returns if that
-        # failes. Typically this should return the key value, file name or id number. If that's what your :name property
-        # contains, you're fine.
-        #
-        # @return [String] string identification for this work item.
-        def name
-          # noinspection RubyResolve
-          self.properties['name'] || self.inspect
-        end
-        def to_s;
-          self.name;
-        end
-        def names
-          (self.parent.names rescue Array.new).push(name).compact
-        end
-        def namepath;
-          self.names.join('/');
-        end
-        # File name safe version of the to_s output.
-        #
-        # The output should be safe to use as a file name to store work item
-        # data. Typical use is when extra file items are created by a task and need to be stored on disk. The default
-        # implementation URL-encodes (%xx) all characters except alphanumeric, '.' and '-'.
-        #
-        # @return [String] file name
-        def to_filename
-          self.to_s.gsub(/[^\w.-]/) { |s| '%%%02x' % s.ord }
-        end
-        # Iterates over the work item clients and invokes code on each of them.
-        def each(&block)
-          self.items.each(&block)
-        end
-        def size
-          self.items.size
-        end
-        alias_method :count, :size
-        # Add a child work item
-        #
-        # @param [WorkItem] item to be added to the child list :items
-        def add_item(item)
-          return self unless item and item.is_a?(Libis::Workflow::Base::WorkItem)
-          self.items << item
-          item.parent = self
-          self.save!
-          item.save!
-          self
-        end
-        alias_method :<<, :add_item
-        # Get list of items.
-        #
-        # This method should return a list of items that can be accessed during long processing times.
-        def get_items
-          self.items
-        end
-        # Get list of items.
-        #
-        # This method should return a list of items that is safe to iterate over while it is being altered.
-        def get_item_list
-          self.items.dup
-        end
-        # Return item's parent
-        # @return [Libis::Workflow::Base::WorkItem]
-        def get_parent
-          self.parent
-        end
-        # go up the hierarchy and return the topmost work item
-        #
-        # @return [Libis::Workflow::Base::WorkItem]
-        def get_root
-          self.get_parent && self.get_parent.is_a?(Libis::Workflow::Base::WorkItem) && self.get_parent.get_root || self
-        end
-        # Get the top
-        #
-        # @return [Libis::Workflow::Base::Run]
-        def get_run
-          return self if self.is_a?(Libis::Workflow::Base::Run)
-          self.get_parent && self.get_parent.get_run || nil
-        end
-        # Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
-        # called to save the current item's state. If state needs to persisted, you should override this method or make
-        # sure your persistence layer implements it in your class.
-        def save
-        end
-        # Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
-        # called to save the current item's state. If state needs to persisted, you should override this method or make
-        # sure your persistence layer implements it in your class.
-        def save!
-        end
-      end
-    end
-  end
-end
+require 'backports/rails/hash'
+require 'libis/tools/extend/hash'
+require 'libis/workflow/config'
+require 'libis/workflow/status'
+require_relative 'logging'
+module Libis
+  module Workflow
+    module Base
+      # Base module for all work items.
+      #
+      # This module lacks the implementation for the data attributes. It functions as an interface that describes the
+      # common functionality regardless of the storage implementation. These attributes require some implementation:
+      #
+      # - parent: [Object|nil] a link to a parent work item. Work items can be organized in any hierarchy you think is
+      #     relevant for your workflow (e.g. directory[/directory...]/file/line or library/section/book/page). Of course
+      #     hierarchies are not mandatory.
+      # - items: [Array] a list of child work items. see above.
+      # - options: [Hash] a set of options for the task chain on how to deal with this work item. This attribute can be
+      #     used to fine-tune the behaviour of tasks for a particular work item.
+      # - properties: [Hash] a set of properties, typically collected during the workflow processing and used to store
+      #     final or intermediate resulst of tasks. The ::Lias::Ingester::FileItem module uses this attribute to store the
+      #     properties (e.g. size, checksum, ...) of the file it represents.
+      # - status_log: [Array] a list of all status changes the work item went through.
+      # - summary: [Hash] collected statistics about the ingest for the work item and its children. This structure will
+      #     be filled in by the included task ::Lias::Ingester::Tasks::Analyzer wich is appended to the workflow by default.
+      #
+      # The module is created so that it is possible to implement an ActiveRecord/Datamapper/... implementation easily.
+      # A simple in-memory implementation would require:
+      #
+      # attr_accessor :parent
+      # attr_accessor :items
+      # attr_accessor :options, :properties
+      # attr_accessor :status_log
+      # attr_accessor :summary
+      #
+      # def initialize
+      #   self.parent = nil
+      #   self.items = []
+      #   self.options = {}
+      #   self.properties = {}
+      #   self.status_log = []
+      #   self.summary = {}
+      # end
+      #
+      # protected
+      #
+      # ## Method below should be adapted to match the implementation of the status array
+      #
+      # def add_status_log(info)
+      #   self.status_log << info
+      # end
+      #
+      #
+      module WorkItem
+        include Enumerable
+        include Libis::Workflow::Status
+        include Libis::Workflow::Base::Logging
+        # String representation of the identity of the work item.
+        #
+        # You may want to overwrite this method as it tries the :name property or whatever #inspect returns if that
+        # failes. Typically this should return the key value, file name or id number. If that's what your :name property
+        # contains, you're fine.
+        #
+        # @return [String] string identification for this work item.
+        def name
+          # noinspection RubyResolve
+          self.properties['name'] || self.inspect
+        end
+        def to_s;
+          self.name;
+        end
+        def names
+          (self.parent.names rescue Array.new).push(name).compact
+        end
+        def namepath;
+          self.names.join('/');
+        end
+        # File name safe version of the to_s output.
+        #
+        # The output should be safe to use as a file name to store work item
+        # data. Typical use is when extra file items are created by a task and need to be stored on disk. The default
+        # implementation URL-encodes (%xx) all characters except alphanumeric, '.' and '-'.
+        #
+        # @return [String] file name
+        def to_filename
+          self.to_s.gsub(/[^\w.-]/) { |s| '%%%02x' % s.ord }
+        end
+        # Iterates over the work item clients and invokes code on each of them.
+        def each(&block)
+          self.items.each(&block)
+        end
+        def size
+          self.items.size
+        end
+        alias_method :count, :size
+        # Add a child work item
+        #
+        # @param [WorkItem] item to be added to the child list :items
+        def add_item(item)
+          return self unless item and item.is_a?(Libis::Workflow::Base::WorkItem)
+          self.items << item
+          item.parent = self
+          self.save!
+          item.save!
+          self
+        end
+        alias_method :<<, :add_item
+        # Get list of items.
+        #
+        # This method should return a list of items that can be accessed during long processing times.
+        def get_items
+          self.items
+        end
+        # Get list of items.
+        #
+        # This method should return a list of items that is safe to iterate over while it is being altered.
+        def get_item_list
+          self.items.dup
+        end
+        # Return item's parent
+        # @return [Libis::Workflow::Base::WorkItem]
+        def get_parent
+          self.parent
+        end
+        # go up the hierarchy and return the topmost work item
+        #
+        # @return [Libis::Workflow::Base::WorkItem]
+        def get_root
+          self.get_parent && self.get_parent.is_a?(Libis::Workflow::Base::WorkItem) && self.get_parent.get_root || self
+        end
+        # Get the top
+        #
+        # @return [Libis::Workflow::Base::Run]
+        def get_run
+          return self if self.is_a?(Libis::Workflow::Base::Run)
+          self.get_parent && self.get_parent.get_run || nil
+        end
+        # Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
+        # called to save the current item's state. If state needs to persisted, you should override this method or make
+        # sure your persistence layer implements it in your class.
+        def save
+        end
+        # Dummy method. It is a placeholder for DB backed implementations. Wherever appropriate WorkItem#save will be
+        # called to save the current item's state. If state needs to persisted, you should override this method or make
+        # sure your persistence layer implements it in your class.
+        def save!
+        end
+      end
+    end
+  end
+end