RubyGems - jongleur - Versions diffs - 1.0.1 - Mend

jongleur 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/.gitignore +111 -0
data/.gitlab-ci.yml +25 -0
data/.rspec +3 -0
data/.rubocop.yml +45 -0
data/CHANGELOG.md +6 -0
data/Gemfile +8 -0
data/LICENSE.txt +21 -0
data/README.md +269 -0
data/Rakefile +8 -0
data/bin/console +11 -0
data/bin/img/DAG_graph_1.png +0 -0
data/bin/img/ETL_DAG.png +0 -0
data/bin/img/jongleur_m-2015.jpg +0 -0
data/bin/img/transactional_DAG.png +0 -0
data/bin/setup +8 -0
data/jongleur.gemspec +34 -0
data/lib/jongleur.rb +41 -0
data/lib/jongleur/api.rb +217 -0
data/lib/jongleur/helpers.rb +9 -0
data/lib/jongleur/implementation.rb +216 -0
data/lib/jongleur/version.rb +5 -0
data/lib/jongleur/worker_task.rb +20 -0
metadata +193 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 749c9b43f797c10a6bbba43ab6fc74fbf9d5b430
+  data.tar.gz: 41077a42d773e8bd23d8446963beec566e118199
+SHA512:
+  metadata.gz: 6abc365ad553864cfaf5d8113a24a981abd5676e58789268fe2e95ee3a246038c083c893fa9977b87a0c4b08c04f2ba0fd16ff5c3d8aaa2842baef156cdc51de
+  data.tar.gz: 12dba9ec3a9f5d888b04c80234bf7a1becff58145fff7d2544ba027118b02803ea15672cef44594146c8842968c765e03d8563cecae5de93dd23f1fe90730ad3

data/.gitignore ADDED

@@ -0,0 +1,111 @@
+# Created by https://www.gitignore.io/api/ruby,linux,macos,sublimetext
+### Linux ###
+*~
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+# KDE directory preferences
+.directory
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### Ruby ###
+*.gem
+*.rbc
+/.config
+/coverage/
+/InstalledFiles
+/pkg/
+/spec/reports/
+/spec/examples.txt
+/test/tmp/
+/test/version_tmp/
+/tmp/
+# Used by dotenv library to load environment variables.
+# .env
+## Documentation cache and generated files:
+/.yardoc/
+/_yardoc/
+/doc/
+/rdoc/
+## Environment normalization:
+/.bundle/
+/vendor/bundle
+/lib/bundler/man/
+Gemfile.lock
+.ruby-version
+.ruby-gemset
+# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
+.rvmrc
+### SublimeText ###
+# Cache files for Sublime Text
+*.tmlanguage.cache
+*.tmPreferences.cache
+*.stTheme.cache
+# Workspace files are user-specific
+*.sublime-workspace
+# SFTP configuration file
+sftp-config.json
+# Package control specific files
+Package Control.last-run
+Package Control.ca-list
+Package Control.ca-bundle
+Package Control.system-ca-bundle
+Package Control.cache/
+Package Control.ca-certs/
+Package Control.merged-ca-bundle
+Package Control.user-ca-bundle
+oscrypto-ca-bundle.crt
+bh_unicode_properties.cache
+# End of https://www.gitignore.io/api/ruby,linux,macos,sublimetext

data/.gitlab-ci.yml ADDED

@@ -0,0 +1,25 @@
+before_script:
+  - ruby -v
+  - which ruby
+  - gem install bundler --no-ri --no-rdoc
+  - bundle install --jobs $(nproc)  "${FLAGS[@]}"
+test:2.4.3:
+  image: ruby:2.4.3
+  script:
+  - bundle exec rspec
+test:2.4.4:
+  image: ruby:2.4.4
+  script:
+  - bundle exec rspec
+test:2.5.0:
+  image: ruby:2.5.0
+  script:
+  - bundle exec rspec
+test:2.5.1:
+  image: ruby:2.5.1
+  script:
+  - bundle exec rspec

data/.rspec ADDED

@@ -0,0 +1,3 @@
+--format documentation
+--color
+--require spec_helper

data/.rubocop.yml ADDED

@@ -0,0 +1,45 @@
+Style/Semicolon:
+  Enabled: false
+Style/ClassVars:
+  Enabled: false
+Metrics/BlockLength:
+  Enabled: false
+Style/CommentedKeyword:
+  Enabled: false
+Style/FormatStringToken:
+  Enabled: false
+Metrics/CyclomaticComplexity:
+  Enabled: false
+Metrics/MethodLength:
+  Enabled: false
+Metrics/PerceivedComplexity:
+  Enabled: false
+Metrics/ModuleLength:
+  Enabled: false
+Metrics/LineLength:
+   Enabled: false
+Metrics/AbcSize:
+  Enabled: false
+Style/FormatString:
+  Enabled: false
+Layout/AlignArray:
+  Enabled: false
+Style/StringLiterals:
+  Enabled: false
+AllCops:
+  Exclude:
+    - test.rb

data/CHANGELOG.md ADDED

@@ -0,0 +1,6 @@
+# Change Log
+All notable changes to this project will be documented in this file.
+## [1.0.0] - 27-Aug-2018
+### Initial Release.

data/Gemfile ADDED

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+source 'https://rubygems.org'
+git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
+# Specify your gem's dependencies in jongleur.gemspec
+gemspec

data/LICENSE.txt ADDED

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2018 Fred Heath
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED

@@ -0,0 +1,269 @@
+# Jongleur
+<img src="./bin/img/jongleur_m-2015.jpg"  width="150" height="150">
+Jongleur is a process scheduler and manager. It allows its users to declare a number of executable tasks as Ruby classes, define precedence between those tasks and run each task as a separate process.
+Jongleur is particularly useful for implementing workflows modeled as a [DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph)
+(Directed Acyclic Graph), but can be also used to run multiple tasks in parallel or even sequential workflows where each task needs to run as a separate OS process.
+## Environment
+This gem has been built using the [POSIX/UNIX process model](https://support.sas.com/documentation/onlinedoc/sasc/doc750/html/lr2/zid-6574.htm).
+It will work on Linux and Mac OS but not on Windows.
+Jongleur has been tested with MRuby 2.4.3, 2.4.4, 2.5.0 and 2.5.1. I would also expect it to work with other Ruby implementations too, such as JRuby or Rubinius though it hasn't yet been tested on those.
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+gem 'jongleur'
+```
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install jongleur
+## What does it do?
+In a nutshell, Jongleur keeps track of a number of tasks and executes them as separate OS processes according to their precedence criteria. For instance, if there are 3 tasks A, B and C, and task C depends on A and B, Jongleur will start executing A and B in separate processes (i.e. in parallel) and will wait until they are both finished before it executes C in a separate process.
+Jongleur is ideal for running workflows represented as DAGs, but is also useful for simply running tasks in parallel or for whenever you need some multi-processing capability.
+## Concepts
+### Task Graph
+To run Jongleur, you will need to define the tasks to run and their precedence. A _Task Graph_ is a
+representation of the tasks to be run by Jongleur and it usually (but not exclusively) represents a DAG, as in the examples below:
+![DAG examples](https://upload.wikimedia.org/wikipedia/commons/f/fa/Dag_graf.JPG)
+A _Task Graph_ is defined as a Hash in the following format:
+`{task-name => list[names-of-dependent-tasks]}`
+So the first graph would be defined as:
+```
+my_graph = {
+  s: [:q, :r, :t],
+  q: [:r],
+  r: [],
+  t: []
+}
+```
+where they Hash key is the class name of a Task and the Hash value is an Array of other Tasks that can be
+run only after this Task is finished. So in the above example:
+* Tasks Q, R and T can only start after task S has finished.
+* Task R can only start after Q has finished.
+* Tasks T and T have no dependents. No other task need wait for them.
+__N.B:__ Since the _Task Graph_ is a Hash, any duplicate key entries will be overriden. For instance, if this Task Graph
+```
+my_task_graph = { A: [:B, :C], B: [:D] }
+```
+is re-defined as
+```
+my_task_graph = { A: [:B], A: [:C], B: [:D] }
+```
+The 2nd assignment of `A` will override the first one so your graph will be:
+`{:A=>[:C], :B=>[:D]}`
+Always assign all dependent tasks together in a single list.
+### Task Matrix
+It's a tabular real-time representation of the state of task execution. It can be invoked at any time with
+```
+Jongleur::API.task_matrix
+```
+ After defining your Task Graph and before running Jongleur, your _Task Matrix_ should look like this:
+```
+#<Jongleur::Task name=:A, pid=-1, running=false, exit_status=nil, success_status=nil>,
+#<Jongleur::Task name=:B, pid=-1, running=false, exit_status=nil, success_status=nil>,
+#<Jongleur::Task name=:C, pid=-1, running=false, exit_status=nil, success_status=nil>,
+#<Jongleur::Task name=:D, pid=-1, running=false, exit_status=nil, success_status=nil>,
+#<Jongleur::Task name=:E, pid=-1, running=false, exit_status=nil, success_status=nil>
+```
+After Jongleur finishes, your _Task Matrix_ will look something like this:
+```
+#<Jongleur::Task name=:A, pid=95117, running=false, exit_status=0, success_status=true>
+#<Jongleur::Task name=:B, pid=95118, running=false, exit_status=0, success_status=true>
+#<Jongleur::Task name=:C, pid=95120, running=false, exit_status=0, success_status=true>
+#<Jongleur::Task name=:D, pid=95122, running=false, exit_status=0, success_status=true>
+#<Jongleur::Task name=:E, pid=95123, running=false, exit_status=0, success_status=true>
+```
+The `Jongleur::Task` attribute values are as follows
+* name : the Task name
+* pid : the Task process id (`nil` if the task hasn't yet ran)
+* running : `true` if task is currently running
+*  exit_status : usually 0 if process finished without errors, <>0 or `nil` otherwise
+*  success_status :  `true` if process finished successfully, `false` if it didn't or `nil` if process didn't exit at all
+### WorkerTask
+This is the implementation template for a Task. For each Task in your Task Graph you must provide a class that derives from `WorkerTask` and implements the `execute` method. This method is what will be called by Jongleur when the Task is ready to run.
+## Usage
+Using Jongleur is easy:
+1. (Optional) You may want to head your code with `require Jongleur` so that you won't have to namespace every api call.
+2. Define your Task Graph
+	```
+	test_graph = {
+      A: [:B, :C],
+      B: [:D],
+      C: [:D],
+      D: [:E],
+      E: []
+    }
+	```
+3. Add your Task Graph to Jongleur
+	```
+	API.add_task_graph test_graph
+	=> [#<struct Jongleur::Task name=:A, pid=-1, running=false, exit_status=nil, success_status=nil>,
+ 		#<struct Jongleur::Task name=:B, pid=-1, running=false, exit_status=nil, success_status=nil>,
+ 		#<struct Jongleur::Task name=:C, pid=-1, running=false, exit_status=nil, success_status=nil>,
+ 		#<struct Jongleur::Task name=:D, pid=-1, running=false, exit_status=nil, success_status=nil>,
+ 		#<struct Jongleur::Task name=:E, pid=-1, running=false, exit_status=nil, success_status=nil>]
+	```
+	Jongleur will show you the Task Matrix for your Task Graph with all attributes set at their initial values, obviously, since the Tasks haven't ran yet.
+4. (Optional) You may want to see a graphical representation of your Task Graph
+    ```
+    API.print_graph('/tmp')
+    => "/tmp/jongleur_graph_08252018_194828.pdf"
+    ```
+	Opening the PDF file will display this:
+	<img src="./bin/img/DAG_graph_1.png"  width="225" height="450" alt="ETL DAG">
+5. Implement your tasks. To do that you have to (i) create a new class, based on `WorkerTask` and (ii) define and `#execute` method in your class. This is the method hat Jongleur will call to run the Task. For instance task A from your Task Graph may look something like that:
+   ```
+   class A < Jongleur::WorkerTask
+  	  @desc = 'this is task A'
+  	  def execute
+       sleep 1
+       'A is running... '
+  	  end
+	end
+```
+   You'll have to do the same for Tasks B, C, D and E, as these ae the tasks declared in the Task Graph.
+6. Run the tasks. Ok, pay attention now because this is the complex bit. Nah, only joking - it's simply:
+   ```
+   API.run
+    => Starting workflow...
+	=> starting task A
+ => finished task: A, process: 2501, exit_status: 0, success: true
+ => starting task B
+ => starting task C
+ => finished task: C, process: 2503, exit_status: 0, success: true
+ => finished task: B, process: 2502, exit_status: 0, success: true
+ => starting task D
+ => finished task: D, process: 2505, exit_status: 0, success: true
+ => starting task E
+ => finished task: E, process: 2506, exit_status: 0, success: true
+ => Workflow finished
+   ```
+A __simple example__ of a client app fro Jongleur can be found [on GitLab](https://gitlab.com/RedFred7/jongleur-client)
+## Use-Cases
+### Extract-Transform-Load
+The ETL workflow is ideally suited to Jongleur.  You can define many Extraction tasks -maybe separate Tasks for different data sources- and have them ran in parallel to each other. At the same time Transformation and Loading Tasks wait in turn for the previous task to finish before they start, as in this DAG illustration:
+<img src="./bin/img/ETL_DAG.png"  width="450" height="450" alt="ETL DAG">
+### Transactions
+Transactional workflows can be greatly sped up by Jongleur by parallelising parts of the transaction that are usually performed sequentially, i.e:
+<img src="./bin/img/transactional_DAG.png"  width="550" height="450" alt="Transaction DAG">
+## Development
+After checking out the repo, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
+To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+## F.A.Q
+### Does Jongleur allow me to pass messages between Tasks?
+No it doesn't. Each task is run competely independently from the other Tasks. There is no Inter-Process Communication, no common data contexts, no shared memory.
+### How can I share data created by a predecessor Task?
+This is something that I wouldl ike to build into Jongleur. For now, you can save a Task's data in a detabase or KV Store and using the Tasks process id as part of the key.  Subsequent Tasks can retrieve their predecessor's process ids with
+```
+API.get_predecessor_pids
+```
+and therefore retrieve the data created by those Tasks.
+### What's the difference between Jongleur::Task's _success\_status_ and _exit\_status_ attributes?
+According to [the official docs](https://ruby-doc.org/core-2.4.3/Process/Status.html) `exit_status` returns the least significant eight bits of the return code of the `stat` call while `success_status` returns true if `stat` is successful.
+### What happens when Jongleur finishes running?
+When Jongleur finishes running all tasks in its Task Graph -and regardless of whether the Tasks themselves have failed ot not- it will exit the parent process with an exit code of 0.
+### What happens if a Task fails
+If a Task fails to run or to finish its run, Jongleur will simply go on running any other tasks it can. It will not run any Tasks which depend on the failed Task. The status of the failed Task will be indicated via an appropriate output message and also on the Task Matrix.
+### How can I examine the Task Matrix after Jongleur has finished?
+Jongleur serializes each run's Task Matrix as a JSON file in the `/tmp` directory. You can either view this in an editor or load it and manipulate it in Ruby with
+```
+JSON.parse( File.read('/tmp/jongleur_task_matrix_08272018_103406.json') )
+```
+## Roadmap
+These are the things I'd like Jongleur to support in future releases:
+* Task storage mechanism, i.e. the ability for each Task to save data in a uniquely identifiable and safe way so that data can be shared between
+  sequential tasks in a transparent and easy manner.
+* Rails integration. Pretty self-explanatory really.
+## Contributing
+Any suggestions for new features or improvements are very welcome. Please raise bug reports and pull requests on [GitLab](https://gitlab.com/RedFred7/Jongleur).
+## License
+The gem is available as open source under the terms of the [MIT License](./License.txt)

data/Rakefile ADDED

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+require 'bundler/gem_tasks'
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+task default: :spec

data/bin/console ADDED

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+require 'bundler/setup'
+require 'jongleur'
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+require 'pry'
+Pry.start

data/bin/img/DAG_graph_1.png ADDED

Binary file

data/bin/img/ETL_DAG.png ADDED

Binary file

data/bin/img/jongleur_m-2015.jpg ADDED

Binary file

data/bin/img/transactional_DAG.png ADDED

Binary file

data/bin/setup ADDED

@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+IFS=$'\n\t'
+set -vx
+bundle install
+# Do any other automated setup that you need to do here

data/jongleur.gemspec ADDED

@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+lib = File.expand_path('lib', __dir__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'jongleur/version'
+Gem::Specification.new do |spec|
+  spec.name          = 'jongleur'
+  spec.version       = Jongleur::VERSION
+  spec.authors       = ['Fred Heath']
+  spec.email         = ['fred@bootstrap.me.uk']
+  spec.summary       = 'A task scheduler manager for DAG-style task groups.'
+  spec.description   = 'Acceps a number of inter-dependent tasks and runs them as separate processes, parallelising where possible.'
+  spec.homepage      = 'http://www.bootstrap.me.uk'
+  spec.license       = 'MIT'
+  spec.files         = `git ls-files -z`.split("\x0").reject do |f|
+    f.match(%r{^(test|spec|features)/})
+  end
+  spec.bindir        = 'exe'
+  spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  spec.require_paths = ['lib']
+  spec.add_dependency 'graphviz', '~> 1.1'
+  spec.add_dependency 'os', '~> 1.0'
+  spec.add_development_dependency 'bundler', '~> 1.16'
+  spec.add_development_dependency 'pry-byebug', '~> 3.4'
+  spec.add_development_dependency 'rake', '~> 10.0'
+  spec.add_development_dependency 'rspec', '~> 3.0'
+  spec.add_development_dependency 'rubocop', '~> 0.58'
+  spec.add_development_dependency 'simplecov', '~> 0.9'
+  spec.add_development_dependency 'yard', '~> 0.9'
+end

data/lib/jongleur.rb ADDED

@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+require 'graphviz'
+require 'json'
+require_relative 'jongleur/version'
+require_relative 'jongleur/helpers'
+require_relative 'jongleur/worker_task'
+require_relative 'jongleur/implementation'
+require_relative 'jongleur/api'
+require 'logger'
+# this is the gem's main module
+module Jongleur
+  # a Task is a representation of the status of an executable Jongleur class,
+  # i.e. a class derived from WorkerTask and the process that's executing that class
+  # @see https://ruby-doc.org/core-2.4.3/Process/Status.html
+  #
+  # @!attribute name
+  #   @return [String] the class (WorkerTask) name that's executing this process
+  # @!attribute pid
+  #   @return [Integer] the process id accoding to the OS
+  # @!attribute running
+  #   @return [Boolean] true if the process is running
+  # @!attribute exit_status
+  #   @return [Integer, Nil] the process's return code when the process is exited
+  #   Usually 0 for success, 1 for error or Nil otherwise
+  # @!attribute success_status
+  #   @return [Boolean, Nil] true if process finished successfully, false if it didn't
+  #       or nil if process didn't exit properly.
+  Task = Struct.new(:name, :pid, :running, :exit_status, :success_status)
+  $stdout.sync = true
+  module StatusCodes
+    PROCESS_NOT_YET_RAN = -1
+    TASK_NOT_IN_TASK_MATRIX = -8
+    TASK_NOT_IN_TASK_GRAPH = -9
+    SUCCESS_STATUS_UNDETERMINED = -2
+  end
+end # module

data/lib/jongleur/api.rb ADDED

@@ -0,0 +1,217 @@
+# frozen_string_literal: true
+# rubocop:disable Lint/HandleExceptions
+require_relative './implementation'
+module Jongleur
+  # Here be methods to be accessed by the gem's client, i.e. the public API
+  module API
+    # @!scope class
+    # Accepts a task_graph and does some initialisation, namely the assigning
+    # of class variables and creation of the inital task matrix
+    #
+    # @param [Hash<Symbol, Array>] task_graph_hash
+    # @raise [ArgumentError] if the task_matrix argument is not structured correctly
+    # @return [void]
+    def self.add_task_graph(task_graph_hash)
+      @@task_matrix = Array.new
+      raise ArgumentError, 'Value should be Hash {task_name, [descendants]}' unless task_graph_hash.is_a?(Hash)
+      # this task_graph will raise the error below , { A: [:B],  B: :C,  C: []}
+      task_graph_hash.values.each do |val|
+        raise ArgumentError, 'Dependent Tasks should be wrapped in an Array {task_name, [dependents]}' unless val.is_a?(Array)
+      end
+      # this task_graph will raise the error below , { A: [:B],  B: [:C, :D],  C: []}
+      if (task_graph_hash.keys.size - task_graph_hash.values.flatten.uniq.size).negative?
+        raise ArgumentError, 'Each dependent Task should also be defined with a separate key entry'
+      end
+      @@task_graph = task_graph_hash
+      @@task_matrix = Implementation.build_task_matrix(task_graph_hash)
+    end
+    # Prints the TaskGraph to a PDF file
+    #
+    # @param [String] the directory name to print the file to
+    # @return [String] the PDF file name
+    def self.print_graph(dir="")
+      graph = Graphviz::Graph.new
+      dir = Dir.pwd if (!dir || dir.empty?)
+      file_name = File.expand_path("jongleur_graph_#{Time.now.strftime('%m%d%Y_%H%M%S')}.pdf", dir)
+      task_graph.each do |parent_node, child_nodes|
+        new_node = unless graph.node_exists?(parent_node)
+          graph.add_node( parent_node )
+        else
+          graph.get_node( parent_node ).first
+        end
+        child_nodes.each { |child_node| new_node.add_node(child_node) }
+      end
+      Graphviz::output(graph, path: file_name)
+      file_name
+    end
+    # @!attribute task_matrix
+    #   @return [Array<Jongleur::Task>] a list of Tasks and their current state
+    #   @see Jongleur::Task
+    def self.task_matrix
+      @@task_matrix
+    end
+    # @!attribute task_graph
+    #   @return [Hash<Symbol, Array<Symbol>>] where the Hash key is the Task
+    #       name and the value is an array of dependent Tasks
+    #   @example
+    #     a_task_graph = {:A=>[:B, :C], :B=>[:D], :C=>[:D], :D=>[:E], :E=>[]}
+    def self.task_graph
+      @@task_graph ||= {}
+    end
+    # Analyses the Task Matrix for all Tasks that ran successfully
+    #
+    # @param [Array<Jongleur::Task>] the task matrix to analyse
+    # @return [Array<Jongleur::Task>] the successful Tasks
+    def self.successful_tasks(my_task_matrix)
+      my_task_matrix.select { |x| x.success_status == true &&
+        x.exit_status == 0
+      }
+    end
+    # Analyses the Task Matrix for all Tasks that failed to finish successfully
+    #
+    # @param [Array<Jongleur::Task>] the task matrix to analyse
+    # @return [Array<Jongleur::Task>] the failed Tasks
+    def self.failed_tasks(my_task_matrix)
+      my_task_matrix.select { |x| x.success_status == false }
+    end
+    # Analyses the Task Matrix for all Tasks that haven't been ran
+    #
+    # @param [Array<Jongleur::Task>] the task matrix to analyse
+    # @return [Array<Jongleur::Task>] the Tasks that haven't been ran
+    def self.not_ran_tasks(my_task_matrix)
+      my_task_matrix.select { |x| x.success_status == nil &&
+        x.exit_status == nil &&
+        x.pid == StatusCodes::PROCESS_NOT_YET_RAN
+      }
+    end
+    # Analyses the Task Matrix for all Tasks that started but failed to finish
+    #
+    # @param [Array<Jongleur::Task>] the task matrix to analyse
+    # @return [Array<Jongleur::Task>] the Tasks that started but failed to finish
+    def self.hung_tasks(my_task_matrix)
+      my_task_matrix.select { |x| x.success_status == nil &&
+        x.pid != StatusCodes::PROCESS_NOT_YET_RAN
+      }
+    end
+    def self.get_predecessor_pids(a_task)
+      pids = Array.new
+      Implementation.get_predecessors(a_task).each do |task|
+        pids << Implementation.get_process_id(task)
+      end
+      pids
+    end
+    # The main method. It starts the tasks as separate processes, according to
+    # their precedence, traps and handles signals, processes messages. On exit
+    # it will also print the Task Matrix in the /tmp directory in JSON format
+    #
+    # @note This method launches processes without precedence constraints,
+    # traps child process signals and starts new processes when their
+    # antecedents have finished. The method will exit its own process when
+    # all children processes have finished.
+    # @raise [RuntimeError] if there are no implementations for Tasks in the Task Graph
+    # @return [void]
+    def self.run
+      unless Implementation.valid_tasks?(task_graph.keys)
+        raise RuntimeError, 'Not all the tasks in the Task Graph are implemented as WorkerTask classes'
+      end
+      Implementation.process_message 'Starting workflow...'
+      trap_quit_signals
+      start_processes
+      trap(:CHLD) do
+        begin
+          # with WNOHANG flag we make sure Process.wait is not blocking
+          while (res = Process.wait2(-1, Process::WNOHANG))
+            dead_pid = res[0]
+            status = res[1]
+            dead_task_name = ''
+            Implementation.find_task_by(:pid, dead_pid) do |t|
+              t.running = false
+              t.exit_status = status.exitstatus
+              t.success_status = status.success?
+              dead_task_name = t.name
+            end
+            msg = "finished task: %s, process: %i, exit_status: %i, success: %s"
+            Implementation.process_message msg % [dead_task_name,
+                                                  dead_pid,
+                                                  status.exitstatus,
+                                                  status.success?]
+            if status.success?
+              Implementation.run_descendants(dead_task_name)
+            else
+              msg = "Task #{dead_task_name} with process id #{dead_pid} was not succesfully completed."
+              Implementation.process_message(msg)
+            end
+          end
+          # it's possible for the last CHLD signal to arrive after our trap
+          # handler has already called Process.wait twice and reaped the
+          # available status. In such a case we must handle (and ignore)
+          # the oncoming exception so we don't get a crash.
+        rescue Errno::ECHILD
+        end
+      end
+      loop do
+        # We exit once all the child processes and their descendants are
+        # accounted for
+        if Implementation.running_tasks.empty?
+          Implementation.process_message 'Workflow finished'
+          file_name = File.expand_path("jongleur_task_matrix_#{Time.now.strftime('%m%d%Y_%H%M%S')}.json", '/tmp')
+          File.open(file_name, 'w') {|f| f.write(task_matrix.to_json) }
+          exit 0
+        end
+        sleep 1
+      end
+    end #method
+    # Starts all tasks without dependencies as separate processes
+    #
+    # @return [void]
+    def self.start_processes
+      Implementation.tasks_without_predecessors.each do |t|
+        t.running = true
+        Implementation.process_message "starting task #{t.name}"
+        t.pid = fork do
+          Jongleur.const_get(t.name).new(predecessors: Implementation.get_predecessors(t.name)).execute
+        end
+      end
+    end
+    # Forwards any quit signals to all working processes so that quitting the
+    # gem (Ctrl+C) kills all processes
+    #
+    # @return [void]
+    def self.trap_quit_signals
+      %i[INT QUIT].each do |signal|
+        Signal.trap(signal) do
+          Implementation.process_message " #{signal} sent to master process!"
+          Implementation.running_tasks.each do |t|
+            Implementation.process_message "....killing #{t.pid}"
+            Process.kill(:KILL, t.pid)
+          end
+        end
+      end
+    end
+  end #module
+end #module
+# rubocop:enable Lint/HandleExceptions

data/lib/jongleur/helpers.rb ADDED

@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+# this module contains generic helper methods that are used
+# for implementation purposes
+module Helper
+  def contains_array?(an_array)
+    (self & an_array).size == an_array.size
+  end
+end

data/lib/jongleur/implementation.rb ADDED

@@ -0,0 +1,216 @@
+# frozen_string_literal: true
+# rubocop:disable Lint/AssignmentInCondition
+module Jongleur
+  # this module encapsulates methods that are not meant to be accessed by the gem's client callers
+  # and are used by the API module to implement functionality
+  # @see API
+  #
+  # @api private
+  module Implementation
+    # Creates a list of tasks and their current state
+    #
+    # @param [Hash] task_graph
+    # @see API.task_graph
+    # @return [Array] task_matrix a list of Tasks
+    def self.build_task_matrix(task_graph)
+      return [] if task_graph.empty?
+      # create it as a Set so we can easily ensure unique entries
+      task_matrix = Set.new
+      task_graph.keys.each { |t| task_matrix << Task.new(t, StatusCodes::PROCESS_NOT_YET_RAN, false) }
+      task_graph.values.each do |val|
+        val.each { |t| task_matrix << Task.new(t, StatusCodes::PROCESS_NOT_YET_RAN, false) }
+      end
+      task_matrix.to_a
+    end
+    # Lists a task's dependent tasks
+    #
+    # @param [Symbol] task
+    # @return [Array] a list of the dependent task names for the given task
+    def self.get_predecessors(task)
+      return [] if API.task_graph.empty?
+      API.task_graph.select { |_k, v| v.include?(task) }.keys
+    end
+    # Ensures a task, or list of tasks, are defined in the task_diagram and are loaded in Ruby.
+    # If #const_get can't find the class it raises NameError. The method catches it and returns false
+    #
+    # @note this method exists for the scenario where the user adds a task X to the Task Diagram but fails
+    # to provide an implementation of the Task's class, i.e. class X < WorkerTask
+    # @param [Array<Symbol>] tasks to be validated
+    # @return [Boolean] true if all tasks are valid, and false if one task or more are invalid
+    def self.valid_tasks?(task_list)
+      task_list.each { |task| API.const_get(task.to_s) }
+      true
+    rescue NameError
+      false
+    end
+    # Gets the process id of a task.
+    #
+    # @param [Symbol] task_name
+    # @return [Integer] the pid of the task or Jongleur::StatusCodes::PROCESS_NOT_YET_RAN if the task
+    # hasn't been ran yet
+    def self.get_process_id(task_name)
+      if valid_tasks?([].push(task_name))
+        idx = API.task_matrix.index { |t| t.name == task_name }
+        # STDOUT.puts ">>>>>  #{task_name} >>>>>> #{API.task_matrix[idx].pid}", "\n"
+        API.task_matrix[idx].pid
+      else
+        StatusCodes::TASK_NOT_IN_TASK_GRAPH
+      end
+    end
+    # Gets a task's exit status
+    # @see https://ruby-doc.org/core-2.4.3/Process/Status.html
+    #
+    # @param [Symbol] task_name
+    # @return [Integer] the task's exit status or StatusCodes::TASK_NOT_IN_TASK_MATRIX
+    def self.get_exit_status(task_name)
+      idx = API.task_matrix.index { |t| t.name == task_name }
+      idx ? API.task_matrix[idx].exit_status : StatusCodes::TASK_NOT_IN_TASK_MATRIX
+    end
+    def self.are_predecessors_running?(task_name)
+      !get_predecessors(task_name).select(&:running).empty?
+    end
+    def self.all_predecessors_finished_successfully?(task_name)
+      get_predecessors(task_name).reduce(0) { |sum, t| sum + get_exit_status(t) }.zero?
+    end
+    def self.predecessors_which_failed(task_name)
+      get_predecessors(task_name).select { |t| task_failed?(t) }
+    end
+    def self.predecessors_which_havent_finished(task_name)
+      get_predecessors(task_name).reject { |t| task_finished?(t) }
+    end
+    # Lists all tasks without dependents
+    #
+    # @return [Array] a list of all tasks without dependents
+    def self.tasks_without_predecessors
+      list = API.task_graph.keys - API.task_graph.values.flatten
+      API.task_matrix.select { |t| list.include?(t.name) }
+    end
+    # Check if a task has failed status
+    #
+    # @return [Boolean, Integer] true if task has a failed status, false if not,
+    #     StatusCodes::TASK_NOT_IN_TASK_MATRIX if task not found
+    def self.task_failed?(task)
+      idx = API.task_matrix.index { |t| t.name == task }
+      idx ? (API.task_matrix[idx].success_status == false) : StatusCodes::TASK_NOT_IN_TASK_MATRIX
+    end
+    # Check if a task is still tunning, at the time of checking
+    #
+    # @return [Boolean, Integer] true if task is running, false if not,
+    #     StatusCodes::TASK_NOT_IN_TASK_MATRIX if task not found
+    def self.task_running?(task)
+      idx = API.task_matrix.index { |t| t.name == task }
+      idx ? API.task_matrix[idx].running : StatusCodes::TASK_NOT_IN_TASK_MATRIX
+    end
+    # Check if a task has finished running
+    #
+    # @return [Boolean, Integer] true if task has finished, false if not,
+    #     StatusCodes::TASK_NOT_IN_TASK_MATRIX if task not found
+    def self.task_finished?(task)
+      idx = API.task_matrix.index { |t| t.name == task }
+      idx ? API.task_matrix[idx].exit_status : StatusCodes::TASK_NOT_IN_TASK_MATRIX
+    end
+    def self.finished_tasks
+      API.task_matrix.map { |t| t.name if t.running == false }.compact.extend(Helper)
+    end
+    def self.running_tasks
+      API.task_matrix.select(&:running)
+    end
+    # Find task based on an attribute's value
+    #
+    # @note the methof will find the first matching task. If there are more than one matches,
+    # only the first one -in sequence order- will be returned
+    # @param [Symbol] attr_name
+    # @param [Object] attr_value could be a String, Integer, Boolean, etc.
+    # @yield [Jongleur::Task] the first task that matches the arguments
+    # @return [Jongleur::Task, nil] the first task that matches the arguments, nil if no matches are found
+    def self.find_task_by(attr_name, attr_value)
+      idx = API.task_matrix.index { |t| t.send(attr_name.to_s) == attr_value }
+      yield API.task_matrix[idx] if block_given? && idx
+      idx ? API.task_matrix[idx] : nil
+    end
+    def self.each_descendant(task)
+      API.task_graph[task]&.each do |desc_task|
+        # check desc_task isn't already running and that its predecessors are finished
+        yield find_task_by(:name, desc_task) if !task_running?(desc_task) &&
+          finished_tasks.contains_array?(get_predecessors(desc_task))
+      end
+    end
+    # Parses a line of program output
+    #
+    # @param [String] a line of program output
+    # @return [Hash] the output line in a key-value format
+    def self.parse_line(line)
+      res = {}
+      msg_arr = []
+      msg_arr = line.split(',') if line&.match(/^finished task/)
+      msg_arr.each do |x|
+        h = {}
+        s = x.split(':')
+        h[s.at(0).strip] = s.at(1).strip
+        res.merge!(h)
+      end
+      res
+    end
+    # Parses a multi-line string of program output
+    #
+    # @param [StringIO] the standard output as a string
+    # @param [Boolean] print output to stdout
+    # @return [Array<Hash>] a list of hashes representing the std output
+    def self.parse_output(string_io, print_to_stdout = false)
+      parsed = []
+      string_io.each_line do |line|
+        STDOUT.puts ">>> #{line}" if print_to_stdout
+        line_as_hash = parse_line(line)
+        parsed << line_as_hash unless line_as_hash.empty?
+      end
+      parsed
+    end
+    # run all descendant tasks of given task
+    def self.run_descendants(task_name)
+      each_descendant(task_name) do |t|
+        waiting = predecessors_which_havent_finished(t.name)
+        failed = predecessors_which_failed(t.name)
+        if waiting.empty? && failed.empty?
+          t.running = true
+          Implementation.process_message "starting task #{t.name}"
+          t.pid = fork { API.const_get(t.name).new(predecessors: get_predecessors(t.name)).execute }
+        elsif !failed.empty?
+          process_message "cannot start #{t.name} because its predecessor #{failed.first} failed to finish"
+        elsif !waiting.empty?
+          process_message "cannot start #{t.name} because its predecessor #{waiting.first} hasn't finished yet"
+        end
+      end
+    end
+    def self.process_message(a_msg)
+      puts(a_msg)
+    end
+  end # module
+end # module
+# rubocop:enable Lint/AssignmentInCondition

data/lib/jongleur/version.rb ADDED

@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+module Jongleur
+  VERSION = '1.0.1'
+end

data/lib/jongleur/worker_task.rb ADDED

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+module Jongleur
+  # This is a Base class for all task classes executed by Jongleur.
+  # Every class declared and used in Jongleur must inherit from <WorkerTask>
+  class WorkerTask
+    def initialize(**other_args)
+      other_args.each do |key, val|
+        var_name = "@#{key}"
+        instance_variable_set(var_name, val)
+        self.class.send(:attr_accessor, key.to_s)
+      end
+    end
+  end
+  # returns the task description
+  class << self
+    attr_reader :desc
+  end
+end # class

metadata ADDED

@@ -0,0 +1,193 @@
+--- !ruby/object:Gem::Specification
+name: jongleur
+version: !ruby/object:Gem::Version
+  version: 1.0.1
+platform: ruby
+authors:
+- Fred Heath
+autorequire:
+bindir: exe
+cert_chain: []
+date: 2018-08-27 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: graphviz
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.1'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.1'
+- !ruby/object:Gem::Dependency
+  name: os
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.16'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.16'
+- !ruby/object:Gem::Dependency
+  name: pry-byebug
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.4'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.4'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.0'
+- !ruby/object:Gem::Dependency
+  name: rubocop
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.58'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.58'
+- !ruby/object:Gem::Dependency
+  name: simplecov
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+- !ruby/object:Gem::Dependency
+  name: yard
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+description: Acceps a number of inter-dependent tasks and runs them as separate processes,
+  parallelising where possible.
+email:
+- fred@bootstrap.me.uk
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- ".gitignore"
+- ".gitlab-ci.yml"
+- ".rspec"
+- ".rubocop.yml"
+- CHANGELOG.md
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- bin/console
+- bin/img/DAG_graph_1.png
+- bin/img/ETL_DAG.png
+- bin/img/jongleur_m-2015.jpg
+- bin/img/transactional_DAG.png
+- bin/setup
+- jongleur.gemspec
+- lib/jongleur.rb
+- lib/jongleur/api.rb
+- lib/jongleur/helpers.rb
+- lib/jongleur/implementation.rb
+- lib/jongleur/version.rb
+- lib/jongleur/worker_task.rb
+homepage: http://www.bootstrap.me.uk
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.6.14
+signing_key:
+specification_version: 4
+summary: A task scheduler manager for DAG-style task groups.
+test_files: []