RubyGems - burstflow - Versions diffs - 0.1.0 - Mend

burstflow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +7 -0
data/.gitignore +20 -0
data/.rspec +3 -0
data/.rubocop.yml +98 -0
data/.travis.yml +31 -0
data/Gemfile +16 -0
data/README.md +293 -0
data/Rakefile +4 -0
data/burst.gemspec +23 -0
data/config/database.yml +15 -0
data/db/migrate/20180101000001_create_workflow.rb +13 -0
data/db/schema.rb +23 -0
data/db/seeds.rb +1 -0
data/lib/burst/builder.rb +48 -0
data/lib/burst/configuration.rb +27 -0
data/lib/burst/job.rb +187 -0
data/lib/burst/manager.rb +79 -0
data/lib/burst/model.rb +49 -0
data/lib/burst/worker.rb +42 -0
data/lib/burst/workflow.rb +148 -0
data/lib/burst/workflow_helper.rb +86 -0
data/lib/burst.rb +37 -0
data/spec/burst_spec.rb +4 -0
data/spec/cases_spec.rb +180 -0
data/spec/job_spec.rb +80 -0
data/spec/spec_helper.rb +47 -0
data/spec/support/database_clean.rb +16 -0
data/spec/support/runner.rb +12 -0
data/spec/workflow_spec.rb +185 -0
metadata +149 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 57d5cf121b6ebec0ba7a522f4220181dd1839a39
+  data.tar.gz: 4fb11ae296181aca5b8aa117057933423b3dd671
+SHA512:
+  metadata.gz: f4e516846e3272678f735880607b080c0518a85f0307f332e3ef9a1b36ae149da53e68abd610d1a53104257fa804aaea3ae60ccfb7f2abd9a2602c66e8928082
+  data.tar.gz: 0c160a85d57659d232431c8136b6c1837b94f0a788dc01ccec1bb8794af0c0d890743bb19e6a77a8c59c9351b40759efe6129f11af2c781fd26a8404998d0214

data/.gitignore ADDED Viewed

@@ -0,0 +1,20 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+workflows/
+tmp
+test.rb
+dump.rdb

data/.rspec ADDED Viewed

@@ -0,0 +1,3 @@
+--color
+--tty
+--format documentation

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,98 @@
+AllCops:
+  TargetRubyVersion: 2.5
+  Exclude:
+    - 'storage/**/*'
+    - 'docker/**/*'
+    - 'lib/docker_toolkit/childprocess/**/*'
+    - 'lib/docker_toolkit/childprocess,rb'
+Security/YAMLLoad:
+  Enabled: false
+Style/AsciiComments:
+  Enabled: false
+Style/RedundantBegin:
+  Enabled: false
+Style/GlobalVars:
+  AllowedVariables: ['$logger', '$root']
+Metrics/BlockLength:
+  Exclude:
+    - 'spec/**/*.rb'
+Metrics/MethodLength:
+  Max: 20
+Style/ClassAndModuleChildren:
+  EnforcedStyle: compact
+  Enabled: false
+Style/Documentation:
+  Enabled: false
+Style/Lambda:
+  Enabled: false
+Style/RaiseArgs:
+  EnforcedStyle: compact
+Style/SpecialGlobalVars:
+  Enabled: false
+Metrics/LineLength:
+  Max: 100
+#Layout/IndentationWidth:
+#  Enabled: true
+#Layout/IndentAssignment:
+#  Enabled: false
+#  IndentationWidth: false
+#Layout/ElseAlignment:
+#  Enabled: false
+#Layout/EndAlignment:
+#  Enabled: false
+#Lint/AssignmentInCondition:
+#  Enabled: false
+Layout/IndentationConsistency:
+  EnforcedStyle: rails
+Layout/EmptyLines:
+  Enabled: false
+Layout/EmptyLinesAroundClassBody:
+  EnforcedStyle: empty_lines
+Layout/EmptyLinesAroundModuleBody:
+  EnforcedStyle: empty_lines
+Layout/SpaceInsideBlockBraces:
+  EnforcedStyle: space
+  SpaceBeforeBlockParameters: false
+Layout/SpaceAroundBlockParameters:
+  EnforcedStyleInsidePipes: no_space
+Layout/SpaceBeforeBlockBraces:
+  Enabled: false
+Style/NumericPredicate:
+  Enabled: false
+Style/FrozenStringLiteralComment:
+  Enabled: false
+Style/DoubleNegation:
+  Enabled: false
+Style/SymbolArray:
+  Enabled: false

data/.travis.yml ADDED Viewed

@@ -0,0 +1,31 @@
+language: ruby
+rvm:
+  - 2.5
+  - 2.6
+services:
+  - postgresql
+stage: test
+before_install:
+  - sudo apt-get install -y libxml2-dev unzip curl
+script:
+  - gem build `ls | grep gemspec`
+  - gem install `ls | grep -e '.gem$'`
+  - bundle exec rake db:create db:migrate
+  - bundle exec rspec
+jobs:
+  include:
+    - stage: gem release
+      rvm: 2.5
+      script: echo "Deploying to rubygems.org ..."
+      deploy:
+        provider: rubygems
+        api_key: $RUBYGEMS_KEY
+        gemspec: burst.gemspec

data/Gemfile ADDED Viewed

@@ -0,0 +1,16 @@
+source 'https://rubygems.org'
+gemspec
+platforms :mri, :ruby do
+  gem 'yajl-ruby'
+end
+gem 'rake'
+group :test do
+  gem 'activesupport'
+  gem 'awesome_print'
+  gem 'database_cleaner'
+  gem 'otr-activerecord'
+  gem 'pg', '~> 0.21.0'
+end

data/README.md ADDED Viewed

@@ -0,0 +1,293 @@
+<!-- # Gush [![Build Status](https://travis-ci.org/chaps-io/gush.svg?branch=master)](https://travis-ci.org/chaps-io/gush) -->
+<!-- ## [![](http://i.imgur.com/ya8Wnyl.png)](https://chaps.io) proudly made by [Chaps](https://chaps.io) -->
+Burst is a parallel workflow runner using [ActiveRecord] and [ActiveJob](https://guides.rubyonrails.org/v4.2/active_job_basics.html) for scheduling and executing jobs.
+This gem is higly inspired by [Gush](https://github.com/chaps-io/gush). But burst not tied to Reddis or Sidekiq.
+The main feature of this runner is availablity to **suspend** job(and whole workflow) and **resume** it in future. For example if your job makes asynchronous request and will receive a response some time later. In this case, the job can send request and suspend until some external event resumes it eventually.
+Another difference from Gush is **Dynamic workflows**. Any job can produce another jobs while executing. This jobs has its parent as incomming jobs, and all parents outgoing jobs as own outgoings.
+## Installation
+### 1. Add `burst` to Gemfile
+```ruby
+gem 'burst', '~> 0.1.0'
+```
+### 2. Run migrations
+Under development
+## Example
+The DSL for defining jobs consists of a single `run` method.
+Here is a complete example of a workflow you can create:
+```ruby
+# app/workflows/sample_workflow.rb
+class SampleWorkflow < Burst::Workflow
+  configure do |url_to_fetch_from|
+    run FetchJob1, params: { url: url_to_fetch_from }
+    run FetchJob2, params: { some_flag: true, url: 'http://url.com' }
+    run PersistJob1, after: FetchJob1
+    run PersistJob2, after: FetchJob2
+    run Normalize,
+        after: [PersistJob1, PersistJob2],
+        before: Index
+    run Index
+  end
+end
+```
+and this is how the graph will look like:
+![SampleWorkflow](https://i.imgur.com/DFh6j51.png)
+## Defining workflows
+Let's start with the simplest workflow possible, consisting of a single job:
+```ruby
+class SimpleWorkflow < Burst::Workflow
+  configure do
+    run DownloadJob
+  end
+end
+```
+Of course having a workflow with only a single job does not make sense, so it's time to define dependencies:
+```ruby
+class SimpleWorkflow < Burst::Workflow
+  configure do
+    run DownloadJob
+    run SaveJob, after: DownloadJob
+  end
+end
+```
+We just told Burst to execute `SaveJob` right after `DownloadJob` finishes **successfully**.
+But what if your job must have multiple dependencies? That's easy, just provide an array to the `after` attribute:
+```ruby
+class SimpleWorkflow < Burst::Workflow
+  configure do
+    run FirstDownloadJob
+    run SecondDownloadJob
+    run SaveJob, after: [FirstDownloadJob, SecondDownloadJob]
+  end
+end
+```
+Now `SaveJob` will only execute after both its parents finish without errors.
+With this simple syntax you can build any complex workflows you can imagine!
+#### Alternative way
+`run` method also accepts `before:` attribute to define the opposite association. So we can write the same workflow as above, but like this:
+```ruby
+class SimpleWorkflow < Burst::Workflow
+  configure do
+    run FirstDownloadJob, before: SaveJob
+    run SecondDownloadJob, before: SaveJob
+    run SaveJob
+  end
+end
+```
+You can use whatever way you find more readable or even both at once :)
+### Passing arguments to workflows
+Workflows can accept any primitive arguments in their constructor, which then will be available in your
+`configure` method.
+Let's assume we are writing a book publishing workflow which needs to know where the PDF of the book is and under what ISBN it will be released:
+```ruby
+class PublishBookWorkflow < Burst::Workflow
+  configure do |url, isbn|
+    run FetchBook, params: { url: url }
+    run PublishBook, params: { book_isbn: isbn }, after: FetchBook
+  end
+end
+```
+and then create your workflow with those arguments:
+```ruby
+PublishBookWorkflow.build("http://url.com/book.pdf", "978-0470081204")
+```
+and that's basically it for defining workflows, see below on how to define jobs:
+## Defining jobs
+The simplest job is a class inheriting from `Burst::Job` and responding to `perform` and `resume` method. Much like any other ActiveJob class.
+```ruby
+class FetchBook < Burst::Job
+  def perform
+    # do some fetching from remote APIs
+  end
+end
+```
+But what about those params we passed in the previous step?
+## Passing parameters into jobs
+To do that, simply provide a `params:` attribute with a hash of parameters you'd like to have available inside the `perform` method of the job.
+So, inside workflow:
+```ruby
+(...)
+run FetchBook, params: {url: "http://url.com/book.pdf"}
+(...)
+```
+and within the job we can access them like this:
+```ruby
+class FetchBook < Burst::Job
+  def perform
+    # you can access `params` method here, for example:
+    params #=> {url: "http://url.com/book.pdf"}
+  end
+end
+```
+## Executing workflows
+Workflows are executed by any backend you chose for ActiveJob.
+### 1. Create the workflow instance
+```ruby
+flow = PublishBookWorkflow.build("http://url.com/book.pdf", "978-0470081204")
+```
+### 2. Start the workflow
+```ruby
+flow.start!
+```
+Now Burst will start processing jobs in the background using ActiveJob and your chosen backend.
+### 3. Monitor its progress:
+```ruby
+flow.reload
+flow.status
+#=> :running|:finished|:failed
+```
+`reload` is needed to see the latest status, since workflows are updated asynchronously.
+## Advanced features
+### Pipelining
+Burst offers a useful tool to pass results of a job to its dependencies, so they can act differently.
+**Example:**
+Let's assume you have two jobs, `DownloadVideo`, `EncodeVideo`.
+The latter needs to know where the first one saved the file to be able to open it.
+```ruby
+class DownloadVideo < Burst::Job
+  def perform
+    downloader = VideoDownloader.fetch("http://youtube.com/?v=someytvideo")
+    output(downloader.file_path)
+  end
+end
+```
+`output` method is used to ouput data from the job to all dependant jobs.
+Now, since `DownloadVideo` finished and its dependant job `EncodeVideo` started, we can access that payload inside it:
+```ruby
+class EncodeVideo < Burst::Job
+  def perform
+    video_path = payloads.first[:output]
+  end
+end
+```
+`payloads` is an array containing outputs from all ancestor jobs. So for our `EncodeVide` job from above, the array will look like:
+```ruby
+[
+  {
+    id: "DownloadVideo-41bfb730-b49f-42ac-a808-156327989294" # unique id of the ancestor job
+    class: "DownloadVideo",
+    output: "https://s3.amazonaws.com/somebucket/downloaded-file.mp4" #the payload returned by DownloadVideo job using `output()` method
+  }
+]
+```
+**Note:** Keep in mind that payloads can only contain data which **can be serialized as JSON**.
+### Dynamic workflows
+There might be a case when you have to construct the workflow dynamically depending on the input.
+As an example, let's write a workflow which accepts an array of users and has to send an email to each one. Additionally after it sends the e-mail to every user, it also has to notify the admin about finishing.
+```ruby
+class ParentJob < Burst::Job
+  def perform
+    configure do
+      params[:user_ids].map do |user_id|
+        run NotificationJob, params: {user_id: user_id}
+      end
+    end
+  end
+end
+class NotifyWorkflow < Burst::Workflow
+  configure do |user_ids|
+    run ParentJob, params: {user_ids: user_ids}
+    run AdminNotificationJob, after: ParentJob
+  end
+end
+```
+## Original Gush Contributors
+https://github.com/chaps-io/gush#contributors
+## Contributing
+1. Fork it ( https://github.com/RnD-Soft/burst/fork )
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/Rakefile ADDED Viewed

@@ -0,0 +1,4 @@
+require 'bundler/setup'
+load 'tasks/otr-activerecord.rake'
+OTR::ActiveRecord.configure_from_file! 'config/database.yml'

data/burst.gemspec ADDED Viewed

@@ -0,0 +1,23 @@
+lib = File.expand_path('lib', __dir__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+Gem::Specification.new do |spec|
+  spec.name          = 'burstflow'
+  spec.version       = '0.1.0'
+  spec.authors       = ['Samoilenko Yuri']
+  spec.email         = ['kinnalru@gmail.com']
+  spec.summary       = 'Burst is a parallel workflow runner using ActiveRecord and ActiveJob'
+  spec.description   = 'It has dependency, result pipelining and suspend/resume ability'
+  spec.homepage      = 'https://github.com/RnD-Soft/burst'
+  spec.license       = 'MIT'
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ['lib']
+  spec.add_dependency 'activejob'
+  spec.add_dependency 'activerecord'
+  spec.add_development_dependency 'bundler'
+  spec.add_development_dependency 'rake'
+  spec.add_development_dependency 'rspec'
+end

data/config/database.yml ADDED Viewed

@@ -0,0 +1,15 @@
+development: &base
+  adapter:  postgresql
+  host:     <%= ENV['DATABASE_HOST'] || 'localhost' %>
+  port:     <%= ENV['DATABASE_PORT'] || 5432 %>
+  encoding: unicode
+  database: <%= ENV['DATABASE_NAME'] || 'burst_test' %>
+  pool:     4
+  username: <%= ENV['DATABASE_USER'] || 'postgres' %>
+  password: <%= ENV['DATABASE_PASS'] || '' %>
+test:
+ <<: *base
+production:
+ <<: *base

data/db/migrate/20180101000001_create_workflow.rb ADDED Viewed

@@ -0,0 +1,13 @@
+class CreateWorkflow < ActiveRecord::Migration[5.1]
+  def change
+    enable_extension 'pgcrypto'
+    create_table :burst_workflows, id: :uuid do |t|
+      t.jsonb :flow, null: false, default: {}
+      t.timestamps
+    end
+  end
+end

data/db/schema.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# This file is auto-generated from the current state of the database. Instead
+# of editing this file, please use the migrations feature of Active Record to
+# incrementally modify your database, and then regenerate this schema definition.
+#
+# Note that this schema.rb definition is the authoritative source for your
+# database schema. If you need to create the application database on another
+# system, you should be using db:schema:load, not running all the migrations
+# from scratch. The latter is a flawed and unsustainable approach (the more migrations
+# you'll amass, the slower it'll run and the greater likelihood for issues).
+#
+# It's strongly recommended that you check this file into your version control system.
+ActiveRecord::Schema.define(version: 20_180_101_000_001) do
+  # These are extensions that must be enabled in order to support this database
+  enable_extension 'pgcrypto'
+  enable_extension 'plpgsql'
+  create_table 'burst_workflows', id: :uuid, default: -> { 'gen_random_uuid()' }, force: :cascade do |t|
+    t.jsonb 'flow', default: {}, null: false
+    t.datetime 'created_at', null: false
+    t.datetime 'updated_at', null: false
+  end
+end

data/db/seeds.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ # require_relative "../_import.rb"

data/lib/burst/builder.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module Burst::Builder
+  extend ActiveSupport::Concern
+  included do |_klass|
+    attr_accessor :build_deps
+    def initialize_builder
+      @build_deps = []
+    end
+    def run(klass, opts = {})
+      opts = opts.with_indifferent_access
+      before_deps = opts.delete(:before) || []
+      after_deps = opts.delete(:after) || []
+      job = klass.new(self, opts)
+      [*before_deps].each do |dep|
+        build_deps << { from: job.id, to: dep.to_s }
+      end
+      [*after_deps].each do |dep|
+        build_deps << { from: dep.to_s, to: job.id }
+      end
+      job_cache[job.id] = job
+      jobs[job.id] = job.model
+      job.id
+    end
+    def resolve_dependencies
+      build_deps.each do |dependency|
+        from = find_job(dependency[:from])
+        to   = find_job(dependency[:to])
+        to.incoming << from.id
+        from.outgoing << to.id
+        to.incoming.uniq!
+        from.outgoing.uniq!
+      end
+    end
+  end
+end

data/lib/burst/configuration.rb ADDED Viewed

@@ -0,0 +1,27 @@
+module Burst
+  class Configuration
+    attr_accessor :concurrency
+    def self.from_json(json)
+      new(Burst::JSON.decode(json, symbolize_keys: true))
+    end
+    def initialize(hash = {})
+      self.concurrency = hash.fetch(:concurrency, 5)
+    end
+    def to_hash
+      {
+        concurrency: concurrency
+      }
+    end
+    def to_json
+      Burst::JSON.encode(to_hash)
+    end
+  end
+end