burstflow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 57d5cf121b6ebec0ba7a522f4220181dd1839a39
4
+ data.tar.gz: 4fb11ae296181aca5b8aa117057933423b3dd671
5
+ SHA512:
6
+ metadata.gz: f4e516846e3272678f735880607b080c0518a85f0307f332e3ef9a1b36ae149da53e68abd610d1a53104257fa804aaea3ae60ccfb7f2abd9a2602c66e8928082
7
+ data.tar.gz: 0c160a85d57659d232431c8136b6c1837b94f0a788dc01ccec1bb8794af0c0d890743bb19e6a77a8c59c9351b40759efe6129f11af2c781fd26a8404998d0214
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ workflows/
18
+ tmp
19
+ test.rb
20
+ dump.rdb
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --tty
3
+ --format documentation
data/.rubocop.yml ADDED
@@ -0,0 +1,98 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.5
3
+ Exclude:
4
+ - 'storage/**/*'
5
+ - 'docker/**/*'
6
+ - 'lib/docker_toolkit/childprocess/**/*'
7
+ - 'lib/docker_toolkit/childprocess,rb'
8
+
9
+
10
+ Security/YAMLLoad:
11
+ Enabled: false
12
+
13
+ Style/AsciiComments:
14
+ Enabled: false
15
+
16
+ Style/RedundantBegin:
17
+ Enabled: false
18
+
19
+ Style/GlobalVars:
20
+ AllowedVariables: ['$logger', '$root']
21
+
22
+ Metrics/BlockLength:
23
+ Exclude:
24
+ - 'spec/**/*.rb'
25
+
26
+ Metrics/MethodLength:
27
+ Max: 20
28
+
29
+ Style/ClassAndModuleChildren:
30
+ EnforcedStyle: compact
31
+ Enabled: false
32
+
33
+ Style/Documentation:
34
+ Enabled: false
35
+
36
+ Style/Lambda:
37
+ Enabled: false
38
+
39
+
40
+ Style/RaiseArgs:
41
+ EnforcedStyle: compact
42
+
43
+ Style/SpecialGlobalVars:
44
+ Enabled: false
45
+
46
+ Metrics/LineLength:
47
+ Max: 100
48
+
49
+ #Layout/IndentationWidth:
50
+ # Enabled: true
51
+
52
+ #Layout/IndentAssignment:
53
+ # Enabled: false
54
+ # IndentationWidth: false
55
+
56
+ #Layout/ElseAlignment:
57
+ # Enabled: false
58
+
59
+ #Layout/EndAlignment:
60
+ # Enabled: false
61
+
62
+ #Lint/AssignmentInCondition:
63
+ # Enabled: false
64
+
65
+ Layout/IndentationConsistency:
66
+ EnforcedStyle: rails
67
+
68
+ Layout/EmptyLines:
69
+ Enabled: false
70
+
71
+ Layout/EmptyLinesAroundClassBody:
72
+ EnforcedStyle: empty_lines
73
+
74
+ Layout/EmptyLinesAroundModuleBody:
75
+ EnforcedStyle: empty_lines
76
+
77
+ Layout/SpaceInsideBlockBraces:
78
+ EnforcedStyle: space
79
+ SpaceBeforeBlockParameters: false
80
+
81
+ Layout/SpaceAroundBlockParameters:
82
+ EnforcedStyleInsidePipes: no_space
83
+
84
+ Layout/SpaceBeforeBlockBraces:
85
+ Enabled: false
86
+
87
+ Style/NumericPredicate:
88
+ Enabled: false
89
+
90
+ Style/FrozenStringLiteralComment:
91
+ Enabled: false
92
+
93
+ Style/DoubleNegation:
94
+ Enabled: false
95
+
96
+ Style/SymbolArray:
97
+ Enabled: false
98
+
data/.travis.yml ADDED
@@ -0,0 +1,31 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.5
4
+ - 2.6
5
+
6
+ services:
7
+ - postgresql
8
+
9
+ stage: test
10
+
11
+ before_install:
12
+ - sudo apt-get install -y libxml2-dev unzip curl
13
+
14
+ script:
15
+ - gem build `ls | grep gemspec`
16
+ - gem install `ls | grep -e '.gem$'`
17
+ - bundle exec rake db:create db:migrate
18
+ - bundle exec rspec
19
+
20
+
21
+ jobs:
22
+ include:
23
+ - stage: gem release
24
+ rvm: 2.5
25
+ script: echo "Deploying to rubygems.org ..."
26
+ deploy:
27
+ provider: rubygems
28
+ api_key: $RUBYGEMS_KEY
29
+ gemspec: burst.gemspec
30
+
31
+
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+
4
+ platforms :mri, :ruby do
5
+ gem 'yajl-ruby'
6
+ end
7
+
8
+ gem 'rake'
9
+
10
+ group :test do
11
+ gem 'activesupport'
12
+ gem 'awesome_print'
13
+ gem 'database_cleaner'
14
+ gem 'otr-activerecord'
15
+ gem 'pg', '~> 0.21.0'
16
+ end
data/README.md ADDED
@@ -0,0 +1,293 @@
1
+ <!-- # Gush [![Build Status](https://travis-ci.org/chaps-io/gush.svg?branch=master)](https://travis-ci.org/chaps-io/gush) -->
2
+
3
+ <!-- ## [![](http://i.imgur.com/ya8Wnyl.png)](https://chaps.io) proudly made by [Chaps](https://chaps.io) -->
4
+
5
+ Burst is a parallel workflow runner using [ActiveRecord] and [ActiveJob](https://guides.rubyonrails.org/v4.2/active_job_basics.html) for scheduling and executing jobs.
6
+
7
+ This gem is higly inspired by [Gush](https://github.com/chaps-io/gush). But burst not tied to Reddis or Sidekiq.
8
+
9
+ The main feature of this runner is availablity to **suspend** job(and whole workflow) and **resume** it in future. For example if your job makes asynchronous request and will receive a response some time later. In this case, the job can send request and suspend until some external event resumes it eventually.
10
+
11
+ Another difference from Gush is **Dynamic workflows**. Any job can produce another jobs while executing. This jobs has its parent as incomming jobs, and all parents outgoing jobs as own outgoings.
12
+
13
+ ## Installation
14
+
15
+ ### 1. Add `burst` to Gemfile
16
+
17
+ ```ruby
18
+ gem 'burst', '~> 0.1.0'
19
+ ```
20
+
21
+ ### 2. Run migrations
22
+
23
+ Under development
24
+
25
+ ## Example
26
+
27
+ The DSL for defining jobs consists of a single `run` method.
28
+ Here is a complete example of a workflow you can create:
29
+
30
+ ```ruby
31
+ # app/workflows/sample_workflow.rb
32
+ class SampleWorkflow < Burst::Workflow
33
+ configure do |url_to_fetch_from|
34
+ run FetchJob1, params: { url: url_to_fetch_from }
35
+ run FetchJob2, params: { some_flag: true, url: 'http://url.com' }
36
+
37
+ run PersistJob1, after: FetchJob1
38
+ run PersistJob2, after: FetchJob2
39
+
40
+ run Normalize,
41
+ after: [PersistJob1, PersistJob2],
42
+ before: Index
43
+
44
+ run Index
45
+ end
46
+ end
47
+ ```
48
+
49
+ and this is how the graph will look like:
50
+
51
+ ![SampleWorkflow](https://i.imgur.com/DFh6j51.png)
52
+
53
+
54
+ ## Defining workflows
55
+
56
+ Let's start with the simplest workflow possible, consisting of a single job:
57
+
58
+ ```ruby
59
+ class SimpleWorkflow < Burst::Workflow
60
+ configure do
61
+ run DownloadJob
62
+ end
63
+ end
64
+ ```
65
+
66
+ Of course having a workflow with only a single job does not make sense, so it's time to define dependencies:
67
+
68
+ ```ruby
69
+ class SimpleWorkflow < Burst::Workflow
70
+ configure do
71
+ run DownloadJob
72
+ run SaveJob, after: DownloadJob
73
+ end
74
+ end
75
+ ```
76
+
77
+ We just told Burst to execute `SaveJob` right after `DownloadJob` finishes **successfully**.
78
+
79
+ But what if your job must have multiple dependencies? That's easy, just provide an array to the `after` attribute:
80
+
81
+ ```ruby
82
+ class SimpleWorkflow < Burst::Workflow
83
+ configure do
84
+ run FirstDownloadJob
85
+ run SecondDownloadJob
86
+
87
+ run SaveJob, after: [FirstDownloadJob, SecondDownloadJob]
88
+ end
89
+ end
90
+ ```
91
+
92
+ Now `SaveJob` will only execute after both its parents finish without errors.
93
+
94
+ With this simple syntax you can build any complex workflows you can imagine!
95
+
96
+ #### Alternative way
97
+
98
+ `run` method also accepts `before:` attribute to define the opposite association. So we can write the same workflow as above, but like this:
99
+
100
+ ```ruby
101
+ class SimpleWorkflow < Burst::Workflow
102
+ configure do
103
+ run FirstDownloadJob, before: SaveJob
104
+ run SecondDownloadJob, before: SaveJob
105
+
106
+ run SaveJob
107
+ end
108
+ end
109
+ ```
110
+
111
+ You can use whatever way you find more readable or even both at once :)
112
+
113
+ ### Passing arguments to workflows
114
+
115
+ Workflows can accept any primitive arguments in their constructor, which then will be available in your
116
+ `configure` method.
117
+
118
+ Let's assume we are writing a book publishing workflow which needs to know where the PDF of the book is and under what ISBN it will be released:
119
+
120
+ ```ruby
121
+ class PublishBookWorkflow < Burst::Workflow
122
+ configure do |url, isbn|
123
+ run FetchBook, params: { url: url }
124
+ run PublishBook, params: { book_isbn: isbn }, after: FetchBook
125
+ end
126
+ end
127
+ ```
128
+
129
+ and then create your workflow with those arguments:
130
+
131
+ ```ruby
132
+ PublishBookWorkflow.build("http://url.com/book.pdf", "978-0470081204")
133
+ ```
134
+
135
+ and that's basically it for defining workflows, see below on how to define jobs:
136
+
137
+ ## Defining jobs
138
+
139
+ The simplest job is a class inheriting from `Burst::Job` and responding to `perform` and `resume` method. Much like any other ActiveJob class.
140
+
141
+ ```ruby
142
+ class FetchBook < Burst::Job
143
+ def perform
144
+ # do some fetching from remote APIs
145
+ end
146
+ end
147
+ ```
148
+
149
+ But what about those params we passed in the previous step?
150
+
151
+ ## Passing parameters into jobs
152
+
153
+ To do that, simply provide a `params:` attribute with a hash of parameters you'd like to have available inside the `perform` method of the job.
154
+
155
+ So, inside workflow:
156
+
157
+ ```ruby
158
+ (...)
159
+ run FetchBook, params: {url: "http://url.com/book.pdf"}
160
+ (...)
161
+ ```
162
+
163
+ and within the job we can access them like this:
164
+
165
+ ```ruby
166
+ class FetchBook < Burst::Job
167
+ def perform
168
+ # you can access `params` method here, for example:
169
+
170
+ params #=> {url: "http://url.com/book.pdf"}
171
+ end
172
+ end
173
+ ```
174
+
175
+ ## Executing workflows
176
+
177
+ Workflows are executed by any backend you chose for ActiveJob.
178
+
179
+
180
+ ### 1. Create the workflow instance
181
+
182
+ ```ruby
183
+ flow = PublishBookWorkflow.build("http://url.com/book.pdf", "978-0470081204")
184
+ ```
185
+
186
+ ### 2. Start the workflow
187
+
188
+ ```ruby
189
+ flow.start!
190
+ ```
191
+
192
+ Now Burst will start processing jobs in the background using ActiveJob and your chosen backend.
193
+
194
+ ### 3. Monitor its progress:
195
+
196
+ ```ruby
197
+ flow.reload
198
+ flow.status
199
+ #=> :running|:finished|:failed
200
+ ```
201
+
202
+ `reload` is needed to see the latest status, since workflows are updated asynchronously.
203
+
204
+ ## Advanced features
205
+
206
+ ### Pipelining
207
+
208
+ Burst offers a useful tool to pass results of a job to its dependencies, so they can act differently.
209
+
210
+ **Example:**
211
+
212
+ Let's assume you have two jobs, `DownloadVideo`, `EncodeVideo`.
213
+ The latter needs to know where the first one saved the file to be able to open it.
214
+
215
+
216
+ ```ruby
217
+ class DownloadVideo < Burst::Job
218
+ def perform
219
+ downloader = VideoDownloader.fetch("http://youtube.com/?v=someytvideo")
220
+
221
+ output(downloader.file_path)
222
+ end
223
+ end
224
+ ```
225
+
226
+ `output` method is used to ouput data from the job to all dependant jobs.
227
+
228
+ Now, since `DownloadVideo` finished and its dependant job `EncodeVideo` started, we can access that payload inside it:
229
+
230
+ ```ruby
231
+ class EncodeVideo < Burst::Job
232
+ def perform
233
+ video_path = payloads.first[:output]
234
+ end
235
+ end
236
+ ```
237
+
238
+ `payloads` is an array containing outputs from all ancestor jobs. So for our `EncodeVide` job from above, the array will look like:
239
+
240
+
241
+ ```ruby
242
+ [
243
+ {
244
+ id: "DownloadVideo-41bfb730-b49f-42ac-a808-156327989294" # unique id of the ancestor job
245
+ class: "DownloadVideo",
246
+ output: "https://s3.amazonaws.com/somebucket/downloaded-file.mp4" #the payload returned by DownloadVideo job using `output()` method
247
+ }
248
+ ]
249
+ ```
250
+
251
+ **Note:** Keep in mind that payloads can only contain data which **can be serialized as JSON**.
252
+
253
+ ### Dynamic workflows
254
+
255
+ There might be a case when you have to construct the workflow dynamically depending on the input.
256
+
257
+ As an example, let's write a workflow which accepts an array of users and has to send an email to each one. Additionally after it sends the e-mail to every user, it also has to notify the admin about finishing.
258
+
259
+
260
+ ```ruby
261
+
262
+ class ParentJob < Burst::Job
263
+ def perform
264
+ configure do
265
+ params[:user_ids].map do |user_id|
266
+ run NotificationJob, params: {user_id: user_id}
267
+ end
268
+ end
269
+ end
270
+ end
271
+
272
+ class NotifyWorkflow < Burst::Workflow
273
+ configure do |user_ids|
274
+ run ParentJob, params: {user_ids: user_ids}
275
+
276
+ run AdminNotificationJob, after: ParentJob
277
+ end
278
+ end
279
+ ```
280
+
281
+
282
+
283
+ ## Original Gush Contributors
284
+
285
+ https://github.com/chaps-io/gush#contributors
286
+
287
+ ## Contributing
288
+
289
+ 1. Fork it ( https://github.com/RnD-Soft/burst/fork )
290
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
291
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
292
+ 4. Push to the branch (`git push origin my-new-feature`)
293
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+ require 'bundler/setup'
2
+ load 'tasks/otr-activerecord.rake'
3
+
4
+ OTR::ActiveRecord.configure_from_file! 'config/database.yml'
data/burst.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ lib = File.expand_path('lib', __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = 'burstflow'
6
+ spec.version = '0.1.0'
7
+ spec.authors = ['Samoilenko Yuri']
8
+ spec.email = ['kinnalru@gmail.com']
9
+ spec.summary = 'Burst is a parallel workflow runner using ActiveRecord and ActiveJob'
10
+ spec.description = 'It has dependency, result pipelining and suspend/resume ability'
11
+ spec.homepage = 'https://github.com/RnD-Soft/burst'
12
+ spec.license = 'MIT'
13
+
14
+ spec.files = `git ls-files -z`.split("\x0")
15
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
16
+ spec.require_paths = ['lib']
17
+
18
+ spec.add_dependency 'activejob'
19
+ spec.add_dependency 'activerecord'
20
+ spec.add_development_dependency 'bundler'
21
+ spec.add_development_dependency 'rake'
22
+ spec.add_development_dependency 'rspec'
23
+ end
@@ -0,0 +1,15 @@
1
+ development: &base
2
+ adapter: postgresql
3
+ host: <%= ENV['DATABASE_HOST'] || 'localhost' %>
4
+ port: <%= ENV['DATABASE_PORT'] || 5432 %>
5
+ encoding: unicode
6
+ database: <%= ENV['DATABASE_NAME'] || 'burst_test' %>
7
+ pool: 4
8
+ username: <%= ENV['DATABASE_USER'] || 'postgres' %>
9
+ password: <%= ENV['DATABASE_PASS'] || '' %>
10
+
11
+ test:
12
+ <<: *base
13
+
14
+ production:
15
+ <<: *base
@@ -0,0 +1,13 @@
1
+ class CreateWorkflow < ActiveRecord::Migration[5.1]
2
+
3
+ def change
4
+ enable_extension 'pgcrypto'
5
+
6
+ create_table :burst_workflows, id: :uuid do |t|
7
+ t.jsonb :flow, null: false, default: {}
8
+
9
+ t.timestamps
10
+ end
11
+ end
12
+
13
+ end
data/db/schema.rb ADDED
@@ -0,0 +1,23 @@
1
+ # This file is auto-generated from the current state of the database. Instead
2
+ # of editing this file, please use the migrations feature of Active Record to
3
+ # incrementally modify your database, and then regenerate this schema definition.
4
+ #
5
+ # Note that this schema.rb definition is the authoritative source for your
6
+ # database schema. If you need to create the application database on another
7
+ # system, you should be using db:schema:load, not running all the migrations
8
+ # from scratch. The latter is a flawed and unsustainable approach (the more migrations
9
+ # you'll amass, the slower it'll run and the greater likelihood for issues).
10
+ #
11
+ # It's strongly recommended that you check this file into your version control system.
12
+
13
+ ActiveRecord::Schema.define(version: 20_180_101_000_001) do
14
+ # These are extensions that must be enabled in order to support this database
15
+ enable_extension 'pgcrypto'
16
+ enable_extension 'plpgsql'
17
+
18
+ create_table 'burst_workflows', id: :uuid, default: -> { 'gen_random_uuid()' }, force: :cascade do |t|
19
+ t.jsonb 'flow', default: {}, null: false
20
+ t.datetime 'created_at', null: false
21
+ t.datetime 'updated_at', null: false
22
+ end
23
+ end
data/db/seeds.rb ADDED
@@ -0,0 +1 @@
1
+ # require_relative "../_import.rb"
@@ -0,0 +1,48 @@
1
+ module Burst::Builder
2
+
3
+ extend ActiveSupport::Concern
4
+
5
+ included do |_klass|
6
+ attr_accessor :build_deps
7
+
8
+ def initialize_builder
9
+ @build_deps = []
10
+ end
11
+
12
+ def run(klass, opts = {})
13
+ opts = opts.with_indifferent_access
14
+
15
+ before_deps = opts.delete(:before) || []
16
+ after_deps = opts.delete(:after) || []
17
+
18
+ job = klass.new(self, opts)
19
+
20
+ [*before_deps].each do |dep|
21
+ build_deps << { from: job.id, to: dep.to_s }
22
+ end
23
+
24
+ [*after_deps].each do |dep|
25
+ build_deps << { from: dep.to_s, to: job.id }
26
+ end
27
+
28
+ job_cache[job.id] = job
29
+ jobs[job.id] = job.model
30
+
31
+ job.id
32
+ end
33
+
34
+ def resolve_dependencies
35
+ build_deps.each do |dependency|
36
+ from = find_job(dependency[:from])
37
+ to = find_job(dependency[:to])
38
+
39
+ to.incoming << from.id
40
+ from.outgoing << to.id
41
+
42
+ to.incoming.uniq!
43
+ from.outgoing.uniq!
44
+ end
45
+ end
46
+ end
47
+
48
+ end
@@ -0,0 +1,27 @@
1
+ module Burst
2
+
3
+ class Configuration
4
+
5
+ attr_accessor :concurrency
6
+
7
+ def self.from_json(json)
8
+ new(Burst::JSON.decode(json, symbolize_keys: true))
9
+ end
10
+
11
+ def initialize(hash = {})
12
+ self.concurrency = hash.fetch(:concurrency, 5)
13
+ end
14
+
15
+ def to_hash
16
+ {
17
+ concurrency: concurrency
18
+ }
19
+ end
20
+
21
+ def to_json
22
+ Burst::JSON.encode(to_hash)
23
+ end
24
+
25
+ end
26
+
27
+ end