plines 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +12 -0
- data/LICENSE +22 -0
- data/README.md +420 -0
- data/Rakefile +61 -0
- data/lib/plines.rb +13 -0
- data/lib/plines/configuration.rb +55 -0
- data/lib/plines/dependency_graph.rb +81 -0
- data/lib/plines/dynamic_struct.rb +34 -0
- data/lib/plines/enqueued_job.rb +120 -0
- data/lib/plines/external_dependency_timeout.rb +30 -0
- data/lib/plines/indifferent_hash.rb +58 -0
- data/lib/plines/job.rb +88 -0
- data/lib/plines/job_batch.rb +363 -0
- data/lib/plines/job_batch_list.rb +57 -0
- data/lib/plines/job_enqueuer.rb +83 -0
- data/lib/plines/pipeline.rb +97 -0
- data/lib/plines/redis_objects.rb +108 -0
- data/lib/plines/step.rb +269 -0
- data/lib/plines/version.rb +3 -0
- metadata +192 -0
data/Rakefile
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require_relative 'config/setup_load_paths'
|
3
|
+
require 'bundler/gem_helper'
|
4
|
+
Bundler::GemHelper.install_tasks
|
5
|
+
|
6
|
+
require 'qless/tasks'
|
7
|
+
namespace :qless do
|
8
|
+
task :set_redis_url do
|
9
|
+
if File.exist?('./config/redis_connection_url.txt')
|
10
|
+
ENV['REDIS_URL'] = File.read('./config/redis_connection_url.txt')
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
task :setup => :set_redis_url do
|
15
|
+
ENV['VVERBOSE'] = '1'
|
16
|
+
ENV['QUEUE'] = 'plines'
|
17
|
+
ENV['INTERVAL'] = '1.0'
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "Start the Qless Web UI"
|
21
|
+
task :server => :set_redis_url do
|
22
|
+
sh "rackup config/config.ru"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'rspec/core/rake_task'
|
27
|
+
|
28
|
+
RSpec::Core::RakeTask.new(:spec) do |t|
|
29
|
+
t.rspec_opts = %w[--profile --format progress]
|
30
|
+
t.ruby_opts = "-Ispec -r./config/setup_load_paths -rsimplecov_setup"
|
31
|
+
end
|
32
|
+
|
33
|
+
if RUBY_ENGINE == 'ruby'
|
34
|
+
require 'cane/rake_task'
|
35
|
+
|
36
|
+
desc "Run cane to check quality metrics"
|
37
|
+
Cane::RakeTask.new(:quality) do |cane|
|
38
|
+
cane.style_glob = "lib/**/*.rb"
|
39
|
+
cane.abc_max = 16
|
40
|
+
cane.add_threshold 'coverage/coverage_percent.txt', :>=, 100
|
41
|
+
end
|
42
|
+
else
|
43
|
+
task :quality do
|
44
|
+
# no-op; Cane isn't supported on this interpretter
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
task default: [:spec, :quality]
|
49
|
+
|
50
|
+
namespace :ci do
|
51
|
+
desc "Run all tests both integrated and in isolation"
|
52
|
+
task :spec do
|
53
|
+
test_all_script = File.expand_path('../script/test_all', __FILE__)
|
54
|
+
sh test_all_script
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
desc "Run CI build"
|
60
|
+
task ci: %w[ ci:spec quality ]
|
61
|
+
|
data/lib/plines.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'plines/configuration'
|
2
|
+
require 'plines/dependency_graph'
|
3
|
+
require 'plines/dynamic_struct'
|
4
|
+
require 'plines/enqueued_job'
|
5
|
+
require 'plines/external_dependency_timeout'
|
6
|
+
require 'plines/job'
|
7
|
+
require 'plines/job_enqueuer'
|
8
|
+
require 'plines/job_batch'
|
9
|
+
require 'plines/job_batch_list'
|
10
|
+
require 'plines/pipeline'
|
11
|
+
require 'plines/step'
|
12
|
+
require "plines/version"
|
13
|
+
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Plines
|
2
|
+
# Stores global Plines configuration.
|
3
|
+
class Configuration
|
4
|
+
# Raised when there is a configuration error.
|
5
|
+
class Error < StandardError; end
|
6
|
+
|
7
|
+
TWO_MONTHS_IN_SECONDS = 2 * 30 * 24 * 60 * 60
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
qless_client { raise Error, "qless_client has not been configured" }
|
11
|
+
batch_list_key { raise Error, "batch_list_key has not been configured" }
|
12
|
+
|
13
|
+
qless_job_options { |job| {} }
|
14
|
+
self.data_ttl_in_seconds = TWO_MONTHS_IN_SECONDS
|
15
|
+
@callbacks = Hash.new { |h, k| h[k] = [] }
|
16
|
+
end
|
17
|
+
|
18
|
+
def qless_client(&block)
|
19
|
+
@qless_client_block = block
|
20
|
+
end
|
21
|
+
|
22
|
+
def qless_client_for(key)
|
23
|
+
@qless_client_block[key]
|
24
|
+
end
|
25
|
+
|
26
|
+
def batch_list_key(&block)
|
27
|
+
@batch_list_key_block = block
|
28
|
+
end
|
29
|
+
|
30
|
+
def batch_list_key_for(batch_data)
|
31
|
+
@batch_list_key_block[batch_data]
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_accessor :data_ttl_in_seconds
|
35
|
+
def data_ttl_in_milliseconds
|
36
|
+
(data_ttl_in_seconds * 1000).to_i
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :qless_job_options_block
|
40
|
+
def qless_job_options(&block)
|
41
|
+
@qless_job_options_block = block
|
42
|
+
end
|
43
|
+
|
44
|
+
def after_job_batch_cancellation(&block)
|
45
|
+
@callbacks[:after_job_batch_cancellation] << block
|
46
|
+
end
|
47
|
+
|
48
|
+
def notify(callback_type, *args)
|
49
|
+
@callbacks[callback_type].each do |callback|
|
50
|
+
callback.call(*args)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Plines
|
4
|
+
# Represents a dependency graph of Plines steps. This graph contains
|
5
|
+
# Jobs (i.e. Step classes paired with data). The graph
|
6
|
+
# takes care of preventing duplicate step instances.
|
7
|
+
class DependencyGraph
|
8
|
+
attr_reader :steps # FYI, steps is not ordered according to dependencies
|
9
|
+
|
10
|
+
# Raised when a circular dependency is detected.
|
11
|
+
class CircularDependencyError < StandardError; end
|
12
|
+
|
13
|
+
def initialize(pipeline, batch_data)
|
14
|
+
step_classes = pipeline.step_classes
|
15
|
+
@steps = Job.accumulate_instances do
|
16
|
+
step_classes.each do |step_klass|
|
17
|
+
step_klass.jobs_for(batch_data).each do |job|
|
18
|
+
job.add_dependencies_for(batch_data)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
@terminal_jobs = pipeline.terminal_step.jobs_for(batch_data)
|
23
|
+
end
|
24
|
+
|
25
|
+
cleanup_and_validate_dependencies!
|
26
|
+
end
|
27
|
+
|
28
|
+
def ordered_steps
|
29
|
+
visited = Set.new
|
30
|
+
Enumerator.new do |yielder|
|
31
|
+
steps.each do |step|
|
32
|
+
yield_next_ordered_step_for(step, visited, yielder)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def cleanup_and_validate_dependencies!
|
40
|
+
@visited_steps = Set.new
|
41
|
+
|
42
|
+
@steps.each do |step|
|
43
|
+
next if @visited_steps.include?(step)
|
44
|
+
depth_first_search_from(step)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def depth_first_search_from(step, current_stack=Set.new)
|
49
|
+
@visited_steps << step
|
50
|
+
add_terminal_job_dependencies(step)
|
51
|
+
|
52
|
+
if current_stack.include?(step)
|
53
|
+
raise CircularDependencyError,
|
54
|
+
"Your graph appears to have a circular dependency: " +
|
55
|
+
current_stack.inspect
|
56
|
+
end
|
57
|
+
|
58
|
+
step.dependencies.each do |dep|
|
59
|
+
depth_first_search_from(dep, current_stack | [step])
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_terminal_job_dependencies(job)
|
64
|
+
if job.dependents.none? && !@terminal_jobs.include?(job)
|
65
|
+
@terminal_jobs.each { |term_job| term_job.add_dependency(job) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def yield_next_ordered_step_for(step, visited, yielder)
|
70
|
+
return if visited.include?(step)
|
71
|
+
|
72
|
+
step.dependencies.each do |dependency|
|
73
|
+
yield_next_ordered_step_for(dependency, visited, yielder)
|
74
|
+
end
|
75
|
+
|
76
|
+
visited << step
|
77
|
+
yielder.yield step
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Plines
|
2
|
+
# Transforms an arbitrarily deeply nested hash into a dot-syntax
|
3
|
+
# object. Useful as an alternative to a hash since it is "strongly typed"
|
4
|
+
# in the sense that fat-fingered property names result in a NoMethodError,
|
5
|
+
# rather than getting a nil as you would with a hash.
|
6
|
+
class DynamicStruct
|
7
|
+
attr_reader :attribute_names, :to_hash
|
8
|
+
|
9
|
+
def initialize(hash)
|
10
|
+
@to_hash = hash
|
11
|
+
@attribute_names = hash.keys.map(&:to_sym)
|
12
|
+
|
13
|
+
hash.each do |key, value|
|
14
|
+
value = method_value_for(value)
|
15
|
+
define_singleton_method(key) { value }
|
16
|
+
define_singleton_method("#{key}?") { !!value }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def method_value_for(hash_value)
|
23
|
+
return self.class.new(hash_value) if hash_value.is_a?(Hash)
|
24
|
+
|
25
|
+
if hash_value.is_a?(Array) && hash_value.all? { |v| v.is_a?(Hash) }
|
26
|
+
return hash_value.map { |v| self.class.new(v) }
|
27
|
+
end
|
28
|
+
|
29
|
+
hash_value
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'plines/redis_objects'
|
2
|
+
|
3
|
+
module Plines
|
4
|
+
# Once a Plines::Job has been enqueued as a Qless job into redis,
|
5
|
+
# an EnqueuedJob is used to represent and hold the additional state
|
6
|
+
# that Plines needs to track about the job.
|
7
|
+
class EnqueuedJob < Struct.new(:pipeline, :jid)
|
8
|
+
include Plines::RedisObjectsHelpers
|
9
|
+
|
10
|
+
attr_reader :qless, :redis
|
11
|
+
|
12
|
+
def initialize(qless, pipeline, jid, &block)
|
13
|
+
@qless = qless
|
14
|
+
@redis = qless.redis
|
15
|
+
super(pipeline, jid)
|
16
|
+
instance_eval(&block) if block
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.create(qless, pipeline, jid, *external_dependencies)
|
20
|
+
new(qless, pipeline, jid) do
|
21
|
+
external_dependencies.each do |dep|
|
22
|
+
pending_ext_deps << dep
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def qless_job
|
28
|
+
qless.jobs[jid]
|
29
|
+
end
|
30
|
+
|
31
|
+
def pending_external_dependencies
|
32
|
+
pending_ext_deps.members
|
33
|
+
end
|
34
|
+
|
35
|
+
def resolved_external_dependencies
|
36
|
+
resolved_ext_deps.members
|
37
|
+
end
|
38
|
+
|
39
|
+
def timed_out_external_dependencies
|
40
|
+
timed_out_ext_deps.members
|
41
|
+
end
|
42
|
+
|
43
|
+
def all_external_dependencies
|
44
|
+
pending_ext_deps.union(
|
45
|
+
resolved_ext_deps, timed_out_ext_deps
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
def unresolved_external_dependencies
|
50
|
+
pending_ext_deps.union(timed_out_ext_deps)
|
51
|
+
end
|
52
|
+
|
53
|
+
def resolve_external_dependency(name)
|
54
|
+
update_external_dependency \
|
55
|
+
name, resolved_ext_deps, pending_ext_deps, timed_out_ext_deps
|
56
|
+
end
|
57
|
+
|
58
|
+
def timeout_external_dependency(name)
|
59
|
+
update_external_dependency \
|
60
|
+
name, timed_out_ext_deps, pending_ext_deps
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
alias id jid # id is needed by Redis::Objects
|
66
|
+
set :pending_ext_deps
|
67
|
+
set :resolved_ext_deps
|
68
|
+
set :timed_out_ext_deps
|
69
|
+
|
70
|
+
CannotUpdateExternalDependencyError = Class.new(StandardError)
|
71
|
+
|
72
|
+
def update_external_dependency(name, destination_set, *source_sets)
|
73
|
+
assert_has_external_dependency!(name)
|
74
|
+
|
75
|
+
result = nil
|
76
|
+
|
77
|
+
5.times do
|
78
|
+
result = try_to_update(qless_job, name, destination_set, *source_sets)
|
79
|
+
break unless result == :needs_retry
|
80
|
+
end
|
81
|
+
|
82
|
+
if result == :needs_retry
|
83
|
+
raise CannotUpdateExternalDependencyError,
|
84
|
+
"Failed to update dependency #{name} after many tries"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def try_to_update(job, name, destination_set, *source_sets)
|
89
|
+
redis.watch(pending_ext_deps.key) do
|
90
|
+
pending_deps = self.pending_external_dependencies
|
91
|
+
|
92
|
+
response = redis.multi do
|
93
|
+
source_sets.each do |source_set|
|
94
|
+
source_set.move(name, destination_set)
|
95
|
+
end
|
96
|
+
|
97
|
+
if job && pending_deps == [name]
|
98
|
+
job.move(job.klass.processing_queue)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
return :needs_retry unless response
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def assert_has_external_dependency!(name)
|
107
|
+
results = redis.multi do
|
108
|
+
pending_ext_deps.include?(name)
|
109
|
+
resolved_ext_deps.include?(name)
|
110
|
+
timed_out_ext_deps.include?(name)
|
111
|
+
end
|
112
|
+
|
113
|
+
if results.none?
|
114
|
+
raise ArgumentError, "EnqueuedJob #{jid} does not have pending " +
|
115
|
+
"external dependency #{name.inspect}"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Plines
|
2
|
+
# This is a job that gets enqueued with a delay in order to timeout external
|
3
|
+
# dependencies. When it runs, it will timeout the named external dependency
|
4
|
+
# for the given jids. If the named dependency is the only remaining pending
|
5
|
+
# dependency for any of the jobs identified by the jids, they will get moved
|
6
|
+
# into their appropriate processing queue.
|
7
|
+
class ExternalDependencyTimeout
|
8
|
+
def self.perform(job)
|
9
|
+
pipeline_parts = job.data.fetch('pipeline').split('::')
|
10
|
+
pipeline = pipeline_parts.inject(Object) { |ns, mod| ns.const_get(mod) }
|
11
|
+
|
12
|
+
job_batch = JobBatch.find(job.client, pipeline,
|
13
|
+
job.data.fetch("job_batch_id"))
|
14
|
+
|
15
|
+
job_batch.timeout_external_dependency \
|
16
|
+
job.data.fetch("dep_name"),
|
17
|
+
job.data.fetch("jids")
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.job_data_for(job_batch, dep_name, jids)
|
21
|
+
{
|
22
|
+
"pipeline" => job_batch.pipeline.name,
|
23
|
+
"job_batch_id" => job_batch.id,
|
24
|
+
"dep_name" => dep_name,
|
25
|
+
"jids" => jids
|
26
|
+
}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'delegate'
|
2
|
+
|
3
|
+
module Plines
|
4
|
+
# Provides a hash that can be accessed by symbol or string keys.
|
5
|
+
# This is useful because a plines job batch data hash is commonly
|
6
|
+
# provided with symbol keys, but after round-tripping through
|
7
|
+
# JSON it is converted to strings. We can't safely convert all
|
8
|
+
# strings to symbols (as symbols are never GC'd) so instead we
|
9
|
+
# use this for the data hash.
|
10
|
+
class IndifferentHash < DelegateClass(Hash)
|
11
|
+
NotAHashError = Class.new(TypeError)
|
12
|
+
ConflictingEntriesError = Class.new(ArgumentError)
|
13
|
+
|
14
|
+
private_class_method :new
|
15
|
+
|
16
|
+
def self.from(original)
|
17
|
+
unless original.is_a?(Hash) || original.is_a?(IndifferentHash)
|
18
|
+
raise NotAHashError, "Expected a hash, got #{original.inspect}"
|
19
|
+
end
|
20
|
+
|
21
|
+
indif = Hash.new { |hash, key| hash[key.to_s] if Symbol === key }
|
22
|
+
|
23
|
+
original.each do |key, value|
|
24
|
+
key = key.to_s
|
25
|
+
|
26
|
+
if indif.has_key?(key)
|
27
|
+
raise ConflictingEntriesError,
|
28
|
+
"Hash has conflicting entries for #{key}: #{original}"
|
29
|
+
end
|
30
|
+
|
31
|
+
indif[key] = indifferent(value)
|
32
|
+
end
|
33
|
+
|
34
|
+
new(indif)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.indifferent(object)
|
38
|
+
case object
|
39
|
+
when Hash then from(object)
|
40
|
+
when Array then object.map { |o| indifferent(o) }
|
41
|
+
else object
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def fetch(key)
|
46
|
+
if !has_key?(key) && Symbol === key && has_key?(key.to_s)
|
47
|
+
key = key.to_s
|
48
|
+
end
|
49
|
+
|
50
|
+
super
|
51
|
+
end
|
52
|
+
|
53
|
+
def merge(other)
|
54
|
+
IndifferentHash.from super(IndifferentHash.from other)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|