plines 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +12 -0
- data/LICENSE +22 -0
- data/README.md +420 -0
- data/Rakefile +61 -0
- data/lib/plines.rb +13 -0
- data/lib/plines/configuration.rb +55 -0
- data/lib/plines/dependency_graph.rb +81 -0
- data/lib/plines/dynamic_struct.rb +34 -0
- data/lib/plines/enqueued_job.rb +120 -0
- data/lib/plines/external_dependency_timeout.rb +30 -0
- data/lib/plines/indifferent_hash.rb +58 -0
- data/lib/plines/job.rb +88 -0
- data/lib/plines/job_batch.rb +363 -0
- data/lib/plines/job_batch_list.rb +57 -0
- data/lib/plines/job_enqueuer.rb +83 -0
- data/lib/plines/pipeline.rb +97 -0
- data/lib/plines/redis_objects.rb +108 -0
- data/lib/plines/step.rb +269 -0
- data/lib/plines/version.rb +3 -0
- metadata +192 -0
data/Rakefile
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require_relative 'config/setup_load_paths'
|
3
|
+
require 'bundler/gem_helper'
|
4
|
+
Bundler::GemHelper.install_tasks
|
5
|
+
|
6
|
+
require 'qless/tasks'
|
7
|
+
namespace :qless do
|
8
|
+
task :set_redis_url do
|
9
|
+
if File.exist?('./config/redis_connection_url.txt')
|
10
|
+
ENV['REDIS_URL'] = File.read('./config/redis_connection_url.txt')
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
task :setup => :set_redis_url do
|
15
|
+
ENV['VVERBOSE'] = '1'
|
16
|
+
ENV['QUEUE'] = 'plines'
|
17
|
+
ENV['INTERVAL'] = '1.0'
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "Start the Qless Web UI"
|
21
|
+
task :server => :set_redis_url do
|
22
|
+
sh "rackup config/config.ru"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'rspec/core/rake_task'
|
27
|
+
|
28
|
+
RSpec::Core::RakeTask.new(:spec) do |t|
|
29
|
+
t.rspec_opts = %w[--profile --format progress]
|
30
|
+
t.ruby_opts = "-Ispec -r./config/setup_load_paths -rsimplecov_setup"
|
31
|
+
end
|
32
|
+
|
33
|
+
if RUBY_ENGINE == 'ruby'
|
34
|
+
require 'cane/rake_task'
|
35
|
+
|
36
|
+
desc "Run cane to check quality metrics"
|
37
|
+
Cane::RakeTask.new(:quality) do |cane|
|
38
|
+
cane.style_glob = "lib/**/*.rb"
|
39
|
+
cane.abc_max = 16
|
40
|
+
cane.add_threshold 'coverage/coverage_percent.txt', :>=, 100
|
41
|
+
end
|
42
|
+
else
|
43
|
+
task :quality do
|
44
|
+
# no-op; Cane isn't supported on this interpretter
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
task default: [:spec, :quality]
|
49
|
+
|
50
|
+
namespace :ci do
|
51
|
+
desc "Run all tests both integrated and in isolation"
|
52
|
+
task :spec do
|
53
|
+
test_all_script = File.expand_path('../script/test_all', __FILE__)
|
54
|
+
sh test_all_script
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
desc "Run CI build"
|
60
|
+
task ci: %w[ ci:spec quality ]
|
61
|
+
|
data/lib/plines.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'plines/configuration'
|
2
|
+
require 'plines/dependency_graph'
|
3
|
+
require 'plines/dynamic_struct'
|
4
|
+
require 'plines/enqueued_job'
|
5
|
+
require 'plines/external_dependency_timeout'
|
6
|
+
require 'plines/job'
|
7
|
+
require 'plines/job_enqueuer'
|
8
|
+
require 'plines/job_batch'
|
9
|
+
require 'plines/job_batch_list'
|
10
|
+
require 'plines/pipeline'
|
11
|
+
require 'plines/step'
|
12
|
+
require "plines/version"
|
13
|
+
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Plines
|
2
|
+
# Stores global Plines configuration.
|
3
|
+
class Configuration
|
4
|
+
# Raised when there is a configuration error.
|
5
|
+
class Error < StandardError; end
|
6
|
+
|
7
|
+
TWO_MONTHS_IN_SECONDS = 2 * 30 * 24 * 60 * 60
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
qless_client { raise Error, "qless_client has not been configured" }
|
11
|
+
batch_list_key { raise Error, "batch_list_key has not been configured" }
|
12
|
+
|
13
|
+
qless_job_options { |job| {} }
|
14
|
+
self.data_ttl_in_seconds = TWO_MONTHS_IN_SECONDS
|
15
|
+
@callbacks = Hash.new { |h, k| h[k] = [] }
|
16
|
+
end
|
17
|
+
|
18
|
+
def qless_client(&block)
|
19
|
+
@qless_client_block = block
|
20
|
+
end
|
21
|
+
|
22
|
+
def qless_client_for(key)
|
23
|
+
@qless_client_block[key]
|
24
|
+
end
|
25
|
+
|
26
|
+
def batch_list_key(&block)
|
27
|
+
@batch_list_key_block = block
|
28
|
+
end
|
29
|
+
|
30
|
+
def batch_list_key_for(batch_data)
|
31
|
+
@batch_list_key_block[batch_data]
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_accessor :data_ttl_in_seconds
|
35
|
+
def data_ttl_in_milliseconds
|
36
|
+
(data_ttl_in_seconds * 1000).to_i
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :qless_job_options_block
|
40
|
+
def qless_job_options(&block)
|
41
|
+
@qless_job_options_block = block
|
42
|
+
end
|
43
|
+
|
44
|
+
def after_job_batch_cancellation(&block)
|
45
|
+
@callbacks[:after_job_batch_cancellation] << block
|
46
|
+
end
|
47
|
+
|
48
|
+
def notify(callback_type, *args)
|
49
|
+
@callbacks[callback_type].each do |callback|
|
50
|
+
callback.call(*args)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Plines
|
4
|
+
# Represents a dependency graph of Plines steps. This graph contains
|
5
|
+
# Jobs (i.e. Step classes paired with data). The graph
|
6
|
+
# takes care of preventing duplicate step instances.
|
7
|
+
class DependencyGraph
|
8
|
+
attr_reader :steps # FYI, steps is not ordered according to dependencies
|
9
|
+
|
10
|
+
# Raised when a circular dependency is detected.
|
11
|
+
class CircularDependencyError < StandardError; end
|
12
|
+
|
13
|
+
def initialize(pipeline, batch_data)
|
14
|
+
step_classes = pipeline.step_classes
|
15
|
+
@steps = Job.accumulate_instances do
|
16
|
+
step_classes.each do |step_klass|
|
17
|
+
step_klass.jobs_for(batch_data).each do |job|
|
18
|
+
job.add_dependencies_for(batch_data)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
@terminal_jobs = pipeline.terminal_step.jobs_for(batch_data)
|
23
|
+
end
|
24
|
+
|
25
|
+
cleanup_and_validate_dependencies!
|
26
|
+
end
|
27
|
+
|
28
|
+
def ordered_steps
|
29
|
+
visited = Set.new
|
30
|
+
Enumerator.new do |yielder|
|
31
|
+
steps.each do |step|
|
32
|
+
yield_next_ordered_step_for(step, visited, yielder)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def cleanup_and_validate_dependencies!
|
40
|
+
@visited_steps = Set.new
|
41
|
+
|
42
|
+
@steps.each do |step|
|
43
|
+
next if @visited_steps.include?(step)
|
44
|
+
depth_first_search_from(step)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def depth_first_search_from(step, current_stack=Set.new)
|
49
|
+
@visited_steps << step
|
50
|
+
add_terminal_job_dependencies(step)
|
51
|
+
|
52
|
+
if current_stack.include?(step)
|
53
|
+
raise CircularDependencyError,
|
54
|
+
"Your graph appears to have a circular dependency: " +
|
55
|
+
current_stack.inspect
|
56
|
+
end
|
57
|
+
|
58
|
+
step.dependencies.each do |dep|
|
59
|
+
depth_first_search_from(dep, current_stack | [step])
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_terminal_job_dependencies(job)
|
64
|
+
if job.dependents.none? && !@terminal_jobs.include?(job)
|
65
|
+
@terminal_jobs.each { |term_job| term_job.add_dependency(job) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def yield_next_ordered_step_for(step, visited, yielder)
|
70
|
+
return if visited.include?(step)
|
71
|
+
|
72
|
+
step.dependencies.each do |dependency|
|
73
|
+
yield_next_ordered_step_for(dependency, visited, yielder)
|
74
|
+
end
|
75
|
+
|
76
|
+
visited << step
|
77
|
+
yielder.yield step
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Plines
|
2
|
+
# Transforms an arbitrarily deeply nested hash into a dot-syntax
|
3
|
+
# object. Useful as an alternative to a hash since it is "strongly typed"
|
4
|
+
# in the sense that fat-fingered property names result in a NoMethodError,
|
5
|
+
# rather than getting a nil as you would with a hash.
|
6
|
+
class DynamicStruct
|
7
|
+
attr_reader :attribute_names, :to_hash
|
8
|
+
|
9
|
+
def initialize(hash)
|
10
|
+
@to_hash = hash
|
11
|
+
@attribute_names = hash.keys.map(&:to_sym)
|
12
|
+
|
13
|
+
hash.each do |key, value|
|
14
|
+
value = method_value_for(value)
|
15
|
+
define_singleton_method(key) { value }
|
16
|
+
define_singleton_method("#{key}?") { !!value }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def method_value_for(hash_value)
|
23
|
+
return self.class.new(hash_value) if hash_value.is_a?(Hash)
|
24
|
+
|
25
|
+
if hash_value.is_a?(Array) && hash_value.all? { |v| v.is_a?(Hash) }
|
26
|
+
return hash_value.map { |v| self.class.new(v) }
|
27
|
+
end
|
28
|
+
|
29
|
+
hash_value
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'plines/redis_objects'
|
2
|
+
|
3
|
+
module Plines
|
4
|
+
# Once a Plines::Job has been enqueued as a Qless job into redis,
|
5
|
+
# an EnqueuedJob is used to represent and hold the additional state
|
6
|
+
# that Plines needs to track about the job.
|
7
|
+
class EnqueuedJob < Struct.new(:pipeline, :jid)
|
8
|
+
include Plines::RedisObjectsHelpers
|
9
|
+
|
10
|
+
attr_reader :qless, :redis
|
11
|
+
|
12
|
+
def initialize(qless, pipeline, jid, &block)
|
13
|
+
@qless = qless
|
14
|
+
@redis = qless.redis
|
15
|
+
super(pipeline, jid)
|
16
|
+
instance_eval(&block) if block
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.create(qless, pipeline, jid, *external_dependencies)
|
20
|
+
new(qless, pipeline, jid) do
|
21
|
+
external_dependencies.each do |dep|
|
22
|
+
pending_ext_deps << dep
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def qless_job
|
28
|
+
qless.jobs[jid]
|
29
|
+
end
|
30
|
+
|
31
|
+
def pending_external_dependencies
|
32
|
+
pending_ext_deps.members
|
33
|
+
end
|
34
|
+
|
35
|
+
def resolved_external_dependencies
|
36
|
+
resolved_ext_deps.members
|
37
|
+
end
|
38
|
+
|
39
|
+
def timed_out_external_dependencies
|
40
|
+
timed_out_ext_deps.members
|
41
|
+
end
|
42
|
+
|
43
|
+
def all_external_dependencies
|
44
|
+
pending_ext_deps.union(
|
45
|
+
resolved_ext_deps, timed_out_ext_deps
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
def unresolved_external_dependencies
|
50
|
+
pending_ext_deps.union(timed_out_ext_deps)
|
51
|
+
end
|
52
|
+
|
53
|
+
def resolve_external_dependency(name)
|
54
|
+
update_external_dependency \
|
55
|
+
name, resolved_ext_deps, pending_ext_deps, timed_out_ext_deps
|
56
|
+
end
|
57
|
+
|
58
|
+
def timeout_external_dependency(name)
|
59
|
+
update_external_dependency \
|
60
|
+
name, timed_out_ext_deps, pending_ext_deps
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
alias id jid # id is needed by Redis::Objects
|
66
|
+
set :pending_ext_deps
|
67
|
+
set :resolved_ext_deps
|
68
|
+
set :timed_out_ext_deps
|
69
|
+
|
70
|
+
CannotUpdateExternalDependencyError = Class.new(StandardError)
|
71
|
+
|
72
|
+
def update_external_dependency(name, destination_set, *source_sets)
|
73
|
+
assert_has_external_dependency!(name)
|
74
|
+
|
75
|
+
result = nil
|
76
|
+
|
77
|
+
5.times do
|
78
|
+
result = try_to_update(qless_job, name, destination_set, *source_sets)
|
79
|
+
break unless result == :needs_retry
|
80
|
+
end
|
81
|
+
|
82
|
+
if result == :needs_retry
|
83
|
+
raise CannotUpdateExternalDependencyError,
|
84
|
+
"Failed to update dependency #{name} after many tries"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def try_to_update(job, name, destination_set, *source_sets)
|
89
|
+
redis.watch(pending_ext_deps.key) do
|
90
|
+
pending_deps = self.pending_external_dependencies
|
91
|
+
|
92
|
+
response = redis.multi do
|
93
|
+
source_sets.each do |source_set|
|
94
|
+
source_set.move(name, destination_set)
|
95
|
+
end
|
96
|
+
|
97
|
+
if job && pending_deps == [name]
|
98
|
+
job.move(job.klass.processing_queue)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
return :needs_retry unless response
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def assert_has_external_dependency!(name)
|
107
|
+
results = redis.multi do
|
108
|
+
pending_ext_deps.include?(name)
|
109
|
+
resolved_ext_deps.include?(name)
|
110
|
+
timed_out_ext_deps.include?(name)
|
111
|
+
end
|
112
|
+
|
113
|
+
if results.none?
|
114
|
+
raise ArgumentError, "EnqueuedJob #{jid} does not have pending " +
|
115
|
+
"external dependency #{name.inspect}"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Plines
|
2
|
+
# This is a job that gets enqueued with a delay in order to timeout external
|
3
|
+
# dependencies. When it runs, it will timeout the named external dependency
|
4
|
+
# for the given jids. If the named dependency is the only remaining pending
|
5
|
+
# dependency for any of the jobs identified by the jids, they will get moved
|
6
|
+
# into their appropriate processing queue.
|
7
|
+
class ExternalDependencyTimeout
|
8
|
+
def self.perform(job)
|
9
|
+
pipeline_parts = job.data.fetch('pipeline').split('::')
|
10
|
+
pipeline = pipeline_parts.inject(Object) { |ns, mod| ns.const_get(mod) }
|
11
|
+
|
12
|
+
job_batch = JobBatch.find(job.client, pipeline,
|
13
|
+
job.data.fetch("job_batch_id"))
|
14
|
+
|
15
|
+
job_batch.timeout_external_dependency \
|
16
|
+
job.data.fetch("dep_name"),
|
17
|
+
job.data.fetch("jids")
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.job_data_for(job_batch, dep_name, jids)
|
21
|
+
{
|
22
|
+
"pipeline" => job_batch.pipeline.name,
|
23
|
+
"job_batch_id" => job_batch.id,
|
24
|
+
"dep_name" => dep_name,
|
25
|
+
"jids" => jids
|
26
|
+
}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'delegate'
|
2
|
+
|
3
|
+
module Plines
|
4
|
+
# Provides a hash that can be accessed by symbol or string keys.
|
5
|
+
# This is useful because a plines job batch data hash is commonly
|
6
|
+
# provided with symbol keys, but after round-tripping through
|
7
|
+
# JSON it is converted to strings. We can't safely convert all
|
8
|
+
# strings to symbols (as symbols are never GC'd) so instead we
|
9
|
+
# use this for the data hash.
|
10
|
+
class IndifferentHash < DelegateClass(Hash)
|
11
|
+
NotAHashError = Class.new(TypeError)
|
12
|
+
ConflictingEntriesError = Class.new(ArgumentError)
|
13
|
+
|
14
|
+
private_class_method :new
|
15
|
+
|
16
|
+
def self.from(original)
|
17
|
+
unless original.is_a?(Hash) || original.is_a?(IndifferentHash)
|
18
|
+
raise NotAHashError, "Expected a hash, got #{original.inspect}"
|
19
|
+
end
|
20
|
+
|
21
|
+
indif = Hash.new { |hash, key| hash[key.to_s] if Symbol === key }
|
22
|
+
|
23
|
+
original.each do |key, value|
|
24
|
+
key = key.to_s
|
25
|
+
|
26
|
+
if indif.has_key?(key)
|
27
|
+
raise ConflictingEntriesError,
|
28
|
+
"Hash has conflicting entries for #{key}: #{original}"
|
29
|
+
end
|
30
|
+
|
31
|
+
indif[key] = indifferent(value)
|
32
|
+
end
|
33
|
+
|
34
|
+
new(indif)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.indifferent(object)
|
38
|
+
case object
|
39
|
+
when Hash then from(object)
|
40
|
+
when Array then object.map { |o| indifferent(o) }
|
41
|
+
else object
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def fetch(key)
|
46
|
+
if !has_key?(key) && Symbol === key && has_key?(key.to_s)
|
47
|
+
key = key.to_s
|
48
|
+
end
|
49
|
+
|
50
|
+
super
|
51
|
+
end
|
52
|
+
|
53
|
+
def merge(other)
|
54
|
+
IndifferentHash.from super(IndifferentHash.from other)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|