plines 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env rake
2
+ require_relative 'config/setup_load_paths'
3
+ require 'bundler/gem_helper'
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ require 'qless/tasks'
7
+ namespace :qless do
8
+ task :set_redis_url do
9
+ if File.exist?('./config/redis_connection_url.txt')
10
+ ENV['REDIS_URL'] = File.read('./config/redis_connection_url.txt')
11
+ end
12
+ end
13
+
14
+ task :setup => :set_redis_url do
15
+ ENV['VVERBOSE'] = '1'
16
+ ENV['QUEUE'] = 'plines'
17
+ ENV['INTERVAL'] = '1.0'
18
+ end
19
+
20
+ desc "Start the Qless Web UI"
21
+ task :server => :set_redis_url do
22
+ sh "rackup config/config.ru"
23
+ end
24
+ end
25
+
26
+ require 'rspec/core/rake_task'
27
+
28
+ RSpec::Core::RakeTask.new(:spec) do |t|
29
+ t.rspec_opts = %w[--profile --format progress]
30
+ t.ruby_opts = "-Ispec -r./config/setup_load_paths -rsimplecov_setup"
31
+ end
32
+
33
+ if RUBY_ENGINE == 'ruby'
34
+ require 'cane/rake_task'
35
+
36
+ desc "Run cane to check quality metrics"
37
+ Cane::RakeTask.new(:quality) do |cane|
38
+ cane.style_glob = "lib/**/*.rb"
39
+ cane.abc_max = 16
40
+ cane.add_threshold 'coverage/coverage_percent.txt', :>=, 100
41
+ end
42
+ else
43
+ task :quality do
44
+ # no-op; Cane isn't supported on this interpretter
45
+ end
46
+ end
47
+
48
+ task default: [:spec, :quality]
49
+
50
+ namespace :ci do
51
+ desc "Run all tests both integrated and in isolation"
52
+ task :spec do
53
+ test_all_script = File.expand_path('../script/test_all', __FILE__)
54
+ sh test_all_script
55
+ end
56
+ end
57
+
58
+
59
+ desc "Run CI build"
60
+ task ci: %w[ ci:spec quality ]
61
+
@@ -0,0 +1,13 @@
1
+ require 'plines/configuration'
2
+ require 'plines/dependency_graph'
3
+ require 'plines/dynamic_struct'
4
+ require 'plines/enqueued_job'
5
+ require 'plines/external_dependency_timeout'
6
+ require 'plines/job'
7
+ require 'plines/job_enqueuer'
8
+ require 'plines/job_batch'
9
+ require 'plines/job_batch_list'
10
+ require 'plines/pipeline'
11
+ require 'plines/step'
12
+ require "plines/version"
13
+
@@ -0,0 +1,55 @@
1
+ module Plines
2
+ # Stores global Plines configuration.
3
+ class Configuration
4
+ # Raised when there is a configuration error.
5
+ class Error < StandardError; end
6
+
7
+ TWO_MONTHS_IN_SECONDS = 2 * 30 * 24 * 60 * 60
8
+
9
+ def initialize
10
+ qless_client { raise Error, "qless_client has not been configured" }
11
+ batch_list_key { raise Error, "batch_list_key has not been configured" }
12
+
13
+ qless_job_options { |job| {} }
14
+ self.data_ttl_in_seconds = TWO_MONTHS_IN_SECONDS
15
+ @callbacks = Hash.new { |h, k| h[k] = [] }
16
+ end
17
+
18
+ def qless_client(&block)
19
+ @qless_client_block = block
20
+ end
21
+
22
+ def qless_client_for(key)
23
+ @qless_client_block[key]
24
+ end
25
+
26
+ def batch_list_key(&block)
27
+ @batch_list_key_block = block
28
+ end
29
+
30
+ def batch_list_key_for(batch_data)
31
+ @batch_list_key_block[batch_data]
32
+ end
33
+
34
+ attr_accessor :data_ttl_in_seconds
35
+ def data_ttl_in_milliseconds
36
+ (data_ttl_in_seconds * 1000).to_i
37
+ end
38
+
39
+ attr_reader :qless_job_options_block
40
+ def qless_job_options(&block)
41
+ @qless_job_options_block = block
42
+ end
43
+
44
+ def after_job_batch_cancellation(&block)
45
+ @callbacks[:after_job_batch_cancellation] << block
46
+ end
47
+
48
+ def notify(callback_type, *args)
49
+ @callbacks[callback_type].each do |callback|
50
+ callback.call(*args)
51
+ end
52
+ end
53
+ end
54
+ end
55
+
@@ -0,0 +1,81 @@
1
+ require 'set'
2
+
3
+ module Plines
4
+ # Represents a dependency graph of Plines steps. This graph contains
5
+ # Jobs (i.e. Step classes paired with data). The graph
6
+ # takes care of preventing duplicate step instances.
7
+ class DependencyGraph
8
+ attr_reader :steps # FYI, steps is not ordered according to dependencies
9
+
10
+ # Raised when a circular dependency is detected.
11
+ class CircularDependencyError < StandardError; end
12
+
13
+ def initialize(pipeline, batch_data)
14
+ step_classes = pipeline.step_classes
15
+ @steps = Job.accumulate_instances do
16
+ step_classes.each do |step_klass|
17
+ step_klass.jobs_for(batch_data).each do |job|
18
+ job.add_dependencies_for(batch_data)
19
+ end
20
+ end
21
+
22
+ @terminal_jobs = pipeline.terminal_step.jobs_for(batch_data)
23
+ end
24
+
25
+ cleanup_and_validate_dependencies!
26
+ end
27
+
28
+ def ordered_steps
29
+ visited = Set.new
30
+ Enumerator.new do |yielder|
31
+ steps.each do |step|
32
+ yield_next_ordered_step_for(step, visited, yielder)
33
+ end
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def cleanup_and_validate_dependencies!
40
+ @visited_steps = Set.new
41
+
42
+ @steps.each do |step|
43
+ next if @visited_steps.include?(step)
44
+ depth_first_search_from(step)
45
+ end
46
+ end
47
+
48
+ def depth_first_search_from(step, current_stack=Set.new)
49
+ @visited_steps << step
50
+ add_terminal_job_dependencies(step)
51
+
52
+ if current_stack.include?(step)
53
+ raise CircularDependencyError,
54
+ "Your graph appears to have a circular dependency: " +
55
+ current_stack.inspect
56
+ end
57
+
58
+ step.dependencies.each do |dep|
59
+ depth_first_search_from(dep, current_stack | [step])
60
+ end
61
+ end
62
+
63
+ def add_terminal_job_dependencies(job)
64
+ if job.dependents.none? && !@terminal_jobs.include?(job)
65
+ @terminal_jobs.each { |term_job| term_job.add_dependency(job) }
66
+ end
67
+ end
68
+
69
+ def yield_next_ordered_step_for(step, visited, yielder)
70
+ return if visited.include?(step)
71
+
72
+ step.dependencies.each do |dependency|
73
+ yield_next_ordered_step_for(dependency, visited, yielder)
74
+ end
75
+
76
+ visited << step
77
+ yielder.yield step
78
+ end
79
+ end
80
+ end
81
+
@@ -0,0 +1,34 @@
1
+ module Plines
2
+ # Transforms an arbitrarily deeply nested hash into a dot-syntax
3
+ # object. Useful as an alternative to a hash since it is "strongly typed"
4
+ # in the sense that fat-fingered property names result in a NoMethodError,
5
+ # rather than getting a nil as you would with a hash.
6
+ class DynamicStruct
7
+ attr_reader :attribute_names, :to_hash
8
+
9
+ def initialize(hash)
10
+ @to_hash = hash
11
+ @attribute_names = hash.keys.map(&:to_sym)
12
+
13
+ hash.each do |key, value|
14
+ value = method_value_for(value)
15
+ define_singleton_method(key) { value }
16
+ define_singleton_method("#{key}?") { !!value }
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def method_value_for(hash_value)
23
+ return self.class.new(hash_value) if hash_value.is_a?(Hash)
24
+
25
+ if hash_value.is_a?(Array) && hash_value.all? { |v| v.is_a?(Hash) }
26
+ return hash_value.map { |v| self.class.new(v) }
27
+ end
28
+
29
+ hash_value
30
+ end
31
+ end
32
+ end
33
+
34
+
@@ -0,0 +1,120 @@
1
+ require 'plines/redis_objects'
2
+
3
+ module Plines
4
+ # Once a Plines::Job has been enqueued as a Qless job into redis,
5
+ # an EnqueuedJob is used to represent and hold the additional state
6
+ # that Plines needs to track about the job.
7
+ class EnqueuedJob < Struct.new(:pipeline, :jid)
8
+ include Plines::RedisObjectsHelpers
9
+
10
+ attr_reader :qless, :redis
11
+
12
+ def initialize(qless, pipeline, jid, &block)
13
+ @qless = qless
14
+ @redis = qless.redis
15
+ super(pipeline, jid)
16
+ instance_eval(&block) if block
17
+ end
18
+
19
+ def self.create(qless, pipeline, jid, *external_dependencies)
20
+ new(qless, pipeline, jid) do
21
+ external_dependencies.each do |dep|
22
+ pending_ext_deps << dep
23
+ end
24
+ end
25
+ end
26
+
27
+ def qless_job
28
+ qless.jobs[jid]
29
+ end
30
+
31
+ def pending_external_dependencies
32
+ pending_ext_deps.members
33
+ end
34
+
35
+ def resolved_external_dependencies
36
+ resolved_ext_deps.members
37
+ end
38
+
39
+ def timed_out_external_dependencies
40
+ timed_out_ext_deps.members
41
+ end
42
+
43
+ def all_external_dependencies
44
+ pending_ext_deps.union(
45
+ resolved_ext_deps, timed_out_ext_deps
46
+ )
47
+ end
48
+
49
+ def unresolved_external_dependencies
50
+ pending_ext_deps.union(timed_out_ext_deps)
51
+ end
52
+
53
+ def resolve_external_dependency(name)
54
+ update_external_dependency \
55
+ name, resolved_ext_deps, pending_ext_deps, timed_out_ext_deps
56
+ end
57
+
58
+ def timeout_external_dependency(name)
59
+ update_external_dependency \
60
+ name, timed_out_ext_deps, pending_ext_deps
61
+ end
62
+
63
+ private
64
+
65
+ alias id jid # id is needed by Redis::Objects
66
+ set :pending_ext_deps
67
+ set :resolved_ext_deps
68
+ set :timed_out_ext_deps
69
+
70
+ CannotUpdateExternalDependencyError = Class.new(StandardError)
71
+
72
+ def update_external_dependency(name, destination_set, *source_sets)
73
+ assert_has_external_dependency!(name)
74
+
75
+ result = nil
76
+
77
+ 5.times do
78
+ result = try_to_update(qless_job, name, destination_set, *source_sets)
79
+ break unless result == :needs_retry
80
+ end
81
+
82
+ if result == :needs_retry
83
+ raise CannotUpdateExternalDependencyError,
84
+ "Failed to update dependency #{name} after many tries"
85
+ end
86
+ end
87
+
88
+ def try_to_update(job, name, destination_set, *source_sets)
89
+ redis.watch(pending_ext_deps.key) do
90
+ pending_deps = self.pending_external_dependencies
91
+
92
+ response = redis.multi do
93
+ source_sets.each do |source_set|
94
+ source_set.move(name, destination_set)
95
+ end
96
+
97
+ if job && pending_deps == [name]
98
+ job.move(job.klass.processing_queue)
99
+ end
100
+ end
101
+
102
+ return :needs_retry unless response
103
+ end
104
+ end
105
+
106
+ def assert_has_external_dependency!(name)
107
+ results = redis.multi do
108
+ pending_ext_deps.include?(name)
109
+ resolved_ext_deps.include?(name)
110
+ timed_out_ext_deps.include?(name)
111
+ end
112
+
113
+ if results.none?
114
+ raise ArgumentError, "EnqueuedJob #{jid} does not have pending " +
115
+ "external dependency #{name.inspect}"
116
+ end
117
+ end
118
+ end
119
+ end
120
+
@@ -0,0 +1,30 @@
1
+ module Plines
2
+ # This is a job that gets enqueued with a delay in order to timeout external
3
+ # dependencies. When it runs, it will timeout the named external dependency
4
+ # for the given jids. If the named dependency is the only remaining pending
5
+ # dependency for any of the jobs identified by the jids, they will get moved
6
+ # into their appropriate processing queue.
7
+ class ExternalDependencyTimeout
8
+ def self.perform(job)
9
+ pipeline_parts = job.data.fetch('pipeline').split('::')
10
+ pipeline = pipeline_parts.inject(Object) { |ns, mod| ns.const_get(mod) }
11
+
12
+ job_batch = JobBatch.find(job.client, pipeline,
13
+ job.data.fetch("job_batch_id"))
14
+
15
+ job_batch.timeout_external_dependency \
16
+ job.data.fetch("dep_name"),
17
+ job.data.fetch("jids")
18
+ end
19
+
20
+ def self.job_data_for(job_batch, dep_name, jids)
21
+ {
22
+ "pipeline" => job_batch.pipeline.name,
23
+ "job_batch_id" => job_batch.id,
24
+ "dep_name" => dep_name,
25
+ "jids" => jids
26
+ }
27
+ end
28
+ end
29
+ end
30
+
@@ -0,0 +1,58 @@
1
+ require 'delegate'
2
+
3
+ module Plines
4
+ # Provides a hash that can be accessed by symbol or string keys.
5
+ # This is useful because a plines job batch data hash is commonly
6
+ # provided with symbol keys, but after round-tripping through
7
+ # JSON it is converted to strings. We can't safely convert all
8
+ # strings to symbols (as symbols are never GC'd) so instead we
9
+ # use this for the data hash.
10
+ class IndifferentHash < DelegateClass(Hash)
11
+ NotAHashError = Class.new(TypeError)
12
+ ConflictingEntriesError = Class.new(ArgumentError)
13
+
14
+ private_class_method :new
15
+
16
+ def self.from(original)
17
+ unless original.is_a?(Hash) || original.is_a?(IndifferentHash)
18
+ raise NotAHashError, "Expected a hash, got #{original.inspect}"
19
+ end
20
+
21
+ indif = Hash.new { |hash, key| hash[key.to_s] if Symbol === key }
22
+
23
+ original.each do |key, value|
24
+ key = key.to_s
25
+
26
+ if indif.has_key?(key)
27
+ raise ConflictingEntriesError,
28
+ "Hash has conflicting entries for #{key}: #{original}"
29
+ end
30
+
31
+ indif[key] = indifferent(value)
32
+ end
33
+
34
+ new(indif)
35
+ end
36
+
37
+ def self.indifferent(object)
38
+ case object
39
+ when Hash then from(object)
40
+ when Array then object.map { |o| indifferent(o) }
41
+ else object
42
+ end
43
+ end
44
+
45
+ def fetch(key)
46
+ if !has_key?(key) && Symbol === key && has_key?(key.to_s)
47
+ key = key.to_s
48
+ end
49
+
50
+ super
51
+ end
52
+
53
+ def merge(other)
54
+ IndifferentHash.from super(IndifferentHash.from other)
55
+ end
56
+ end
57
+ end
58
+