plines 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env rake
2
+ require_relative 'config/setup_load_paths'
3
+ require 'bundler/gem_helper'
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ require 'qless/tasks'
7
+ namespace :qless do
8
+ task :set_redis_url do
9
+ if File.exist?('./config/redis_connection_url.txt')
10
+ ENV['REDIS_URL'] = File.read('./config/redis_connection_url.txt')
11
+ end
12
+ end
13
+
14
+ task :setup => :set_redis_url do
15
+ ENV['VVERBOSE'] = '1'
16
+ ENV['QUEUE'] = 'plines'
17
+ ENV['INTERVAL'] = '1.0'
18
+ end
19
+
20
+ desc "Start the Qless Web UI"
21
+ task :server => :set_redis_url do
22
+ sh "rackup config/config.ru"
23
+ end
24
+ end
25
+
26
+ require 'rspec/core/rake_task'
27
+
28
+ RSpec::Core::RakeTask.new(:spec) do |t|
29
+ t.rspec_opts = %w[--profile --format progress]
30
+ t.ruby_opts = "-Ispec -r./config/setup_load_paths -rsimplecov_setup"
31
+ end
32
+
33
+ if RUBY_ENGINE == 'ruby'
34
+ require 'cane/rake_task'
35
+
36
+ desc "Run cane to check quality metrics"
37
+ Cane::RakeTask.new(:quality) do |cane|
38
+ cane.style_glob = "lib/**/*.rb"
39
+ cane.abc_max = 16
40
+ cane.add_threshold 'coverage/coverage_percent.txt', :>=, 100
41
+ end
42
+ else
43
+ task :quality do
44
+ # no-op; Cane isn't supported on this interpretter
45
+ end
46
+ end
47
+
48
+ task default: [:spec, :quality]
49
+
50
+ namespace :ci do
51
+ desc "Run all tests both integrated and in isolation"
52
+ task :spec do
53
+ test_all_script = File.expand_path('../script/test_all', __FILE__)
54
+ sh test_all_script
55
+ end
56
+ end
57
+
58
+
59
+ desc "Run CI build"
60
+ task ci: %w[ ci:spec quality ]
61
+
@@ -0,0 +1,13 @@
1
+ require 'plines/configuration'
2
+ require 'plines/dependency_graph'
3
+ require 'plines/dynamic_struct'
4
+ require 'plines/enqueued_job'
5
+ require 'plines/external_dependency_timeout'
6
+ require 'plines/job'
7
+ require 'plines/job_enqueuer'
8
+ require 'plines/job_batch'
9
+ require 'plines/job_batch_list'
10
+ require 'plines/pipeline'
11
+ require 'plines/step'
12
+ require "plines/version"
13
+
@@ -0,0 +1,55 @@
1
+ module Plines
2
+ # Stores global Plines configuration.
3
+ class Configuration
4
+ # Raised when there is a configuration error.
5
+ class Error < StandardError; end
6
+
7
+ TWO_MONTHS_IN_SECONDS = 2 * 30 * 24 * 60 * 60
8
+
9
+ def initialize
10
+ qless_client { raise Error, "qless_client has not been configured" }
11
+ batch_list_key { raise Error, "batch_list_key has not been configured" }
12
+
13
+ qless_job_options { |job| {} }
14
+ self.data_ttl_in_seconds = TWO_MONTHS_IN_SECONDS
15
+ @callbacks = Hash.new { |h, k| h[k] = [] }
16
+ end
17
+
18
+ def qless_client(&block)
19
+ @qless_client_block = block
20
+ end
21
+
22
+ def qless_client_for(key)
23
+ @qless_client_block[key]
24
+ end
25
+
26
+ def batch_list_key(&block)
27
+ @batch_list_key_block = block
28
+ end
29
+
30
+ def batch_list_key_for(batch_data)
31
+ @batch_list_key_block[batch_data]
32
+ end
33
+
34
+ attr_accessor :data_ttl_in_seconds
35
+ def data_ttl_in_milliseconds
36
+ (data_ttl_in_seconds * 1000).to_i
37
+ end
38
+
39
+ attr_reader :qless_job_options_block
40
+ def qless_job_options(&block)
41
+ @qless_job_options_block = block
42
+ end
43
+
44
+ def after_job_batch_cancellation(&block)
45
+ @callbacks[:after_job_batch_cancellation] << block
46
+ end
47
+
48
+ def notify(callback_type, *args)
49
+ @callbacks[callback_type].each do |callback|
50
+ callback.call(*args)
51
+ end
52
+ end
53
+ end
54
+ end
55
+
@@ -0,0 +1,81 @@
1
+ require 'set'
2
+
3
+ module Plines
4
+ # Represents a dependency graph of Plines steps. This graph contains
5
+ # Jobs (i.e. Step classes paired with data). The graph
6
+ # takes care of preventing duplicate step instances.
7
+ class DependencyGraph
8
+ attr_reader :steps # FYI, steps is not ordered according to dependencies
9
+
10
+ # Raised when a circular dependency is detected.
11
+ class CircularDependencyError < StandardError; end
12
+
13
+ def initialize(pipeline, batch_data)
14
+ step_classes = pipeline.step_classes
15
+ @steps = Job.accumulate_instances do
16
+ step_classes.each do |step_klass|
17
+ step_klass.jobs_for(batch_data).each do |job|
18
+ job.add_dependencies_for(batch_data)
19
+ end
20
+ end
21
+
22
+ @terminal_jobs = pipeline.terminal_step.jobs_for(batch_data)
23
+ end
24
+
25
+ cleanup_and_validate_dependencies!
26
+ end
27
+
28
+ def ordered_steps
29
+ visited = Set.new
30
+ Enumerator.new do |yielder|
31
+ steps.each do |step|
32
+ yield_next_ordered_step_for(step, visited, yielder)
33
+ end
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def cleanup_and_validate_dependencies!
40
+ @visited_steps = Set.new
41
+
42
+ @steps.each do |step|
43
+ next if @visited_steps.include?(step)
44
+ depth_first_search_from(step)
45
+ end
46
+ end
47
+
48
+ def depth_first_search_from(step, current_stack=Set.new)
49
+ @visited_steps << step
50
+ add_terminal_job_dependencies(step)
51
+
52
+ if current_stack.include?(step)
53
+ raise CircularDependencyError,
54
+ "Your graph appears to have a circular dependency: " +
55
+ current_stack.inspect
56
+ end
57
+
58
+ step.dependencies.each do |dep|
59
+ depth_first_search_from(dep, current_stack | [step])
60
+ end
61
+ end
62
+
63
+ def add_terminal_job_dependencies(job)
64
+ if job.dependents.none? && !@terminal_jobs.include?(job)
65
+ @terminal_jobs.each { |term_job| term_job.add_dependency(job) }
66
+ end
67
+ end
68
+
69
+ def yield_next_ordered_step_for(step, visited, yielder)
70
+ return if visited.include?(step)
71
+
72
+ step.dependencies.each do |dependency|
73
+ yield_next_ordered_step_for(dependency, visited, yielder)
74
+ end
75
+
76
+ visited << step
77
+ yielder.yield step
78
+ end
79
+ end
80
+ end
81
+
@@ -0,0 +1,34 @@
1
+ module Plines
2
+ # Transforms an arbitrarily deeply nested hash into a dot-syntax
3
+ # object. Useful as an alternative to a hash since it is "strongly typed"
4
+ # in the sense that fat-fingered property names result in a NoMethodError,
5
+ # rather than getting a nil as you would with a hash.
6
+ class DynamicStruct
7
+ attr_reader :attribute_names, :to_hash
8
+
9
+ def initialize(hash)
10
+ @to_hash = hash
11
+ @attribute_names = hash.keys.map(&:to_sym)
12
+
13
+ hash.each do |key, value|
14
+ value = method_value_for(value)
15
+ define_singleton_method(key) { value }
16
+ define_singleton_method("#{key}?") { !!value }
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def method_value_for(hash_value)
23
+ return self.class.new(hash_value) if hash_value.is_a?(Hash)
24
+
25
+ if hash_value.is_a?(Array) && hash_value.all? { |v| v.is_a?(Hash) }
26
+ return hash_value.map { |v| self.class.new(v) }
27
+ end
28
+
29
+ hash_value
30
+ end
31
+ end
32
+ end
33
+
34
+
@@ -0,0 +1,120 @@
1
+ require 'plines/redis_objects'
2
+
3
+ module Plines
4
+ # Once a Plines::Job has been enqueued as a Qless job into redis,
5
+ # an EnqueuedJob is used to represent and hold the additional state
6
+ # that Plines needs to track about the job.
7
+ class EnqueuedJob < Struct.new(:pipeline, :jid)
8
+ include Plines::RedisObjectsHelpers
9
+
10
+ attr_reader :qless, :redis
11
+
12
+ def initialize(qless, pipeline, jid, &block)
13
+ @qless = qless
14
+ @redis = qless.redis
15
+ super(pipeline, jid)
16
+ instance_eval(&block) if block
17
+ end
18
+
19
+ def self.create(qless, pipeline, jid, *external_dependencies)
20
+ new(qless, pipeline, jid) do
21
+ external_dependencies.each do |dep|
22
+ pending_ext_deps << dep
23
+ end
24
+ end
25
+ end
26
+
27
+ def qless_job
28
+ qless.jobs[jid]
29
+ end
30
+
31
+ def pending_external_dependencies
32
+ pending_ext_deps.members
33
+ end
34
+
35
+ def resolved_external_dependencies
36
+ resolved_ext_deps.members
37
+ end
38
+
39
+ def timed_out_external_dependencies
40
+ timed_out_ext_deps.members
41
+ end
42
+
43
+ def all_external_dependencies
44
+ pending_ext_deps.union(
45
+ resolved_ext_deps, timed_out_ext_deps
46
+ )
47
+ end
48
+
49
+ def unresolved_external_dependencies
50
+ pending_ext_deps.union(timed_out_ext_deps)
51
+ end
52
+
53
+ def resolve_external_dependency(name)
54
+ update_external_dependency \
55
+ name, resolved_ext_deps, pending_ext_deps, timed_out_ext_deps
56
+ end
57
+
58
+ def timeout_external_dependency(name)
59
+ update_external_dependency \
60
+ name, timed_out_ext_deps, pending_ext_deps
61
+ end
62
+
63
+ private
64
+
65
+ alias id jid # id is needed by Redis::Objects
66
+ set :pending_ext_deps
67
+ set :resolved_ext_deps
68
+ set :timed_out_ext_deps
69
+
70
+ CannotUpdateExternalDependencyError = Class.new(StandardError)
71
+
72
+ def update_external_dependency(name, destination_set, *source_sets)
73
+ assert_has_external_dependency!(name)
74
+
75
+ result = nil
76
+
77
+ 5.times do
78
+ result = try_to_update(qless_job, name, destination_set, *source_sets)
79
+ break unless result == :needs_retry
80
+ end
81
+
82
+ if result == :needs_retry
83
+ raise CannotUpdateExternalDependencyError,
84
+ "Failed to update dependency #{name} after many tries"
85
+ end
86
+ end
87
+
88
+ def try_to_update(job, name, destination_set, *source_sets)
89
+ redis.watch(pending_ext_deps.key) do
90
+ pending_deps = self.pending_external_dependencies
91
+
92
+ response = redis.multi do
93
+ source_sets.each do |source_set|
94
+ source_set.move(name, destination_set)
95
+ end
96
+
97
+ if job && pending_deps == [name]
98
+ job.move(job.klass.processing_queue)
99
+ end
100
+ end
101
+
102
+ return :needs_retry unless response
103
+ end
104
+ end
105
+
106
+ def assert_has_external_dependency!(name)
107
+ results = redis.multi do
108
+ pending_ext_deps.include?(name)
109
+ resolved_ext_deps.include?(name)
110
+ timed_out_ext_deps.include?(name)
111
+ end
112
+
113
+ if results.none?
114
+ raise ArgumentError, "EnqueuedJob #{jid} does not have pending " +
115
+ "external dependency #{name.inspect}"
116
+ end
117
+ end
118
+ end
119
+ end
120
+
@@ -0,0 +1,30 @@
1
+ module Plines
2
+ # This is a job that gets enqueued with a delay in order to timeout external
3
+ # dependencies. When it runs, it will timeout the named external dependency
4
+ # for the given jids. If the named dependency is the only remaining pending
5
+ # dependency for any of the jobs identified by the jids, they will get moved
6
+ # into their appropriate processing queue.
7
+ class ExternalDependencyTimeout
8
+ def self.perform(job)
9
+ pipeline_parts = job.data.fetch('pipeline').split('::')
10
+ pipeline = pipeline_parts.inject(Object) { |ns, mod| ns.const_get(mod) }
11
+
12
+ job_batch = JobBatch.find(job.client, pipeline,
13
+ job.data.fetch("job_batch_id"))
14
+
15
+ job_batch.timeout_external_dependency \
16
+ job.data.fetch("dep_name"),
17
+ job.data.fetch("jids")
18
+ end
19
+
20
+ def self.job_data_for(job_batch, dep_name, jids)
21
+ {
22
+ "pipeline" => job_batch.pipeline.name,
23
+ "job_batch_id" => job_batch.id,
24
+ "dep_name" => dep_name,
25
+ "jids" => jids
26
+ }
27
+ end
28
+ end
29
+ end
30
+
@@ -0,0 +1,58 @@
1
+ require 'delegate'
2
+
3
+ module Plines
4
+ # Provides a hash that can be accessed by symbol or string keys.
5
+ # This is useful because a plines job batch data hash is commonly
6
+ # provided with symbol keys, but after round-tripping through
7
+ # JSON it is converted to strings. We can't safely convert all
8
+ # strings to symbols (as symbols are never GC'd) so instead we
9
+ # use this for the data hash.
10
+ class IndifferentHash < DelegateClass(Hash)
11
+ NotAHashError = Class.new(TypeError)
12
+ ConflictingEntriesError = Class.new(ArgumentError)
13
+
14
+ private_class_method :new
15
+
16
+ def self.from(original)
17
+ unless original.is_a?(Hash) || original.is_a?(IndifferentHash)
18
+ raise NotAHashError, "Expected a hash, got #{original.inspect}"
19
+ end
20
+
21
+ indif = Hash.new { |hash, key| hash[key.to_s] if Symbol === key }
22
+
23
+ original.each do |key, value|
24
+ key = key.to_s
25
+
26
+ if indif.has_key?(key)
27
+ raise ConflictingEntriesError,
28
+ "Hash has conflicting entries for #{key}: #{original}"
29
+ end
30
+
31
+ indif[key] = indifferent(value)
32
+ end
33
+
34
+ new(indif)
35
+ end
36
+
37
+ def self.indifferent(object)
38
+ case object
39
+ when Hash then from(object)
40
+ when Array then object.map { |o| indifferent(o) }
41
+ else object
42
+ end
43
+ end
44
+
45
+ def fetch(key)
46
+ if !has_key?(key) && Symbol === key && has_key?(key.to_s)
47
+ key = key.to_s
48
+ end
49
+
50
+ super
51
+ end
52
+
53
+ def merge(other)
54
+ IndifferentHash.from super(IndifferentHash.from other)
55
+ end
56
+ end
57
+ end
58
+