plines 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ require 'plines/redis_objects'
2
+
3
+ module Plines
4
+ # Represents a list of job batches that are grouped by
5
+ # some common trait (such as a user id).
6
+ class JobBatchList < Struct.new(:pipeline, :key)
7
+ include Plines::RedisObjectsHelpers
8
+
9
+ counter :last_batch_num
10
+ attr_reader :qless, :redis
11
+
12
+ def initialize(pipeline, key)
13
+ super(pipeline, key)
14
+ @qless = pipeline.configuration.qless_client_for(key)
15
+ @redis = @qless.redis
16
+ end
17
+
18
+ def most_recent_batch
19
+ batch_num = last_batch_num.value
20
+ return nil if batch_num.zero?
21
+ JobBatch.find(qless, pipeline, batch_id_for(batch_num))
22
+ end
23
+
24
+ def create_new_batch(batch_data, options = {}, &blk)
25
+ JobBatch.create(qless, pipeline,
26
+ batch_id_for(last_batch_num.increment),
27
+ batch_data, options, &blk)
28
+ end
29
+
30
+ def each(&block)
31
+ return enum_for(:each) unless block_given?
32
+
33
+ 1.upto(last_batch_num.value) do |num|
34
+ begin
35
+ yield JobBatch.find(qless, pipeline, batch_id_for(num))
36
+ rescue JobBatch::CannotFindExistingJobBatchError
37
+ # We can't yield a batch we can't find!
38
+ end
39
+ end
40
+ end
41
+
42
+ def all_with_external_dependency_timeout(dep_name)
43
+ each.select do |batch|
44
+ batch.timed_out_external_deps.include?(dep_name)
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ alias id key
51
+
52
+ def batch_id_for(number)
53
+ [id, number].join(':')
54
+ end
55
+ end
56
+ end
57
+
@@ -0,0 +1,83 @@
1
+ require 'set'
2
+
3
+ module Plines
4
+ # Responsible for enqueing Qless jobs based on the given dependency graph.
5
+ class JobEnqueuer
6
+ def initialize(dependency_graph, job_batch, &block)
7
+ @dependency_graph = dependency_graph
8
+ @job_batch = job_batch
9
+ @qless_job_options_block = block
10
+ @timeout_reduction = job_batch.timeout_reduction
11
+ end
12
+
13
+ def enqueue_jobs
14
+ @dependency_graph.ordered_steps.each do |step|
15
+ jid = Qless.generate_jid
16
+ jids[step] = jid
17
+ @job_batch.add_job(jid, *step.external_dependencies.map(&:name))
18
+
19
+ enqueue_job_for(step, jid, dependency_jids_for(step))
20
+ setup_external_dep_timeouts_for(step)
21
+ end
22
+
23
+ enqueue_external_dependency_timeouts
24
+
25
+ self
26
+ end
27
+
28
+ private
29
+
30
+ def jids
31
+ @jids ||= {}
32
+ end
33
+
34
+ def qless
35
+ @job_batch.qless
36
+ end
37
+
38
+ def enqueue_job_for(step, jid, depends_on)
39
+ step.klass.enqueue_qless_job qless,
40
+ step.data.merge('_job_batch_id' => @job_batch.id),
41
+ @qless_job_options_block[step].merge(depends: depends_on, jid: jid)
42
+ end
43
+
44
+ def setup_external_dep_timeouts_for(step)
45
+ step.external_dependencies.each do |dependency|
46
+ next unless timeout = dependency.options[:wait_up_to]
47
+
48
+ timeout_key = TimeoutKey.new(dependency.name,
49
+ timeout - @timeout_reduction)
50
+ external_dep_timeouts[timeout_key] << step
51
+ end
52
+ end
53
+
54
+ def dependency_jids_for(step)
55
+ step.dependencies.map { |d| jids[d] }
56
+ end
57
+
58
+ def external_dep_timeouts
59
+ @external_dep_timeouts ||= Hash.new do |h, k|
60
+ h[k] = Set.new
61
+ end
62
+ end
63
+
64
+ TIMEOUT_JOB_PRIORITY = 999999 # an arbitrary high priority
65
+
66
+ def enqueue_external_dependency_timeouts
67
+ external_dep_timeouts.each do |tk, jobs|
68
+ job_ids = jobs.map { |k| jids[k] }
69
+ job_data = ExternalDependencyTimeout.job_data_for \
70
+ @job_batch, tk.dep_name, job_ids
71
+
72
+ jid = qless.queues[jobs.first.klass.processing_queue].put \
73
+ ExternalDependencyTimeout, job_data,
74
+ delay: tk.timeout, priority: TIMEOUT_JOB_PRIORITY
75
+
76
+ @job_batch.track_timeout_job(tk.dep_name, jid)
77
+ end
78
+ end
79
+
80
+ TimeoutKey = Struct.new(:dep_name, :timeout)
81
+ end
82
+ end
83
+
@@ -0,0 +1,97 @@
1
+ require 'qless'
2
+ require 'forwardable'
3
+ require 'plines/indifferent_hash'
4
+
5
+ module Plines
6
+ # This module should be extended onto a class or module in order
7
+ # to make it a pipeline. Steps declared within that module will
8
+ # automatically belong to that pipeline. This enables one application
9
+ # to have multiple pipelines.
10
+ module Pipeline
11
+ extend Forwardable
12
+ def_delegators :configuration
13
+
14
+ DEFAULT_QUEUE = "plines"
15
+ AWAITING_EXTERNAL_DEPENDENCY_QUEUE = "awaiting_ext_dep"
16
+
17
+ def configuration
18
+ @configuration ||= Configuration.new
19
+ end
20
+
21
+ def configure
22
+ yield configuration
23
+ end
24
+
25
+ def enqueue_jobs_for(batch_data, options = {})
26
+ batch_data = IndifferentHash.from(batch_data)
27
+ graph = DependencyGraph.new(self, batch_data)
28
+ job_batch_list = job_batch_list_for(batch_data)
29
+
30
+ job_batch_list.create_new_batch(batch_data, options) do |job_batch|
31
+ job_options_block = configuration.qless_job_options_block
32
+ JobEnqueuer.new(graph, job_batch, &job_options_block).enqueue_jobs
33
+ end
34
+ end
35
+
36
+ def most_recent_job_batch_for(batch_data)
37
+ job_batch_list_for(batch_data).most_recent_batch
38
+ end
39
+
40
+ def find_job_batch(id)
41
+ key = id[/\A(.*):\d+\z/, 1] # http://rubular.com/r/fMGv1TaZZA
42
+ qless = configuration.qless_client_for(key)
43
+ Plines::JobBatch.find(qless, self, id)
44
+ end
45
+
46
+ def step_classes
47
+ @step_classes ||= []
48
+ end
49
+
50
+ # Null Object pattern implementation of a step class
51
+ class NullStep
52
+ def self.jobs_for(*args)
53
+ []
54
+ end
55
+ end
56
+
57
+ # Error raised when two steps are declared as the same boundary step.
58
+ # Having more than one initial or terminal step is not well defined.
59
+ class BoundaryStepAlreadySetError < StandardError; end
60
+
61
+ def self.define_boundary_step(name)
62
+ define_method "#{name}=" do |value|
63
+ current_value = public_send(name)
64
+ if current_value == NullStep
65
+ instance_variable_set(:"@#{name}", value)
66
+ else
67
+ raise BoundaryStepAlreadySetError,
68
+ "The #{name} for pipeline #{self} is already set. " +
69
+ "Multiple of these boundary steps are not supported."
70
+ end
71
+ end
72
+
73
+ define_method name do
74
+ current_value = instance_variable_get(:"@#{name}")
75
+ return current_value if current_value
76
+ instance_variable_set(:"@#{name}", NullStep)
77
+ end
78
+ end
79
+ private_class_method :define_boundary_step
80
+
81
+ define_boundary_step :initial_step
82
+ define_boundary_step :terminal_step
83
+
84
+ def job_batch_list_for(batch_data)
85
+ key = configuration.batch_list_key_for(batch_data)
86
+ JobBatchList.new(self, key)
87
+ end
88
+
89
+ def matching_older_unfinished_job_batches(main_job_batch)
90
+ job_batch_list = job_batch_list_for(main_job_batch.data)
91
+ job_batch_list.each.select do |job_batch|
92
+ !job_batch.complete? && job_batch.created_at < main_job_batch.created_at
93
+ end
94
+ end
95
+ end
96
+ end
97
+
@@ -0,0 +1,108 @@
1
+ require 'redis'
2
+ require 'redis/value'
3
+ require 'redis/lock'
4
+ require 'redis/set'
5
+ require 'redis/list'
6
+ require 'redis/hash_key'
7
+ require 'redis/counter'
8
+
9
+ module Plines
10
+ module RedisObjectsHelpers
11
+ def new_redis_object(klass, key, args)
12
+ klass.new([key_prefix, key].join(':'), redis, *args)
13
+ end
14
+
15
+ def key_prefix
16
+ @key_prefix ||= [
17
+ self.class.redis_key_prefix,
18
+ pipeline.name,
19
+ self.class.name.split('::').last,
20
+ id
21
+ ].join(':')
22
+ end
23
+
24
+ def declared_redis_object_keys
25
+ self.class.declared_redis_object_names.map { |n| send(n).key }
26
+ end
27
+
28
+ def self.included(klass)
29
+ klass.extend ClassMethods
30
+ end
31
+
32
+ module ClassMethods
33
+ def value(name, *args)
34
+ declared_redis_object_names << name
35
+
36
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
37
+ def #{name}
38
+ @#{name} ||= new_redis_object(
39
+ ::Redis::Value, #{name.inspect}, #{args})
40
+ end
41
+ EOS
42
+ end
43
+
44
+ def lock(name, *args)
45
+ declared_redis_object_names << name
46
+
47
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
48
+ def #{name}
49
+ @#{name} ||= new_redis_object(
50
+ ::Redis::Lock, #{name.inspect}, #{args})
51
+ end
52
+ EOS
53
+ end
54
+
55
+ def set(name, *args)
56
+ declared_redis_object_names << name
57
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
58
+ def #{name}
59
+ @#{name} ||= new_redis_object(
60
+ ::Redis::Set, #{name.inspect}, #{args})
61
+ end
62
+ EOS
63
+ end
64
+
65
+ def list(name, *args)
66
+ declared_redis_object_names << name
67
+
68
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
69
+ def #{name}
70
+ @#{name} ||= new_redis_object(
71
+ ::Redis::List, #{name.inspect}, #{args})
72
+ end
73
+ EOS
74
+ end
75
+
76
+ def hash_key(name, *args)
77
+ declared_redis_object_names << name
78
+
79
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
80
+ def #{name}
81
+ @#{name} ||= new_redis_object(
82
+ ::Redis::HashKey, #{name.inspect}, #{args})
83
+ end
84
+ EOS
85
+ end
86
+
87
+ def counter(name, *args)
88
+ declared_redis_object_names << name
89
+
90
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
91
+ def #{name}
92
+ @#{name} ||= new_redis_object(
93
+ ::Redis::Counter, #{name.inspect}, #{args})
94
+ end
95
+ EOS
96
+ end
97
+
98
+ def declared_redis_object_names
99
+ @declared_redis_object_names ||= []
100
+ end
101
+
102
+ def redis_key_prefix(override = nil)
103
+ (@redis_key_prefix ||= override) || "plines"
104
+ end
105
+ end
106
+ end
107
+ end
108
+
@@ -0,0 +1,269 @@
1
+ require 'forwardable'
2
+
3
+ module Plines
4
+ ExternalDependency = Struct.new(:name, :options)
5
+
6
+ # Keeps track of a list of external dependencies.
7
+ # These are yielded as the first argument to
8
+ # `has_external_dependencies`.
9
+ class ExternalDependencyList
10
+ extend Forwardable
11
+ def_delegators :@dependencies, :any?
12
+
13
+ def initialize
14
+ @dependencies = []
15
+ end
16
+
17
+ def add(name, options = {})
18
+ @dependencies << ExternalDependency.new(name, options)
19
+ end
20
+
21
+ def to_a
22
+ @dependencies.dup
23
+ end
24
+ end
25
+
26
+ # This is the module that should be included in any class that
27
+ # is intended to be a Plines step.
28
+ module Step
29
+ # Error raised when you include Plines::Step in a module that is
30
+ # not nested within a pipeline module.
31
+ class NotDeclaredInPipelineError < StandardError; end
32
+
33
+ def self.extended(klass)
34
+ klass.class_eval do
35
+ include InstanceMethods
36
+
37
+ unless pipeline.is_a?(Plines::Pipeline)
38
+ raise NotDeclaredInPipelineError,
39
+ "#{klass} is not nested in a pipeline module and thus " +
40
+ "cannot be a Plines::Step. All plines steps must be " +
41
+ "declared within pipeline modules."
42
+ end
43
+
44
+ fan_out { |d| [d] } # default to one step instance
45
+ pipeline.step_classes << self
46
+ end
47
+ end
48
+
49
+ DEFAULT_DEPENDENCY_FILTER = Proc.new { true }
50
+
51
+ def depends_on(*klasses, &block)
52
+ klasses.each do |klass|
53
+ dependency_filters[klass] = (block || DEFAULT_DEPENDENCY_FILTER)
54
+ end
55
+ end
56
+
57
+ def depended_on_by_all_steps
58
+ pipeline.initial_step = self
59
+ end
60
+
61
+ def depends_on_all_steps
62
+ pipeline.terminal_step = self
63
+ end
64
+
65
+ def run_jobs_in_serial
66
+ depends_on step_name do |data|
67
+ prior_data = data.my_data_hashes.each_cons(2) do |(prior, current)|
68
+ break prior if current == data.my_data
69
+ end
70
+
71
+ data.their_data == prior_data
72
+ end
73
+ end
74
+
75
+ def fan_out(&block)
76
+ @fan_out_blocks ||= []
77
+ @fan_out_blocks << block
78
+ end
79
+
80
+ def has_external_dependencies(*args, &block)
81
+ block ||= begin
82
+ options = args.last.is_a?(Hash) ? args.pop : {}
83
+ lambda do |deps, _|
84
+ args.each do |name|
85
+ deps.add name, options
86
+ end
87
+ end
88
+ end
89
+
90
+ external_dependency_definitions << block
91
+ end
92
+
93
+ def has_external_dependencies_for?(data)
94
+ external_dependency_definitions.any? do |block|
95
+ list = ExternalDependencyList.new
96
+ block.call(list, data)
97
+ list.any?
98
+ end
99
+ end
100
+
101
+ def external_dependencies_for(data)
102
+ list = ExternalDependencyList.new
103
+
104
+ external_dependency_definitions.each do |block|
105
+ block.call(list, data)
106
+ end
107
+
108
+ list.to_a
109
+ end
110
+
111
+ def dependencies_for(job, batch_data)
112
+ Enumerator.new do |yielder|
113
+ has_dependencies = false
114
+
115
+ each_declared_dependency_job_for(job, batch_data) do |job|
116
+ has_dependencies = true
117
+ yielder.yield job
118
+ end
119
+
120
+ each_initial_step_job_for(job, batch_data) do |job|
121
+ yielder.yield job
122
+ end unless has_dependencies
123
+ end
124
+ end
125
+
126
+ def jobs_for(batch_data)
127
+ @fan_out_blocks.inject([batch_data]) do |job_data_hashes, fan_out_block|
128
+ job_data_hashes.flat_map { |job_data| fan_out_block.call(job_data) }
129
+ end.map do |job_data|
130
+ Job.build(self, job_data)
131
+ end
132
+ end
133
+
134
+ def perform(qless_job)
135
+ batch = JobBatch.find(qless_job.client, pipeline,
136
+ qless_job.data.fetch("_job_batch_id"))
137
+
138
+ if batch.creation_in_progress?
139
+ qless_job.move(qless_job.queue_name, delay: 2)
140
+ return
141
+ end
142
+
143
+ job_data = DynamicStruct.new(qless_job.data)
144
+
145
+ qless_job.after_complete do
146
+ batch.mark_job_as_complete(qless_job.jid)
147
+ end
148
+
149
+ new(batch, job_data, qless_job)
150
+ .send(:around_perform)
151
+ end
152
+
153
+ def external_dependency_definitions
154
+ @external_dependency_definitions ||= []
155
+ end
156
+
157
+ def qless_options
158
+ @qless_options ||= QlessJobOptions.new
159
+ yield @qless_options if block_given?
160
+ @qless_options
161
+ end
162
+
163
+ def enqueue_qless_job(qless, data, options = {})
164
+ queue_name = if has_external_dependencies_for?(data)
165
+ Pipeline::AWAITING_EXTERNAL_DEPENDENCY_QUEUE
166
+ elsif options[:queue] && processing_queue == :plines
167
+ options[:queue]
168
+ else
169
+ processing_queue
170
+ end
171
+
172
+ queue = qless.queues[queue_name]
173
+
174
+ options[:priority] = qless_options.priority if qless_options.priority
175
+ options[:retries] = qless_options.retries if qless_options.retries
176
+ options[:tags] = Array(options[:tags]) | qless_options.tags
177
+
178
+ queue.put(self, data, options)
179
+ end
180
+
181
+ def processing_queue
182
+ qless_options.queue
183
+ end
184
+
185
+ def pipeline
186
+ @pipeline ||= begin
187
+ namespaces = name.split('::')
188
+ namespaces.pop # ignore the last one
189
+ namespaces.inject(Object) { |ns, mod| ns.const_get(mod) }
190
+ end
191
+ end
192
+
193
+ def step_name
194
+ @step_name ||= name.split('::').last.to_sym
195
+ end
196
+
197
+ private
198
+
199
+ def dependency_filters
200
+ @dependency_filters ||= {}
201
+ end
202
+
203
+ DependencyData = Struct.new(:my_data, :their_data,
204
+ :my_data_hashes, :their_data_hashes)
205
+
206
+ def each_declared_dependency_job_for(my_job, batch_data)
207
+ my_data_hashes = jobs_for(batch_data).map(&:data)
208
+
209
+ dependency_filters.each do |klass, filter|
210
+ klass = pipeline.const_get(klass)
211
+ their_jobs = klass.jobs_for(batch_data)
212
+ their_data_hashes = their_jobs.map(&:data)
213
+
214
+ their_jobs.each do |their_job|
215
+ yield their_job if filter.call(DependencyData.new(
216
+ my_job.data, their_job.data,
217
+ my_data_hashes, their_data_hashes
218
+ ))
219
+ end
220
+ end
221
+ end
222
+
223
+ def each_initial_step_job_for(job, batch_data)
224
+ return if pipeline.initial_step == self
225
+
226
+ pipeline.initial_step.jobs_for(batch_data).each do |dependency|
227
+ yield dependency
228
+ end
229
+ end
230
+
231
+ module InstanceMethods
232
+ extend Forwardable
233
+ attr_reader :job_data, :job_batch, :qless_job
234
+ def_delegator "self.class", :enqueue_qless_job
235
+
236
+ def initialize(job_batch, job_data, qless_job)
237
+ @job_batch = job_batch
238
+ @job_data = job_data
239
+ @qless_job = qless_job
240
+ @enqueued_job = EnqueuedJob.new(qless_job.client,
241
+ self.class.pipeline, qless_job.jid)
242
+ end
243
+
244
+ private
245
+
246
+ def around_perform
247
+ perform
248
+ end
249
+
250
+ def unresolved_external_dependencies
251
+ @unresolved_external_dependencies ||=
252
+ @enqueued_job.unresolved_external_dependencies
253
+ end
254
+ end
255
+
256
+ QlessJobOptions = Struct.new(:tags, :priority, :queue, :retries) do
257
+ def initialize
258
+ super
259
+ self.queue ||= :plines
260
+ self.tags ||= []
261
+ end
262
+
263
+ def tag=(value)
264
+ self.tags = Array(value)
265
+ end
266
+ end
267
+ end
268
+ end
269
+