plines 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,57 @@
1
+ require 'plines/redis_objects'
2
+
3
+ module Plines
4
+ # Represents a list of job batches that are grouped by
5
+ # some common trait (such as a user id).
6
+ class JobBatchList < Struct.new(:pipeline, :key)
7
+ include Plines::RedisObjectsHelpers
8
+
9
+ counter :last_batch_num
10
+ attr_reader :qless, :redis
11
+
12
+ def initialize(pipeline, key)
13
+ super(pipeline, key)
14
+ @qless = pipeline.configuration.qless_client_for(key)
15
+ @redis = @qless.redis
16
+ end
17
+
18
+ def most_recent_batch
19
+ batch_num = last_batch_num.value
20
+ return nil if batch_num.zero?
21
+ JobBatch.find(qless, pipeline, batch_id_for(batch_num))
22
+ end
23
+
24
+ def create_new_batch(batch_data, options = {}, &blk)
25
+ JobBatch.create(qless, pipeline,
26
+ batch_id_for(last_batch_num.increment),
27
+ batch_data, options, &blk)
28
+ end
29
+
30
+ def each(&block)
31
+ return enum_for(:each) unless block_given?
32
+
33
+ 1.upto(last_batch_num.value) do |num|
34
+ begin
35
+ yield JobBatch.find(qless, pipeline, batch_id_for(num))
36
+ rescue JobBatch::CannotFindExistingJobBatchError
37
+ # We can't yield a batch we can't find!
38
+ end
39
+ end
40
+ end
41
+
42
+ def all_with_external_dependency_timeout(dep_name)
43
+ each.select do |batch|
44
+ batch.timed_out_external_deps.include?(dep_name)
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ alias id key
51
+
52
+ def batch_id_for(number)
53
+ [id, number].join(':')
54
+ end
55
+ end
56
+ end
57
+
@@ -0,0 +1,83 @@
1
+ require 'set'
2
+
3
+ module Plines
4
+ # Responsible for enqueing Qless jobs based on the given dependency graph.
5
+ class JobEnqueuer
6
+ def initialize(dependency_graph, job_batch, &block)
7
+ @dependency_graph = dependency_graph
8
+ @job_batch = job_batch
9
+ @qless_job_options_block = block
10
+ @timeout_reduction = job_batch.timeout_reduction
11
+ end
12
+
13
+ def enqueue_jobs
14
+ @dependency_graph.ordered_steps.each do |step|
15
+ jid = Qless.generate_jid
16
+ jids[step] = jid
17
+ @job_batch.add_job(jid, *step.external_dependencies.map(&:name))
18
+
19
+ enqueue_job_for(step, jid, dependency_jids_for(step))
20
+ setup_external_dep_timeouts_for(step)
21
+ end
22
+
23
+ enqueue_external_dependency_timeouts
24
+
25
+ self
26
+ end
27
+
28
+ private
29
+
30
+ def jids
31
+ @jids ||= {}
32
+ end
33
+
34
+ def qless
35
+ @job_batch.qless
36
+ end
37
+
38
+ def enqueue_job_for(step, jid, depends_on)
39
+ step.klass.enqueue_qless_job qless,
40
+ step.data.merge('_job_batch_id' => @job_batch.id),
41
+ @qless_job_options_block[step].merge(depends: depends_on, jid: jid)
42
+ end
43
+
44
+ def setup_external_dep_timeouts_for(step)
45
+ step.external_dependencies.each do |dependency|
46
+ next unless timeout = dependency.options[:wait_up_to]
47
+
48
+ timeout_key = TimeoutKey.new(dependency.name,
49
+ timeout - @timeout_reduction)
50
+ external_dep_timeouts[timeout_key] << step
51
+ end
52
+ end
53
+
54
+ def dependency_jids_for(step)
55
+ step.dependencies.map { |d| jids[d] }
56
+ end
57
+
58
+ def external_dep_timeouts
59
+ @external_dep_timeouts ||= Hash.new do |h, k|
60
+ h[k] = Set.new
61
+ end
62
+ end
63
+
64
+ TIMEOUT_JOB_PRIORITY = 999999 # an arbitrary high priority
65
+
66
+ def enqueue_external_dependency_timeouts
67
+ external_dep_timeouts.each do |tk, jobs|
68
+ job_ids = jobs.map { |k| jids[k] }
69
+ job_data = ExternalDependencyTimeout.job_data_for \
70
+ @job_batch, tk.dep_name, job_ids
71
+
72
+ jid = qless.queues[jobs.first.klass.processing_queue].put \
73
+ ExternalDependencyTimeout, job_data,
74
+ delay: tk.timeout, priority: TIMEOUT_JOB_PRIORITY
75
+
76
+ @job_batch.track_timeout_job(tk.dep_name, jid)
77
+ end
78
+ end
79
+
80
+ TimeoutKey = Struct.new(:dep_name, :timeout)
81
+ end
82
+ end
83
+
@@ -0,0 +1,97 @@
1
+ require 'qless'
2
+ require 'forwardable'
3
+ require 'plines/indifferent_hash'
4
+
5
+ module Plines
6
+ # This module should be extended onto a class or module in order
7
+ # to make it a pipeline. Steps declared within that module will
8
+ # automatically belong to that pipeline. This enables one application
9
+ # to have multiple pipelines.
10
+ module Pipeline
11
+ extend Forwardable
12
+ def_delegators :configuration
13
+
14
+ DEFAULT_QUEUE = "plines"
15
+ AWAITING_EXTERNAL_DEPENDENCY_QUEUE = "awaiting_ext_dep"
16
+
17
+ def configuration
18
+ @configuration ||= Configuration.new
19
+ end
20
+
21
+ def configure
22
+ yield configuration
23
+ end
24
+
25
+ def enqueue_jobs_for(batch_data, options = {})
26
+ batch_data = IndifferentHash.from(batch_data)
27
+ graph = DependencyGraph.new(self, batch_data)
28
+ job_batch_list = job_batch_list_for(batch_data)
29
+
30
+ job_batch_list.create_new_batch(batch_data, options) do |job_batch|
31
+ job_options_block = configuration.qless_job_options_block
32
+ JobEnqueuer.new(graph, job_batch, &job_options_block).enqueue_jobs
33
+ end
34
+ end
35
+
36
+ def most_recent_job_batch_for(batch_data)
37
+ job_batch_list_for(batch_data).most_recent_batch
38
+ end
39
+
40
+ def find_job_batch(id)
41
+ key = id[/\A(.*):\d+\z/, 1] # http://rubular.com/r/fMGv1TaZZA
42
+ qless = configuration.qless_client_for(key)
43
+ Plines::JobBatch.find(qless, self, id)
44
+ end
45
+
46
+ def step_classes
47
+ @step_classes ||= []
48
+ end
49
+
50
+ # Null Object pattern implementation of a step class
51
+ class NullStep
52
+ def self.jobs_for(*args)
53
+ []
54
+ end
55
+ end
56
+
57
+ # Error raised when two steps are declared as the same boundary step.
58
+ # Having more than one initial or terminal step is not well defined.
59
+ class BoundaryStepAlreadySetError < StandardError; end
60
+
61
+ def self.define_boundary_step(name)
62
+ define_method "#{name}=" do |value|
63
+ current_value = public_send(name)
64
+ if current_value == NullStep
65
+ instance_variable_set(:"@#{name}", value)
66
+ else
67
+ raise BoundaryStepAlreadySetError,
68
+ "The #{name} for pipeline #{self} is already set. " +
69
+ "Multiple of these boundary steps are not supported."
70
+ end
71
+ end
72
+
73
+ define_method name do
74
+ current_value = instance_variable_get(:"@#{name}")
75
+ return current_value if current_value
76
+ instance_variable_set(:"@#{name}", NullStep)
77
+ end
78
+ end
79
+ private_class_method :define_boundary_step
80
+
81
+ define_boundary_step :initial_step
82
+ define_boundary_step :terminal_step
83
+
84
+ def job_batch_list_for(batch_data)
85
+ key = configuration.batch_list_key_for(batch_data)
86
+ JobBatchList.new(self, key)
87
+ end
88
+
89
+ def matching_older_unfinished_job_batches(main_job_batch)
90
+ job_batch_list = job_batch_list_for(main_job_batch.data)
91
+ job_batch_list.each.select do |job_batch|
92
+ !job_batch.complete? && job_batch.created_at < main_job_batch.created_at
93
+ end
94
+ end
95
+ end
96
+ end
97
+
@@ -0,0 +1,108 @@
1
+ require 'redis'
2
+ require 'redis/value'
3
+ require 'redis/lock'
4
+ require 'redis/set'
5
+ require 'redis/list'
6
+ require 'redis/hash_key'
7
+ require 'redis/counter'
8
+
9
+ module Plines
10
+ module RedisObjectsHelpers
11
+ def new_redis_object(klass, key, args)
12
+ klass.new([key_prefix, key].join(':'), redis, *args)
13
+ end
14
+
15
+ def key_prefix
16
+ @key_prefix ||= [
17
+ self.class.redis_key_prefix,
18
+ pipeline.name,
19
+ self.class.name.split('::').last,
20
+ id
21
+ ].join(':')
22
+ end
23
+
24
+ def declared_redis_object_keys
25
+ self.class.declared_redis_object_names.map { |n| send(n).key }
26
+ end
27
+
28
+ def self.included(klass)
29
+ klass.extend ClassMethods
30
+ end
31
+
32
+ module ClassMethods
33
+ def value(name, *args)
34
+ declared_redis_object_names << name
35
+
36
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
37
+ def #{name}
38
+ @#{name} ||= new_redis_object(
39
+ ::Redis::Value, #{name.inspect}, #{args})
40
+ end
41
+ EOS
42
+ end
43
+
44
+ def lock(name, *args)
45
+ declared_redis_object_names << name
46
+
47
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
48
+ def #{name}
49
+ @#{name} ||= new_redis_object(
50
+ ::Redis::Lock, #{name.inspect}, #{args})
51
+ end
52
+ EOS
53
+ end
54
+
55
+ def set(name, *args)
56
+ declared_redis_object_names << name
57
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
58
+ def #{name}
59
+ @#{name} ||= new_redis_object(
60
+ ::Redis::Set, #{name.inspect}, #{args})
61
+ end
62
+ EOS
63
+ end
64
+
65
+ def list(name, *args)
66
+ declared_redis_object_names << name
67
+
68
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
69
+ def #{name}
70
+ @#{name} ||= new_redis_object(
71
+ ::Redis::List, #{name.inspect}, #{args})
72
+ end
73
+ EOS
74
+ end
75
+
76
+ def hash_key(name, *args)
77
+ declared_redis_object_names << name
78
+
79
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
80
+ def #{name}
81
+ @#{name} ||= new_redis_object(
82
+ ::Redis::HashKey, #{name.inspect}, #{args})
83
+ end
84
+ EOS
85
+ end
86
+
87
+ def counter(name, *args)
88
+ declared_redis_object_names << name
89
+
90
+ class_eval <<-EOS, __FILE__, __LINE__ + 1
91
+ def #{name}
92
+ @#{name} ||= new_redis_object(
93
+ ::Redis::Counter, #{name.inspect}, #{args})
94
+ end
95
+ EOS
96
+ end
97
+
98
+ def declared_redis_object_names
99
+ @declared_redis_object_names ||= []
100
+ end
101
+
102
+ def redis_key_prefix(override = nil)
103
+ (@redis_key_prefix ||= override) || "plines"
104
+ end
105
+ end
106
+ end
107
+ end
108
+
@@ -0,0 +1,269 @@
1
+ require 'forwardable'
2
+
3
+ module Plines
4
+ ExternalDependency = Struct.new(:name, :options)
5
+
6
+ # Keeps track of a list of external dependencies.
7
+ # These are yielded as the first argument to
8
+ # `has_external_dependencies`.
9
+ class ExternalDependencyList
10
+ extend Forwardable
11
+ def_delegators :@dependencies, :any?
12
+
13
+ def initialize
14
+ @dependencies = []
15
+ end
16
+
17
+ def add(name, options = {})
18
+ @dependencies << ExternalDependency.new(name, options)
19
+ end
20
+
21
+ def to_a
22
+ @dependencies.dup
23
+ end
24
+ end
25
+
26
+ # This is the module that should be included in any class that
27
+ # is intended to be a Plines step.
28
+ module Step
29
+ # Error raised when you include Plines::Step in a module that is
30
+ # not nested within a pipeline module.
31
+ class NotDeclaredInPipelineError < StandardError; end
32
+
33
+ def self.extended(klass)
34
+ klass.class_eval do
35
+ include InstanceMethods
36
+
37
+ unless pipeline.is_a?(Plines::Pipeline)
38
+ raise NotDeclaredInPipelineError,
39
+ "#{klass} is not nested in a pipeline module and thus " +
40
+ "cannot be a Plines::Step. All plines steps must be " +
41
+ "declared within pipeline modules."
42
+ end
43
+
44
+ fan_out { |d| [d] } # default to one step instance
45
+ pipeline.step_classes << self
46
+ end
47
+ end
48
+
49
+ DEFAULT_DEPENDENCY_FILTER = Proc.new { true }
50
+
51
+ def depends_on(*klasses, &block)
52
+ klasses.each do |klass|
53
+ dependency_filters[klass] = (block || DEFAULT_DEPENDENCY_FILTER)
54
+ end
55
+ end
56
+
57
+ def depended_on_by_all_steps
58
+ pipeline.initial_step = self
59
+ end
60
+
61
+ def depends_on_all_steps
62
+ pipeline.terminal_step = self
63
+ end
64
+
65
+ def run_jobs_in_serial
66
+ depends_on step_name do |data|
67
+ prior_data = data.my_data_hashes.each_cons(2) do |(prior, current)|
68
+ break prior if current == data.my_data
69
+ end
70
+
71
+ data.their_data == prior_data
72
+ end
73
+ end
74
+
75
+ def fan_out(&block)
76
+ @fan_out_blocks ||= []
77
+ @fan_out_blocks << block
78
+ end
79
+
80
+ def has_external_dependencies(*args, &block)
81
+ block ||= begin
82
+ options = args.last.is_a?(Hash) ? args.pop : {}
83
+ lambda do |deps, _|
84
+ args.each do |name|
85
+ deps.add name, options
86
+ end
87
+ end
88
+ end
89
+
90
+ external_dependency_definitions << block
91
+ end
92
+
93
+ def has_external_dependencies_for?(data)
94
+ external_dependency_definitions.any? do |block|
95
+ list = ExternalDependencyList.new
96
+ block.call(list, data)
97
+ list.any?
98
+ end
99
+ end
100
+
101
+ def external_dependencies_for(data)
102
+ list = ExternalDependencyList.new
103
+
104
+ external_dependency_definitions.each do |block|
105
+ block.call(list, data)
106
+ end
107
+
108
+ list.to_a
109
+ end
110
+
111
+ def dependencies_for(job, batch_data)
112
+ Enumerator.new do |yielder|
113
+ has_dependencies = false
114
+
115
+ each_declared_dependency_job_for(job, batch_data) do |job|
116
+ has_dependencies = true
117
+ yielder.yield job
118
+ end
119
+
120
+ each_initial_step_job_for(job, batch_data) do |job|
121
+ yielder.yield job
122
+ end unless has_dependencies
123
+ end
124
+ end
125
+
126
+ def jobs_for(batch_data)
127
+ @fan_out_blocks.inject([batch_data]) do |job_data_hashes, fan_out_block|
128
+ job_data_hashes.flat_map { |job_data| fan_out_block.call(job_data) }
129
+ end.map do |job_data|
130
+ Job.build(self, job_data)
131
+ end
132
+ end
133
+
134
+ def perform(qless_job)
135
+ batch = JobBatch.find(qless_job.client, pipeline,
136
+ qless_job.data.fetch("_job_batch_id"))
137
+
138
+ if batch.creation_in_progress?
139
+ qless_job.move(qless_job.queue_name, delay: 2)
140
+ return
141
+ end
142
+
143
+ job_data = DynamicStruct.new(qless_job.data)
144
+
145
+ qless_job.after_complete do
146
+ batch.mark_job_as_complete(qless_job.jid)
147
+ end
148
+
149
+ new(batch, job_data, qless_job)
150
+ .send(:around_perform)
151
+ end
152
+
153
+ def external_dependency_definitions
154
+ @external_dependency_definitions ||= []
155
+ end
156
+
157
+ def qless_options
158
+ @qless_options ||= QlessJobOptions.new
159
+ yield @qless_options if block_given?
160
+ @qless_options
161
+ end
162
+
163
+ def enqueue_qless_job(qless, data, options = {})
164
+ queue_name = if has_external_dependencies_for?(data)
165
+ Pipeline::AWAITING_EXTERNAL_DEPENDENCY_QUEUE
166
+ elsif options[:queue] && processing_queue == :plines
167
+ options[:queue]
168
+ else
169
+ processing_queue
170
+ end
171
+
172
+ queue = qless.queues[queue_name]
173
+
174
+ options[:priority] = qless_options.priority if qless_options.priority
175
+ options[:retries] = qless_options.retries if qless_options.retries
176
+ options[:tags] = Array(options[:tags]) | qless_options.tags
177
+
178
+ queue.put(self, data, options)
179
+ end
180
+
181
+ def processing_queue
182
+ qless_options.queue
183
+ end
184
+
185
+ def pipeline
186
+ @pipeline ||= begin
187
+ namespaces = name.split('::')
188
+ namespaces.pop # ignore the last one
189
+ namespaces.inject(Object) { |ns, mod| ns.const_get(mod) }
190
+ end
191
+ end
192
+
193
+ def step_name
194
+ @step_name ||= name.split('::').last.to_sym
195
+ end
196
+
197
+ private
198
+
199
+ def dependency_filters
200
+ @dependency_filters ||= {}
201
+ end
202
+
203
+ DependencyData = Struct.new(:my_data, :their_data,
204
+ :my_data_hashes, :their_data_hashes)
205
+
206
+ def each_declared_dependency_job_for(my_job, batch_data)
207
+ my_data_hashes = jobs_for(batch_data).map(&:data)
208
+
209
+ dependency_filters.each do |klass, filter|
210
+ klass = pipeline.const_get(klass)
211
+ their_jobs = klass.jobs_for(batch_data)
212
+ their_data_hashes = their_jobs.map(&:data)
213
+
214
+ their_jobs.each do |their_job|
215
+ yield their_job if filter.call(DependencyData.new(
216
+ my_job.data, their_job.data,
217
+ my_data_hashes, their_data_hashes
218
+ ))
219
+ end
220
+ end
221
+ end
222
+
223
+ def each_initial_step_job_for(job, batch_data)
224
+ return if pipeline.initial_step == self
225
+
226
+ pipeline.initial_step.jobs_for(batch_data).each do |dependency|
227
+ yield dependency
228
+ end
229
+ end
230
+
231
+ module InstanceMethods
232
+ extend Forwardable
233
+ attr_reader :job_data, :job_batch, :qless_job
234
+ def_delegator "self.class", :enqueue_qless_job
235
+
236
+ def initialize(job_batch, job_data, qless_job)
237
+ @job_batch = job_batch
238
+ @job_data = job_data
239
+ @qless_job = qless_job
240
+ @enqueued_job = EnqueuedJob.new(qless_job.client,
241
+ self.class.pipeline, qless_job.jid)
242
+ end
243
+
244
+ private
245
+
246
+ def around_perform
247
+ perform
248
+ end
249
+
250
+ def unresolved_external_dependencies
251
+ @unresolved_external_dependencies ||=
252
+ @enqueued_job.unresolved_external_dependencies
253
+ end
254
+ end
255
+
256
+ QlessJobOptions = Struct.new(:tags, :priority, :queue, :retries) do
257
+ def initialize
258
+ super
259
+ self.queue ||= :plines
260
+ self.tags ||= []
261
+ end
262
+
263
+ def tag=(value)
264
+ self.tags = Array(value)
265
+ end
266
+ end
267
+ end
268
+ end
269
+