cloudtasker 0.7.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +5 -0
  4. data/.travis.yml +3 -3
  5. data/CHANGELOG.md +41 -0
  6. data/README.md +145 -25
  7. data/_config.yml +1 -0
  8. data/app/controllers/cloudtasker/worker_controller.rb +21 -5
  9. data/cloudtasker.gemspec +2 -2
  10. data/docs/BATCH_JOBS.md +28 -3
  11. data/docs/CRON_JOBS.md +3 -1
  12. data/exe/cloudtasker +13 -1
  13. data/gemfiles/google_cloud_tasks_1.0.gemfile.lock +26 -9
  14. data/gemfiles/google_cloud_tasks_1.1.gemfile.lock +26 -9
  15. data/gemfiles/google_cloud_tasks_1.2.gemfile.lock +27 -10
  16. data/gemfiles/google_cloud_tasks_1.3.gemfile.lock +26 -9
  17. data/gemfiles/rails_5.2.gemfile.lock +28 -11
  18. data/gemfiles/rails_6.0.gemfile.lock +29 -12
  19. data/lib/cloudtasker.rb +1 -1
  20. data/lib/cloudtasker/backend/google_cloud_task.rb +65 -12
  21. data/lib/cloudtasker/backend/memory_task.rb +5 -3
  22. data/lib/cloudtasker/backend/redis_task.rb +24 -13
  23. data/lib/cloudtasker/batch/batch_progress.rb +11 -2
  24. data/lib/cloudtasker/batch/job.rb +18 -4
  25. data/lib/cloudtasker/cli.rb +6 -5
  26. data/lib/cloudtasker/cloud_task.rb +6 -2
  27. data/lib/cloudtasker/config.rb +33 -9
  28. data/lib/cloudtasker/cron/job.rb +2 -2
  29. data/lib/cloudtasker/cron/schedule.rb +26 -14
  30. data/lib/cloudtasker/local_server.rb +44 -22
  31. data/lib/cloudtasker/max_task_size_exceeded_error.rb +14 -0
  32. data/lib/cloudtasker/redis_client.rb +10 -7
  33. data/lib/cloudtasker/unique_job/job.rb +2 -2
  34. data/lib/cloudtasker/version.rb +1 -1
  35. data/lib/cloudtasker/worker.rb +45 -10
  36. data/lib/cloudtasker/worker_handler.rb +7 -5
  37. data/lib/cloudtasker/worker_logger.rb +1 -1
  38. data/lib/cloudtasker/worker_wrapper.rb +52 -0
  39. data/lib/tasks/setup_queue.rake +12 -2
  40. metadata +7 -6
  41. data/Gemfile.lock +0 -280
  42. data/lib/cloudtasker/railtie.rb +0 -10
@@ -77,7 +77,16 @@ module Cloudtasker
77
77
  # @return [Integer] The number of jobs pending.
78
78
  #
79
79
  def pending
80
- total - completed - dead
80
+ total - done
81
+ end
82
+
83
+ #
84
+ # Return the number of jobs completed or dead.
85
+ #
86
+ # @return [Integer] The number of jobs done.
87
+ #
88
+ def done
89
+ completed + dead
81
90
  end
82
91
 
83
92
  #
@@ -88,7 +97,7 @@ module Cloudtasker
88
97
  def percent
89
98
  return 0 if total.zero?
90
99
 
91
- pending.to_f / total
100
+ (done.to_f / total) * 100
92
101
  end
93
102
 
94
103
  #
@@ -16,10 +16,10 @@ module Cloudtasker
16
16
  #
17
17
  # Return the cloudtasker redis client
18
18
  #
19
- # @return [Class] The redis client.
19
+ # @return [Cloudtasker::RedisClient] The cloudtasker redis client..
20
20
  #
21
21
  def self.redis
22
- RedisClient
22
+ @redis ||= RedisClient.new
23
23
  end
24
24
 
25
25
  #
@@ -87,7 +87,7 @@ module Cloudtasker
87
87
  #
88
88
  # Return the cloudtasker redis client
89
89
  #
90
- # @return [Class] The redis client.
90
+ # @return [Cloudtasker::RedisClient] The cloudtasker redis client..
91
91
  #
92
92
  def redis
93
93
  self.class.redis
@@ -180,9 +180,23 @@ module Cloudtasker
180
180
  # @return [Array<Cloudtasker::Worker>] The updated list of jobs.
181
181
  #
182
182
  def add(worker_klass, *args)
183
+ add_to_queue(worker.job_queue, worker_klass, *args)
184
+ end
185
+
186
+ #
187
+ # Add a worker to the batch using a specific queue.
188
+ #
189
+ # @param [String, Symbol] queue The name of the queue
190
+ # @param [Class] worker_klass The worker class.
191
+ # @param [Array<any>] *args The worker arguments.
192
+ #
193
+ # @return [Array<Cloudtasker::Worker>] The updated list of jobs.
194
+ #
195
+ def add_to_queue(queue, worker_klass, *args)
183
196
  jobs << worker_klass.new(
184
197
  job_args: args,
185
- job_meta: { key(:parent_id) => batch_id }
198
+ job_meta: { key(:parent_id) => batch_id },
199
+ job_queue: queue
186
200
  )
187
201
  end
188
202
 
@@ -70,7 +70,7 @@ module Cloudtasker
70
70
  #
71
71
  # Run the cloudtasker development server.
72
72
  #
73
- def run
73
+ def run(opts = {})
74
74
  boot_system
75
75
 
76
76
  # Print banner
@@ -90,16 +90,17 @@ module Cloudtasker
90
90
  logger.info "[Cloudtasker/Server] Running in #{RUBY_DESCRIPTION}"
91
91
 
92
92
  # Wait for signals
93
- wait_for_signal(self_read)
93
+ run_server(self_read, opts)
94
94
  end
95
95
 
96
96
  #
97
- # Wait for signals and handle them.
97
+ # Run server and wait for signals.
98
98
  #
99
99
  # @param [IO] read_pipe Where to read signals.
100
+ # @param [Hash] opts Server options.
100
101
  #
101
- def wait_for_signal(read_pipe)
102
- local_server.start
102
+ def run_server(read_pipe, opts = {})
103
+ local_server.start(opts)
103
104
 
104
105
  while (readable_io = IO.select([read_pipe]))
105
106
  signal = readable_io.first[0].gets.strip
@@ -3,7 +3,7 @@
3
3
  module Cloudtasker
4
4
  # An interface class to manage tasks on the backend (Cloud Task or Redis)
5
5
  class CloudTask
6
- attr_accessor :id, :http_request, :schedule_time, :retries
6
+ attr_accessor :id, :http_request, :schedule_time, :retries, :queue
7
7
 
8
8
  #
9
9
  # The backend to use for cloud tasks.
@@ -48,6 +48,8 @@ module Cloudtasker
48
48
  # @return [Cloudtasker::CloudTask] The created task.
49
49
  #
50
50
  def self.create(payload)
51
+ raise MaxTaskSizeExceededError if payload.to_json.bytesize > Config::MAX_TASK_SIZE
52
+
51
53
  resp = backend.create(payload)&.to_h
52
54
  resp ? new(resp) : nil
53
55
  end
@@ -69,12 +71,14 @@ module Cloudtasker
69
71
  # @param [Hash] http_request The content of the http request.
70
72
  # @param [Integer] schedule_time When to run the job (Unix timestamp)
71
73
  # @param [Integer] retries The number of times the job failed.
74
+ # @param [String] queue The queue the task is in.
72
75
  #
73
- def initialize(id:, http_request:, schedule_time: nil, retries: 0)
76
+ def initialize(id:, http_request:, schedule_time: nil, retries: 0, queue: nil)
74
77
  @id = id
75
78
  @http_request = http_request
76
79
  @schedule_time = schedule_time
77
80
  @retries = retries || 0
81
+ @queue = queue
78
82
  end
79
83
 
80
84
  #
@@ -7,15 +7,32 @@ module Cloudtasker
7
7
  class Config
8
8
  attr_accessor :redis
9
9
  attr_writer :secret, :gcp_location_id, :gcp_project_id,
10
- :gcp_queue_id, :processor_path, :logger, :mode, :max_retries
10
+ :gcp_queue_prefix, :processor_path, :logger, :mode, :max_retries
11
+
12
+ # Max Cloud Task size in bytes
13
+ MAX_TASK_SIZE = 100 * 1024 # 100 KB
11
14
 
12
15
  # Retry header in Cloud Task responses
13
16
  RETRY_HEADER = 'X-CloudTasks-TaskExecutionCount'
14
17
 
18
+ # Content-Transfer-Encoding header in Cloud Task responses
19
+ ENCODING_HEADER = 'Content-Transfer-Encoding'
20
+
21
+ # Content Type
22
+ CONTENT_TYPE_HEADER = 'Content-Type'
23
+
24
+ # Authorization header
25
+ AUTHORIZATION_HEADER = 'Authorization'
26
+
15
27
  # Default values
16
28
  DEFAULT_LOCATION_ID = 'us-east1'
17
29
  DEFAULT_PROCESSOR_PATH = '/cloudtasker/run'
18
30
 
31
+ # Default queue values
32
+ DEFAULT_JOB_QUEUE = 'default'
33
+ DEFAULT_QUEUE_CONCURRENCY = 10
34
+ DEFAULT_QUEUE_RETRIES = -1 # unlimited
35
+
19
36
  # The number of times jobs will be attempted before declaring them dead
20
37
  DEFAULT_MAX_RETRY_ATTEMPTS = 25
21
38
 
@@ -23,9 +40,10 @@ module Cloudtasker
23
40
  Missing host for processing.
24
41
  Please specify a processor hostname in form of `https://some-public-dns.example.com`'
25
42
  DOC
26
- QUEUE_ID_MISSING_ERROR = <<~DOC
27
- Missing GCP queue ID.
28
- Please specify a queue ID in the form of `my-queue-id`. You can create a queue using the Google SDK via `gcloud tasks queues create my-queue-id`
43
+ QUEUE_PREFIX_MISSING_ERROR = <<~DOC
44
+ Missing GCP queue prefix.
45
+ Please specify a queue prefix in the form of `my-app`.
46
+ You can create a default queue using the Google SDK via `gcloud tasks queues create my-app-default`
29
47
  DOC
30
48
  PROJECT_ID_MISSING_ERROR = <<~DOC
31
49
  Missing GCP project ID.
@@ -95,8 +113,14 @@ module Cloudtasker
95
113
  def processor_host=(val)
96
114
  @processor_host = val
97
115
 
116
+ # Check if Rails supports host filtering
117
+ return unless val &&
118
+ defined?(Rails) &&
119
+ Rails.application.config.respond_to?(:hosts) &&
120
+ Rails.application.config.hosts&.any?
121
+
98
122
  # Add processor host to the list of authorized hosts
99
- Rails.application.config.hosts << val.gsub(%r{https?://}, '') if val && defined?(Rails)
123
+ Rails.application.config.hosts << val.gsub(%r{https?://}, '')
100
124
  end
101
125
 
102
126
  #
@@ -121,12 +145,12 @@ module Cloudtasker
121
145
  end
122
146
 
123
147
  #
124
- # Return the ID of GCP queue where tasks will be added.
148
+ # Return the prefix used for queues.
125
149
  #
126
- # @return [String] The ID of the processing queue.
150
+ # @return [String] The prefix of the processing queues.
127
151
  #
128
- def gcp_queue_id
129
- @gcp_queue_id || raise(StandardError, QUEUE_ID_MISSING_ERROR)
152
+ def gcp_queue_prefix
153
+ @gcp_queue_prefix || raise(StandardError, QUEUE_PREFIX_MISSING_ERROR)
130
154
  end
131
155
 
132
156
  #
@@ -105,10 +105,10 @@ module Cloudtasker
105
105
  #
106
106
  # Return the cloudtasker redis client
107
107
  #
108
- # @return [Class] The redis client.
108
+ # @return [Cloudtasker::RedisClient] The cloudtasker redis client..
109
109
  #
110
110
  def redis
111
- RedisClient
111
+ @redis ||= RedisClient.new
112
112
  end
113
113
 
114
114
  #
@@ -1,12 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'fugit'
4
+ require 'cloudtasker/worker_wrapper'
4
5
 
5
6
  module Cloudtasker
6
7
  module Cron
7
8
  # Manage cron schedules
8
9
  class Schedule
9
- attr_accessor :id, :cron, :worker, :task_id, :job_id
10
+ attr_accessor :id, :cron, :worker, :task_id, :job_id, :queue, :args
10
11
 
11
12
  # Key Namespace used for object saved under this class
12
13
  SUB_NAMESPACE = 'schedule'
@@ -14,10 +15,10 @@ module Cloudtasker
14
15
  #
15
16
  # Return the redis client.
16
17
  #
17
- # @return [Class] The redis client
18
+ # @return [Cloudtasker::RedisClient] The cloudtasker redis client.
18
19
  #
19
20
  def self.redis
20
- RedisClient
21
+ @redis ||= RedisClient.new
21
22
  end
22
23
 
23
24
  #
@@ -113,21 +114,25 @@ module Cloudtasker
113
114
  # @param [String] id The schedule id.
114
115
  # @param [String] cron The cron expression.
115
116
  # @param [Class] worker The worker class to run.
117
+ # @param [Array<any>] args The worker arguments.
118
+ # @param [String] queue The queue to use for the cron job.
116
119
  # @param [String] task_id The ID of the actual backend task.
117
120
  # @param [String] job_id The ID of the Cloudtasker worker.
118
121
  #
119
- def initialize(id:, cron:, worker:, task_id: nil, job_id: nil)
122
+ def initialize(id:, cron:, worker:, **opts)
120
123
  @id = id
121
124
  @cron = cron
122
125
  @worker = worker
123
- @task_id = task_id
124
- @job_id = job_id
126
+ @args = opts[:args]
127
+ @queue = opts[:queue]
128
+ @task_id = opts[:task_id]
129
+ @job_id = opts[:job_id]
125
130
  end
126
131
 
127
132
  #
128
133
  # Return the redis client.
129
134
  #
130
- # @return [Class] The redis client
135
+ # @return [Cloudtasker::RedisClient] The cloudtasker redis client.
131
136
  #
132
137
  def redis
133
138
  self.class.redis
@@ -191,7 +196,9 @@ module Cloudtasker
191
196
  {
192
197
  id: id,
193
198
  cron: cron,
194
- worker: worker
199
+ worker: worker,
200
+ args: args,
201
+ queue: queue
195
202
  }
196
203
  end
197
204
 
@@ -201,13 +208,10 @@ module Cloudtasker
201
208
  # @return [Hash] The attributes hash.
202
209
  #
203
210
  def to_h
204
- {
205
- id: id,
206
- cron: cron,
207
- worker: worker,
211
+ to_config.merge(
208
212
  task_id: task_id,
209
213
  job_id: job_id
210
- }
214
+ )
211
215
  end
212
216
 
213
217
  #
@@ -219,6 +223,15 @@ module Cloudtasker
219
223
  @cron_schedule ||= Fugit::Cron.parse(cron)
220
224
  end
221
225
 
226
+ #
227
+ # Return an instance of the underlying worker.
228
+ #
229
+ # @return [Cloudtasker::WorkerWrapper] The worker instance
230
+ #
231
+ def worker_instance
232
+ WorkerWrapper.new(worker_name: worker, job_args: args, job_queue: queue)
233
+ end
234
+
222
235
  #
223
236
  # Return the next time a job should run.
224
237
  #
@@ -279,7 +292,6 @@ module Cloudtasker
279
292
  CloudTask.delete(task_id) if task_id
280
293
 
281
294
  # Schedule worker
282
- worker_instance = Object.const_get(worker).new
283
295
  Job.new(worker_instance).set(schedule_id: id).schedule!
284
296
  end
285
297
  end
@@ -9,6 +9,9 @@ module Cloudtasker
9
9
  # Max number of task requests sent to the processing server
10
10
  CONCURRENCY = (ENV['CLOUDTASKER_CONCURRENCY'] || 5).to_i
11
11
 
12
+ # Default number of threads to allocate to process a specific queue
13
+ QUEUE_CONCURRENCY = 1
14
+
12
15
  #
13
16
  # Stop the local server.
14
17
  #
@@ -16,7 +19,7 @@ module Cloudtasker
16
19
  @done = true
17
20
 
18
21
  # Terminate threads and repush tasks
19
- @threads&.each do |t|
22
+ @threads&.values&.flatten&.each do |t|
20
23
  t.terminate
21
24
  t['task']&.retry_later(0, is_error: false)
22
25
  end
@@ -28,11 +31,21 @@ module Cloudtasker
28
31
  #
29
32
  # Start the local server
30
33
  #
34
+ # @param [Hash] opts Server options.
35
+ #
31
36
  #
32
- def start
37
+ def start(opts = {})
38
+ # Extract queues to process
39
+ queues = opts[:queues].to_a.any? ? opts[:queues] : [[nil, CONCURRENCY]]
40
+
41
+ # Display start banner
42
+ queue_labels = queues.map { |n, c| "#{n || 'all'}=#{c || QUEUE_CONCURRENCY}" }.join(' ')
43
+ Cloudtasker.logger.info("[Cloudtasker/Server] Processing queues: #{queue_labels}")
44
+
45
+ # Start processing queues
33
46
  @start ||= Thread.new do
34
47
  until @done
35
- process_jobs
48
+ queues.each { |(n, c)| process_jobs(n, c) }
36
49
  sleep 1
37
50
  end
38
51
  Cloudtasker.logger.info('[Cloudtasker/Server] Local server exiting...')
@@ -43,31 +56,40 @@ module Cloudtasker
43
56
  # Process enqueued workers.
44
57
  #
45
58
  #
46
- def process_jobs
47
- @threads ||= []
59
+ def process_jobs(queue = nil, concurrency = nil)
60
+ @threads ||= {}
61
+ @threads[queue] ||= []
62
+ max_threads = (concurrency || QUEUE_CONCURRENCY).to_i
48
63
 
49
64
  # Remove any done thread
50
- @threads.select!(&:alive?)
65
+ @threads[queue].select!(&:alive?)
51
66
 
52
67
  # Process tasks
53
- while @threads.count < CONCURRENCY && (task = Cloudtasker::Backend::RedisTask.pop)
54
- @threads << Thread.new do
55
- Thread.current['task'] = task
56
- Thread.current['attempts'] = 0
68
+ while @threads[queue].count < max_threads && (task = Cloudtasker::Backend::RedisTask.pop(queue))
69
+ @threads[queue] << Thread.new { process_task(task) }
70
+ end
71
+ end
57
72
 
58
- # Deliver task
59
- begin
60
- Thread.current['task'].deliver
61
- rescue Errno::ECONNREFUSED => e
62
- raise(e) unless Thread.current['attempts'] < 3
73
+ #
74
+ # Process a given task
75
+ #
76
+ # @param [Cloudtasker::CloudTask] task The task to process
77
+ #
78
+ def process_task(task)
79
+ Thread.current['task'] = task
80
+ Thread.current['attempts'] = 0
63
81
 
64
- # Retry on connection error, in case the web server is not
65
- # started yet.
66
- Thread.current['attempts'] += 1
67
- sleep(3)
68
- retry
69
- end
70
- end
82
+ # Deliver task
83
+ begin
84
+ Thread.current['task'].deliver
85
+ rescue Errno::ECONNREFUSED => e
86
+ raise(e) unless Thread.current['attempts'] < 3
87
+
88
+ # Retry on connection error, in case the web server is not
89
+ # started yet.
90
+ Thread.current['attempts'] += 1
91
+ sleep(3)
92
+ retry
71
93
  end
72
94
  end
73
95
  end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cloudtasker
4
+ # Handle Cloud Task size quota
5
+ # See: https://cloud.google.com/appengine/quotas#Task_Queue
6
+ #
7
+ class MaxTaskSizeExceededError < StandardError
8
+ MSG = 'The size of Cloud Tasks must not exceed 100KB'
9
+
10
+ def initialize(msg = MSG)
11
+ super
12
+ end
13
+ end
14
+ end