cloudtasker-tonix 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/lint_rubocop.yml +15 -0
  3. data/.github/workflows/test_ruby_3.x.yml +40 -0
  4. data/.gitignore +23 -0
  5. data/.rspec +3 -0
  6. data/.rubocop.yml +96 -0
  7. data/Appraisals +76 -0
  8. data/CHANGELOG.md +248 -0
  9. data/CODE_OF_CONDUCT.md +74 -0
  10. data/Gemfile +18 -0
  11. data/LICENSE.txt +21 -0
  12. data/README.md +1311 -0
  13. data/Rakefile +8 -0
  14. data/_config.yml +1 -0
  15. data/app/controllers/cloudtasker/worker_controller.rb +107 -0
  16. data/bin/console +15 -0
  17. data/bin/setup +8 -0
  18. data/cloudtasker.gemspec +42 -0
  19. data/config/routes.rb +5 -0
  20. data/docs/BATCH_JOBS.md +144 -0
  21. data/docs/CRON_JOBS.md +129 -0
  22. data/docs/STORABLE_JOBS.md +68 -0
  23. data/docs/UNIQUE_JOBS.md +190 -0
  24. data/exe/cloudtasker +30 -0
  25. data/gemfiles/.bundle/config +2 -0
  26. data/gemfiles/google_cloud_tasks_1.0.gemfile +17 -0
  27. data/gemfiles/google_cloud_tasks_1.1.gemfile +17 -0
  28. data/gemfiles/google_cloud_tasks_1.2.gemfile +17 -0
  29. data/gemfiles/google_cloud_tasks_1.3.gemfile +17 -0
  30. data/gemfiles/google_cloud_tasks_1.4.gemfile +17 -0
  31. data/gemfiles/google_cloud_tasks_1.5.gemfile +17 -0
  32. data/gemfiles/google_cloud_tasks_2.0.gemfile +17 -0
  33. data/gemfiles/google_cloud_tasks_2.1.gemfile +17 -0
  34. data/gemfiles/rails_6.1.gemfile +20 -0
  35. data/gemfiles/rails_7.0.gemfile +18 -0
  36. data/gemfiles/rails_7.1.gemfile +18 -0
  37. data/gemfiles/rails_8.0.gemfile +18 -0
  38. data/gemfiles/rails_8.1.gemfile +18 -0
  39. data/gemfiles/semantic_logger_3.4.gemfile +16 -0
  40. data/gemfiles/semantic_logger_4.6.gemfile +16 -0
  41. data/gemfiles/semantic_logger_4.7.0.gemfile +16 -0
  42. data/gemfiles/semantic_logger_4.7.2.gemfile +16 -0
  43. data/lib/active_job/queue_adapters/cloudtasker_adapter.rb +89 -0
  44. data/lib/cloudtasker/authentication_error.rb +6 -0
  45. data/lib/cloudtasker/authenticator.rb +90 -0
  46. data/lib/cloudtasker/backend/google_cloud_task_v1.rb +228 -0
  47. data/lib/cloudtasker/backend/google_cloud_task_v2.rb +231 -0
  48. data/lib/cloudtasker/backend/memory_task.rb +202 -0
  49. data/lib/cloudtasker/backend/redis_task.rb +291 -0
  50. data/lib/cloudtasker/batch/batch_progress.rb +142 -0
  51. data/lib/cloudtasker/batch/extension/worker.rb +13 -0
  52. data/lib/cloudtasker/batch/job.rb +558 -0
  53. data/lib/cloudtasker/batch/middleware/server.rb +14 -0
  54. data/lib/cloudtasker/batch/middleware.rb +25 -0
  55. data/lib/cloudtasker/batch.rb +5 -0
  56. data/lib/cloudtasker/cli.rb +194 -0
  57. data/lib/cloudtasker/cloud_task.rb +130 -0
  58. data/lib/cloudtasker/config.rb +319 -0
  59. data/lib/cloudtasker/cron/job.rb +205 -0
  60. data/lib/cloudtasker/cron/middleware/server.rb +14 -0
  61. data/lib/cloudtasker/cron/middleware.rb +20 -0
  62. data/lib/cloudtasker/cron/schedule.rb +308 -0
  63. data/lib/cloudtasker/cron.rb +5 -0
  64. data/lib/cloudtasker/dead_worker_error.rb +6 -0
  65. data/lib/cloudtasker/engine.rb +24 -0
  66. data/lib/cloudtasker/invalid_worker_error.rb +6 -0
  67. data/lib/cloudtasker/local_server.rb +99 -0
  68. data/lib/cloudtasker/max_task_size_exceeded_error.rb +14 -0
  69. data/lib/cloudtasker/meta_store.rb +86 -0
  70. data/lib/cloudtasker/middleware/chain.rb +250 -0
  71. data/lib/cloudtasker/missing_worker_arguments_error.rb +6 -0
  72. data/lib/cloudtasker/redis_client.rb +166 -0
  73. data/lib/cloudtasker/retry_worker_error.rb +6 -0
  74. data/lib/cloudtasker/storable/worker.rb +78 -0
  75. data/lib/cloudtasker/storable.rb +3 -0
  76. data/lib/cloudtasker/testing.rb +184 -0
  77. data/lib/cloudtasker/unique_job/conflict_strategy/base_strategy.rb +39 -0
  78. data/lib/cloudtasker/unique_job/conflict_strategy/raise.rb +28 -0
  79. data/lib/cloudtasker/unique_job/conflict_strategy/reject.rb +11 -0
  80. data/lib/cloudtasker/unique_job/conflict_strategy/reschedule.rb +30 -0
  81. data/lib/cloudtasker/unique_job/job.rb +168 -0
  82. data/lib/cloudtasker/unique_job/lock/base_lock.rb +70 -0
  83. data/lib/cloudtasker/unique_job/lock/no_op.rb +11 -0
  84. data/lib/cloudtasker/unique_job/lock/until_completed.rb +40 -0
  85. data/lib/cloudtasker/unique_job/lock/until_executed.rb +36 -0
  86. data/lib/cloudtasker/unique_job/lock/until_executing.rb +30 -0
  87. data/lib/cloudtasker/unique_job/lock/while_executing.rb +25 -0
  88. data/lib/cloudtasker/unique_job/lock_error.rb +8 -0
  89. data/lib/cloudtasker/unique_job/middleware/client.rb +15 -0
  90. data/lib/cloudtasker/unique_job/middleware/server.rb +14 -0
  91. data/lib/cloudtasker/unique_job/middleware.rb +36 -0
  92. data/lib/cloudtasker/unique_job.rb +32 -0
  93. data/lib/cloudtasker/version.rb +5 -0
  94. data/lib/cloudtasker/worker.rb +487 -0
  95. data/lib/cloudtasker/worker_handler.rb +250 -0
  96. data/lib/cloudtasker/worker_logger.rb +231 -0
  97. data/lib/cloudtasker/worker_wrapper.rb +52 -0
  98. data/lib/cloudtasker.rb +57 -0
  99. data/lib/tasks/setup_queue.rake +20 -0
  100. metadata +241 -0
data/README.md ADDED
@@ -0,0 +1,1311 @@
1
+ ![Build Status 3.x](https://github.com/keypup-io/cloudtasker/actions/workflows/test_ruby_3.x.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/cloudtasker.svg)](https://badge.fury.io/rb/cloudtasker)
2
+
3
+ # Cloudtasker
4
+
5
+ Background jobs for Ruby using Google Cloud Tasks.
6
+
7
+ Cloudtasker provides an easy to manage interface to Google Cloud Tasks for background job processing. Workers can be defined programmatically using the Cloudtasker DSL and enqueued for processing using a simple to use API.
8
+
9
+ Cloudtasker is particularly suited for serverless applications only responding to HTTP requests and where running a dedicated job processing server is not an option (e.g. deploy via [Cloud Run](https://cloud.google.com/run)). All jobs enqueued in Cloud Tasks via Cloudtasker eventually get processed by your application via HTTP requests.
10
+
11
+ Cloudtasker also provides optional modules for running [cron jobs](docs/CRON_JOBS.md), [batch jobs](docs/BATCH_JOBS.md), [unique jobs](docs/UNIQUE_JOBS.md) and [storable jobs](docs/STORABLE_JOBS.md).
12
+
13
+ A local processing server is also available for development. This local server processes jobs in lieu of Cloud Tasks and allows you to work offline.
14
+
15
+ ## Summary
16
+
17
+ 1. [Installation](#installation)
18
+ 2. [Get started with Rails](#get-started-with-rails)
19
+ 2. [Get started with Rails & ActiveJob](#get-started-with-rails--activejob)
20
+ 3. [Configuring Cloudtasker](#configuring-cloudtasker)
21
+ 1. [Cloud Tasks authentication & permissions](#cloud-tasks-authentication--permissions)
22
+ 2. [Cloudtasker initializer](#cloudtasker-initializer)
23
+ 4. [Enqueuing jobs](#enqueuing-jobs)
24
+ 5. [Managing worker queues](#managing-worker-queues)
25
+ 1. [Creating queues](#creating-queues)
26
+ 2. [Assigning queues to workers](#assigning-queues-to-workers)
27
+ 6. [Extensions](#extensions)
28
+ 7. [Working locally](#working-locally)
29
+ 1. [Option 1: Cloudtasker local server](#option-1-cloudtasker-local-server)
30
+ 2. [Option 2: Using ngrok](#option-2-using-ngrok)
31
+ 8. [Logging](#logging)
32
+ 1. [Configuring a logger](#configuring-a-logger)
33
+ 2. [Logging context](#logging-context)
34
+ 3. [Truncating log arguments](#truncating-log-arguments)
35
+ 4. [Searching logs: Job ID vs Task ID](#searching-logs-job-id-vs-task-id)
36
+ 9. [Error Handling](#error-handling)
37
+ 1. [HTTP Error codes](#http-error-codes)
38
+ 2. [Worker callbacks](#worker-callbacks)
39
+ 3. [Global callbacks](#global-callbacks)
40
+ 4. [Max retries](#max-retries)
41
+ 5. [Conditional reenqueues using retry errors](#conditional-reenqueues-using-retry-errors)
42
+ 6. [Dispatch deadline](#dispatch-deadline)
43
+ 10. [Testing](#testing)
44
+ 1. [Test helper setup](#test-helper-setup)
45
+ 2. [In-memory queues](#in-memory-queues)
46
+ 3. [Unit tests](#unit-tests)
47
+ 11. [Best practices building workers](#best-practices-building-workers)
48
+
49
+ ## Installation
50
+
51
+ Add this line to your application's Gemfile:
52
+
53
+ ```ruby
54
+ gem 'cloudtasker'
55
+ ```
56
+
57
+ And then execute:
58
+
59
+ $ bundle
60
+
61
+ Or install it yourself with:
62
+
63
+ $ gem install cloudtasker
64
+
65
+ ## Get started with Rails
66
+
67
+ Cloudtasker is pre-integrated with Rails. Follow the steps below to get started.
68
+
69
+ Install redis on your machine (this is required by the Cloudtasker local processing server)
70
+ ```bash
71
+ # E.g. using brew
72
+ brew install redis
73
+ ```
74
+
75
+ Add the following initializer
76
+ ```ruby
77
+ # config/initializers/cloudtasker.rb
78
+
79
+ Cloudtasker.configure do |config|
80
+ #
81
+ # Adapt the server port to be the one used by your Rails web process
82
+ #
83
+ config.processor_host = 'http://localhost:3000'
84
+
85
+ #
86
+ # If you do not have any Rails secret_key_base defined, uncomment the following
87
+ # This secret is used to authenticate jobs sent to the processing endpoint
88
+ # of your application.
89
+ #
90
+ # config.secret = 'some-long-token'
91
+ end
92
+ ```
93
+
94
+ Define your first worker:
95
+ ```ruby
96
+ # app/workers/dummy_worker.rb
97
+
98
+ class DummyWorker
99
+ include Cloudtasker::Worker
100
+
101
+ def perform(some_arg)
102
+ logger.info("Job run with #{some_arg}. This is working!")
103
+ end
104
+ end
105
+ ```
106
+
107
+ Launch Rails and the local Cloudtasker processing server (or add `cloudtasker` to your foreman config as a `worker` process)
108
+ ```bash
109
+ # In one terminal
110
+ > rails s -p 3000
111
+
112
+ # In another terminal
113
+ > cloudtasker
114
+ ```
115
+
116
+ Open a Rails console and enqueue some jobs
117
+ ```ruby
118
+ # Process job as soon as possible
119
+ DummyWorker.perform_async('foo')
120
+
121
+ # Process job in 60 seconds
122
+ DummyWorker.perform_in(60, 'foo')
123
+
124
+ # Process job immediately, inline
125
+ # Supported since: v0.15.rc2
126
+ DummyWorker.perform_now('foo')
127
+ ```
128
+
129
+ Your Rails logs should display the following:
130
+ ```log
131
+ Started POST "/cloudtasker/run" for ::1 at 2019-11-22 09:20:09 +0100
132
+
133
+ Processing by Cloudtasker::WorkerController#run as */*
134
+ Parameters: {"worker"=>"DummyWorker", "job_id"=>"d76040a1-367e-4e3b-854e-e05a74d5f773", "job_args"=>["foo"], "job_meta"=>{}}
135
+
136
+ I, [2019-11-22T09:20:09.319336 #49257] INFO -- [Cloudtasker][d76040a1-367e-4e3b-854e-e05a74d5f773] Starting job...: {:worker=>"DummyWorker", :job_id=>"d76040a1-367e-4e3b-854e-e05a74d5f773", :job_meta=>{}}
137
+ I, [2019-11-22T09:20:09.319938 #49257] INFO -- [Cloudtasker][d76040a1-367e-4e3b-854e-e05a74d5f773] Job run with foo. This is working!: {:worker=>"DummyWorker", :job_id=>"d76040a1-367e-4e3b-854e-e05a74d5f773", :job_meta=>{}}
138
+ I, [2019-11-22T09:20:09.320966 #49257] INFO -- [Cloudtasker][d76040a1-367e-4e3b-854e-e05a74d5f773] Job done: {:worker=>"DummyWorker", :job_id=>"d76040a1-367e-4e3b-854e-e05a74d5f773", :job_meta=>{}}
139
+ ```
140
+
141
+ That's it! Your job was picked up by the Cloudtasker local server and sent for processing to your Rails web process.
142
+
143
+ Now jump to the next section to configure your app to use Google Cloud Tasks as a backend.
144
+
145
+ ## Get started with Rails & ActiveJob
146
+ **Note**: ActiveJob is supported since `0.11.0`
147
+ **Note**: Cloudtasker extensions (cron, batch, unique jobs and storable) are not available when using cloudtasker via ActiveJob.
148
+
149
+ Cloudtasker is pre-integrated with ActiveJob. Follow the steps below to get started.
150
+
151
+ Install redis on your machine (this is required by the Cloudtasker local processing server)
152
+ ```bash
153
+ # E.g. using brew
154
+ brew install redis
155
+ ```
156
+
157
+ Add the following initializer
158
+ ```ruby
159
+ # config/initializers/cloudtasker.rb
160
+
161
+ Cloudtasker.configure do |config|
162
+ #
163
+ # Adapt the server port to be the one used by your Rails web process
164
+ #
165
+ config.processor_host = 'http://localhost:3000'
166
+
167
+ #
168
+ # If you do not have any Rails secret_key_base defined, uncomment the following
169
+ # This secret is used to authenticate jobs sent to the processing endpoint
170
+ # of your application.
171
+ #
172
+ # config.secret = 'some-long-token'
173
+ end
174
+ ```
175
+
176
+ Configure ActiveJob to use Cloudtasker. You can also configure ActiveJob per environment via the config/environments/:env.rb files
177
+ ```ruby
178
+ # config/application.rb
179
+
180
+ require_relative 'boot'
181
+ require 'rails/all'
182
+
183
+ Bundler.require(*Rails.groups)
184
+
185
+ module Dummy
186
+ class Application < Rails::Application
187
+ # Initialize configuration defaults for originally generated Rails version.
188
+ config.load_defaults 6.0
189
+
190
+ # Settings in config/environments/* take precedence over those specified here.
191
+ # Application configuration can go into files in config/initializers
192
+ # -- all .rb files in that directory are automatically loaded after loading
193
+ # the framework and any gems in your application.
194
+
195
+ # Use cloudtasker as the ActiveJob backend:
196
+ config.active_job.queue_adapter = :cloudtasker
197
+ end
198
+ end
199
+
200
+ ```
201
+
202
+ Define your first job:
203
+ ```ruby
204
+ # app/jobs/example_job.rb
205
+
206
+ class ExampleJob < ApplicationJob
207
+ queue_as :default
208
+
209
+ def perform(some_arg)
210
+ logger.info("Job run with #{some_arg}. This is working!")
211
+ end
212
+ end
213
+ ```
214
+
215
+ Launch Rails and the local Cloudtasker processing server (or add `cloudtasker` to your foreman config as a `worker` process)
216
+ ```bash
217
+ # In one terminal
218
+ > rails s -p 3000
219
+
220
+ # In another terminal
221
+ > cloudtasker
222
+ ```
223
+
224
+ Open a Rails console and enqueue some jobs
225
+ ```ruby
226
+ # Process job as soon as possible
227
+ ExampleJob.perform_later('foo')
228
+
229
+ # Process job in 60 seconds
230
+ ExampleJob.set(wait: 60).perform_later('foo')
231
+ ```
232
+
233
+
234
+ ## Configuring Cloudtasker
235
+
236
+ ### Cloud Tasks authentication & permissions
237
+
238
+ The Google Cloud library authenticates via the Google Cloud SDK by default. If you do not have it setup then we recommend you [install it](https://cloud.google.com/sdk/docs/quickstarts).
239
+
240
+ Other options are available such as using a service account. You can see all authentication options in the [Google Cloud Authentication guide](https://github.com/googleapis/google-cloud-ruby/blob/main/AUTHENTICATION.md).
241
+
242
+ In order to function properly Cloudtasker requires the authenticated account to have the following IAM permissions:
243
+ - `cloudtasks.tasks.get`
244
+ - `cloudtasks.tasks.create`
245
+ - `cloudtasks.tasks.delete`
246
+
247
+ To get started quickly you can add the `roles/cloudtasks.admin` role to your account via the [IAM Console](https://console.cloud.google.com/iam-admin/iam). This is not required if your account is a project admin account.
248
+
249
+ The GCP project ID and region values are not loaded automatically by the Google Cloud library, and must be explicitly defined in the initializer when using Google Cloud Tasks.
250
+
251
+ ### Cloudtasker initializer
252
+
253
+ The gem can be configured through an initializer. See below all the available configuration options.
254
+
255
+ ```ruby
256
+ # config/initializers/cloudtasker.rb
257
+
258
+ Cloudtasker.configure do |config|
259
+ #
260
+ # If you do not have any Rails secret_key_base defined, uncomment the following.
261
+ # This secret is used to authenticate jobs sent to the processing endpoint
262
+ # of your application.
263
+ #
264
+ # Default with Rails: Rails.application.credentials.secret_key_base
265
+ #
266
+ # config.secret = 'some-long-token'
267
+
268
+ #
269
+ # Specify the details of your Google Cloud Task location.
270
+ #
271
+ # This is required when the mode of operation is set to :production
272
+ #
273
+ config.gcp_location_id = 'us-central1' # defaults to 'us-east1'
274
+ config.gcp_project_id = 'my-gcp-project'
275
+
276
+ #
277
+ # Specify the namespace for your Cloud Task queues.
278
+ #
279
+ # Specifying a namespace is optional but strongly recommended to keep
280
+ # queues organised, especially in a micro-service environment.
281
+ #
282
+ # The gem assumes that a least a default queue named 'my-app-default'
283
+ # exists in Cloud Tasks. You can create this default queue using the
284
+ # gcloud SDK or via the `rake cloudtasker:setup_queue` task if you use Rails.
285
+ #
286
+ # Workers can be scheduled on different queues. The name of the queue
287
+ # in Cloud Tasks is always assumed to be prefixed with the prefix below.
288
+ #
289
+ # E.g.
290
+ # Setting `cloudtasker_options queue: 'critical'` on a worker means that
291
+ # the worker will be pushed to 'my-app-critical' in Cloud Tasks.
292
+ #
293
+ # Specific queues can be created in Cloud Tasks using the gcloud SDK or
294
+ # via the `rake cloudtasker:setup_queue name=<queue_name>` task.
295
+ #
296
+ config.gcp_queue_prefix = 'my-app'
297
+
298
+ #
299
+ # Specify the publicly accessible host for your application
300
+ #
301
+ # > E.g. in development, using the cloudtasker local server
302
+ # config.processor_host = 'http://localhost:3000'
303
+ #
304
+ # > E.g. in development, using `config.mode = :production` and ngrok
305
+ # config.processor_host = 'https://111111.ngrok.io'
306
+ #
307
+ config.processor_host = 'https://app.mydomain.com'
308
+
309
+ #
310
+ # Specify the mode of operation:
311
+ # - :development => jobs will be pushed to Redis and picked up by the Cloudtasker local server
312
+ # - :production => jobs will be pushed to Google Cloud Tasks. Requires a publicly accessible domain.
313
+ #
314
+ # Defaults to :development unless CLOUDTASKER_ENV or RAILS_ENV or RACK_ENV is set to something else.
315
+ #
316
+ # config.mode = Rails.env.production? || Rails.env.my_other_env? ? :production : :development
317
+
318
+ #
319
+ # Specify the logger to use
320
+ #
321
+ # Default with Rails: Rails.logger
322
+ # Default without Rails: Logger.new(STDOUT)
323
+ #
324
+ # config.logger = MyLogger.new(STDOUT)
325
+
326
+ #
327
+ # Specify how many retries are allowed on jobs. This number of retries excludes any
328
+ # connectivity error due to the application being down or unreachable.
329
+ #
330
+ # Default: 25
331
+ #
332
+ # config.max_retries = 10
333
+
334
+ #
335
+ # Specify the redis connection hash.
336
+ #
337
+ # This is ONLY required in development for the Cloudtasker local server and in
338
+ # all environments if you use any cloudtasker extension (unique jobs, cron jobs,
339
+ # batch jobs or storable jobs)
340
+ #
341
+ # See https://github.com/redis/redis-rb for examples of configuration hashes.
342
+ #
343
+ # Default: redis-rb connects to redis://127.0.0.1:6379/0
344
+ #
345
+ # config.redis = { url: 'redis://localhost:6379/5' }
346
+
347
+ #
348
+ # Set to true to store job arguments in Redis instead of sending arguments as part
349
+ # of the job payload to Google Cloud Tasks.
350
+ #
351
+ # This is useful if you expect to process jobs with payloads exceeding 100KB, which
352
+ # is the limit enforced by Google Cloud Tasks.
353
+ #
354
+ # You can set this configuration parameter to a KB value if you want to store jobs
355
+ # args in redis only if the JSONified arguments payload exceeds that threshold.
356
+ #
357
+ # Supported since: v0.10.0
358
+ #
359
+ # Default: false
360
+ #
361
+ # Store all job payloads in Redis:
362
+ # config.store_payloads_in_redis = true
363
+ #
364
+ # Store all job payloads in Redis exceeding 50 KB:
365
+ # config.store_payloads_in_redis = 50
366
+
367
+ #
368
+ # Specify the dispatch deadline for jobs in Cloud Tasks, in seconds.
369
+ # Jobs taking longer will be retried by Cloud Tasks, even if they eventually
370
+ # complete on the server side.
371
+ #
372
+ # Note that this option is applied when jobs are enqueued job. Changing this value
373
+ # will not impact already enqueued jobs.
374
+ #
375
+ # This option can also be configured on a per worker basis via
376
+ # the cloudtasker_options directive.
377
+ #
378
+ # Supported since: v0.12.0
379
+ #
380
+ # Default: 600 seconds (10 minutes)
381
+ # Min: 15 seconds
382
+ # Max: 1800 seconds (30 minutes)
383
+ #
384
+ # config.dispatch_deadline = 600
385
+
386
+ #
387
+ # Specify a proc to be invoked every time a job fails due to a runtime
388
+ # error.
389
+ #
390
+ # This hook is not invoked for DeadWorkerError. See on_dead instead.
391
+ #
392
+ # This is useful when you need to apply general exception handling, such
393
+ # as reporting errors to a third-party service like Rollbar or Bugsnag.
394
+ #
395
+ # Note: the worker argument might be nil, such as when InvalidWorkerError is raised.
396
+ #
397
+ # Supported since: v0.12.0
398
+ #
399
+ # Default: no operation
400
+ #
401
+ # config.on_error = ->(error, worker) { Rollbar.error(error) }
402
+
403
+ #
404
+ # Specify a proc to be invoked every time a job dies due to too many
405
+ # retries.
406
+ #
407
+ # This is useful when you need to apply general exception handling, such
408
+ # logging specific messages/context when a job dies.
409
+ #
410
+ # Supported since: v0.12.0
411
+ #
412
+ # Default: no operation
413
+ #
414
+ # config.on_dead = ->(error, worker) { Rollbar.error(error) }
415
+
416
+ #
417
+ # Specify the Open ID Connect (OIDC) details to connect to a protected GCP service, such
418
+ # as a private Cloud Run application.
419
+ #
420
+ # The configuration supports the following details:
421
+ # - service_account_email: This is the "act as" user. It can be found under the security details
422
+ # of the Cloud Run service.
423
+ # - audience: The audience is usually the publicly accessible host for the Cloud Run service
424
+ # (which is the same value configured as the processor_host). If no audiences are provided
425
+ # it will be set to the processor_host.
426
+ #
427
+ # Note: If the OIDC token is used for a Cloud Run service make sure to include the
428
+ # `iam.serviceAccounts.actAs` permission on the service account.
429
+ #
430
+ # See https://cloud.google.com/tasks/docs/creating-http-target-tasks#sa for more information on
431
+ # setting up service accounts for use with Cloud Tasks.
432
+ #
433
+ # Supported since: v0.14.0
434
+ #
435
+ # Default: nil
436
+ #
437
+ # config.oidc = { service_account_email: 'example@gserviceaccount.com' }
438
+ # config.oidc = { service_account_email: 'example@gserviceaccount.com', audience: 'https://api.example.net' }
439
+
440
+ #
441
+ # Enable/disable the verification of SSL certificates on the local processing server when
442
+ # sending tasks to the processor.
443
+ #
444
+ # Set to false to disable SSL verification (OpenSSL::SSL::VERIFY_NONE).
445
+ #
446
+ # Default: true
447
+ #
448
+ # config.local_server_ssl_verify = true
449
+ end
450
+ ```
451
+
452
+ If the default queue `<gcp_queue_prefix>-default` does not exist in Cloud Tasks you should [create it using the gcloud sdk](https://cloud.google.com/tasks/docs/creating-queues).
453
+
454
+ Alternatively with Rails you can simply run the following rake task if you have queue admin permissions (`cloudtasks.queues.get` and `cloudtasks.queues.create`).
455
+ ```bash
456
+ bundle exec rake cloudtasker:setup_queue
457
+ ```
458
+
459
+ ## Enqueuing jobs
460
+
461
+ Cloudtasker provides multiple ways of enqueuing jobs.
462
+
463
+ ```ruby
464
+ # Worker will be processed as soon as possible
465
+ MyWorker.perform_async(arg1, arg2)
466
+
467
+ # Worker will be processed in 5 minutes
468
+ MyWorker.perform_in(5 * 60, arg1, arg2)
469
+ # or with Rails
470
+ MyWorker.perform_in(5.minutes, arg1, arg2)
471
+
472
+ # Worker will be processed on a specific date
473
+ MyWorker.perform_at(Time.parse('2025-01-01 00:50:00Z'), arg1, arg2)
474
+ # also with Rails
475
+ MyWorker.perform_at(3.days.from_now, arg1, arg2)
476
+
477
+ # With all options, including which queue to run the worker on.
478
+ MyWorker.schedule(args: [arg1, arg2], time_at: Time.parse('2025-01-01 00:50:00Z'), queue: 'critical')
479
+ # or
480
+ MyWorker.schedule(args: [arg1, arg2], time_in: 5 * 60, queue: 'critical')
481
+
482
+ # Perform worker immediately, inline. This will not send the job to
483
+ # the processing queue. Middlewares such as Unique Job, Batch Jobs will still be invoked.
484
+ # Supported since: v0.15.rc2
485
+ MyWorker.perform_now(arg1, arg2)
486
+ ```
487
+
488
+ Cloudtasker also provides a helper for re-enqueuing jobs. Re-enqueued jobs keep the same job id. Some middlewares may rely on this to track the fact that that a job didn't actually complete (e.g. Cloustasker batch). This is optional and you can always fallback to using exception management (raise an error) to retry/re-enqueue jobs.
489
+
490
+ E.g.
491
+ ```ruby
492
+ # app/workers/fetch_resource_worker.rb
493
+
494
+ class FetchResourceWorker
495
+ include Cloudtasker::Worker
496
+
497
+ def perform(id)
498
+ # ...do some logic...
499
+ if some_condition
500
+ # Stop and re-enqueue the job to be run again in 10 seconds.
501
+ # Also see the section on Cloudtasker::RetryWorkerError for a different
502
+ # approach on reenqueuing.
503
+ return reenqueue(10)
504
+ else
505
+ # ...keep going...
506
+ end
507
+ end
508
+ end
509
+ ```
510
+
511
+ ## Managing worker queues
512
+
513
+ Cloudtasker allows you to manage several queues and distribute workers across them based on job priority. By default jobs are pushed to the `default` queue, which is `<gcp_queue_prefix>-default` in Cloud Tasks.
514
+
515
+ ### Creating queues
516
+
517
+ More queues can be created using the gcloud sdk or the `cloudtasker:setup_queue` rake task.
518
+
519
+ E.g. Create a `critical` queue with a concurrency of 5 via the gcloud SDK
520
+ ```bash
521
+ gcloud tasks queues create <gcp_queue_prefix>-critical --max-concurrent-dispatches=5
522
+ ```
523
+
524
+ E.g. Create a `real-time` queue with a concurrency of 15 via the rake task (Rails only)
525
+ ```bash
526
+ rake cloudtasker:setup_queue name=real-time concurrency=15
527
+ ```
528
+
529
+ When running the Cloudtasker local processing server, you can specify the concurrency for each queue using:
530
+ ```bash
531
+ cloudtasker -q critical,5 -q important,4 -q default,3
532
+ ```
533
+
534
+ ### Assigning queues to workers
535
+
536
+ Queues can be assigned to workers via the `cloudtasker_options` directive on the worker class:
537
+
538
+ ```ruby
539
+ # app/workers/critical_worker.rb
540
+
541
+ class CriticalWorker
542
+ include Cloudtasker::Worker
543
+
544
+ cloudtasker_options queue: :critical
545
+
546
+ def perform(some_arg)
547
+ logger.info("This is a critical job run with arg=#{some_arg}.")
548
+ end
549
+ end
550
+ ```
551
+
552
+ Queues can also be assigned at runtime when scheduling a job:
553
+ ```ruby
554
+ CriticalWorker.schedule(args: [1], queue: :important)
555
+ ```
556
+
557
+ ### Propagating the queue in child workers
558
+ **Supported since:** `v0.15.rc2`
559
+
560
+ You can specify `propagate_queue: true` via the `cloudtasker_options` to make workers enqueued inside a job use the runtime queue instead of the default (class-configured or `default`) queue:
561
+
562
+ ```ruby
563
+ # app/workers/child_worker.rb
564
+
565
+ class ChildWorker
566
+ include Cloudtasker::Worker
567
+
568
+ cloudtasker_options queue: :level2
569
+
570
+ def perform(some_arg)
571
+ logger.info("This is a child job, which is set to run on the level2 queue by default.")
572
+ end
573
+ end
574
+ ```
575
+
576
+ ```ruby
577
+ # app/workers/parent_worker.rb
578
+
579
+ class ParentWorker
580
+ include Cloudtasker::Worker
581
+
582
+ cloudtasker_options queue: :level1, propagate_queue: true
583
+
584
+ def perform(some_arg)
585
+ logger.info("This is a parent job, which is set to run on the level1 queue by default.")
586
+
587
+ # This worker will run on queue 'level1' instead of 'level2' because
588
+ # the "propagate_queue: true" has been specified on the parent.
589
+ ChildWorker.perform_async(some_arg)
590
+
591
+ # This worker will run on queue 'level3' because the queue has been explicitly
592
+ # specified on the scheduling options. It overrides the propagate_queue behaviour.
593
+ ChildWorker.schedule(queue: 'level3', args: [some_arg])
594
+
595
+ # This worker will run on queue 'level4' and the first ChildWorker it enqueues
596
+ # will also run on queue 'level4'. The second ChidlWorker will, however, run
597
+ # on queue 'level3', as explained above.
598
+ ParentWorker.schedule(queue: 'level4', args: [some_arg])
599
+ end
600
+ end
601
+ ```
602
+
603
+
604
+ ## Extensions
605
+ **Note**: Extensions are not available when using cloudtasker via ActiveJob.
606
+
607
+ Cloudtasker comes with three optional features:
608
+ - Cron Jobs [[docs](docs/CRON_JOBS.md)]: Run jobs at fixed intervals.
609
+ - Batch Jobs [[docs](docs/BATCH_JOBS.md)]: Run jobs in jobs and track completion of the overall batch.
610
+ - Unique Jobs [[docs](docs/UNIQUE_JOBS.md)]: Ensure uniqueness of jobs based on job arguments.
611
+ - Storable Jobs [[docs](docs/STORABLE_JOBS.md)]: Park jobs until they are ready to be enqueued.
612
+
613
+ ## Working locally
614
+
615
+ Cloudtasker pushes jobs to Google Cloud Tasks, which in turn sends jobs for processing to your application via HTTP POST requests to the `/cloudtasker/run` endpoint of the publicly accessible domain of your application.
616
+
617
+ When working locally on your application it is usually not possible to have a public domain. So what are the options?
618
+
619
+ ### Option 1: Cloudtasker local server
620
+ The Cloudtasker local server is a ruby daemon that looks for jobs pushed to Redis and sends them to your application via HTTP POST requests. The server mimics the way Google Cloud Tasks works, but locally!
621
+
622
+ You can configure your application to use the Cloudtasker local server using the following initializer:
623
+ ```ruby
624
+ # config/initializers/cloudtasker.rb
625
+
626
+ Cloudtasker.configure do |config|
627
+ # ... other options
628
+
629
+ # Push jobs to redis and let the Cloudtasker local server collect them
630
+ # This is the default mode unless CLOUDTASKER_ENV or RAILS_ENV or RACK_ENV is set
631
+ # to a non-development environment
632
+ config.mode = :development
633
+ end
634
+ ```
635
+
636
+ The Cloudtasker server can then be started using:
637
+ ```bash
638
+ bundle exec cloudtasker
639
+ ```
640
+
641
+ You can as well define a Procfile to manage the cloudtasker process via foreman. Then use `foreman start` to launch both your Rails server and the Cloudtasker local server.
642
+ ```yaml
643
+ # Procfile
644
+ web: bundle exec rails s
645
+ worker: bundle exec cloudtasker
646
+ ```
647
+
648
+ Note that the local development server runs with `5` concurrent threads by default. You can tune the number of threads per queue by running `cloudtasker` the following options:
649
+ ```bash
650
+ bundle exec cloudtasker -q critical,5 -q important,4 -q default,3
651
+ ```
652
+
653
+ ### Option 2: Using ngrok
654
+
655
+ Want to test your application end to end with Google Cloud Task? Then [ngrok](https://ngrok.io) is the way to go.
656
+
657
+ First start your ngrok tunnel:
658
+ ```bash
659
+ ngrok http 3000
660
+ ```
661
+
662
+ Take note of your ngrok domain and configure Cloudtasker to use Google Cloud Task in development via ngrok.
663
+ ```ruby
664
+ # config/initializers/cloudtasker.rb
665
+
666
+ Cloudtasker.configure do |config|
667
+ # Specify your Google Cloud Task queue configuration
668
+ config.gcp_location_id = 'us-central1'
669
+ config.gcp_project_id = 'my-gcp-project'
670
+ config.gcp_queue_prefix = 'my-app'
671
+
672
+ # Use your ngrok domain as the processor host
673
+ config.processor_host = 'https://your-tunnel-id.ngrok.io'
674
+
675
+ # Force Cloudtasker to use Google Cloud Tasks in development
676
+ config.mode = :production
677
+ end
678
+ ```
679
+
680
+ Finally start Rails to accept jobs from Google Cloud Tasks
681
+ ```bash
682
+ bundle exec rails s
683
+ ```
684
+
685
+ ## Logging
686
+ There are several options available to configure logging and logging context.
687
+
688
+ ### Configuring a logger
689
+ Cloudtasker uses `Rails.logger` if Rails is available and falls back on a plain ruby logger `Logger.new(STDOUT)` if not.
690
+
691
+ It is also possible to configure your own logger. For example you can setup Cloudtasker with [semantic_logger](http://rocketjob.github.io/semantic_logger) by doing the following in your initializer:
692
+ ```ruby
693
+ # config/initializers/cloudtasker.rb
694
+
695
+ Cloudtasker.configure do |config|
696
+ config.logger = SemanticLogger[Cloudtasker]
697
+ end
698
+ ```
699
+
700
+ ### Logging context
701
+ Cloudtasker provides worker contextual information to the worker `logger` method inside your worker methods.
702
+
703
+ For example:
704
+ ```ruby
705
+ # app/workers/dummy_worker.rb
706
+
707
+ class DummyWorker
708
+ include Cloudtasker::Worker
709
+
710
+ def perform(some_arg)
711
+ logger.info("Job run with #{some_arg}. This is working!")
712
+ end
713
+ end
714
+ ```
715
+
716
+ Will generate the following log with context `{:worker=> ..., :job_id=> ..., :job_meta=> ...}`
717
+ ```log
718
+ [Cloudtasker][d76040a1-367e-4e3b-854e-e05a74d5f773] Job run with foo. This is working!: {:worker=>"DummyWorker", :job_id=>"d76040a1-367e-4e3b-854e-e05a74d5f773", :job_meta=>{}, :task_id => "4e755d3f-6de0-426c-b4ac-51edd445c045"}
719
+ ```
720
+
721
+ The way contextual information is displayed depends on the logger itself. For example with [semantic_logger](http://rocketjob.github.io/semantic_logger) contextual information might not appear in the log message but show up as payload data on the log entry itself (e.g. using the fluentd adapter).
722
+
723
+ Contextual information can be customised globally and locally using a log context_processor. By default the `Cloudtasker::WorkerLogger` is configured the following way:
724
+ ```ruby
725
+ Cloudtasker::WorkerLogger.log_context_processor = ->(worker) { worker.to_h.slice(:worker, :job_id, :job_meta, :job_queue, :task_id) }
726
+ ```
727
+
728
+ You can decide to add a global identifier for your worker logs using the following:
729
+ ```ruby
730
+ # config/initializers/cloudtasker.rb
731
+
732
+ Cloudtasker::WorkerLogger.log_context_processor = lambda { |worker|
733
+ worker.to_h.slice(:worker, :job_id, :job_meta, :job_queue, :task_id).merge(app: 'my-app')
734
+ }
735
+ ```
736
+
737
+ You could also decide to log all available context - including arguments passed to `perform` - for specific workers only:
738
+ ```ruby
739
+ # app/workers/full_context_worker.rb
740
+
741
+ class FullContextWorker
742
+ include Cloudtasker::Worker
743
+
744
+ cloudtasker_options log_context_processor: ->(worker) { worker.to_h }
745
+
746
+ def perform(some_arg)
747
+ logger.info("This log entry will have full context!")
748
+ end
749
+ end
750
+ ```
751
+
752
+ See the [Cloudtasker::Worker class](lib/cloudtasker/worker.rb) for more information on attributes available to be logged in your `log_context_processor` proc.
753
+
754
+ ### Truncating log arguments
755
+ **Supported since**: `v0.14.0`
756
+
757
+ By default Cloudtasker does not log job arguments as arguments can contain sensitive data and generate voluminous logs, which may lead to noticeable costs with your log provider (e.g. GCP Logging). Also some providers (e.g. GCP Logging) will automatically truncate log entries that are too big and reduce their searchability.
758
+
759
+ Job arguments can be logged for all workers by configuring the following log context processor in your Cloudtasker initializer:
760
+ ```ruby
761
+ Cloudtasker::WorkerLogger.log_context_processor = ->(worker) { worker.to_h }
762
+ ```
763
+
764
+ In order to reduce the size of logged job arguments, the following `truncate` utility is provided by Cloudtasker:
765
+ ```ruby
766
+ # string_limit: The maximum size for strings. Default is 64. Set to -1 to disable.
767
+ # array_limit: The maximum length for arrays. Default is 10. Set to -1 to disable.
768
+ # max_depth: The maximum recursive depth. Default is 3. Set to -1 to disable.
769
+ Cloudtasker::WorkerLogger.truncate(payload, string_limit: 64, array_limit: 10, max_depth: 3)
770
+ ```
771
+
772
+ You may use it the following way:
773
+ ```ruby
774
+ Cloudtasker::WorkerLogger.log_context_processor = lambda do |worker|
775
+ payload = worker.to_h
776
+
777
+ # Using default options
778
+ payload[:job_args] = Cloudtasker::WorkerLogger.truncate(payload[:job_args])
779
+
780
+ # Using custom options
781
+ # payload[:job_args] = Cloudtasker::WorkerLogger.truncate(payload[:job_args], string_limit: 32, array_limit: 5, max_depth: 2)
782
+
783
+ # Return the payload to log
784
+ payload
785
+ end
786
+ ```
787
+
788
+ To further reduce logging cost, you may also log a reasonably complete version of job arguments at start then log a watered down version for the remaining log entries:
789
+ ```ruby
790
+ Cloudtasker::WorkerLogger.log_context_processor = lambda do |worker|
791
+ payload = worker.to_h
792
+
793
+ # Adjust the log payload based on the lifecycle of the job
794
+ payload[:job_args] = if worker.perform_started_at
795
+ # The job start has already been logged. Log the job primitive arguments without depth.
796
+ # Arrays and hashes will be masked.
797
+ Cloudtasker::WorkerLogger.truncate(payload[:job_args], max_depth: 0)
798
+ else
799
+ # This is the job start. Log a more complete version of the job args.
800
+ Cloudtasker::WorkerLogger.truncate(payload[:job_args])
801
+ end
802
+
803
+ # Return the payload to log
804
+ payload
805
+ end
806
+ ```
807
+
808
+ ### Searching logs: Job ID vs Task ID
809
+ **Note**: `task_id` field is available in logs starting with `0.10.0`
810
+
811
+ Job instances are assigned two different different IDs for tracking and logging purpose: `job_id` and `task_id`. These IDs are found in each log entry to facilitate search.
812
+
813
+ | Field | Definition |
814
+ |------|-------------|
815
+ | `job_id` | This ID is generated by Cloudtasker. It identifies the job along its entire lifecyle. It is persistent across retries and reschedules. |
816
+ | `task_id` | This ID is generated by Google Cloud Tasks. It identifies a job instance on the Google Cloud Task side. It is persistent across retries but NOT across reschedules. |
817
+
818
+ The Google Cloud Task UI (GCP console) lists all the tasks pending/retrying and their associated task id (also called "Task name"). From there you can:
819
+ 1. Use a task ID to lookup the logs of a specific job instance in Stackdriver Logging (or any other logging solution).
820
+ 2. From (1) you can retrieve the `job_id` attribute of the job.
821
+ 3. From (2) you can use the `job_id` to lookup the job logs along its entire lifecycle.
822
+
823
+ ## Error Handling
824
+
825
+ Jobs failures will return an HTTP error to Cloud Task and trigger a retry at a later time. The number of Cloud Task retries depends on the configuration of your queue in Cloud Tasks.
826
+
827
+ ### HTTP Error codes
828
+
829
+ Jobs failing will automatically return the following HTTP error code to Cloud Tasks, based on the actual reason:
830
+
831
+ | Code | Description |
832
+ |------|-------------|
833
+ | 204 | The job was processed successfully |
834
+ | 205 | The job is dead and has been removed from the queue |
835
+ | 404 | The job has specified an incorrect worker class. |
836
+ | 422 | An error happened during the execution of the worker (`perform` method) |
837
+
838
+ ### Worker callbacks
839
+
840
+ Workers can implement the `on_error(error)` and `on_dead(error)` callbacks to do things when a job fails during its execution:
841
+
842
+ E.g.
843
+ ```ruby
844
+ # app/workers/handle_error_worker.rb
845
+
846
+ class HandleErrorWorker
847
+ include Cloudtasker::Worker
848
+
849
+ def perform
850
+ raise(ArgumentError)
851
+ end
852
+
853
+ # The runtime error is passed as an argument.
854
+ def on_error(error)
855
+ logger.error("The following error happened: #{error}")
856
+ end
857
+
858
+ # The job has been retried too many times and will be removed
859
+ # from the queue.
860
+ def on_dead(error)
861
+ logger.error("The job died with the following error: #{error}")
862
+ end
863
+ end
864
+ ```
865
+
866
+ ### Global callbacks
867
+ **Supported since**: `0.12.0`
868
+
869
+ If you need to apply general exception handling logic to your workers you can specify `on_error` and `on_dead` hooks in the Cloudtasker configuration.
870
+
871
+ This is useful if you need to report errors to third-party services such as Rollbar or Bugsnag.
872
+
873
+ ```ruby
874
+ # config/initializers/cloudtasker.rb
875
+
876
+ Cloudtasker.configure do |config|
877
+ #
878
+ # Report runtime and dead worker errors to Rollbar
879
+ #
880
+ config.on_error = -> (error, _worker) { Rollbar.error(error) }
881
+ config.on_dead = -> (error, _worker) { Rollbar.error(error) }
882
+ end
883
+ ```
884
+
885
+ ### Max retries
886
+
887
+ By default jobs are retried 25 times - using an exponential backoff - before being declared dead. This number of retries can be customized locally on workers and/or globally via the Cloudtasker initializer.
888
+
889
+ Note that the number of retries set on your Cloud Task queue should be many times higher than the number of retries configured in Cloudtasker because Cloud Task also includes failures to connect to your application. Ideally set the number of retries to `unlimited` in Cloud Tasks.
890
+
891
+ **Note**: Versions prior to `v0.14.0` use the `X-CloudTasks-TaskRetryCount` header for retries instead of the `X-CloudTasks-TaskExecutionCount` header to detect the number of retries, because there a previous bug on the GCP side which made the `X-CloudTasks-TaskExecutionCount` stay at zero instead of increasing on successive executions. Versions prior to `v0.14.0` count any failure as failure, including failures due to the backend being unavailable (`HTTP 503`). Versions `v0.14.0` and later only count application failure (`HTTP 4xx`) as failure for retry purpose.
892
+
893
+ E.g. Set max number of retries globally via the cloudtasker initializer.
894
+ ```ruby
895
+ # config/initializers/cloudtasker.rb
896
+
897
+ Cloudtasker.configure do |config|
898
+ #
899
+ # Specify how many retries are allowed on jobs. This number of retries excludes any
900
+ # connectivity error that would be due to the application being down or unreachable.
901
+ #
902
+ # Default: 25
903
+ #
904
+ config.max_retries = 10
905
+ end
906
+ ```
907
+
908
+ E.g. Set max number of retries to 3 on a given worker
909
+ ```ruby
910
+ # app/workers/some_error_worker.rb
911
+
912
+ class SomeErrorWorker
913
+ include Cloudtasker::Worker
914
+
915
+ # This will override the global setting
916
+ cloudtasker_options max_retries: 3
917
+
918
+ def perform
919
+ raise(ArgumentError)
920
+ end
921
+ end
922
+ ```
923
+
924
+ E.g. Evaluate the number of max retries at runtime (Supported since: v0.10.1)
925
+ ```ruby
926
+ # app/workers/some_error_worker.rb
927
+
928
+ class SomeErrorWorker
929
+ include Cloudtasker::Worker
930
+
931
+ # Return the number of max retries based on
932
+ # worker arguments.
933
+ #
934
+ # If this method returns nil then max_retries
935
+ # will delegate to the class `max_retries` setting or Cloudtasker
936
+ # `max_retries` configuration otion.
937
+ def max_retries(arg1, arg2)
938
+ arg1 == 'foo' ? 13 : nil
939
+ end
940
+
941
+ def perform(arg1, arg2)
942
+ raise(ArgumentError)
943
+ end
944
+ end
945
+ ```
946
+
947
+ ### Conditional reenqueues using retry errors
948
+ **Supported since**: `v0.14.0`
949
+
950
+ If your worker is waiting for some precondition to occur and you want to re-enqueue it until the condition has been met, you can raise a `Cloudtasker::RetryWorkerError`. This special error will fail your job **without logging an error** while still increasing the number of retries.
951
+
952
+ This is a safer approach than using the `reenqueue` helper, which can lead to forever running jobs if not used properly.
953
+
954
+ ```ruby
955
+ # app/workers/my_worker.rb
956
+
957
+ class MyWorker
958
+ include Cloudtasker::Worker
959
+
960
+ def perform(project_id)
961
+ # Abort if project does not exist
962
+ return unless (project = Project.find_by(id: project_id))
963
+
964
+ # Trigger a retry if the project is still in "discovering" status
965
+ # This error will NOT log an error. It only triggers a retry.
966
+ raise Cloudtasker::RetryWorkerError if project.status == 'discovering'
967
+
968
+ # The previous approach was to use `reenqueue`. This works but since it
969
+ # does not increase the number of retries, you may end up with forever running
970
+ # jobs
971
+ # return reenqueue(10) if project.status == 'discovering'
972
+
973
+ # Do stuff when project is not longer discovering
974
+ do_some_stuff
975
+ end
976
+
977
+ # You can then specify what should be done if we've been waiting for too long
978
+ def on_dead(error)
979
+ logger.error("Looks like the project is forever discovering. Time to give up.")
980
+
981
+ # This is of course an imaginary method
982
+ send_slack_notification_to_internal_support_team(worker: self.class, args: job_args)
983
+ end
984
+ end
985
+ ```
986
+
987
+ ### Dispatch deadline
988
+ **Supported since**: `0.12.0`
989
+
990
+ By default Cloud Tasks will automatically timeout your jobs after 10 minutes, independently of your server HTTP timeout configuration.
991
+
992
+ You can modify the dispatch deadline for jobs at a global level or on a per job basis.
993
+
994
+ E.g. Set the default dispatch deadline to 20 minutes.
995
+ ```ruby
996
+ # config/initializers/cloudtasker.rb
997
+
998
+ Cloudtasker.configure do |config|
999
+ #
1000
+ # Specify the dispatch deadline for jobs in Cloud Tasks, in seconds.
1001
+ # Jobs taking longer will be retried by Cloud Tasks, even if they eventually
1002
+ # complete on the server side.
1003
+ #
1004
+ # Note that this option is applied when jobs are enqueued job. Changing this value
1005
+ # will not impact already enqueued jobs.
1006
+ #
1007
+ # Default: 600 (10 minutes)
1008
+ #
1009
+ config.dispatch_deadline = 20 * 60 # 20 minutes
1010
+ end
1011
+ ```
1012
+
1013
+ E.g. Set a dispatch deadline of 5 minutes on a specific worker
1014
+ ```ruby
1015
+ # app/workers/some_error_worker.rb
1016
+
1017
+ class SomeFasterWorker
1018
+ include Cloudtasker::Worker
1019
+
1020
+ # This will override the global setting
1021
+ cloudtasker_options dispatch_deadline: 5 * 60
1022
+
1023
+ def perform
1024
+ # ... do things ...
1025
+ end
1026
+ end
1027
+ ```
1028
+
1029
+ ## Testing
1030
+ Cloudtasker provides several options to test your workers.
1031
+
1032
+ ### Test helper setup
1033
+ Require `cloudtasker/testing` in your `rails_helper.rb` (Rspec Rails) or `spec_helper.rb` (Rspec) or test unit helper file then enable one of the three modes:
1034
+
1035
+ ```ruby
1036
+ require 'cloudtasker/testing'
1037
+
1038
+ # Mode 1 (default): Push jobs to Google Cloud Tasks (env != development) or Redis (env == development)
1039
+ Cloudtasker::Testing.enable!
1040
+
1041
+ # Mode 2: Push jobs to an in-memory queue. Jobs will not be processed until you call
1042
+ # Cloudtasker::Worker.drain_all (process all jobs) or MyWorker.drain (process jobs for specific worker)
1043
+ Cloudtasker::Testing.fake!
1044
+
1045
+ # Mode 3: Push jobs to an in-memory queue. Jobs will be processed immediately.
1046
+ Cloudtasker::Testing.inline!
1047
+ ```
1048
+
1049
+ You can query the current testing mode with:
1050
+ ```ruby
1051
+ Cloudtasker::Testing.enabled?
1052
+ Cloudtasker::Testing.fake?
1053
+ Cloudtasker::Testing.inline?
1054
+ ```
1055
+
1056
+ Each testing mode accepts a block argument to temporarily switch to it:
1057
+ ```ruby
1058
+ # Enable fake mode for all tests
1059
+ Cloudtasker::Testing.fake!
1060
+
1061
+ # Enable inline! mode temporarily for a given test
1062
+ Cloudtasker::Testing.inline! do
1063
+ MyWorker.perform_async(1,2)
1064
+ end
1065
+ ```
1066
+
1067
+ Note that extension middlewares - e.g. unique job, batch job etc. - run in test mode. You can disable middlewares in your tests by adding the following to your test helper:
1068
+ ```ruby
1069
+ # Remove all middlewares
1070
+ Cloudtasker.configure do |c|
1071
+ c.client_middleware.clear
1072
+ c.server_middleware.clear
1073
+ end
1074
+
1075
+ # Remove all unique job middlewares
1076
+ Cloudtasker.configure do |c|
1077
+ c.client_middleware.remove(Cloudtasker::UniqueJob::Middleware::Client)
1078
+ c.server_middleware.remove(Cloudtasker::UniqueJob::Middleware::Server)
1079
+ end
1080
+ ```
1081
+
1082
+ ### In-memory queues
1083
+ The `fake!` or `inline!` modes use in-memory queues, which can be queried and controlled using the following methods:
1084
+
1085
+ ```ruby
1086
+ # Perform all jobs in queue
1087
+ Cloudtasker::Worker.drain_all
1088
+
1089
+ # Remove all jobs in queue
1090
+ Cloudtasker::Worker.clear_all
1091
+
1092
+ # Perform all jobs in queue for a specific worker type
1093
+ MyWorker.drain
1094
+
1095
+ # Return the list of jobs in queue for a specific worker type
1096
+ MyWorker.jobs
1097
+ ```
1098
+
1099
+ ### Unit tests
1100
+ Below are examples of rspec tests. It is assumed that `Cloudtasker::Testing.fake!` has been set in the test helper.
1101
+
1102
+ **Example 1**: Testing that a job is scheduled
1103
+ ```ruby
1104
+ describe 'worker scheduling'
1105
+ subject(:enqueue_job) { MyWorker.perform_async(1,2) }
1106
+
1107
+ it { expect { enqueue_job }.to change(MyWorker.jobs, :size).by(1) }
1108
+ end
1109
+ ```
1110
+
1111
+ **Example 2**: Testing job execution logic
1112
+ ```ruby
1113
+ describe 'worker calls api'
1114
+ subject { Cloudtasker::Testing.inline! { MyApiWorker.perform_async(1,2) } }
1115
+
1116
+ before { expect(MyApi).to receive(:fetch).and_return([]) }
1117
+ it { is_expected.to be_truthy }
1118
+ end
1119
+ ```
1120
+
1121
+ ## Best practices building workers
1122
+
1123
+ Below are recommendations and notes about creating workers.
1124
+
1125
+ ### Use primitive arguments
1126
+ Pushing a job via `MyWorker.perform_async(arg1, arg2)` will serialize all arguments as JSON. Cloudtasker does not do any magic marshalling and therefore passing user-defined class instance as arguments is likely to make your jobs fail because of JSON serialization/deserialization.
1127
+
1128
+ When defining your worker `perform` method, use primitive arguments (integers, strings, hashes).
1129
+
1130
+ Don't do that:
1131
+ ```ruby
1132
+ # app/workers/user_email_worker.rb
1133
+
1134
+ class UserEmailWorker
1135
+ include Cloudtasker::Worker
1136
+
1137
+ def perform(user)
1138
+ user.reload.send_email
1139
+ end
1140
+ end
1141
+ ```
1142
+
1143
+ Do that:
1144
+ ```ruby
1145
+ # app/workers/user_email_worker.rb
1146
+
1147
+ class UserEmailWorker
1148
+ include Cloudtasker::Worker
1149
+
1150
+ def perform(user_id)
1151
+ User.find_by(id: user_id)&.send_email
1152
+ end
1153
+ end
1154
+ ```
1155
+
1156
+ ### Assume hash arguments are stringified
1157
+ Because of JSON serialization/deserialization hashes passed to `perform_*` methods will eventually be passed as stringified hashes to the worker `perform` method.
1158
+
1159
+ ```ruby
1160
+ # Enqueuing a job with:
1161
+ MyWorker.perform_async({ foo: 'bar', 'baz' => { key: 'value' } })
1162
+
1163
+ # will be processed as
1164
+ MyWorker.new.perform({ 'foo' => 'bar', 'baz' => { 'key' => 'value' } })
1165
+ ```
1166
+
1167
+ ### Be careful with default arguments
1168
+ Default arguments passed to the `perform` method are not actually considered as job arguments. Default arguments will therefore be ignored in contextual logging and by extensions relying on arguments such as the [unique job](docs/UNIQUE_JOBS.md) extension.
1169
+
1170
+ Consider the following worker:
1171
+ ```ruby
1172
+ # app/workers/user_email_worker.rb
1173
+
1174
+ class UserEmailWorker
1175
+ include Cloudtasker::Worker
1176
+
1177
+ cloudtasker_options lock: :until_executed
1178
+
1179
+ def perform(user_id, time_at = Time.now.iso8601)
1180
+ User.find_by(id: user_id)&.send_email(Time.parse(time_at))
1181
+ end
1182
+ end
1183
+ ```
1184
+
1185
+ If you enqueue this worker by omitting the second argument `MyWorker.perform_async(123)` then:
1186
+ - The `time_at` argument will not be included in contextual logging
1187
+ - The `time_at` argument will be ignored by the `unique-job` extension, meaning that job uniqueness will be only based on the `user_id` argument.
1188
+
1189
+ ### Handling big job payloads
1190
+ Google Cloud Tasks enforces a limit of 100 KB for job payloads. Taking into accounts Cloudtasker authentication headers and meta information this leave ~85 KB of free space for JSONified job arguments.
1191
+
1192
+ Any excessive job payload (> 100 KB) will raise a `Cloudtasker::MaxTaskSizeExceededError`, both in production and development mode.
1193
+
1194
+ #### Option 1: Use Cloudtasker optional support for payload storage in Redis
1195
+ **Supported since**: `0.10.0`
1196
+
1197
+ Cloudtasker provides optional support for storing argument payloads in Redis instead of sending them to Google Cloud Tasks.
1198
+
1199
+ To enable it simply put the following in your Cloudtasker initializer:
1200
+ ```ruby
1201
+ # config/initializers/cloudtasker.rb
1202
+
1203
+ Cloudtasker.configure do |config|
1204
+ # Enable Redis support. Specify your redis connection
1205
+ config.redis = { url: 'redis://localhost:6379/5' }
1206
+
1207
+ # Store all job payloads in Redis:
1208
+ config.store_payloads_in_redis = true
1209
+
1210
+ # OR: store all job payloads in Redis exceeding 50 KB:
1211
+ # config.store_payloads_in_redis = 50
1212
+ end
1213
+ ```
1214
+
1215
+ #### Option 2: Do it yourself solution
1216
+
1217
+ If you feel that a job payload is going to get big, prefer to store the payload using a datastore (e.g. Redis) and pass a reference to the job to retrieve the payload inside your job `perform` method.
1218
+
1219
+ E.g. Define a job like this
1220
+ ```ruby
1221
+ # app/workers/big_payload_worker.rb
1222
+
1223
+ class BigPayloadWorker
1224
+ include Cloudtasker::Worker
1225
+
1226
+ def perform(payload_id)
1227
+ data = Rails.cache.fetch(payload_id)
1228
+ # ...do some processing...
1229
+ end
1230
+ end
1231
+ ```
1232
+
1233
+ Then enqueue your job like this:
1234
+ ```ruby
1235
+ # Fetch and store the payload
1236
+ data = ApiClient.fetch_thousands_of_records
1237
+ payload_id = SecureRandom.uuid
1238
+ Rails.cache.write(payload_id, data)
1239
+
1240
+ # Enqueue the processing job
1241
+ BigPayloadWorker.perform_async(payload_id)
1242
+ ```
1243
+
1244
+ ### Sizing the concurrency of your queues
1245
+
1246
+ When defining the max concurrency of your queues (`max_concurrent_dispatches` in Cloud Tasks) you must keep in mind the maximum number of threads that your application provides. Otherwise your application threads may eventually get exhausted and your users will experience outages if all your web threads are busy running jobs.
1247
+
1248
+ #### With server based applications
1249
+
1250
+ Let's consider an application deployed in production with 3 instances, each having `RAILS_MAX_THREADS` set to `20`. This gives us a total of `60` threads available.
1251
+
1252
+ Now let's say that we distribute jobs across two queues: `default` and `critical`. We can set the concurrency of each queue depending on the profile of the application:
1253
+
1254
+ E.g. 1: The application serves requests from web users and runs backgrounds jobs in a balanced way
1255
+ ```
1256
+ concurrency for default queue: 20
1257
+ concurrency for critical queue: 10
1258
+
1259
+ Total threads consumed by jobs at most: 30
1260
+ Total threads always available to web users at worst: 30
1261
+ ```
1262
+
1263
+ E.g. 2: The application is a micro-service API heavily focused on running jobs (e.g. data processing)
1264
+ ```
1265
+ concurrency for default queue: 35
1266
+ concurrency for critical queue: 15
1267
+
1268
+ Total threads consumed by jobs at most: 50
1269
+ Total threads always available to API clients at worst: 10
1270
+ ```
1271
+
1272
+ Also always ensure that your total number of threads does not exceed the available number of database connections (if you use any).
1273
+
1274
+ #### With serverless applications
1275
+
1276
+ In a serverless context your application will be scaled up/down based on traffic. When we say 'traffic' this includes requests from Cloud Tasks to run jobs.
1277
+
1278
+ Because your application is auto-scaled - and assuming you haven't set a maximum - your job processing capacity if theoretically unlimited. The main limiting factor in a serverless context becomes external constraints such as the number of database connections available.
1279
+
1280
+ To size the concurrency of your queues you should therefore take the most limiting factor - which is often the database connection pool size of relational databases - and use the calculations of the previous section with this limiting factor as the capping parameter instead of threads.
1281
+
1282
+
1283
+ ## Development
1284
+
1285
+ After checking out the repo, run `bin/setup` to install dependencies.
1286
+
1287
+ For tests, run `rake` to run the tests. Note that Rails is not in context by default, which means Rails-specific test will not run.
1288
+ For tests including Rails-specific tests, run `bundle exec appraisal rails_7.0 rake`
1289
+ For all context-specific tests (incl. Rails), run the [appraisal tests](Appraisals) using `bundle exec appraisal rake`.
1290
+
1291
+ You can run `bin/console` for an interactive prompt that will allow you to experiment.
1292
+
1293
+ To install this gem onto your local machine, run `bundle exec rake install`.
1294
+
1295
+ To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
1296
+
1297
+ ## Contributing
1298
+
1299
+ Bug reports and pull requests are welcome on GitHub at https://github.com/keypup-io/cloudtasker. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
1300
+
1301
+ ## License
1302
+
1303
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
1304
+
1305
+ ## Code of Conduct
1306
+
1307
+ Everyone interacting in the Cloudtasker project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/keypup-io/cloudtasker/blob/master/CODE_OF_CONDUCT.md).
1308
+
1309
+ ## Author
1310
+
1311
+ Provided with :heart: by [keypup.io](https://keypup.io/)