RubyGems - cloudtasker - Versions diffs - 0.7.0 → 0.8.0 - Mend

cloudtasker 0.7.0 → 0.8.0

Files changed (31) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/Gemfile.lock +2 -2
data/README.md +128 -12
data/app/controllers/cloudtasker/worker_controller.rb +1 -1
data/cloudtasker.gemspec +2 -2
data/docs/BATCH_JOBS.md +26 -1
data/docs/CRON_JOBS.md +5 -1
data/exe/cloudtasker +13 -1
data/lib/cloudtasker.rb +0 -1
data/lib/cloudtasker/backend/google_cloud_task.rb +41 -8
data/lib/cloudtasker/backend/memory_task.rb +5 -3
data/lib/cloudtasker/backend/redis_task.rb +17 -9
data/lib/cloudtasker/batch/batch_progress.rb +11 -2
data/lib/cloudtasker/batch/job.rb +18 -4
data/lib/cloudtasker/cli.rb +6 -5
data/lib/cloudtasker/cloud_task.rb +4 -2
data/lib/cloudtasker/config.rb +14 -8
data/lib/cloudtasker/cron/job.rb +2 -2
data/lib/cloudtasker/cron/schedule.rb +26 -14
data/lib/cloudtasker/local_server.rb +44 -22
data/lib/cloudtasker/redis_client.rb +5 -6
data/lib/cloudtasker/unique_job/job.rb +2 -2
data/lib/cloudtasker/version.rb +1 -1
data/lib/cloudtasker/worker.rb +44 -9
data/lib/cloudtasker/worker_handler.rb +5 -3
data/lib/cloudtasker/worker_logger.rb +1 -1
data/lib/cloudtasker/worker_wrapper.rb +52 -0
data/lib/tasks/setup_queue.rake +12 -2
metadata +5 -5
data/lib/cloudtasker/railtie.rb +0 -10

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 92abbab630f4e60c7ae8a6b2e249a5e19d49ad9730788381790463cfda136aea
-  data.tar.gz: c71fcf022efd34d3bba1e11341583b18581d1ae93bcd128b769b2e871e5e76f6
+  metadata.gz: e2a110c5354118a009e8620c887eb0c8bb9ff5c7aa63fefcf242ac9a649bd0e1
+  data.tar.gz: 58772d851865727f326bc30dfa3b94f170fe602eff264ce8661f2abd175033e9
 SHA512:
-  metadata.gz: 4a0f52436da444c75530ceb49ac16ce4c76392383011774accb31418447b6d0b2677e1289cb5768be39bb51b2665b46f347e0496c98c9552bf8c1a82a324f6e1
-  data.tar.gz: 5597abc655cef50010bbb38850cc26a5f440404aa8e3d0b645d49fe6bf34f4222d8402e1d61d3e99cf4a0093f7ccd66f561258a3b02595de7a7774cb21cda643
+  metadata.gz: af8b0e59a08d7e65bcc46f03135c17932b69816c2282bc651b1ae60887bde3ea0e52af051accb5606f1c2d042dd15dc224313a6aa7b8e6222aeef01806147464
+  data.tar.gz: 8b9d7e921aca496913a36357a43e8e3c0f77e059ac31439b3ccbe749afde946117a9faca70de11ee50ed6a9ff97b3439889b90bdb30db534ba4ac7690a0e3bfb

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,9 @@
 # Changelog
+## [v0.7.0](https://github.com/keypup-io/cloudtasker/tree/v0.7.0) (2019-11-25)
+[Full Changelog](https://github.com/keypup-io/cloudtasker/compare/v0.6.0...v0.7.0)
 ## [v0.6.0](https://github.com/keypup-io/cloudtasker/tree/v0.6.0) (2019-11-25)
 [Full Changelog](https://github.com/keypup-io/cloudtasker/compare/v0.5.0...v0.6.0)

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    cloudtasker (0.7.0)
+    cloudtasker (0.8.0)
       activesupport
       fugit
       google-cloud-tasks
@@ -110,7 +110,7 @@ GEM
       googleauth (~> 0.9)
       grpc (~> 1.24)
       rly (~> 0.2.3)
-    google-protobuf (3.10.1-universal-darwin)
+    google-protobuf (3.11.0-universal-darwin)
     googleapis-common-protos (1.3.9)
       google-protobuf (~> 3.0)
       googleapis-common-protos-types (~> 1.0)

data/README.md CHANGED Viewed

@@ -18,18 +18,21 @@ A local processing server is also available in development. This local server pr
     1. [Cloud Tasks authentication & permissions](#cloud-tasks-authentication--permissions)
     2. [Cloudtasker initializer](#cloudtasker-initializer)
 4. [Enqueuing jobs](#enqueuing-jobs)
-5. [Extensions](#extensions)
-6. [Working locally](#working-locally)
+5. [Managing worker queues](#managing-worker-queues)
+    1. [Creating queues](#creating-queues)
+    2. [Assigning queues to workers](#assigning-queues-to-workers)
+6. [Extensions](#extensions)
+7. [Working locally](#working-locally)
     1. [Option 1: Cloudtasker local server](#option-1-cloudtasker-local-server)
     2. [Option 2: Using ngrok](#option-2-using-ngrok)
-7. [Logging](#logging)
+8. [Logging](#logging)
     1. [Configuring a logger](#configuring-a-logger)
     2. [Logging context](#logging-context)
-8. [Error Handling](#error-handling)
+9. [Error Handling](#error-handling)
     1. [HTTP Error codes](#http-error-codes)
     2. [Error callbacks](#error-callbacks)
     3. [Max retries](#max-retries)
-9. [Best practices building workers](#best-practices-building-workers)
+10. [Best practices building workers](#best-practices-building-workers)
 ## Installation
@@ -157,13 +160,31 @@ Cloudtasker.configure do |config|
   # config.secret = 'some-long-token'
   #
-  # Specify the details of your Google Cloud Task queue.
+  # Specify the details of your Google Cloud Task location.
   #
   # This not required in development using the Cloudtasker local server.
   #
   config.gcp_location_id = 'us-central1' # defaults to 'us-east1'
   config.gcp_project_id = 'my-gcp-project'
-  config.gcp_queue_id = 'my-queue'
+  #
+  # Specify the namespace for your Cloud Task queues.
+  #
+  # The gem assumes that a least a default queue named 'my-app-default'
+  # exists in Cloud Tasks. You can create this default queue using the
+  # gcloud SDK or via the `rake cloudtasker:setup_queue` task if you use Rails.
+  #
+  # Workers can be scheduled on different queues. The name of the queue
+  # in Cloud Tasks is always assumed to be prefixed with the prefix below.
+  #
+  # E.g.
+  # Setting `cloudtasker_options queue: 'critical'` on a worker means that
+  # the worker will be pushed to 'my-app-critical' in Cloud Tasks.
+  #
+  # Specific queues can be created in Cloud Tasks using the gcloud SDK or
+  # via the `rake cloudtasker:setup_queue name=<queue_name>` task.
+  #
+  config.gcp_queue_prefix = 'my-app'
   #
   # Specify the publicly accessible host for your application
@@ -215,7 +236,7 @@ Cloudtasker.configure do |config|
 end
 ```
-If your queue does not exist in Cloud Tasks you should [create it using the gcloud sdk](https://cloud.google.com/tasks/docs/creating-queues).
+If the default queue `<gcp_queue_prefix>-default` does not exist in Cloud Tasks you should [create it using the gcloud sdk](https://cloud.google.com/tasks/docs/creating-queues).
 Alternatively with Rails you can simply run the following rake task if you have queue admin permissions (`cloudtasks.queues.get` and `cloudtasks.queues.create`).
 ```bash
@@ -235,10 +256,15 @@ MyWorker.perform_in(5 * 60, arg1, arg2)
 # or with Rails
 MyWorker.perform_in(5.minutes, arg1, arg2)
-# Worker will be processed on specific date
+# Worker will be processed on a specific date
 MyWorker.perform_at(Time.parse('2025-01-01 00:50:00Z'), arg1, arg2)
 # also with Rails
 MyWorker.perform_at(3.days.from_now, arg1, arg2)
+# With all options, including which queue to run the worker on.
+MyWorker.schedule(args: [arg1, arg2], time_at: Time.parse('2025-01-01 00:50:00Z'), queue: 'critical')
+# or
+MyWorker.schedule(args: [arg1, arg2], time_in: 5 * 60, queue: 'critical')
 ```
 Cloudtasker also provides a helper for re-enqueuing jobs. Re-enqueued jobs keep the same worker id. Some middlewares may rely on this to track the fact that that a job didn't actually complete (e.g. Cloustasker batch). This is optional and you can always fallback to using exception management (raise an error) to retry/re-enqueue jobs.
@@ -262,6 +288,52 @@ class FetchResourceWorker
 end
 ```
+## Managing worker queues
+Cloudtasker allows you to manage several queues and distribute workers across them based on job priority. By default jobs are pushed to the `default` queue, which is `<gcp_queue_prefix>-default` in Cloud Tasks.
+### Creating queues
+More queues can be created using the gcloud sdk or the `cloudtasker:setup_queue` rake task.
+E.g. Create a `critical` queue with a concurrency of 5 via the gcloud SDK
+```bash
+gcloud tasks queues create <gcp_queue_prefix>-critical --max-concurrent-dispatches=5
+```
+E.g. Create a `real-time` queue with a concurrency of 15 via the rake task (Rails only)
+```bash
+rake cloudtasker:setup_queue name=real-time concurrency=15
+```
+When running the Cloudtasker local processing server, you can specify the concurrency for each queue using:
+```bash
+cloudtasker -q critical,5 -q important,4 -q default,3
+```
+### Assigning queues to workers
+Queues can be assigned to workers via the `cloudtasker_options` directive on the worker class:
+```ruby
+# app/workers/critical_worker.rb
+class CriticalWorker
+  include Cloudtasker::Worker
+  cloudtasker_options queue: :critical
+  def perform(some_arg)
+    logger.info("This is a critical job run with arg=#{some_arg}.")
+  end
+end
+```
+Queues can also be assigned at runtime when scheduling a job:
+```ruby
+CriticalWorker.schedule(args: [1], queue: :important)
+```
 ## Extensions
 Cloudtasker comes with three optional features:
 - Cron Jobs [[docs](docs/CRON_JOBS.md)]: Run jobs at fixed intervals.
@@ -303,6 +375,11 @@ web: rails s
 worker: cloudtasker
 ```
+Note that the local development server runs with `5` concurrent threads by default. You can tune the number of threads per queue by running `cloudtasker` the following options:
+```bash
+cloudtasker -q critical,5 -q important,4 -q default,3
+```
 ### Option 2: Using ngrok
 Want to test your application end to end with Google Cloud Task? Then [ngrok](https://ngrok.io) is the way to go.
@@ -318,9 +395,9 @@ Take note of your ngrok domain and configure Cloudtasker to use Google Cloud Tas
 Cloudtasker.configure do |config|
   # Specify your Google Cloud Task queue configuration
-  # config.gcp_location_id = 'us-central1'
-  # config.gcp_project_id = 'my-gcp-project'
-  # config.gcp_queue_id = 'my-queue'
+  config.gcp_location_id = 'us-central1'
+  config.gcp_project_id = 'my-gcp-project'
+  config.gcp_queue_prefix = 'my-app'
   # Use your ngrok domain as the processor host
   config.processor_host = 'https://your-tunnel-id.ngrok.io'
@@ -585,6 +662,45 @@ Rails.cache.write(payload_id, data)
 BigPayloadWorker.perform_async(payload_id)
 ```
+### Sizing the concurrency of your queues
+When defining the max concurrency concurrency of your queues (`max_concurrent_dispatches` in Cloud Tasks) you must keep in mind the maximum number of threads that your application provides. Otherwise your application threads may eventually get exhausted and your users will experience outages if all your web threads are busy running jobs.
+#### With server based applications
+Let's consider an application deployed in production with 3 instances, each having `RAILS_MAX_THREADS` set to `20`. This gives us a total of `60` threads available.
+Now let's say that we distribute jobs across two queues: `default` and `critical`. We can set the concurrency of each each queue depending on the profile of the application:
+E.g. 1: The application serves requests from web users and runs backgrounds jobs in a balanced way
+```
+concurrency for default queue: 20
+concurrency for critical queue: 10
+Total threads consumed by jobs at most: 30
+Total threads always available to web users at worst: 30
+```
+E.g. 2: The application is a micro-service API heavily focused on running jobs (e.g. data processing)
+```
+concurrency for default queue: 35
+concurrency for critical queue: 15
+Total threads consumed by jobs at most: 50
+Total threads always available to web users at worst: 10
+```
+Also always ensure that your total number of threads does not exceed the available number of database connections (if you use any).
+#### With serverless applications
+In a serverless context your application will be scaled up/down based on traffic. When we say 'traffic' this includes requests from Cloud Tasks to run jobs.
+Because your application is auto-scaled - and assuming you haven't set a maximum - your job processing capacity if theoretically unlimited. The main limiting factor in a serverless context becomes external constraints such as the number of database connections available.
+To size the concurrency of your queues you should therefore take the most limiting factor - which is often the database connection pool size for RDBMS databases - and use the calculations of the previous section with this limiting factor as the capping parameter instead of the threads.
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/app/controllers/cloudtasker/worker_controller.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Cloudtasker
     def run
       # Build payload
       payload = request.params
-                       .slice(:worker, :job_id, :job_args, :job_meta)
+                       .slice(:worker, :job_id, :job_args, :job_meta, :job_queue)
                        .merge(job_retries: job_retries)
       # Process payload

data/cloudtasker.gemspec CHANGED Viewed

@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
   spec.authors       = ['Arnaud Lachaume']
   spec.email         = ['arnaud.lachaume@keypup.io']
-  spec.summary       = 'Background jobs for Ruby using Google Cloud Tasks (alpha)'
-  spec.description   = 'Background jobs for Ruby using Google Cloud Tasks (alpha)'
+  spec.summary       = 'Background jobs for Ruby using Google Cloud Tasks (beta)'
+  spec.description   = 'Background jobs for Ruby using Google Cloud Tasks (beta)'
   spec.homepage      = 'https://github.com/keypup-io/cloudtasker'
   spec.license       = 'MIT'

data/docs/BATCH_JOBS.md CHANGED Viewed

@@ -59,8 +59,33 @@ The following callbacks are available on your workers to track the progress of t
 | `on_child_dead` | `The child job` | Invoked when a child has exhausted all of its retries |s
 | `on_batch_complete` | none | Invoked when all chidren have finished or died  |
+## Queue management
+Jobs added to a batch inherit the queue of the parent. It is possible to specify a different queue when adding a job to a batch using `add_to_queue` batch method.
+E.g.
+```ruby
+def perform
+  batch.add_to_queue(:critical, SubWorker, arg1, arg2, arg3)
+end
+```
 ## Batch completion
 Batches complete when all children have successfully completed or died (all retries exhausted).
-Jobs that fail in a batch will be retried based on the `max_retries` setting configured globally or on the worker itself. The batch will be considered `pending` while workers retry. Therefore it may be a good idea to reduce the number of retries on your workers using `cloudtasker_options max_retries: 5` to ensure your batches don't hang for too long.
+Jobs that fail in a batch will be retried based on the `max_retries` setting configured globally or on the worker itself. The batch will be considered `pending` while workers retry. Therefore it may be a good idea to reduce the number of retries on your workers using `cloudtasker_options max_retries: 5` to ensure your batches don't hang for too long.
+## Batch progress tracking
+You can access progression statistics in callback using `batch.progress`. See the [BatchProgress](../lib/cloudtasker/batch/batch_progress.rb) class for more details.
+E.g.
+```ruby
+def on_batch_node_complete(_child_job)
+  logger.info("Total: #{batch.progress.total}")
+  logger.info("Completed: #{batch.progress.completed}")
+  logger.info("Progress: #{batch.progress.percent.to_i}%")
+end
+```

data/docs/CRON_JOBS.md CHANGED Viewed

@@ -28,7 +28,11 @@ unless Rails.env.test?
     # Run job every hour on the fifteenth minute
     other_cron_schedule: {
       worker: 'OtherCronWorker',
-      cron: '15 * * * *'
+      cron: '15 * * * *',
+      queue: 'critical'
+      args:
+        - 'foo'
+        - 'bar
     }
   )
 end

data/exe/cloudtasker CHANGED Viewed

@@ -3,9 +3,21 @@
 require 'bundler/setup'
 require 'cloudtasker/cli'
+require 'optparse'
+options = {}
+OptionParser.new do |opts|
+  opts.banner = 'Usage: cloudtasker [options]'
+  opts.on('-q QUEUE', '--queue=QUEUE', 'Queue to process and number of threads. ' \
+    "Examples: '-q critical' | '-q critical,2' | '-q critical,3 -q defaults,2'") do |o|
+    options[:queues] ||= []
+    options[:queues] << o.split(',')
+  end
+end.parse!
 begin
-  Cloudtasker::CLI.run
+  Cloudtasker::CLI.run(options)
 rescue StandardError => e
   raise e if $DEBUG

data/lib/cloudtasker.rb CHANGED Viewed

@@ -47,5 +47,4 @@ module Cloudtasker
   end
 end
-require 'cloudtasker/railtie' if defined?(Rails)
 require 'cloudtasker/engine' if defined?(::Rails::Engine)

data/lib/cloudtasker/backend/google_cloud_task.rb CHANGED Viewed

@@ -9,15 +9,28 @@ module Cloudtasker
       #
       # Create the queue configured in Cloudtasker if it does not already exist.
       #
+      # @param [String] queue_name The relative name of the queue.
+      #
       # @return [Google::Cloud::Tasks::V2beta3::Queue] The queue
       #
-      def self.setup_queue
-        client.get_queue(queue_path)
+      def self.setup_queue(**opts)
+        # Build full queue path
+        queue_name = opts[:name] || Cloudtasker::Config::DEFAULT_JOB_QUEUE
+        full_queue_name = queue_path(queue_name)
+        # Try to get existing queue
+        client.get_queue(full_queue_name)
       rescue Google::Gax::RetryError
+        # Extract options
+        concurrency = (opts[:concurrency] || Cloudtasker::Config::DEFAULT_QUEUE_CONCURRENCY).to_i
+        retries = (opts[:retries] || Cloudtasker::Config::DEFAULT_QUEUE_RETRIES).to_i
+        # Create queue on 'not found' error
         client.create_queue(
           client.location_path(config.gcp_project_id, config.gcp_location_id),
-          name: queue_path,
-          retry_config: { max_attempts: -1 }
+          name: full_queue_name,
+          retry_config: { max_attempts: retries },
+          rate_limits: { max_concurrent_dispatches: concurrency }
         )
       end
@@ -42,13 +55,15 @@ module Cloudtasker
       #
       # Return the fully qualified path for the Cloud Task queue.
       #
+      # @param [String] queue_name The relative name of the queue.
+      #
       # @return [String] The queue path.
       #
-      def self.queue_path
+      def self.queue_path(queue_name)
         client.queue_path(
           config.gcp_project_id,
           config.gcp_location_id,
-          config.gcp_queue_id
+          [config.gcp_queue_prefix, queue_name].join('-')
         )
       end
@@ -94,8 +109,11 @@ module Cloudtasker
           schedule_time: format_schedule_time(payload[:schedule_time])
         ).compact
+        # Extract relative queue name
+        relative_queue = payload.delete(:queue)
         # Create task
-        resp = client.create_task(queue_path, payload)
+        resp = client.create_task(queue_path(relative_queue), payload)
         resp ? new(resp) : nil
       rescue Google::Gax::RetryError
         nil
@@ -121,6 +139,20 @@ module Cloudtasker
         @gcp_task = gcp_task
       end
+      #
+      # Return the relative queue (queue name minus prefix) the task is in.
+      #
+      # @return [String] The relative queue name
+      #
+      def relative_queue
+        gcp_task
+          .name
+          .match(%r{/queues/([^/]+)})
+          &.captures
+          &.first
+          &.sub("#{self.class.config.gcp_queue_prefix}-", '')
+      end
       #
       # Return a hash description of the task.
       #
@@ -131,7 +163,8 @@ module Cloudtasker
           id: gcp_task.name,
           http_request: gcp_task.to_h[:http_request],
           schedule_time: gcp_task.to_h.dig(:schedule_time, :seconds).to_i,
-          retries: gcp_task.to_h[:response_count]
+          retries: gcp_task.to_h[:response_count],
+          queue: relative_queue
         }
       end
     end

data/lib/cloudtasker/backend/memory_task.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Cloudtasker
     # Manage local tasks pushed to memory.
     # Used for testing.
     class MemoryTask
-      attr_reader :id, :http_request, :schedule_time
+      attr_reader :id, :http_request, :schedule_time, :queue
       #
       # Return the task queue. A worker class name
@@ -116,10 +116,11 @@ module Cloudtasker
       # @param [Hash] http_request The HTTP request content.
       # @param [Integer] schedule_time When to run the task (Unix timestamp)
       #
-      def initialize(id:, http_request:, schedule_time: nil)
+      def initialize(id:, http_request:, schedule_time: nil, queue: nil)
         @id = id
         @http_request = http_request
         @schedule_time = Time.at(schedule_time || 0)
+        @queue = queue
       end
       #
@@ -149,7 +150,8 @@ module Cloudtasker
         {
           id: id,
           http_request: http_request,
-          schedule_time: schedule_time.to_i
+          schedule_time: schedule_time.to_i,
+          queue: queue
         }
       end