RubyGems - toiler - Versions diffs - 0.6.0 → 0.7.1 - Mend

toiler 0.6.0 → 0.7.1

Files changed (26) hide show

checksums.yaml +4 -4
data/.rubocop.yml +29 -0
data/.ruby-version +1 -1
data/Gemfile +2 -0
data/Gemfile.lock +130 -42
data/README.md +13 -13
data/Rakefile +2 -0
data/lib/toiler/actor/fetcher.rb +85 -48
data/lib/toiler/actor/processor.rb +47 -48
data/lib/toiler/actor/supervisor.rb +7 -5
data/lib/toiler/actor/utils/actor_logging.rb +5 -3
data/lib/toiler/aws/message.rb +3 -1
data/lib/toiler/aws/queue.rb +19 -9
data/lib/toiler/cli.rb +49 -32
data/lib/toiler/gcp/message.rb +55 -0
data/lib/toiler/gcp/queue.rb +37 -0
data/lib/toiler/utils/argument_parser.rb +2 -0
data/lib/toiler/utils/environment_loader.rb +16 -18
data/lib/toiler/utils/logging.rb +5 -3
data/lib/toiler/version.rb +3 -1
data/lib/toiler/worker.rb +12 -4
data/lib/toiler.rb +14 -4
data/spec/models/fetcher_spec.rb +31 -5
data/spec/models/supervisor_spec.rb +14 -3
data/toiler.gemspec +8 -3
metadata +44 -19

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c5f0706a7d25d8bce91d1bb08d5ae3a640c06dbf6e874f5176e8c83b266eea9b
-  data.tar.gz: d738548ee19af151a2f5aecf925c3205175f729622e34a39374e75b1e68e8395
+  metadata.gz: fc813573ecab24cb043e85d2743c730863f1315536c522dd6bad3704d64f75db
+  data.tar.gz: 9a7bd308ee94283d6633eb3877fe22c14b06866ddd085c2249b94cc6cbfa8703
 SHA512:
-  metadata.gz: 2682ecec945d595c2497fa77a2d4cbb1a602575ab79f19afe094c747ff4d027ca58964dce1b85b70077b873c9e82904fe082cfdb03aacaec420e6a40e739a3ec
-  data.tar.gz: e11c323b0570c1ed307752e62771fe41ff1c0e166691c6673b8fa4dabb1b3b5cd7715ae91a5c76f681c1e6c0c0cb0f4f13042bcc4017d0f76bc01023b06488d6
+  metadata.gz: 593a212957bc7f962f3559c5f454b581bfdb68ac4026893e83f923f25e38771781e0a3449808401546ccdae4aaedcfb6e9ea64f41bff29c9a60eeb8c4d560621
+  data.tar.gz: a5c66a263d9029afe2237a48437a01d92d60984d8ff600edde54deb15848fc58bf924c86717047624cf7f446e73012eb5049649d1ed950d931cfb2cc3ac1fa3c

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,29 @@
+# Documentation:
+#   Enabled: false
+#
+# Style/ClassAndModuleChildren:
+#   Enabled: false
+Layout/LineLength:
+  Max: 120
+Metrics/MethodLength:
+  Max: 20
+Metrics/AbcSize:
+  Max: 40
+Metrics/ClassLength:
+  CountComments: false
+  Max: 200
+AllCops:
+  Exclude:
+    - 'vendor/**/*'
+    - 'tmp/**/*'
+    - 'config/**/*'
+    - 'bin/**'
+    - 'db/**/*'
+    - 'spec/**/*'
+  NewCops: enable
+  TargetRubyVersion: 2.6

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 2.2.2
1	+ 2.6.8

data/Gemfile CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 source 'https://rubygems.org'
 gemspec

data/Gemfile.lock CHANGED Viewed

@@ -1,60 +1,148 @@
 PATH
   remote: .
   specs:
-    toiler (0.5.1.pre7)
+    toiler (0.7.0)
       aws-sdk-sqs (>= 1.0.0, < 2.0.0)
       concurrent-ruby (~> 1.0, >= 1.0.0)
       concurrent-ruby-edge (~> 0.3, >= 0.3)
+      google-cloud-pubsub (~> 2.9, >= 2.9.1)
 GEM
   remote: https://rubygems.org/
   specs:
-    ast (2.4.0)
-    aws-eventstream (1.0.1)
-    aws-partitions (1.105.0)
-    aws-sdk-core (3.31.0)
-      aws-eventstream (~> 1.0)
-      aws-partitions (~> 1.0)
-      aws-sigv4 (~> 1.0)
+    addressable (2.8.0)
+      public_suffix (>= 2.0.2, < 5.0)
+    ast (2.4.2)
+    aws-eventstream (1.2.0)
+    aws-partitions (1.573.0)
+    aws-sdk-core (3.130.0)
+      aws-eventstream (~> 1, >= 1.0.2)
+      aws-partitions (~> 1, >= 1.525.0)
+      aws-sigv4 (~> 1.1)
       jmespath (~> 1.0)
-    aws-sdk-sqs (1.7.0)
-      aws-sdk-core (~> 3, >= 3.26.0)
-      aws-sigv4 (~> 1.0)
-    aws-sigv4 (1.0.3)
-    concurrent-ruby (1.0.5)
-    concurrent-ruby-edge (0.3.1)
-      concurrent-ruby (= 1.0.5)
-    diff-lcs (1.3)
-    jaro_winkler (1.5.1)
-    jmespath (1.4.0)
-    parallel (1.12.1)
-    parser (2.5.1.2)
-      ast (~> 2.4.0)
-    powerpack (0.1.2)
-    rainbow (3.0.0)
-    rspec (3.8.0)
-      rspec-core (~> 3.8.0)
-      rspec-expectations (~> 3.8.0)
-      rspec-mocks (~> 3.8.0)
-    rspec-core (3.8.0)
-      rspec-support (~> 3.8.0)
-    rspec-expectations (3.8.1)
+    aws-sdk-sqs (1.51.0)
+      aws-sdk-core (~> 3, >= 3.127.0)
+      aws-sigv4 (~> 1.1)
+    aws-sigv4 (1.4.0)
+      aws-eventstream (~> 1, >= 1.0.2)
+    concurrent-ruby (1.1.10)
+    concurrent-ruby-edge (0.6.0)
+      concurrent-ruby (~> 1.1.6)
+    diff-lcs (1.5.0)
+    faraday (1.10.0)
+      faraday-em_http (~> 1.0)
+      faraday-em_synchrony (~> 1.0)
+      faraday-excon (~> 1.1)
+      faraday-httpclient (~> 1.0)
+      faraday-multipart (~> 1.0)
+      faraday-net_http (~> 1.0)
+      faraday-net_http_persistent (~> 1.0)
+      faraday-patron (~> 1.0)
+      faraday-rack (~> 1.0)
+      faraday-retry (~> 1.0)
+      ruby2_keywords (>= 0.0.4)
+    faraday-em_http (1.0.0)
+    faraday-em_synchrony (1.0.0)
+    faraday-excon (1.1.0)
+    faraday-httpclient (1.0.1)
+    faraday-multipart (1.0.3)
+      multipart-post (>= 1.2, < 3)
+    faraday-net_http (1.0.1)
+    faraday-net_http_persistent (1.2.0)
+    faraday-patron (1.0.0)
+    faraday-rack (1.0.0)
+    faraday-retry (1.0.3)
+    gapic-common (0.8.0)
+      faraday (~> 1.3)
+      google-protobuf (~> 3.14)
+      googleapis-common-protos (>= 1.3.11, < 2.a)
+      googleapis-common-protos-types (>= 1.0.6, < 2.a)
+      googleauth (>= 0.17.0, < 2.a)
+      grpc (~> 1.36)
+    google-cloud-core (1.6.0)
+      google-cloud-env (~> 1.0)
+      google-cloud-errors (~> 1.0)
+    google-cloud-env (1.6.0)
+      faraday (>= 0.17.3, < 3.0)
+    google-cloud-errors (1.2.0)
+    google-cloud-pubsub (2.9.1)
+      concurrent-ruby (~> 1.1)
+      google-cloud-core (~> 1.5)
+      google-cloud-pubsub-v1 (~> 0.0)
+    google-cloud-pubsub-v1 (0.8.0)
+      gapic-common (>= 0.7, < 2.a)
+      google-cloud-errors (~> 1.0)
+      grpc-google-iam-v1 (>= 0.6.10, < 2.a)
+    google-protobuf (3.20.0)
+    google-protobuf (3.20.0-x64-mingw32)
+    googleapis-common-protos (1.3.12)
+      google-protobuf (~> 3.14)
+      googleapis-common-protos-types (~> 1.2)
+      grpc (~> 1.27)
+    googleapis-common-protos-types (1.3.0)
+      google-protobuf (~> 3.14)
+    googleauth (1.1.2)
+      faraday (>= 0.17.3, < 3.a)
+      jwt (>= 1.4, < 3.0)
+      memoist (~> 0.16)
+      multi_json (~> 1.11)
+      os (>= 0.9, < 2.0)
+      signet (>= 0.16, < 2.a)
+    grpc (1.45.0)
+      google-protobuf (~> 3.19)
+      googleapis-common-protos-types (~> 1.0)
+    grpc (1.45.0-x64-mingw32)
+      google-protobuf (~> 3.19)
+      googleapis-common-protos-types (~> 1.0)
+    grpc-google-iam-v1 (1.0.0)
+      google-protobuf (~> 3.14)
+      googleapis-common-protos (>= 1.3.12, < 2.0)
+      grpc (~> 1.27)
+    jmespath (1.6.1)
+    jwt (2.3.0)
+    memoist (0.16.2)
+    multi_json (1.15.0)
+    multipart-post (2.1.1)
+    os (1.1.4)
+    parallel (1.22.1)
+    parser (3.1.1.0)
+      ast (~> 2.4.1)
+    public_suffix (4.0.6)
+    rainbow (3.1.1)
+    regexp_parser (2.2.1)
+    rexml (3.2.5)
+    rspec (3.11.0)
+      rspec-core (~> 3.11.0)
+      rspec-expectations (~> 3.11.0)
+      rspec-mocks (~> 3.11.0)
+    rspec-core (3.11.0)
+      rspec-support (~> 3.11.0)
+    rspec-expectations (3.11.0)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.8.0)
-    rspec-mocks (3.8.0)
+      rspec-support (~> 3.11.0)
+    rspec-mocks (3.11.1)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.8.0)
-    rspec-support (3.8.0)
-    rubocop (0.58.2)
-      jaro_winkler (~> 1.5.1)
+      rspec-support (~> 3.11.0)
+    rspec-support (3.11.0)
+    rubocop (1.26.1)
       parallel (~> 1.10)
-      parser (>= 2.5, != 2.5.1.1)
-      powerpack (~> 0.1)
+      parser (>= 3.1.0.0)
       rainbow (>= 2.2.2, < 4.0)
+      regexp_parser (>= 1.8, < 3.0)
+      rexml
+      rubocop-ast (>= 1.16.0, < 2.0)
       ruby-progressbar (~> 1.7)
-      unicode-display_width (~> 1.0, >= 1.0.1)
-    ruby-progressbar (1.10.0)
-    unicode-display_width (1.4.0)
+      unicode-display_width (>= 1.4.0, < 3.0)
+    rubocop-ast (1.16.0)
+      parser (>= 3.1.1.0)
+    ruby-progressbar (1.11.0)
+    ruby2_keywords (0.0.5)
+    signet (0.16.1)
+      addressable (~> 2.8)
+      faraday (>= 0.17.5, < 3.0)
+      jwt (>= 1.5, < 3.0)
+      multi_json (~> 1.10)
+    unicode-display_width (2.1.0)
 PLATFORMS
   ruby
@@ -66,4 +154,4 @@ DEPENDENCIES
   toiler!
 BUNDLED WITH
-   1.16.3
+   1.17.2

data/README.md CHANGED Viewed

@@ -14,21 +14,17 @@ Instead of [shoryuken's](https://github.com/phstc/shoryuken) loadbalancing  appr
 ### Long-Polling
 A Fetcher thread is spawned for each queue.
-Fetchers are resposible for polling SQS and retreiving messages.
+Fetchers are resposible for polling SQS/PubSub and retreiving messages.
 They are optimised to not bring more messages than the amount of processors avaiable for such queue.
 By long-polling fetchers wait for a configurable amount of time for messages to become available on a single request, this prevents unneccesarilly requesting messages when there are none.
 ### Message Parsing
-Workers can configure a parser Class or Proc to parse an SQS message body before being processed.
+Workers can configure a parser Class or Proc to parse a message body before being processed.
-### Batches
+### Deadline Extension
-Toiler allows a Worker to be able to receive a batch of messages instead of a single one.
-### Auto Visibility Extension
-Toiler has the ability to automatically extend the visibility timeout of and SQS message to prevent the message from re-entering the queue if processing of such message is taking longer than the queue's visibility timeout.
+Toiler has the ability to automatically extend the ack deadline of and messages to prevent the message from re-entering the queue if processing of such message is taking longer than the queue's ack deadline or visibility timeout.
 ## Instalation
@@ -59,8 +55,9 @@ class MyWorker
   # toiler_options parser: ->(sqs_msg){ REXML::Document.new(sqs_msg.body) }
   # toiler_options parser: MultiJson
-  # toiler_options auto_visibility_timeout: true
+  # toiler_options deadline_extension: true
   # toiler_options batch: true
+  # toiler_options queue: 'subscription', concurrency: 5, auto_delete: true, provider: :gcp
   #Example connection client that should be shared across all instances of MyWorker
   @@client = ConnectionClient.new
@@ -82,10 +79,13 @@ end
 ```yaml
 aws:
-  access_key_id:      ...       # or <%= ENV['AWS_ACCESS_KEY_ID'] %>
-  secret_access_key:  ...       # or <%= ENV['AWS_SECRET_ACCESS_KEY'] %>
-  region:             us-east-1 # or <%= ENV['AWS_REGION'] %>
-wait: 20                        # The time in seconds to wait for messages during long-polling
+  access_key_id:     ...             # or <%= ENV['AWS_ACCESS_KEY_ID'] %>
+  secret_access_key: ...             # or <%= ENV['AWS_SECRET_ACCESS_KEY'] %>
+  region:            us-east-1       # or <%= ENV['AWS_REGION'] %>
+gcp:
+  project_id:  my-project            # or <%= ENV['GCP_PROJECT'] %>
+  credentials: /path/to/keyfile.json # or <%= ENV['GCP_CREDENTIALS'] %>
+wait: 20                             # The time in seconds to wait for messages during long-polling
 ```
 ### Rails Integration

data/Rakefile CHANGED Viewed

@@ -1 +1,3 @@
+# frozen_string_literal: true
 require 'bundler/gem_tasks'

data/lib/toiler/actor/fetcher.rb CHANGED Viewed

@@ -1,29 +1,32 @@
+# frozen_string_literal: true
 require 'toiler/actor/utils/actor_logging'
 require 'toiler/aws/queue'
+require 'toiler/gcp/queue'
 module Toiler
   module Actor
-    # Actor polling for messages only when processors are ready, otherwise idle
+    # Actor pulling messages only when processors are ready, otherwise idle
     class Fetcher < Concurrent::Actor::RestartingContext
       include Utils::ActorLogging
-      FETCH_LIMIT = 10
+      attr_reader :queue, :wait, :ack_deadline, :free_processors,
+                  :executing, :waiting_messages, :concurrency,
+                  :scheduled_task
-      attr_accessor :queue, :wait, :visibility_timeout, :free_processors,
-                    :executing, :waiting_messages, :concurrency
+      def initialize(queue_name, count, provider)
+        super()
-      def initialize(queue, client, count)
-        debug "Initializing Fetcher for queue #{queue}..."
-        @queue = Toiler::Aws::Queue.new queue, client
+        debug "Initializing Fetcher for queue #{queue_name} and provider #{provider}..."
         @wait = Toiler.options[:wait] || 60
         @free_processors = count
-        @batch = Toiler.worker_class_registry[queue].batch?
-        @visibility_timeout = @queue.visibility_timeout
         @executing = false
         @waiting_messages = 0
         @concurrency = count
-        debug "Finished initializing Fetcher for queue #{queue}"
-        tell :poll_messages
+        @scheduled_task = nil
+        init_queue(queue_name, provider)
+        debug "Finished initializing Fetcher for queue #{queue_name} and provider #{provider}..."
+        tell :pull_messages
       end
       def default_executor
@@ -34,8 +37,9 @@ module Toiler
         @executing = true
         method, *args = msg
         send(method, *args)
-      rescue StandardError => e
-        error "Fetcher #{queue.name} raised exception #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
+      rescue StandardError, SystemStackError => e
+        # if we misbehave and cause a stack level too deep exception, we should be able to recover
+        error "Fetcher #{@queue.name} raised exception #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
       ensure
         @executing = false
       end
@@ -46,26 +50,26 @@ module Toiler
       private
-      def batch?
-        @batch
+      def init_queue(queue_name, provider)
+        if provider.nil? || provider.to_sym == :aws
+          @queue = Toiler::Aws::Queue.new queue_name, Toiler.aws_client
+        elsif provider.to_sym == :gcp
+          @queue = Toiler::Gcp::Queue.new queue_name, Toiler.gcp_client
+        else
+          raise StandardError, "unknown provider #{provider}"
+        end
+        @ack_deadline = @queue.ack_deadline
       end
       def processor_finished
-        debug "Fetcher #{queue.name} received processor finished signal..."
+        debug "Fetcher #{@queue.name} received processor finished signal..."
         @free_processors += 1
-        tell :poll_messages
-      end
-      def max_messages
-        batch? ? FETCH_LIMIT : [FETCH_LIMIT, free_processors].min
+        tell :pull_messages
       end
-      def poll_future(max_number_of_messages)
+      def pull_future(max_number_of_messages)
         Concurrent::Promises.future do
-          queue.receive_messages attribute_names: %w[All],
-                                 message_attribute_names: %w[All],
-                                 wait_time_seconds: wait,
-                                 max_number_of_messages: max_number_of_messages
+          @queue.receive_messages wait: @wait, max_messages: max_number_of_messages
         end
       end
@@ -73,48 +77,81 @@ module Toiler
         @waiting_messages -= messages
       end
-      def poll_messages
-        return unless should_poll?
+      def max_messages
+        # limit max messages to 10% of concurrency to always ensure we have
+        # 10 concurrent fetches and improved latency
+        [@queue.max_messages, (@concurrency * 0.1).ceil].min
+      end
+      def needed_messages
+        @free_processors - @waiting_messages
+      end
+      def pull_messages
+        if needed_messages < max_messages
+          # a pull is already scheduled and we dont fit a full batch, return
+          return unless @scheduled_task.nil?
+          free_percent = free_processors.to_f / concurrency
+          # wait time linear to the amount of free workers with a maximum of 5 seconds,
+          # when there are more free workers, we can theoretically wait more time, since
+          # we already have workers waiting for messages.
+          wait_time = 0.1 + (5 * free_percent)
+          # schedule a message pull if we cannot fill a batch
+          # this ensures we wait some time for more messages to arrive
+          @scheduled_task = Concurrent::ScheduledTask.execute(wait_time) do
+            tell [:do_pull_messages, true]
+          end
+        end
+        # we can fit a whole batch, if there was already a scheduled task
+        # we just let it run, it will only pull messages if there are more
+        # needed messages
+        do_pull_messages false
+      end
+      def do_pull_messages(clear_scheduled_task)
+        @scheduled_task = nil if clear_scheduled_task
-        max_number_of_messages = max_messages
-        return if waiting_messages > 0 && !full_batch?(max_number_of_messages)
+        return unless should_pull?
-        @waiting_messages += max_number_of_messages
+        current_needed_messages = needed_messages
-        debug "Fetcher #{queue.name} polling messages..."
-        future = poll_future max_number_of_messages
+        current_needed_messages = max_messages if current_needed_messages >= max_messages
+        @waiting_messages += current_needed_messages
+        debug "Fetcher #{@queue.name} pulling messages..."
+        future = pull_future current_needed_messages
         future.on_rejection! do
-          tell [:release_messages, max_number_of_messages]
-          tell :poll_messages
+          tell [:release_messages, current_needed_messages]
+          tell :pull_messages
         end
         future.on_fulfillment! do |msgs|
           tell [:assign_messages, msgs] if !msgs.nil? && !msgs.empty?
-          tell [:release_messages, max_number_of_messages]
-          tell :poll_messages
+          tell [:release_messages, current_needed_messages]
+          tell :pull_messages
         end
-        poll_messages if should_poll?
-      end
-      def should_poll?
-        free_processors / 2 > waiting_messages
+        # defer method execution to avoid recursion
+        tell :pull_messages if should_pull?
       end
-      def full_batch?(max_number_of_messages)
-        max_number_of_messages == FETCH_LIMIT || max_number_of_messages >= concurrency * 0.1
+      def should_pull?
+        needed_messages.positive?
       end
       def processor_pool
-        @processor_pool ||= Toiler.processor_pool queue.name
+        @processor_pool ||= Toiler.processor_pool @queue.name
       end
       def assign_messages(messages)
-        messages = [messages] if batch?
         messages.each do |m|
-          processor_pool.tell [:process, visibility_timeout, m]
+          processor_pool.tell [:process, @ack_deadline, m]
           @free_processors -= 1
         end
-        debug "Fetcher #{queue.name} assigned #{messages.count} messages"
+        debug "Fetcher #{@queue.name} assigned #{messages.count} messages"
       end
     end
   end