toiler 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c5f0706a7d25d8bce91d1bb08d5ae3a640c06dbf6e874f5176e8c83b266eea9b
4
- data.tar.gz: d738548ee19af151a2f5aecf925c3205175f729622e34a39374e75b1e68e8395
3
+ metadata.gz: fc813573ecab24cb043e85d2743c730863f1315536c522dd6bad3704d64f75db
4
+ data.tar.gz: 9a7bd308ee94283d6633eb3877fe22c14b06866ddd085c2249b94cc6cbfa8703
5
5
  SHA512:
6
- metadata.gz: 2682ecec945d595c2497fa77a2d4cbb1a602575ab79f19afe094c747ff4d027ca58964dce1b85b70077b873c9e82904fe082cfdb03aacaec420e6a40e739a3ec
7
- data.tar.gz: e11c323b0570c1ed307752e62771fe41ff1c0e166691c6673b8fa4dabb1b3b5cd7715ae91a5c76f681c1e6c0c0cb0f4f13042bcc4017d0f76bc01023b06488d6
6
+ metadata.gz: 593a212957bc7f962f3559c5f454b581bfdb68ac4026893e83f923f25e38771781e0a3449808401546ccdae4aaedcfb6e9ea64f41bff29c9a60eeb8c4d560621
7
+ data.tar.gz: a5c66a263d9029afe2237a48437a01d92d60984d8ff600edde54deb15848fc58bf924c86717047624cf7f446e73012eb5049649d1ed950d931cfb2cc3ac1fa3c
data/.rubocop.yml ADDED
@@ -0,0 +1,29 @@
1
+ # Documentation:
2
+ # Enabled: false
3
+ #
4
+ # Style/ClassAndModuleChildren:
5
+ # Enabled: false
6
+
7
+ Layout/LineLength:
8
+ Max: 120
9
+
10
+ Metrics/MethodLength:
11
+ Max: 20
12
+
13
+ Metrics/AbcSize:
14
+ Max: 40
15
+
16
+ Metrics/ClassLength:
17
+ CountComments: false
18
+ Max: 200
19
+
20
+ AllCops:
21
+ Exclude:
22
+ - 'vendor/**/*'
23
+ - 'tmp/**/*'
24
+ - 'config/**/*'
25
+ - 'bin/**'
26
+ - 'db/**/*'
27
+ - 'spec/**/*'
28
+ NewCops: enable
29
+ TargetRubyVersion: 2.6
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.2.2
1
+ 2.6.8
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  gemspec
data/Gemfile.lock CHANGED
@@ -1,60 +1,148 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- toiler (0.5.1.pre7)
4
+ toiler (0.7.0)
5
5
  aws-sdk-sqs (>= 1.0.0, < 2.0.0)
6
6
  concurrent-ruby (~> 1.0, >= 1.0.0)
7
7
  concurrent-ruby-edge (~> 0.3, >= 0.3)
8
+ google-cloud-pubsub (~> 2.9, >= 2.9.1)
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
11
12
  specs:
12
- ast (2.4.0)
13
- aws-eventstream (1.0.1)
14
- aws-partitions (1.105.0)
15
- aws-sdk-core (3.31.0)
16
- aws-eventstream (~> 1.0)
17
- aws-partitions (~> 1.0)
18
- aws-sigv4 (~> 1.0)
13
+ addressable (2.8.0)
14
+ public_suffix (>= 2.0.2, < 5.0)
15
+ ast (2.4.2)
16
+ aws-eventstream (1.2.0)
17
+ aws-partitions (1.573.0)
18
+ aws-sdk-core (3.130.0)
19
+ aws-eventstream (~> 1, >= 1.0.2)
20
+ aws-partitions (~> 1, >= 1.525.0)
21
+ aws-sigv4 (~> 1.1)
19
22
  jmespath (~> 1.0)
20
- aws-sdk-sqs (1.7.0)
21
- aws-sdk-core (~> 3, >= 3.26.0)
22
- aws-sigv4 (~> 1.0)
23
- aws-sigv4 (1.0.3)
24
- concurrent-ruby (1.0.5)
25
- concurrent-ruby-edge (0.3.1)
26
- concurrent-ruby (= 1.0.5)
27
- diff-lcs (1.3)
28
- jaro_winkler (1.5.1)
29
- jmespath (1.4.0)
30
- parallel (1.12.1)
31
- parser (2.5.1.2)
32
- ast (~> 2.4.0)
33
- powerpack (0.1.2)
34
- rainbow (3.0.0)
35
- rspec (3.8.0)
36
- rspec-core (~> 3.8.0)
37
- rspec-expectations (~> 3.8.0)
38
- rspec-mocks (~> 3.8.0)
39
- rspec-core (3.8.0)
40
- rspec-support (~> 3.8.0)
41
- rspec-expectations (3.8.1)
23
+ aws-sdk-sqs (1.51.0)
24
+ aws-sdk-core (~> 3, >= 3.127.0)
25
+ aws-sigv4 (~> 1.1)
26
+ aws-sigv4 (1.4.0)
27
+ aws-eventstream (~> 1, >= 1.0.2)
28
+ concurrent-ruby (1.1.10)
29
+ concurrent-ruby-edge (0.6.0)
30
+ concurrent-ruby (~> 1.1.6)
31
+ diff-lcs (1.5.0)
32
+ faraday (1.10.0)
33
+ faraday-em_http (~> 1.0)
34
+ faraday-em_synchrony (~> 1.0)
35
+ faraday-excon (~> 1.1)
36
+ faraday-httpclient (~> 1.0)
37
+ faraday-multipart (~> 1.0)
38
+ faraday-net_http (~> 1.0)
39
+ faraday-net_http_persistent (~> 1.0)
40
+ faraday-patron (~> 1.0)
41
+ faraday-rack (~> 1.0)
42
+ faraday-retry (~> 1.0)
43
+ ruby2_keywords (>= 0.0.4)
44
+ faraday-em_http (1.0.0)
45
+ faraday-em_synchrony (1.0.0)
46
+ faraday-excon (1.1.0)
47
+ faraday-httpclient (1.0.1)
48
+ faraday-multipart (1.0.3)
49
+ multipart-post (>= 1.2, < 3)
50
+ faraday-net_http (1.0.1)
51
+ faraday-net_http_persistent (1.2.0)
52
+ faraday-patron (1.0.0)
53
+ faraday-rack (1.0.0)
54
+ faraday-retry (1.0.3)
55
+ gapic-common (0.8.0)
56
+ faraday (~> 1.3)
57
+ google-protobuf (~> 3.14)
58
+ googleapis-common-protos (>= 1.3.11, < 2.a)
59
+ googleapis-common-protos-types (>= 1.0.6, < 2.a)
60
+ googleauth (>= 0.17.0, < 2.a)
61
+ grpc (~> 1.36)
62
+ google-cloud-core (1.6.0)
63
+ google-cloud-env (~> 1.0)
64
+ google-cloud-errors (~> 1.0)
65
+ google-cloud-env (1.6.0)
66
+ faraday (>= 0.17.3, < 3.0)
67
+ google-cloud-errors (1.2.0)
68
+ google-cloud-pubsub (2.9.1)
69
+ concurrent-ruby (~> 1.1)
70
+ google-cloud-core (~> 1.5)
71
+ google-cloud-pubsub-v1 (~> 0.0)
72
+ google-cloud-pubsub-v1 (0.8.0)
73
+ gapic-common (>= 0.7, < 2.a)
74
+ google-cloud-errors (~> 1.0)
75
+ grpc-google-iam-v1 (>= 0.6.10, < 2.a)
76
+ google-protobuf (3.20.0)
77
+ google-protobuf (3.20.0-x64-mingw32)
78
+ googleapis-common-protos (1.3.12)
79
+ google-protobuf (~> 3.14)
80
+ googleapis-common-protos-types (~> 1.2)
81
+ grpc (~> 1.27)
82
+ googleapis-common-protos-types (1.3.0)
83
+ google-protobuf (~> 3.14)
84
+ googleauth (1.1.2)
85
+ faraday (>= 0.17.3, < 3.a)
86
+ jwt (>= 1.4, < 3.0)
87
+ memoist (~> 0.16)
88
+ multi_json (~> 1.11)
89
+ os (>= 0.9, < 2.0)
90
+ signet (>= 0.16, < 2.a)
91
+ grpc (1.45.0)
92
+ google-protobuf (~> 3.19)
93
+ googleapis-common-protos-types (~> 1.0)
94
+ grpc (1.45.0-x64-mingw32)
95
+ google-protobuf (~> 3.19)
96
+ googleapis-common-protos-types (~> 1.0)
97
+ grpc-google-iam-v1 (1.0.0)
98
+ google-protobuf (~> 3.14)
99
+ googleapis-common-protos (>= 1.3.12, < 2.0)
100
+ grpc (~> 1.27)
101
+ jmespath (1.6.1)
102
+ jwt (2.3.0)
103
+ memoist (0.16.2)
104
+ multi_json (1.15.0)
105
+ multipart-post (2.1.1)
106
+ os (1.1.4)
107
+ parallel (1.22.1)
108
+ parser (3.1.1.0)
109
+ ast (~> 2.4.1)
110
+ public_suffix (4.0.6)
111
+ rainbow (3.1.1)
112
+ regexp_parser (2.2.1)
113
+ rexml (3.2.5)
114
+ rspec (3.11.0)
115
+ rspec-core (~> 3.11.0)
116
+ rspec-expectations (~> 3.11.0)
117
+ rspec-mocks (~> 3.11.0)
118
+ rspec-core (3.11.0)
119
+ rspec-support (~> 3.11.0)
120
+ rspec-expectations (3.11.0)
42
121
  diff-lcs (>= 1.2.0, < 2.0)
43
- rspec-support (~> 3.8.0)
44
- rspec-mocks (3.8.0)
122
+ rspec-support (~> 3.11.0)
123
+ rspec-mocks (3.11.1)
45
124
  diff-lcs (>= 1.2.0, < 2.0)
46
- rspec-support (~> 3.8.0)
47
- rspec-support (3.8.0)
48
- rubocop (0.58.2)
49
- jaro_winkler (~> 1.5.1)
125
+ rspec-support (~> 3.11.0)
126
+ rspec-support (3.11.0)
127
+ rubocop (1.26.1)
50
128
  parallel (~> 1.10)
51
- parser (>= 2.5, != 2.5.1.1)
52
- powerpack (~> 0.1)
129
+ parser (>= 3.1.0.0)
53
130
  rainbow (>= 2.2.2, < 4.0)
131
+ regexp_parser (>= 1.8, < 3.0)
132
+ rexml
133
+ rubocop-ast (>= 1.16.0, < 2.0)
54
134
  ruby-progressbar (~> 1.7)
55
- unicode-display_width (~> 1.0, >= 1.0.1)
56
- ruby-progressbar (1.10.0)
57
- unicode-display_width (1.4.0)
135
+ unicode-display_width (>= 1.4.0, < 3.0)
136
+ rubocop-ast (1.16.0)
137
+ parser (>= 3.1.1.0)
138
+ ruby-progressbar (1.11.0)
139
+ ruby2_keywords (0.0.5)
140
+ signet (0.16.1)
141
+ addressable (~> 2.8)
142
+ faraday (>= 0.17.5, < 3.0)
143
+ jwt (>= 1.5, < 3.0)
144
+ multi_json (~> 1.10)
145
+ unicode-display_width (2.1.0)
58
146
 
59
147
  PLATFORMS
60
148
  ruby
@@ -66,4 +154,4 @@ DEPENDENCIES
66
154
  toiler!
67
155
 
68
156
  BUNDLED WITH
69
- 1.16.3
157
+ 1.17.2
data/README.md CHANGED
@@ -14,21 +14,17 @@ Instead of [shoryuken's](https://github.com/phstc/shoryuken) loadbalancing appr
14
14
  ### Long-Polling
15
15
 
16
16
  A Fetcher thread is spawned for each queue.
17
- Fetchers are resposible for polling SQS and retreiving messages.
17
+ Fetchers are resposible for polling SQS/PubSub and retreiving messages.
18
18
  They are optimised to not bring more messages than the amount of processors avaiable for such queue.
19
19
  By long-polling fetchers wait for a configurable amount of time for messages to become available on a single request, this prevents unneccesarilly requesting messages when there are none.
20
20
 
21
21
  ### Message Parsing
22
22
 
23
- Workers can configure a parser Class or Proc to parse an SQS message body before being processed.
23
+ Workers can configure a parser Class or Proc to parse a message body before being processed.
24
24
 
25
- ### Batches
25
+ ### Deadline Extension
26
26
 
27
- Toiler allows a Worker to be able to receive a batch of messages instead of a single one.
28
-
29
- ### Auto Visibility Extension
30
-
31
- Toiler has the ability to automatically extend the visibility timeout of and SQS message to prevent the message from re-entering the queue if processing of such message is taking longer than the queue's visibility timeout.
27
+ Toiler has the ability to automatically extend the ack deadline of and messages to prevent the message from re-entering the queue if processing of such message is taking longer than the queue's ack deadline or visibility timeout.
32
28
 
33
29
  ## Instalation
34
30
 
@@ -59,8 +55,9 @@ class MyWorker
59
55
 
60
56
  # toiler_options parser: ->(sqs_msg){ REXML::Document.new(sqs_msg.body) }
61
57
  # toiler_options parser: MultiJson
62
- # toiler_options auto_visibility_timeout: true
58
+ # toiler_options deadline_extension: true
63
59
  # toiler_options batch: true
60
+ # toiler_options queue: 'subscription', concurrency: 5, auto_delete: true, provider: :gcp
64
61
 
65
62
  #Example connection client that should be shared across all instances of MyWorker
66
63
  @@client = ConnectionClient.new
@@ -82,10 +79,13 @@ end
82
79
 
83
80
  ```yaml
84
81
  aws:
85
- access_key_id: ... # or <%= ENV['AWS_ACCESS_KEY_ID'] %>
86
- secret_access_key: ... # or <%= ENV['AWS_SECRET_ACCESS_KEY'] %>
87
- region: us-east-1 # or <%= ENV['AWS_REGION'] %>
88
- wait: 20 # The time in seconds to wait for messages during long-polling
82
+ access_key_id: ... # or <%= ENV['AWS_ACCESS_KEY_ID'] %>
83
+ secret_access_key: ... # or <%= ENV['AWS_SECRET_ACCESS_KEY'] %>
84
+ region: us-east-1 # or <%= ENV['AWS_REGION'] %>
85
+ gcp:
86
+ project_id: my-project # or <%= ENV['GCP_PROJECT'] %>
87
+ credentials: /path/to/keyfile.json # or <%= ENV['GCP_CREDENTIALS'] %>
88
+ wait: 20 # The time in seconds to wait for messages during long-polling
89
89
  ```
90
90
 
91
91
  ### Rails Integration
data/Rakefile CHANGED
@@ -1 +1,3 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bundler/gem_tasks'
@@ -1,29 +1,32 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'toiler/actor/utils/actor_logging'
2
4
  require 'toiler/aws/queue'
5
+ require 'toiler/gcp/queue'
3
6
 
4
7
  module Toiler
5
8
  module Actor
6
- # Actor polling for messages only when processors are ready, otherwise idle
9
+ # Actor pulling messages only when processors are ready, otherwise idle
7
10
  class Fetcher < Concurrent::Actor::RestartingContext
8
11
  include Utils::ActorLogging
9
12
 
10
- FETCH_LIMIT = 10
13
+ attr_reader :queue, :wait, :ack_deadline, :free_processors,
14
+ :executing, :waiting_messages, :concurrency,
15
+ :scheduled_task
11
16
 
12
- attr_accessor :queue, :wait, :visibility_timeout, :free_processors,
13
- :executing, :waiting_messages, :concurrency
17
+ def initialize(queue_name, count, provider)
18
+ super()
14
19
 
15
- def initialize(queue, client, count)
16
- debug "Initializing Fetcher for queue #{queue}..."
17
- @queue = Toiler::Aws::Queue.new queue, client
20
+ debug "Initializing Fetcher for queue #{queue_name} and provider #{provider}..."
18
21
  @wait = Toiler.options[:wait] || 60
19
22
  @free_processors = count
20
- @batch = Toiler.worker_class_registry[queue].batch?
21
- @visibility_timeout = @queue.visibility_timeout
22
23
  @executing = false
23
24
  @waiting_messages = 0
24
25
  @concurrency = count
25
- debug "Finished initializing Fetcher for queue #{queue}"
26
- tell :poll_messages
26
+ @scheduled_task = nil
27
+ init_queue(queue_name, provider)
28
+ debug "Finished initializing Fetcher for queue #{queue_name} and provider #{provider}..."
29
+ tell :pull_messages
27
30
  end
28
31
 
29
32
  def default_executor
@@ -34,8 +37,9 @@ module Toiler
34
37
  @executing = true
35
38
  method, *args = msg
36
39
  send(method, *args)
37
- rescue StandardError => e
38
- error "Fetcher #{queue.name} raised exception #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
40
+ rescue StandardError, SystemStackError => e
41
+ # if we misbehave and cause a stack level too deep exception, we should be able to recover
42
+ error "Fetcher #{@queue.name} raised exception #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
39
43
  ensure
40
44
  @executing = false
41
45
  end
@@ -46,26 +50,26 @@ module Toiler
46
50
 
47
51
  private
48
52
 
49
- def batch?
50
- @batch
53
+ def init_queue(queue_name, provider)
54
+ if provider.nil? || provider.to_sym == :aws
55
+ @queue = Toiler::Aws::Queue.new queue_name, Toiler.aws_client
56
+ elsif provider.to_sym == :gcp
57
+ @queue = Toiler::Gcp::Queue.new queue_name, Toiler.gcp_client
58
+ else
59
+ raise StandardError, "unknown provider #{provider}"
60
+ end
61
+ @ack_deadline = @queue.ack_deadline
51
62
  end
52
63
 
53
64
  def processor_finished
54
- debug "Fetcher #{queue.name} received processor finished signal..."
65
+ debug "Fetcher #{@queue.name} received processor finished signal..."
55
66
  @free_processors += 1
56
- tell :poll_messages
57
- end
58
-
59
- def max_messages
60
- batch? ? FETCH_LIMIT : [FETCH_LIMIT, free_processors].min
67
+ tell :pull_messages
61
68
  end
62
69
 
63
- def poll_future(max_number_of_messages)
70
+ def pull_future(max_number_of_messages)
64
71
  Concurrent::Promises.future do
65
- queue.receive_messages attribute_names: %w[All],
66
- message_attribute_names: %w[All],
67
- wait_time_seconds: wait,
68
- max_number_of_messages: max_number_of_messages
72
+ @queue.receive_messages wait: @wait, max_messages: max_number_of_messages
69
73
  end
70
74
  end
71
75
 
@@ -73,48 +77,81 @@ module Toiler
73
77
  @waiting_messages -= messages
74
78
  end
75
79
 
76
- def poll_messages
77
- return unless should_poll?
80
+ def max_messages
81
+ # limit max messages to 10% of concurrency to always ensure we have
82
+ # 10 concurrent fetches and improved latency
83
+ [@queue.max_messages, (@concurrency * 0.1).ceil].min
84
+ end
85
+
86
+ def needed_messages
87
+ @free_processors - @waiting_messages
88
+ end
89
+
90
+ def pull_messages
91
+ if needed_messages < max_messages
92
+ # a pull is already scheduled and we dont fit a full batch, return
93
+ return unless @scheduled_task.nil?
94
+
95
+ free_percent = free_processors.to_f / concurrency
96
+ # wait time linear to the amount of free workers with a maximum of 5 seconds,
97
+ # when there are more free workers, we can theoretically wait more time, since
98
+ # we already have workers waiting for messages.
99
+ wait_time = 0.1 + (5 * free_percent)
100
+
101
+ # schedule a message pull if we cannot fill a batch
102
+ # this ensures we wait some time for more messages to arrive
103
+ @scheduled_task = Concurrent::ScheduledTask.execute(wait_time) do
104
+ tell [:do_pull_messages, true]
105
+ end
106
+ end
107
+
108
+ # we can fit a whole batch, if there was already a scheduled task
109
+ # we just let it run, it will only pull messages if there are more
110
+ # needed messages
111
+ do_pull_messages false
112
+ end
113
+
114
+ def do_pull_messages(clear_scheduled_task)
115
+ @scheduled_task = nil if clear_scheduled_task
78
116
 
79
- max_number_of_messages = max_messages
80
- return if waiting_messages > 0 && !full_batch?(max_number_of_messages)
117
+ return unless should_pull?
81
118
 
82
- @waiting_messages += max_number_of_messages
119
+ current_needed_messages = needed_messages
83
120
 
84
- debug "Fetcher #{queue.name} polling messages..."
85
- future = poll_future max_number_of_messages
121
+ current_needed_messages = max_messages if current_needed_messages >= max_messages
122
+
123
+ @waiting_messages += current_needed_messages
124
+
125
+ debug "Fetcher #{@queue.name} pulling messages..."
126
+ future = pull_future current_needed_messages
86
127
  future.on_rejection! do
87
- tell [:release_messages, max_number_of_messages]
88
- tell :poll_messages
128
+ tell [:release_messages, current_needed_messages]
129
+ tell :pull_messages
89
130
  end
90
131
  future.on_fulfillment! do |msgs|
91
132
  tell [:assign_messages, msgs] if !msgs.nil? && !msgs.empty?
92
- tell [:release_messages, max_number_of_messages]
93
- tell :poll_messages
133
+ tell [:release_messages, current_needed_messages]
134
+ tell :pull_messages
94
135
  end
95
136
 
96
- poll_messages if should_poll?
97
- end
98
-
99
- def should_poll?
100
- free_processors / 2 > waiting_messages
137
+ # defer method execution to avoid recursion
138
+ tell :pull_messages if should_pull?
101
139
  end
102
140
 
103
- def full_batch?(max_number_of_messages)
104
- max_number_of_messages == FETCH_LIMIT || max_number_of_messages >= concurrency * 0.1
141
+ def should_pull?
142
+ needed_messages.positive?
105
143
  end
106
144
 
107
145
  def processor_pool
108
- @processor_pool ||= Toiler.processor_pool queue.name
146
+ @processor_pool ||= Toiler.processor_pool @queue.name
109
147
  end
110
148
 
111
149
  def assign_messages(messages)
112
- messages = [messages] if batch?
113
150
  messages.each do |m|
114
- processor_pool.tell [:process, visibility_timeout, m]
151
+ processor_pool.tell [:process, @ack_deadline, m]
115
152
  @free_processors -= 1
116
153
  end
117
- debug "Fetcher #{queue.name} assigned #{messages.count} messages"
154
+ debug "Fetcher #{@queue.name} assigned #{messages.count} messages"
118
155
  end
119
156
  end
120
157
  end