karafka 1.0.1 → 1.1.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -3
  3. data/Gemfile +1 -0
  4. data/Gemfile.lock +14 -32
  5. data/README.md +1 -1
  6. data/karafka.gemspec +2 -3
  7. data/lib/karafka.rb +2 -3
  8. data/lib/karafka/attributes_map.rb +3 -3
  9. data/lib/karafka/backends/inline.rb +2 -2
  10. data/lib/karafka/base_controller.rb +19 -69
  11. data/lib/karafka/base_responder.rb +10 -5
  12. data/lib/karafka/cli/info.rb +1 -2
  13. data/lib/karafka/cli/server.rb +6 -8
  14. data/lib/karafka/connection/{messages_consumer.rb → consumer.rb} +27 -12
  15. data/lib/karafka/connection/listener.rb +6 -13
  16. data/lib/karafka/connection/{messages_processor.rb → processor.rb} +3 -3
  17. data/lib/karafka/controllers/callbacks.rb +54 -0
  18. data/lib/karafka/controllers/includer.rb +1 -1
  19. data/lib/karafka/controllers/single_params.rb +2 -2
  20. data/lib/karafka/errors.rb +7 -0
  21. data/lib/karafka/fetcher.rb +11 -5
  22. data/lib/karafka/monitor.rb +2 -2
  23. data/lib/karafka/params/params.rb +3 -1
  24. data/lib/karafka/params/params_batch.rb +1 -1
  25. data/lib/karafka/patches/dry_configurable.rb +0 -2
  26. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  27. data/lib/karafka/persistence/consumer.rb +25 -0
  28. data/lib/karafka/persistence/controller.rb +24 -9
  29. data/lib/karafka/process.rb +1 -1
  30. data/lib/karafka/responders/topic.rb +8 -1
  31. data/lib/karafka/schemas/config.rb +0 -10
  32. data/lib/karafka/schemas/consumer_group.rb +9 -8
  33. data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
  34. data/lib/karafka/schemas/responder_usage.rb +1 -0
  35. data/lib/karafka/server.rb +6 -19
  36. data/lib/karafka/setup/config.rb +15 -34
  37. data/lib/karafka/setup/configurators/base.rb +1 -1
  38. data/lib/karafka/setup/configurators/water_drop.rb +11 -13
  39. data/lib/karafka/templates/karafka.rb.example +1 -1
  40. data/lib/karafka/version.rb +1 -1
  41. metadata +15 -28
  42. data/Rakefile +0 -7
  43. data/lib/karafka/setup/configurators/celluloid.rb +0 -19
@@ -36,13 +36,20 @@ module Karafka
36
36
  @options[:registered] == true
37
37
  end
38
38
 
39
+ # @return [Boolean] do we want to use async producer. Defaults to false as the sync producer
40
+ # is safer and introduces less problems
41
+ def async?
42
+ @options.key?(:async) ? @options[:async] : false
43
+ end
44
+
39
45
  # @return [Hash] hash with this topic attributes and options
40
46
  def to_h
41
47
  {
42
48
  name: name,
43
49
  multiple_usage: multiple_usage?,
44
50
  required: required?,
45
- registered: registered?
51
+ registered: registered?,
52
+ async: async?
46
53
  }
47
54
  end
48
55
  end
@@ -15,17 +15,7 @@ module Karafka
15
15
  required(:client_id).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
16
16
  required(:consumer_mapper)
17
17
  required(:topic_mapper)
18
-
19
- required(:celluloid).schema do
20
- required(:shutdown_timeout).filled(:int?, gteq?: 0)
21
- end
22
-
23
18
  optional(:backend).filled
24
-
25
- optional(:connection_pool).schema do
26
- required(:size).filled
27
- optional(:timeout).filled(:int?)
28
- end
29
19
  end
30
20
  end
31
21
  end
@@ -27,16 +27,17 @@ module Karafka
27
27
 
28
28
  required(:id).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
29
29
  required(:seed_brokers).filled { each(:broker_schema?) }
30
- required(:session_timeout).filled(:int?)
31
- required(:pause_timeout).filled(:int?, gteq?: 0)
32
- required(:offset_commit_interval).filled(:int?)
30
+ required(:session_timeout).filled { int? | float? }
31
+ required(:pause_timeout).filled { (int? | float?) & gteq?(0) }
32
+ required(:offset_commit_interval) { int? | float? }
33
33
  required(:offset_commit_threshold).filled(:int?)
34
34
  required(:offset_retention_time) { none?.not > int? }
35
- required(:heartbeat_interval).filled(:int?, gteq?: 0)
36
- required(:connect_timeout).filled(:int?, gt?: 0)
37
- required(:socket_timeout).filled(:int?, gt?: 0)
38
- required(:max_wait_time).filled(:int?, gteq?: 0)
39
- required(:batch_consuming).filled(:bool?)
35
+ required(:heartbeat_interval).filled { (int? | float?) & gteq?(0) }
36
+ required(:connect_timeout).filled { (int? | float?) & gt?(0) }
37
+ required(:socket_timeout).filled { (int? | float?) & gt?(0) }
38
+ required(:min_bytes).filled(:int?, gt?: 0)
39
+ required(:max_wait_time).filled { (int? | float?) & gteq?(0) }
40
+ required(:batch_fetching).filled(:bool?)
40
41
  required(:topics).filled { each { schema(ConsumerGroupTopic) } }
41
42
 
42
43
  # Max wait time cannot exceed socket_timeout - wouldn't make sense
@@ -11,7 +11,7 @@ module Karafka
11
11
  required(:parser).filled
12
12
  required(:max_bytes_per_partition).filled(:int?, gteq?: 0)
13
13
  required(:start_from_beginning).filled(:bool?)
14
- required(:batch_processing).filled(:bool?)
14
+ required(:batch_consuming).filled(:bool?)
15
15
  required(:persistent).filled(:bool?)
16
16
  end
17
17
  end
@@ -9,6 +9,7 @@ module Karafka
9
9
  required(:multiple_usage).filled(:bool?)
10
10
  required(:usage_count).filled(:int?, gteq?: 0)
11
11
  required(:registered).filled(eql?: true)
12
+ required(:async).filled(:bool?)
12
13
 
13
14
  rule(
14
15
  required_usage: %i[required usage_count]
@@ -4,16 +4,15 @@ module Karafka
4
4
  # Karafka consuming server class
5
5
  class Server
6
6
  class << self
7
- # We need to store reference to all the consumers in the main server thread,
8
- # So we can have access to them later on and be able to stop them on exit
9
- attr_reader :consumers
7
+ # Set of consuming threads. Each consumer thread contains a single consumer
8
+ attr_accessor :consumer_threads
10
9
 
11
10
  # Writer for list of consumer groups that we want to consume in our current process context
12
11
  attr_writer :consumer_groups
13
12
 
14
13
  # Method which runs app
15
14
  def run
16
- @consumers = Concurrent::Array.new
15
+ @consumer_threads = Concurrent::Array.new
17
16
  bind_on_sigint
18
17
  bind_on_sigquit
19
18
  bind_on_sigterm
@@ -36,29 +35,17 @@ module Karafka
36
35
 
37
36
  # What should happen when we decide to quit with sigint
38
37
  def bind_on_sigint
39
- process.on_sigint do
40
- Karafka::App.stop!
41
- consumers.map(&:stop)
42
- Kernel.exit
43
- end
38
+ process.on_sigint { Karafka::App.stop! }
44
39
  end
45
40
 
46
41
  # What should happen when we decide to quit with sigquit
47
42
  def bind_on_sigquit
48
- process.on_sigquit do
49
- Karafka::App.stop!
50
- consumers.map(&:stop)
51
- Kernel.exit
52
- end
43
+ process.on_sigquit { Karafka::App.stop! }
53
44
  end
54
45
 
55
46
  # What should happen when we decide to quit with sigterm
56
47
  def bind_on_sigterm
57
- process.on_sigterm do
58
- Karafka::App.stop!
59
- consumers.map(&:stop)
60
- Kernel.exit
61
- end
48
+ process.on_sigterm { Karafka::App.stop! }
62
49
  end
63
50
 
64
51
  # Starts Karafka with a supervision
@@ -34,42 +34,19 @@ module Karafka
34
34
  # - #incoming - for remapping from the incoming message to our internal format
35
35
  # - #outgoing - for remapping from internal topic name into outgoing message
36
36
  setting :topic_mapper, -> { Routing::TopicMapper }
37
- # If batch_consuming is true, we will consume kafka messages in batches instead of 1 by 1
38
- # @note Consuming does not equal processing, see batch_processing description for details
39
- setting :batch_consuming, true
40
- # If batch_processing is true, we will have access to #params_batch instead of #params.
37
+ # If batch_fetching is true, we will fetch kafka messages in batches instead of 1 by 1
38
+ # @note Fetching does not equal consuming, see batch_consuming description for details
39
+ setting :batch_fetching, true
40
+ # If batch_consuming is true, we will have access to #params_batch instead of #params.
41
41
  # #params_batch will contain params received from Kafka (may be more than 1) so we can
42
42
  # process them in batches
43
- setting :batch_processing, false
43
+ setting :batch_consuming, false
44
44
  # Should we operate in a single controller instance across multiple batches of messages,
45
45
  # from the same partition or should we build a new instance for each incoming batch.
46
46
  # Disabling that can be useful when you want to build a new controller instance for each
47
47
  # incoming batch. It's disabled by default, not to create more objects that needed on
48
48
  # each batch
49
49
  setting :persistent, true
50
- # This is configured automatically, don't overwrite it!
51
- # Each consumer group requires separate thread, so number of threads should be equal to
52
- # number of consumer groups
53
- setting :concurrency, -> { ::Karafka::App.consumer_groups.count }
54
-
55
- # option celluloid [Hash] - optional - celluloid configuration options
56
- setting :celluloid do
57
- # options shutdown_timeout [Integer] How many seconds should we wait for actors (listeners)
58
- # before forcefully shutting them
59
- setting :shutdown_timeout, 30
60
- end
61
-
62
- # Connection pool options are used for producer (Waterdrop) - by default it will adapt to
63
- # number of active actors
64
- setting :connection_pool do
65
- # Connection pool size for producers. If you use sidekiq or any other multi threaded
66
- # backend, you might want to tune it to match number of threads of your background
67
- # processing engine
68
- setting :size, -> { ::Karafka::App.consumer_groups.active.count }
69
- # How long should we wait for a working resource from the pool before rising timeout
70
- # With a proper connection pool size, this should never happen
71
- setting :timeout, 5
72
- end
73
50
 
74
51
  # option kafka [Hash] - optional - kafka configuration options
75
52
  setting :kafka do
@@ -78,17 +55,17 @@ module Karafka
78
55
  # option session_timeout [Integer] the number of seconds after which, if a client
79
56
  # hasn't contacted the Kafka cluster, it will be kicked out of the group.
80
57
  setting :session_timeout, 30
81
- # Time that a given partition will be paused from processing messages, when message
82
- # processing fails. It allows us to process other partitions, while the error is being
58
+ # Time that a given partition will be paused from fetching messages, when message
59
+ # consumption fails. It allows us to process other partitions, while the error is being
83
60
  # resolved and also "slows" things down, so it prevents from "eating" up all messages and
84
- # processing them with failed code
61
+ # consuming them with failed code
85
62
  setting :pause_timeout, 10
86
63
  # option offset_commit_interval [Integer] the interval between offset commits,
87
64
  # in seconds.
88
65
  setting :offset_commit_interval, 10
89
66
  # option offset_commit_threshold [Integer] the number of messages that can be
90
67
  # processed before their offsets are committed. If zero, offset commits are
91
- # not triggered by message processing.
68
+ # not triggered by message consumption.
92
69
  setting :offset_commit_threshold, 0
93
70
  # option heartbeat_interval [Integer] the interval between heartbeats; must be less
94
71
  # than the session window.
@@ -104,12 +81,15 @@ module Karafka
104
81
  setting :min_bytes, 1
105
82
  # option max_wait_time [Integer, Float] max_wait_time is the maximum number of seconds to
106
83
  # wait before returning data from a single message fetch. By setting this high you also
107
- # increase the processing throughput and by setting it low you set a bound on latency.
84
+ # increase the fetching throughput - and by setting it low you set a bound on latency.
108
85
  # This configuration overrides `min_bytes`, so you'll _always_ get data back within the
109
86
  # time specified. The default value is one second. If you want to have at most five
110
87
  # seconds of latency, set `max_wait_time` to 5. You should make sure
111
88
  # max_wait_time * num brokers + heartbeat_interval is less than session_timeout.
112
89
  setting :max_wait_time, 1
90
+ # option automatically_mark_as_processed [Boolean] should we automatically mark received
91
+ # messages as processed after non-error consumption
92
+ setting :automatically_mark_as_processed, true
113
93
  # option reconnect_timeout [Integer] How long should we wait before trying to reconnect to
114
94
  # Kafka cluster that went down (in seconds)
115
95
  setting :reconnect_timeout, 5
@@ -124,7 +104,8 @@ module Karafka
124
104
  # writing to a socket connection to a broker. After this timeout expires the connection
125
105
  # will be killed. Note that some Kafka operations are by definition long-running, such as
126
106
  # waiting for new messages to arrive in a partition, so don't set this value too low
127
- setting :socket_timeout, 10
107
+ setting :socket_timeout, 30
108
+
128
109
  # SSL authentication related settings
129
110
  # option ca_cert [String] SSL CA certificate
130
111
  setting :ssl_ca_cert, nil
@@ -4,7 +4,7 @@ module Karafka
4
4
  module Setup
5
5
  # Configurators module is used to enclose all the external dependencies configurations
6
6
  class Configurators
7
- # Karafka has come components that it relies on (like Celluloid or Sidekiq)
7
+ # Karafka has come components that it relies on (like Sidekiq)
8
8
  # We need to configure all of them only when the framework was set up.
9
9
  # Any class that descends from this one will be automatically invoked upon setup (after it)
10
10
  # @example Configure an Example class
@@ -7,21 +7,19 @@ module Karafka
7
7
  class WaterDrop < Base
8
8
  # Sets up a WaterDrop settings
9
9
  def setup
10
- dynamic_params = Connection::ConfigAdapter.client(nil)
11
-
12
10
  ::WaterDrop.setup do |water_config|
13
- water_config.send_messages = true
14
- water_config.raise_on_failure = true
15
- water_config.connection_pool = config.connection_pool
11
+ water_config.deliver = true
12
+
13
+ Karafka::App.config.to_h.except(:kafka).each do |k, v|
14
+ key_assignment = :"#{k}="
15
+ next unless water_config.respond_to?(key_assignment)
16
+ water_config.public_send(key_assignment, v)
17
+ end
16
18
 
17
- # Automigration of all the attributes that should be accepted by waterdrop
18
- # based on what we use in karafka ruby-kafka initialization
19
- dynamic_params.each do |key, value|
20
- key_assignment = :"#{key}="
21
- # We decide whether we should set it on a kafka scope of waterdrop config or on the
22
- # main scope
23
- scope = water_config.kafka.respond_to?(key_assignment) ? :kafka : :itself
24
- water_config.public_send(scope).public_send(key_assignment, value)
19
+ Karafka::App.config.kafka.to_h.each do |k, v|
20
+ key_assignment = :"#{k}="
21
+ next unless water_config.kafka.respond_to?(key_assignment)
22
+ water_config.kafka.public_send(key_assignment, v)
25
23
  end
26
24
  end
27
25
  end
@@ -18,7 +18,7 @@ class KarafkaApp < Karafka::App
18
18
  config.kafka.seed_brokers = %w( 127.0.0.1:9092 )
19
19
  config.client_id = 'example_app'
20
20
  config.backend = :inline
21
- config.batch_consuming = true
21
+ config.batch_fetching = true
22
22
  end
23
23
 
24
24
  consumer_groups.draw do
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '1.0.1'
6
+ VERSION = '1.1.0.alpha1'
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0.alpha1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2017-10-27 00:00:00.000000000 Z
13
+ date: 2017-10-30 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activesupport
@@ -26,20 +26,6 @@ dependencies:
26
26
  - - ">="
27
27
  - !ruby/object:Gem::Version
28
28
  version: '5.0'
29
- - !ruby/object:Gem::Dependency
30
- name: celluloid
31
- requirement: !ruby/object:Gem::Requirement
32
- requirements:
33
- - - ">="
34
- - !ruby/object:Gem::Version
35
- version: '0'
36
- type: :runtime
37
- prerelease: false
38
- version_requirements: !ruby/object:Gem::Requirement
39
- requirements:
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- version: '0'
43
29
  - !ruby/object:Gem::Dependency
44
30
  name: dry-configurable
45
31
  requirement: !ruby/object:Gem::Requirement
@@ -130,14 +116,14 @@ dependencies:
130
116
  requirements:
131
117
  - - ">="
132
118
  - !ruby/object:Gem::Version
133
- version: '0.4'
119
+ version: '0.5'
134
120
  type: :runtime
135
121
  prerelease: false
136
122
  version_requirements: !ruby/object:Gem::Requirement
137
123
  requirements:
138
124
  - - ">="
139
125
  - !ruby/object:Gem::Version
140
- version: '0.4'
126
+ version: '0.5'
141
127
  - !ruby/object:Gem::Dependency
142
128
  name: thor
143
129
  requirement: !ruby/object:Gem::Requirement
@@ -156,16 +142,16 @@ dependencies:
156
142
  name: waterdrop
157
143
  requirement: !ruby/object:Gem::Requirement
158
144
  requirements:
159
- - - "~>"
145
+ - - ">="
160
146
  - !ruby/object:Gem::Version
161
- version: '0.4'
147
+ version: 1.0.alpha2
162
148
  type: :runtime
163
149
  prerelease: false
164
150
  version_requirements: !ruby/object:Gem::Requirement
165
151
  requirements:
166
- - - "~>"
152
+ - - ">="
167
153
  - !ruby/object:Gem::Version
168
- version: '0.4'
154
+ version: 1.0.alpha2
169
155
  description: Framework used to simplify Apache Kafka based Ruby applications development
170
156
  email:
171
157
  - maciej@coditsu.io
@@ -190,7 +176,6 @@ files:
190
176
  - Gemfile.lock
191
177
  - MIT-LICENCE
192
178
  - README.md
193
- - Rakefile
194
179
  - bin/karafka
195
180
  - config/errors.yml
196
181
  - karafka.gemspec
@@ -208,9 +193,10 @@ files:
208
193
  - lib/karafka/cli/install.rb
209
194
  - lib/karafka/cli/server.rb
210
195
  - lib/karafka/connection/config_adapter.rb
196
+ - lib/karafka/connection/consumer.rb
211
197
  - lib/karafka/connection/listener.rb
212
- - lib/karafka/connection/messages_consumer.rb
213
- - lib/karafka/connection/messages_processor.rb
198
+ - lib/karafka/connection/processor.rb
199
+ - lib/karafka/controllers/callbacks.rb
214
200
  - lib/karafka/controllers/includer.rb
215
201
  - lib/karafka/controllers/responders.rb
216
202
  - lib/karafka/controllers/single_params.rb
@@ -226,6 +212,8 @@ files:
226
212
  - lib/karafka/params/params_batch.rb
227
213
  - lib/karafka/parsers/json.rb
228
214
  - lib/karafka/patches/dry_configurable.rb
215
+ - lib/karafka/patches/ruby_kafka.rb
216
+ - lib/karafka/persistence/consumer.rb
229
217
  - lib/karafka/persistence/controller.rb
230
218
  - lib/karafka/process.rb
231
219
  - lib/karafka/responders/builder.rb
@@ -245,7 +233,6 @@ files:
245
233
  - lib/karafka/server.rb
246
234
  - lib/karafka/setup/config.rb
247
235
  - lib/karafka/setup/configurators/base.rb
248
- - lib/karafka/setup/configurators/celluloid.rb
249
236
  - lib/karafka/setup/configurators/water_drop.rb
250
237
  - lib/karafka/status.rb
251
238
  - lib/karafka/templates/application_controller.rb.example
@@ -268,9 +255,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
268
255
  version: 2.3.0
269
256
  required_rubygems_version: !ruby/object:Gem::Requirement
270
257
  requirements:
271
- - - ">="
258
+ - - ">"
272
259
  - !ruby/object:Gem::Version
273
- version: '0'
260
+ version: 1.3.1
274
261
  requirements: []
275
262
  rubyforge_project:
276
263
  rubygems_version: 2.6.13
data/Rakefile DELETED
@@ -1,7 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rspec'
4
- require 'rspec/core/rake_task'
5
-
6
- RSpec::Core::RakeTask.new(:spec)
7
- task default: :spec
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Setup
5
- class Configurators
6
- # Class responsible for setting up Celluloid settings
7
- class Celluloid < Base
8
- # Sets up a Karafka logger as celluloid logger
9
- def setup
10
- ::Celluloid.logger = ::Karafka.logger
11
- # This is just a precaution - it should automatically close the current
12
- # connection and shutdown actor - but in case it didn't (hanged, etc)
13
- # we will kill it after waiting for some time
14
- ::Celluloid.shutdown_timeout = config.celluloid.shutdown_timeout
15
- end
16
- end
17
- end
18
- end
19
- end