karafka 1.4.0 → 2.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +89 -18
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +365 -1
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +56 -112
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +61 -68
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +272 -0
  16. data/bin/karafka +10 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/certs/cert_chain.pem +26 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +59 -38
  23. data/docker-compose.yml +10 -3
  24. data/karafka.gemspec +18 -21
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +33 -0
  32. data/lib/karafka/admin.rb +63 -0
  33. data/lib/karafka/app.rb +15 -20
  34. data/lib/karafka/base_consumer.rb +197 -31
  35. data/lib/karafka/cli/info.rb +44 -10
  36. data/lib/karafka/cli/install.rb +22 -12
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -3
  39. data/lib/karafka/connection/client.rb +379 -89
  40. data/lib/karafka/connection/listener.rb +250 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -184
  49. data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger.rb +6 -10
  60. data/lib/karafka/instrumentation/logger_listener.rb +174 -0
  61. data/lib/karafka/instrumentation/monitor.rb +13 -61
  62. data/lib/karafka/instrumentation/notifications.rb +53 -0
  63. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  64. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  65. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  66. data/lib/karafka/instrumentation.rb +21 -0
  67. data/lib/karafka/licenser.rb +75 -0
  68. data/lib/karafka/messages/batch_metadata.rb +45 -0
  69. data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
  70. data/lib/karafka/messages/builders/message.rb +39 -0
  71. data/lib/karafka/messages/builders/messages.rb +34 -0
  72. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  73. data/lib/karafka/messages/messages.rb +64 -0
  74. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  75. data/lib/karafka/messages/seek.rb +9 -0
  76. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  77. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  78. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  79. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  80. data/lib/karafka/pro/base_consumer.rb +107 -0
  81. data/lib/karafka/pro/contracts/base.rb +21 -0
  82. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  83. data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
  84. data/lib/karafka/pro/loader.rb +76 -0
  85. data/lib/karafka/pro/performance_tracker.rb +80 -0
  86. data/lib/karafka/pro/processing/coordinator.rb +85 -0
  87. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  88. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  89. data/lib/karafka/pro/processing/partitioner.rb +58 -0
  90. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  91. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  92. data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
  93. data/lib/karafka/pro.rb +13 -0
  94. data/lib/karafka/process.rb +1 -0
  95. data/lib/karafka/processing/coordinator.rb +103 -0
  96. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  97. data/lib/karafka/processing/executor.rb +126 -0
  98. data/lib/karafka/processing/executors_buffer.rb +88 -0
  99. data/lib/karafka/processing/jobs/base.rb +55 -0
  100. data/lib/karafka/processing/jobs/consume.rb +47 -0
  101. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  102. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  103. data/lib/karafka/processing/jobs_builder.rb +29 -0
  104. data/lib/karafka/processing/jobs_queue.rb +144 -0
  105. data/lib/karafka/processing/partitioner.rb +22 -0
  106. data/lib/karafka/processing/result.rb +37 -0
  107. data/lib/karafka/processing/scheduler.rb +22 -0
  108. data/lib/karafka/processing/worker.rb +91 -0
  109. data/lib/karafka/processing/workers_batch.rb +27 -0
  110. data/lib/karafka/railtie.rb +127 -0
  111. data/lib/karafka/routing/builder.rb +26 -23
  112. data/lib/karafka/routing/consumer_group.rb +37 -17
  113. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  114. data/lib/karafka/routing/proxy.rb +9 -16
  115. data/lib/karafka/routing/router.rb +1 -1
  116. data/lib/karafka/routing/subscription_group.rb +53 -0
  117. data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
  118. data/lib/karafka/routing/topic.rb +65 -24
  119. data/lib/karafka/routing/topics.rb +38 -0
  120. data/lib/karafka/runner.rb +51 -0
  121. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  122. data/lib/karafka/server.rb +67 -26
  123. data/lib/karafka/setup/config.rb +153 -175
  124. data/lib/karafka/status.rb +14 -5
  125. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  126. data/lib/karafka/templates/karafka.rb.erb +17 -55
  127. data/lib/karafka/time_trackers/base.rb +19 -0
  128. data/lib/karafka/time_trackers/pause.rb +92 -0
  129. data/lib/karafka/time_trackers/poll.rb +65 -0
  130. data/lib/karafka/version.rb +1 -1
  131. data/lib/karafka.rb +46 -16
  132. data.tar.gz.sig +0 -0
  133. metadata +145 -171
  134. metadata.gz.sig +0 -0
  135. data/.github/FUNDING.yml +0 -3
  136. data/MIT-LICENCE +0 -18
  137. data/certs/mensfeld.pem +0 -25
  138. data/lib/karafka/attributes_map.rb +0 -62
  139. data/lib/karafka/backends/inline.rb +0 -16
  140. data/lib/karafka/base_responder.rb +0 -226
  141. data/lib/karafka/cli/flow.rb +0 -48
  142. data/lib/karafka/code_reloader.rb +0 -67
  143. data/lib/karafka/connection/api_adapter.rb +0 -161
  144. data/lib/karafka/connection/batch_delegator.rb +0 -55
  145. data/lib/karafka/connection/builder.rb +0 -18
  146. data/lib/karafka/connection/message_delegator.rb +0 -36
  147. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  148. data/lib/karafka/consumers/callbacks.rb +0 -71
  149. data/lib/karafka/consumers/includer.rb +0 -64
  150. data/lib/karafka/consumers/responders.rb +0 -24
  151. data/lib/karafka/consumers/single_params.rb +0 -15
  152. data/lib/karafka/contracts/responder_usage.rb +0 -54
  153. data/lib/karafka/fetcher.rb +0 -42
  154. data/lib/karafka/helpers/class_matcher.rb +0 -88
  155. data/lib/karafka/helpers/config_retriever.rb +0 -46
  156. data/lib/karafka/helpers/inflector.rb +0 -26
  157. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  158. data/lib/karafka/params/batch_metadata.rb +0 -26
  159. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  160. data/lib/karafka/params/builders/params.rb +0 -38
  161. data/lib/karafka/params/builders/params_batch.rb +0 -25
  162. data/lib/karafka/params/params_batch.rb +0 -60
  163. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  164. data/lib/karafka/persistence/client.rb +0 -29
  165. data/lib/karafka/persistence/consumers.rb +0 -45
  166. data/lib/karafka/persistence/topics.rb +0 -48
  167. data/lib/karafka/responders/builder.rb +0 -36
  168. data/lib/karafka/responders/topic.rb +0 -55
  169. data/lib/karafka/routing/topic_mapper.rb +0 -53
  170. data/lib/karafka/serialization/json/serializer.rb +0 -31
  171. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  172. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -12,210 +12,188 @@ module Karafka
12
12
  # enough and will still keep the code simple
13
13
  # @see Karafka::Setup::Configurators::Base for more details about configurators api
14
14
  class Config
15
- extend Dry::Configurable
15
+ extend ::Karafka::Core::Configurable
16
16
 
17
- # Contract for checking the config provided by the user
18
- CONTRACT = Karafka::Contracts::Config.new.freeze
17
+ # Defaults for kafka settings, that will be overwritten only if not present already
18
+ KAFKA_DEFAULTS = {
19
+ 'client.id': 'karafka'
20
+ }.freeze
19
21
 
20
- private_constant :CONTRACT
22
+ # Contains settings that should not be used in production but make life easier in dev
23
+ DEV_DEFAULTS = {
24
+ # Will create non-existing topics automatically.
25
+ # Note that the broker needs to be configured with `auto.create.topics.enable=true`
26
+ # While it is not recommended in prod, it simplifies work in dev
27
+ 'allow.auto.create.topics': 'true',
28
+ # We refresh the cluster state often as newly created topics in dev may not be detected
29
+ # fast enough. Fast enough means within reasonable time to provide decent user experience
30
+ # While it's only a one time thing for new topics, it can still be irritating to have to
31
+ # restart the process.
32
+ 'topic.metadata.refresh.interval.ms': 5_000
33
+ }.freeze
34
+
35
+ private_constant :KAFKA_DEFAULTS, :DEV_DEFAULTS
21
36
 
22
37
  # Available settings
38
+
39
+ # Namespace for Pro version related license management. If you use LGPL, no need to worry
40
+ # about any of this
41
+ setting :license do
42
+ # option token [String, false] - license token issued when you acquire a Pro license
43
+ # Leave false if using the LGPL version and all is going to work just fine :)
44
+ #
45
+ # @note By using the commercial components, you accept the LICENSE-COMM commercial license
46
+ # terms and conditions
47
+ setting :token, default: false
48
+ # option entity [String] for whom we did issue the license
49
+ setting :entity, default: ''
50
+ end
51
+
23
52
  # option client_id [String] kafka client_id - used to provide
24
53
  # default Kafka groups namespaces and identify that app in kafka
25
- setting :client_id
26
- # What backend do we want to use to process messages
27
- setting :backend, :inline
54
+ setting :client_id, default: 'karafka'
28
55
  # option logger [Instance] logger that we want to use
29
- setting :logger, ::Karafka::Instrumentation::Logger.new
56
+ setting :logger, default: ::Karafka::Instrumentation::Logger.new
30
57
  # option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
31
- setting :monitor, ::Karafka::Instrumentation::Monitor.new
58
+ setting :monitor, default: ::Karafka::Instrumentation::Monitor.new
32
59
  # Mapper used to remap consumer groups ids, so in case users migrate from other tools
33
60
  # or they need to maintain their own internal consumer group naming conventions, they
34
61
  # can easily do it, replacing the default client_id + consumer name pattern concept
35
- setting :consumer_mapper, Routing::ConsumerMapper.new
36
- # Mapper used to remap names of topics, so we can have a clean internal topic naming
37
- # despite using any Kafka provider that uses namespacing, etc
38
- # It needs to implement two methods:
39
- # - #incoming - for remapping from the incoming message to our internal format
40
- # - #outgoing - for remapping from internal topic name into outgoing message
41
- setting :topic_mapper, Routing::TopicMapper.new
42
- # Default serializer for converting whatever we want to send to kafka to json
43
- setting :serializer, Karafka::Serialization::Json::Serializer.new
62
+ setting :consumer_mapper, default: Routing::ConsumerMapper.new
63
+ # option [Boolean] should we reload consumers with each incoming batch thus effectively
64
+ # supporting code reload (if someone reloads code) or should we keep the persistence
65
+ setting :consumer_persistence, default: true
44
66
  # Default deserializer for converting incoming data into ruby objects
45
- setting :deserializer, Karafka::Serialization::Json::Deserializer.new
46
- # If batch_fetching is true, we will fetch kafka messages in batches instead of 1 by 1
47
- # @note Fetching does not equal consuming, see batch_consuming description for details
48
- setting :batch_fetching, true
49
- # If batch_consuming is true, we will have access to #params_batch instead of #params.
50
- # #params_batch will contain params received from Kafka (may be more than 1) so we can
51
- # process them in batches
52
- setting :batch_consuming, false
53
- # option shutdown_timeout [Integer, nil] the number of seconds after which Karafka no
54
- # longer wait for the consumers to stop gracefully but instead we force terminate
67
+ setting :deserializer, default: Karafka::Serialization::Json::Deserializer.new
68
+ # option [String] should we start with the earliest possible offset or latest
69
+ # This will set the `auto.offset.reset` value unless present in the kafka scope
70
+ setting :initial_offset, default: 'earliest'
71
+ # option [Boolean] should we leave offset management to the user
72
+ setting :manual_offset_management, default: false
73
+ # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
74
+ setting :max_messages, default: 100
75
+ # option [Integer] number of milliseconds we can wait while fetching data
76
+ setting :max_wait_time, default: 1_000
77
+ # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
78
+ # longer waits for the consumers to stop gracefully but instead we force terminate
55
79
  # everything.
56
- setting :shutdown_timeout, 60
57
-
58
- # option kafka [Hash] - optional - kafka configuration options
59
- setting :kafka do
60
- # Array with at least one host
61
- setting :seed_brokers, %w[kafka://127.0.0.1:9092]
62
- # option session_timeout [Integer] the number of seconds after which, if a client
63
- # hasn't contacted the Kafka cluster, it will be kicked out of the group.
64
- setting :session_timeout, 30
65
- # Time that a given partition will be paused from fetching messages, when message
66
- # consumption fails. It allows us to process other partitions, while the error is being
67
- # resolved and also "slows" things down, so it prevents from "eating" up all messages and
68
- # consuming them with failed code. Use `nil` if you want to pause forever and never retry.
69
- setting :pause_timeout, 10
70
- # option pause_max_timeout [Integer, nil] the maximum number of seconds to pause for,
71
- # or `nil` if no maximum should be enforced.
72
- setting :pause_max_timeout, nil
73
- # option pause_exponential_backoff [Boolean] whether to enable exponential backoff
74
- setting :pause_exponential_backoff, false
75
- # option offset_commit_interval [Integer] the interval between offset commits,
76
- # in seconds.
77
- setting :offset_commit_interval, 10
78
- # option offset_commit_threshold [Integer] the number of messages that can be
79
- # processed before their offsets are committed. If zero, offset commits are
80
- # not triggered by message consumption.
81
- setting :offset_commit_threshold, 0
82
- # option heartbeat_interval [Integer] the interval between heartbeats; must be less
83
- # than the session window.
84
- setting :heartbeat_interval, 10
85
- # option offset_retention_time [Integer] The length of the retention window, known as
86
- # offset retention time
87
- setting :offset_retention_time, nil
88
- # option fetcher_max_queue_size [Integer] max number of items in the fetch queue that
89
- # are stored for further processing. Note, that each item in the queue represents a
90
- # response from a single broker
91
- setting :fetcher_max_queue_size, 10
92
- # option max_bytes_per_partition [Integer] the maximum amount of data fetched
93
- # from a single partition at a time.
94
- setting :max_bytes_per_partition, 1_048_576
95
- # whether to consume messages starting at the beginning or to just consume new messages
96
- setting :start_from_beginning, true
97
- # option min_bytes [Integer] the minimum number of bytes to read before
98
- # returning messages from the server; if `max_wait_time` is reached, this
99
- # is ignored.
100
- setting :min_bytes, 1
101
- # option max_bytes [Integer] the maximum number of bytes to read before returning messages
102
- # from each broker.
103
- setting :max_bytes, 10_485_760
104
- # option max_wait_time [Integer, Float] max_wait_time is the maximum number of seconds to
105
- # wait before returning data from a single message fetch. By setting this high you also
106
- # increase the fetching throughput - and by setting it low you set a bound on latency.
107
- # This configuration overrides `min_bytes`, so you'll _always_ get data back within the
108
- # time specified. The default value is one second. If you want to have at most five
109
- # seconds of latency, set `max_wait_time` to 5. You should make sure
110
- # max_wait_time * num brokers + heartbeat_interval is less than session_timeout.
111
- setting :max_wait_time, 1
112
- # option automatically_mark_as_consumed [Boolean] should we automatically mark received
113
- # messages as consumed (processed) after non-error consumption
114
- setting :automatically_mark_as_consumed, true
115
- # option reconnect_timeout [Integer] How long should we wait before trying to reconnect to
116
- # Kafka cluster that went down (in seconds)
117
- setting :reconnect_timeout, 5
118
- # option connect_timeout [Integer] Sets the number of seconds to wait while connecting to
119
- # a broker for the first time. When ruby-kafka initializes, it needs to connect to at
120
- # least one host.
121
- setting :connect_timeout, 10
122
- # option socket_timeout [Integer] Sets the number of seconds to wait when reading from or
123
- # writing to a socket connection to a broker. After this timeout expires the connection
124
- # will be killed. Note that some Kafka operations are by definition long-running, such as
125
- # waiting for new messages to arrive in a partition, so don't set this value too low
126
- setting :socket_timeout, 30
127
- # option partitioner [Object, nil] the partitioner that should be used by the client
128
- setting :partitioner, nil
129
-
130
- # SSL authentication related settings
131
- # option ca_cert [String, nil] SSL CA certificate
132
- setting :ssl_ca_cert, nil
133
- # option ssl_ca_cert_file_path [String, nil] SSL CA certificate file path
134
- setting :ssl_ca_cert_file_path, nil
135
- # option ssl_ca_certs_from_system [Boolean] Use the CA certs from your system's default
136
- # certificate store
137
- setting :ssl_ca_certs_from_system, false
138
- # option ssl_verify_hostname [Boolean] Verify the hostname for client certs
139
- setting :ssl_verify_hostname, true
140
- # option ssl_client_cert [String, nil] SSL client certificate
141
- setting :ssl_client_cert, nil
142
- # option ssl_client_cert_key [String, nil] SSL client certificate password
143
- setting :ssl_client_cert_key, nil
144
- # option sasl_gssapi_principal [String, nil] sasl principal
145
- setting :sasl_gssapi_principal, nil
146
- # option sasl_gssapi_keytab [String, nil] sasl keytab
147
- setting :sasl_gssapi_keytab, nil
148
- # option sasl_plain_authzid [String] The authorization identity to use
149
- setting :sasl_plain_authzid, ''
150
- # option sasl_plain_username [String, nil] The username used to authenticate
151
- setting :sasl_plain_username, nil
152
- # option sasl_plain_password [String, nil] The password used to authenticate
153
- setting :sasl_plain_password, nil
154
- # option sasl_scram_username [String, nil] The username used to authenticate
155
- setting :sasl_scram_username, nil
156
- # option sasl_scram_password [String, nil] The password used to authenticate
157
- setting :sasl_scram_password, nil
158
- # option sasl_scram_mechanism [String, nil] Scram mechanism, either 'sha256' or 'sha512'
159
- setting :sasl_scram_mechanism, nil
160
- # option sasl_over_ssl [Boolean] whether to enforce SSL with SASL
161
- setting :sasl_over_ssl, true
162
- # option ssl_client_cert_chain [String, nil] client cert chain or nil if not used
163
- setting :ssl_client_cert_chain, nil
164
- # option ssl_client_cert_key_password [String, nil] the password required to read
165
- # the ssl_client_cert_key
166
- setting :ssl_client_cert_key_password, nil
167
- # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
168
- # implements method token.
169
- setting :sasl_oauth_token_provider, nil
170
- end
80
+ setting :shutdown_timeout, default: 60_000
81
+ # option [Integer] number of threads in which we want to do parallel processing
82
+ setting :concurrency, default: 5
83
+ # option [Integer] how long should we wait upon processing error
84
+ setting :pause_timeout, default: 1_000
85
+ # option [Integer] what is the max timeout in case of an exponential backoff
86
+ setting :pause_max_timeout, default: 30_000
87
+ # option [Boolean] should we use exponential backoff
88
+ setting :pause_with_exponential_backoff, default: true
89
+ # option [::WaterDrop::Producer, nil]
90
+ # Unless configured, will be created once Karafka is configured based on user Karafka setup
91
+ setting :producer, default: nil
92
+
93
+ # rdkafka default options
94
+ # @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
95
+ setting :kafka, default: {}
171
96
 
172
- # Namespace for internal settings that should not be modified
173
- # It's a temporary step to "declassify" several things internally before we move to a
174
- # non global state
97
+ # Namespace for internal settings that should not be modified directly
175
98
  setting :internal do
176
- # option routing_builder [Karafka::Routing::Builder] builder instance
177
- setting :routing_builder, Routing::Builder.new
178
99
  # option status [Karafka::Status] app status
179
- setting :status, Status.new
100
+ setting :status, default: Status.new
180
101
  # option process [Karafka::Process] process status
181
102
  # @note In the future, we need to have a single process representation for all the karafka
182
103
  # instances
183
- setting :process, Process.new
184
- # option fetcher [Karafka::Fetcher] fetcher instance
185
- setting :fetcher, Fetcher.new
186
- # option configurators [Array<Object>] all configurators that we want to run after
187
- # the setup
188
- setting :configurators, [Configurators::WaterDrop.new]
104
+ setting :process, default: Process.new
105
+
106
+ setting :routing do
107
+ # option builder [Karafka::Routing::Builder] builder instance
108
+ setting :builder, default: Routing::Builder.new
109
+ # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
110
+ # group builder
111
+ setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
112
+ end
113
+
114
+ setting :processing do
115
+ # option scheduler [Object] scheduler we will be using
116
+ setting :scheduler, default: Processing::Scheduler.new
117
+ # option jobs_builder [Object] jobs builder we want to use
118
+ setting :jobs_builder, default: Processing::JobsBuilder.new
119
+ # option coordinator [Class] work coordinator we want to user for processing coordination
120
+ setting :coordinator_class, default: Processing::Coordinator
121
+ # option partitioner_class [Class] partitioner we use against a batch of data
122
+ setting :partitioner_class, default: Processing::Partitioner
123
+ end
124
+
125
+ # Karafka components for ActiveJob
126
+ setting :active_job do
127
+ # option dispatcher [Karafka::ActiveJob::Dispatcher] default dispatcher for ActiveJob
128
+ setting :dispatcher, default: ActiveJob::Dispatcher.new
129
+ # option job_options_contract [Karafka::Contracts::JobOptionsContract] contract for
130
+ # ensuring, that extra job options defined are valid
131
+ setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
132
+ # option consumer [Class] consumer class that should be used to consume ActiveJob data
133
+ setting :consumer_class, default: ActiveJob::Consumer
134
+ end
189
135
  end
190
136
 
137
+ # This will load all the defaults that can be later overwritten.
138
+ # Thanks to that we have an initial state out of the box.
139
+ configure
140
+
191
141
  class << self
192
142
  # Configuring method
193
- # @yield Runs a block of code providing a config singleton instance to it
194
- # @yieldparam [Karafka::Setup::Config] Karafka config instance
195
- def setup
196
- configure { |config| yield(config) }
143
+ # @param block [Proc] block we want to execute with the config instance
144
+ def setup(&block)
145
+ configure(&block)
146
+ merge_kafka_defaults!(config)
147
+
148
+ Contracts::Config.new.validate!(config.to_h)
149
+
150
+ licenser = Licenser.new
151
+
152
+ # Tries to load our license gem and if present will try to load the correct license
153
+ licenser.prepare_and_verify(config.license)
154
+
155
+ configure_components
156
+
157
+ Karafka::App.initialized!
197
158
  end
198
159
 
199
- # Everything that should be initialized after the setup
200
- # Components are in karafka/config directory and are all loaded one by one
201
- # If you want to configure a next component, please add a proper file to config dir
202
- def setup_components
203
- config
204
- .internal
205
- .configurators
206
- .each { |configurator| configurator.call(config) }
160
+ private
161
+
162
+ # Propagates the kafka setting defaults unless they are already present
163
+ # This makes it easier to set some values that users usually don't change but still allows
164
+ # them to overwrite the whole hash if they want to
165
+ # @param config [Karafka::Core::Configurable::Node] config of this producer
166
+ def merge_kafka_defaults!(config)
167
+ KAFKA_DEFAULTS.each do |key, value|
168
+ next if config.kafka.key?(key)
169
+
170
+ config.kafka[key] = value
171
+ end
172
+
173
+ return if Karafka::App.env.production?
174
+
175
+ DEV_DEFAULTS.each do |key, value|
176
+ next if config.kafka.key?(key)
177
+
178
+ config.kafka[key] = value
179
+ end
207
180
  end
208
181
 
209
- # Validate config based on the config contract
210
- # @return [Boolean] true if configuration is valid
211
- # @raise [Karafka::Errors::InvalidConfigurationError] raised when configuration
212
- # doesn't match with the config contract
213
- def validate!
214
- validation_result = CONTRACT.call(config.to_h)
182
+ # Sets up all the components that are based on the user configuration
183
+ # @note At the moment it is only WaterDrop
184
+ def configure_components
185
+ config.producer ||= ::WaterDrop::Producer.new do |producer_config|
186
+ # In some cases WaterDrop updates the config and we don't want our consumer config to
187
+ # be polluted by those updates, that's why we copy
188
+ producer_config.kafka = config.kafka.dup
189
+ producer_config.logger = config.logger
190
+ end
215
191
 
216
- return true if validation_result.success?
192
+ return unless Karafka.pro?
217
193
 
218
- raise Errors::InvalidConfigurationError, validation_result.errors.to_h
194
+ # Runs the pro loader that includes all the pro components
195
+ require 'karafka/pro/loader'
196
+ Pro::Loader.setup(config)
219
197
  end
220
198
  end
221
199
  end
@@ -3,16 +3,22 @@
3
3
  module Karafka
4
4
  # App status monitor
5
5
  class Status
6
- # Available states and their transitions
6
+ # Available states and their transitions.
7
7
  STATES = {
8
8
  initializing: :initialize!,
9
9
  initialized: :initialized!,
10
10
  running: :run!,
11
- stopping: :stop!
11
+ stopping: :stop!,
12
+ stopped: :stopped!
12
13
  }.freeze
13
14
 
14
15
  private_constant :STATES
15
16
 
17
+ # By default we are in the initializing state
18
+ def initialize
19
+ initialize!
20
+ end
21
+
16
22
  STATES.each do |state, transition|
17
23
  define_method :"#{state}?" do
18
24
  @status == state
@@ -20,9 +26,12 @@ module Karafka
20
26
 
21
27
  define_method transition do
22
28
  @status = state
23
- # Trap context disallows to run certain things that we instrument
24
- # so the state changes are executed from a separate thread
25
- Thread.new { Karafka.monitor.instrument("app.#{state}") }.join
29
+
30
+ # Skip on creation (initializing)
31
+ # We skip as during this state we do not have yet a monitor
32
+ return if initializing?
33
+
34
+ Karafka.monitor.instrument("app.#{state}")
26
35
  end
27
36
  end
28
37
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Example consumer that prints messages payloads
4
+ class ExampleConsumer < ApplicationConsumer
5
+ def consume
6
+ messages.each { |message| puts message.payload }
7
+ end
8
+
9
+ # Run anything upon partition being revoked
10
+ # def revoked
11
+ # end
12
+
13
+ # Define here any teardown things you want when Karafka server stops
14
+ # def shutdown
15
+ # end
16
+ end
@@ -1,20 +1,6 @@
1
1
  # frozen_string_literal: true
2
+ <% unless rails? -%>
2
3
 
3
- <% if rails? -%>
4
- ENV['RAILS_ENV'] ||= 'development'
5
- ENV['KARAFKA_ENV'] = ENV['RAILS_ENV']
6
- require ::File.expand_path('../config/environment', __FILE__)
7
- Rails.application.eager_load!
8
-
9
- # This lines will make Karafka print to stdout like puma or unicorn
10
- if Rails.env.development?
11
- Rails.logger.extend(
12
- ActiveSupport::Logger.broadcast(
13
- ActiveSupport::Logger.new($stdout)
14
- )
15
- )
16
- end
17
- <% else -%>
18
4
  # This file is auto-generated during the install process.
19
5
  # If by any chance you've wanted a setup for Rails app, either run the `karafka:install`
20
6
  # command again or refer to the install templates available in the source codes
@@ -31,9 +17,7 @@ APP_LOADER.enable_reloading
31
17
  %w[
32
18
  lib
33
19
  app/consumers
34
- app/responders
35
- app/workers
36
- ].each(&APP_LOADER.method(:push_dir))
20
+ ].each { |dir| APP_LOADER.push_dir(dir) }
37
21
 
38
22
  APP_LOADER.setup
39
23
  APP_LOADER.eager_load
@@ -41,10 +25,12 @@ APP_LOADER.eager_load
41
25
 
42
26
  class KarafkaApp < Karafka::App
43
27
  setup do |config|
44
- config.kafka.seed_brokers = %w[kafka://127.0.0.1:9092]
28
+ config.kafka = { 'bootstrap.servers': '127.0.0.1:9092' }
45
29
  config.client_id = 'example_app'
46
30
  <% if rails? -%>
47
- config.logger = Rails.logger
31
+ # Recreate consumers with each batch. This will allow Rails code reload to work in the
32
+ # development mode. Otherwise Karafka process would not be aware of code changes
33
+ config.consumer_persistence = !Rails.env.development?
48
34
  <% end -%>
49
35
  end
50
36
 
@@ -52,41 +38,17 @@ class KarafkaApp < Karafka::App
52
38
  # interested in logging events for certain environments. Since instrumentation
53
39
  # notifications add extra boilerplate, if you want to achieve max performance,
54
40
  # listen to only what you really need for given environment.
55
- Karafka.monitor.subscribe(WaterDrop::Instrumentation::StdoutListener.new)
56
- Karafka.monitor.subscribe(Karafka::Instrumentation::StdoutListener.new)
57
- Karafka.monitor.subscribe(Karafka::Instrumentation::ProctitleListener.new)
58
-
59
- # Uncomment that in order to achieve code reload in development mode
60
- # Be aware, that this might have some side-effects. Please refer to the wiki
61
- # for more details on benefits and downsides of the code reload in the
62
- # development mode
63
- #
64
- # Karafka.monitor.subscribe(
65
- # Karafka::CodeReloader.new(
66
- # <%= rails? ? '*Rails.application.reloaders' : 'APP_LOADER' %>
67
- # )
68
- # )
69
-
70
- consumer_groups.draw do
71
- # topic :example do
72
- # consumer ExampleConsumer
73
- # end
41
+ Karafka.monitor.subscribe(Karafka::Instrumentation::LoggerListener.new)
42
+ # Karafka.monitor.subscribe(Karafka::Instrumentation::ProctitleListener.new)
74
43
 
75
- # consumer_group :bigger_group do
76
- # topic :test do
77
- # consumer TestConsumer
78
- # end
79
- #
80
- # topic :test2 do
81
- # consumer Test2Consumer
82
- # end
83
- # end
44
+ routes.draw do
45
+ <% if rails? -%>
46
+ # Uncomment this if you use Karafka with ActiveJob
47
+ # You ned to define the topic per each queue name you use
48
+ # active_job_topic :default
49
+ <% end -%>
50
+ topic :example do
51
+ consumer ExampleConsumer
52
+ end
84
53
  end
85
54
  end
86
-
87
- Karafka.monitor.subscribe('app.initialized') do
88
- # Put here all the things you want to do after the Karafka framework
89
- # initialization
90
- end
91
-
92
- KarafkaApp.boot!
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Time trackers module.
5
+ #
6
+ # Time trackers are used to track time in context of having a time poll (amount of time
7
+ # available for processing) or a pausing engine (pause for a time period).
8
+ module TimeTrackers
9
+ # Base class for all the time-trackers.
10
+ class Base
11
+ private
12
+
13
+ # @return [Float] current time in milliseconds
14
+ def now
15
+ ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) * 1000
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module TimeTrackers
5
+ # Handles Kafka topic partition pausing and resuming with exponential back-offs.
6
+ class Pause < Base
7
+ attr_reader :count
8
+
9
+ # @param timeout [Integer] how long should we wait when anything went wrong (in ms)
10
+ # @param max_timeout [Integer, nil] if exponential is on, what is the max value we can reach
11
+ # exponentially on which we will stay
12
+ # @param exponential_backoff [Boolean] should we wait exponentially or with the same
13
+ # timeout value
14
+ # @return [Karafka::TimeTrackers::Pause]
15
+ # @example
16
+ # pause = Karafka::TimeTrackers::Pause.new(timeout: 1000)
17
+ # pause.expired? #=> true
18
+ # pause.paused? #=> false
19
+ # pause.pause
20
+ # sleep(1.1)
21
+ # pause.paused? #=> true
22
+ # pause.expired? #=> true
23
+ # pause.count #=> 1
24
+ # pause.pause
25
+ # pause.count #=> 1
26
+ # pause.paused? #=> true
27
+ # pause.expired? #=> false
28
+ # pause.resume
29
+ # pause.count #=> 2
30
+ # pause.paused? #=> false
31
+ # pause.reset
32
+ # pause.count #=> 0
33
+ def initialize(timeout:, max_timeout:, exponential_backoff:)
34
+ @started_at = nil
35
+ @count = 0
36
+ @timeout = timeout
37
+ @max_timeout = max_timeout
38
+ @exponential_backoff = exponential_backoff
39
+ super()
40
+ end
41
+
42
+ # Pauses the processing from now till the end of the interval (backoff or non-backoff)
43
+ # and records the count.
44
+ # @param timeout [Integer] timeout value in milliseconds that overwrites the default timeout
45
+ # @note Providing this value can be useful when we explicitly want to pause for a certain
46
+ # period of time, outside of any regular pausing logic
47
+ def pause(timeout = backoff_interval)
48
+ @started_at = now
49
+ @ends_at = @started_at + timeout
50
+ @count += 1
51
+ end
52
+
53
+ # Marks the pause as resumed.
54
+ def resume
55
+ @started_at = nil
56
+ @ends_at = nil
57
+ end
58
+
59
+ # Expires the pause, so it can be considered expired
60
+ def expire
61
+ @ends_at = nil
62
+ end
63
+
64
+ # @return [Boolean] are we paused from processing
65
+ def paused?
66
+ !@started_at.nil?
67
+ end
68
+
69
+ # @return [Boolean] did the pause expire
70
+ def expired?
71
+ @ends_at ? now >= @ends_at : true
72
+ end
73
+
74
+ # Resets the pause counter.
75
+ def reset
76
+ @count = 0
77
+ end
78
+
79
+ private
80
+
81
+ # Computers the exponential backoff
82
+ # @return [Integer] backoff in milliseconds
83
+ def backoff_interval
84
+ backoff_factor = @exponential_backoff ? 2**@count : 1
85
+
86
+ timeout = backoff_factor * @timeout
87
+
88
+ @max_timeout && timeout > @max_timeout ? @max_timeout : timeout
89
+ end
90
+ end
91
+ end
92
+ end