karafka 1.4.0 → 2.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +89 -18
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +365 -1
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +56 -112
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +61 -68
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +272 -0
  16. data/bin/karafka +10 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/certs/cert_chain.pem +26 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +59 -38
  23. data/docker-compose.yml +10 -3
  24. data/karafka.gemspec +18 -21
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +33 -0
  32. data/lib/karafka/admin.rb +63 -0
  33. data/lib/karafka/app.rb +15 -20
  34. data/lib/karafka/base_consumer.rb +197 -31
  35. data/lib/karafka/cli/info.rb +44 -10
  36. data/lib/karafka/cli/install.rb +22 -12
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -3
  39. data/lib/karafka/connection/client.rb +379 -89
  40. data/lib/karafka/connection/listener.rb +250 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -184
  49. data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger.rb +6 -10
  60. data/lib/karafka/instrumentation/logger_listener.rb +174 -0
  61. data/lib/karafka/instrumentation/monitor.rb +13 -61
  62. data/lib/karafka/instrumentation/notifications.rb +53 -0
  63. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  64. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  65. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  66. data/lib/karafka/instrumentation.rb +21 -0
  67. data/lib/karafka/licenser.rb +75 -0
  68. data/lib/karafka/messages/batch_metadata.rb +45 -0
  69. data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
  70. data/lib/karafka/messages/builders/message.rb +39 -0
  71. data/lib/karafka/messages/builders/messages.rb +34 -0
  72. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  73. data/lib/karafka/messages/messages.rb +64 -0
  74. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  75. data/lib/karafka/messages/seek.rb +9 -0
  76. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  77. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  78. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  79. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  80. data/lib/karafka/pro/base_consumer.rb +107 -0
  81. data/lib/karafka/pro/contracts/base.rb +21 -0
  82. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  83. data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
  84. data/lib/karafka/pro/loader.rb +76 -0
  85. data/lib/karafka/pro/performance_tracker.rb +80 -0
  86. data/lib/karafka/pro/processing/coordinator.rb +85 -0
  87. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  88. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  89. data/lib/karafka/pro/processing/partitioner.rb +58 -0
  90. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  91. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  92. data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
  93. data/lib/karafka/pro.rb +13 -0
  94. data/lib/karafka/process.rb +1 -0
  95. data/lib/karafka/processing/coordinator.rb +103 -0
  96. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  97. data/lib/karafka/processing/executor.rb +126 -0
  98. data/lib/karafka/processing/executors_buffer.rb +88 -0
  99. data/lib/karafka/processing/jobs/base.rb +55 -0
  100. data/lib/karafka/processing/jobs/consume.rb +47 -0
  101. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  102. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  103. data/lib/karafka/processing/jobs_builder.rb +29 -0
  104. data/lib/karafka/processing/jobs_queue.rb +144 -0
  105. data/lib/karafka/processing/partitioner.rb +22 -0
  106. data/lib/karafka/processing/result.rb +37 -0
  107. data/lib/karafka/processing/scheduler.rb +22 -0
  108. data/lib/karafka/processing/worker.rb +91 -0
  109. data/lib/karafka/processing/workers_batch.rb +27 -0
  110. data/lib/karafka/railtie.rb +127 -0
  111. data/lib/karafka/routing/builder.rb +26 -23
  112. data/lib/karafka/routing/consumer_group.rb +37 -17
  113. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  114. data/lib/karafka/routing/proxy.rb +9 -16
  115. data/lib/karafka/routing/router.rb +1 -1
  116. data/lib/karafka/routing/subscription_group.rb +53 -0
  117. data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
  118. data/lib/karafka/routing/topic.rb +65 -24
  119. data/lib/karafka/routing/topics.rb +38 -0
  120. data/lib/karafka/runner.rb +51 -0
  121. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  122. data/lib/karafka/server.rb +67 -26
  123. data/lib/karafka/setup/config.rb +153 -175
  124. data/lib/karafka/status.rb +14 -5
  125. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  126. data/lib/karafka/templates/karafka.rb.erb +17 -55
  127. data/lib/karafka/time_trackers/base.rb +19 -0
  128. data/lib/karafka/time_trackers/pause.rb +92 -0
  129. data/lib/karafka/time_trackers/poll.rb +65 -0
  130. data/lib/karafka/version.rb +1 -1
  131. data/lib/karafka.rb +46 -16
  132. data.tar.gz.sig +0 -0
  133. metadata +145 -171
  134. metadata.gz.sig +0 -0
  135. data/.github/FUNDING.yml +0 -3
  136. data/MIT-LICENCE +0 -18
  137. data/certs/mensfeld.pem +0 -25
  138. data/lib/karafka/attributes_map.rb +0 -62
  139. data/lib/karafka/backends/inline.rb +0 -16
  140. data/lib/karafka/base_responder.rb +0 -226
  141. data/lib/karafka/cli/flow.rb +0 -48
  142. data/lib/karafka/code_reloader.rb +0 -67
  143. data/lib/karafka/connection/api_adapter.rb +0 -161
  144. data/lib/karafka/connection/batch_delegator.rb +0 -55
  145. data/lib/karafka/connection/builder.rb +0 -18
  146. data/lib/karafka/connection/message_delegator.rb +0 -36
  147. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  148. data/lib/karafka/consumers/callbacks.rb +0 -71
  149. data/lib/karafka/consumers/includer.rb +0 -64
  150. data/lib/karafka/consumers/responders.rb +0 -24
  151. data/lib/karafka/consumers/single_params.rb +0 -15
  152. data/lib/karafka/contracts/responder_usage.rb +0 -54
  153. data/lib/karafka/fetcher.rb +0 -42
  154. data/lib/karafka/helpers/class_matcher.rb +0 -88
  155. data/lib/karafka/helpers/config_retriever.rb +0 -46
  156. data/lib/karafka/helpers/inflector.rb +0 -26
  157. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  158. data/lib/karafka/params/batch_metadata.rb +0 -26
  159. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  160. data/lib/karafka/params/builders/params.rb +0 -38
  161. data/lib/karafka/params/builders/params_batch.rb +0 -25
  162. data/lib/karafka/params/params_batch.rb +0 -60
  163. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  164. data/lib/karafka/persistence/client.rb +0 -29
  165. data/lib/karafka/persistence/consumers.rb +0 -45
  166. data/lib/karafka/persistence/topics.rb +0 -48
  167. data/lib/karafka/responders/builder.rb +0 -36
  168. data/lib/karafka/responders/topic.rb +0 -55
  169. data/lib/karafka/routing/topic_mapper.rb +0 -53
  170. data/lib/karafka/serialization/json/serializer.rb +0 -31
  171. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  172. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Namespace for vendor specific instrumentation
6
+ module Vendors
7
+ # Datadog specific instrumentation
8
+ module Datadog
9
+ # Listener that can be used to subscribe to Karafka to receive stats via StatsD
10
+ # and/or Datadog
11
+ #
12
+ # @note You need to setup the `dogstatsd-ruby` client and assign it
13
+ class Listener
14
+ include ::Karafka::Core::Configurable
15
+ extend Forwardable
16
+
17
+ def_delegators :config, :client, :rd_kafka_metrics, :namespace, :default_tags
18
+
19
+ # Value object for storing a single rdkafka metric publishing details
20
+ RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
21
+
22
+ # Namespace under which the DD metrics should be published
23
+ setting :namespace, default: 'karafka'
24
+
25
+ # Datadog client that we should use to publish the metrics
26
+ setting :client
27
+
28
+ # Default tags we want to publish (for example hostname)
29
+ # Format as followed (example for hostname): `["host:#{Socket.gethostname}"]`
30
+ setting :default_tags, default: []
31
+
32
+ # All the rdkafka metrics we want to publish
33
+ #
34
+ # By default we publish quite a lot so this can be tuned
35
+ # Note, that the once with `_d` come from Karafka, not rdkafka or Kafka
36
+ setting :rd_kafka_metrics, default: [
37
+ # Client metrics
38
+ RdKafkaMetric.new(:count, :root, 'messages.consumed', 'rxmsgs_d'),
39
+ RdKafkaMetric.new(:count, :root, 'messages.consumed.bytes', 'rxmsg_bytes'),
40
+
41
+ # Broker metrics
42
+ RdKafkaMetric.new(:count, :brokers, 'consume.attempts', 'txretries_d'),
43
+ RdKafkaMetric.new(:count, :brokers, 'consume.errors', 'txerrs_d'),
44
+ RdKafkaMetric.new(:count, :brokers, 'receive.errors', 'rxerrs_d'),
45
+ RdKafkaMetric.new(:count, :brokers, 'connection.connects', 'connects_d'),
46
+ RdKafkaMetric.new(:count, :brokers, 'connection.disconnects', 'disconnects_d'),
47
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
48
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
49
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
50
+ ].freeze
51
+
52
+ configure
53
+
54
+ # @param block [Proc] configuration block
55
+ def initialize(&block)
56
+ configure
57
+ setup(&block) if block
58
+ end
59
+
60
+ # @param block [Proc] configuration block
61
+ # @note We define this alias to be consistent with `WaterDrop#setup`
62
+ def setup(&block)
63
+ configure(&block)
64
+ end
65
+
66
+ # Hooks up to WaterDrop instrumentation for emitted statistics
67
+ #
68
+ # @param event [Karafka::Core::Monitoring::Event]
69
+ def on_statistics_emitted(event)
70
+ statistics = event[:statistics]
71
+
72
+ rd_kafka_metrics.each do |metric|
73
+ report_metric(metric, statistics)
74
+ end
75
+ end
76
+
77
+ # Increases the errors count by 1
78
+ #
79
+ # @param event [Karafka::Core::Monitoring::Event]
80
+ def on_error_occurred(event)
81
+ extra_tags = ["type:#{event[:type]}"]
82
+
83
+ if event.payload[:caller].respond_to?(:messages)
84
+ metadata = event.payload[:caller].messages.metadata
85
+
86
+ extra_tags += [
87
+ "topic:#{metadata.topic}",
88
+ "partition:#{metadata.partition}"
89
+ ]
90
+ end
91
+
92
+ count('error_occurred', 1, tags: default_tags + extra_tags)
93
+ end
94
+
95
+ # Reports how many messages we've polled and how much time did we spend on it
96
+ #
97
+ # @param event [Karafka::Core::Monitoring::Event]
98
+ def on_connection_listener_fetch_loop_received(event)
99
+ time_taken = event[:time]
100
+ messages_count = event[:messages_buffer].size
101
+
102
+ histogram('listener.polling.time_taken', time_taken, tags: default_tags)
103
+ histogram('listener.polling.messages', messages_count, tags: default_tags)
104
+ end
105
+
106
+ # Here we report majority of things related to processing as we have access to the
107
+ # consumer
108
+ # @param event [Karafka::Core::Monitoring::Event]
109
+ def on_consumer_consumed(event)
110
+ messages = event.payload[:caller].messages
111
+ metadata = messages.metadata
112
+
113
+ tags = default_tags + [
114
+ "topic:#{metadata.topic}",
115
+ "partition:#{metadata.partition}"
116
+ ]
117
+
118
+ count('consumer.messages', messages.count, tags: tags)
119
+ count('consumer.batches', 1, tags: tags)
120
+ gauge('consumer.offset', metadata.last_offset, tags: tags)
121
+ histogram('consumer.consumed.time_taken', event[:time], tags: tags)
122
+ histogram('consumer.batch_size', messages.count, tags: tags)
123
+ histogram('consumer.processing_lag', metadata.processing_lag, tags: tags)
124
+ histogram('consumer.consumption_lag', metadata.consumption_lag, tags: tags)
125
+ end
126
+
127
+ # @param event [Karafka::Core::Monitoring::Event]
128
+ def on_consumer_revoked(event)
129
+ messages = event.payload[:caller].messages
130
+ metadata = messages.metadata
131
+
132
+ tags = default_tags + [
133
+ "topic:#{metadata.topic}",
134
+ "partition:#{metadata.partition}"
135
+ ]
136
+
137
+ count('consumer.revoked', 1, tags: tags)
138
+ end
139
+
140
+ # @param event [Karafka::Core::Monitoring::Event]
141
+ def on_consumer_shutdown(event)
142
+ messages = event.payload[:caller].messages
143
+ metadata = messages.metadata
144
+
145
+ tags = default_tags + [
146
+ "topic:#{metadata.topic}",
147
+ "partition:#{metadata.partition}"
148
+ ]
149
+
150
+ count('consumer.shutdown', 1, tags: tags)
151
+ end
152
+
153
+ # Worker related metrics
154
+ # @param event [Karafka::Core::Monitoring::Event]
155
+ def on_worker_process(event)
156
+ jq_stats = event[:jobs_queue].statistics
157
+
158
+ gauge('worker.total_threads', Karafka::App.config.concurrency, tags: default_tags)
159
+ histogram('worker.processing', jq_stats[:processing], tags: default_tags)
160
+ histogram('worker.enqueued_jobs', jq_stats[:enqueued], tags: default_tags)
161
+ end
162
+
163
+ # We report this metric before and after processing for higher accuracy
164
+ # Without this, the utilization would not be fully reflected
165
+ # @param event [Karafka::Core::Monitoring::Event]
166
+ def on_worker_processed(event)
167
+ jq_stats = event[:jobs_queue].statistics
168
+
169
+ histogram('worker.processing', jq_stats[:processing], tags: default_tags)
170
+ end
171
+
172
+ private
173
+
174
+ %i[
175
+ count
176
+ gauge
177
+ histogram
178
+ increment
179
+ decrement
180
+ ].each do |metric_type|
181
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
182
+ def #{metric_type}(key, *args)
183
+ client.#{metric_type}(
184
+ namespaced_metric(key),
185
+ *args
186
+ )
187
+ end
188
+ METHODS
189
+ end
190
+
191
+ # Wraps metric name in listener's namespace
192
+ # @param metric_name [String] RdKafkaMetric name
193
+ # @return [String]
194
+ def namespaced_metric(metric_name)
195
+ "#{namespace}.#{metric_name}"
196
+ end
197
+
198
+ # Reports a given metric statistics to Datadog
199
+ # @param metric [RdKafkaMetric] metric value object
200
+ # @param statistics [Hash] hash with all the statistics emitted
201
+ def report_metric(metric, statistics)
202
+ case metric.scope
203
+ when :root
204
+ public_send(
205
+ metric.type,
206
+ metric.name,
207
+ statistics.fetch(*metric.key_location),
208
+ tags: default_tags
209
+ )
210
+ when :brokers
211
+ statistics.fetch('brokers').each_value do |broker_statistics|
212
+ # Skip bootstrap nodes
213
+ # Bootstrap nodes have nodeid -1, other nodes have positive
214
+ # node ids
215
+ next if broker_statistics['nodeid'] == -1
216
+
217
+ public_send(
218
+ metric.type,
219
+ metric.name,
220
+ broker_statistics.dig(*metric.key_location),
221
+ tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
222
+ )
223
+ end
224
+ else
225
+ raise ArgumentError, metric.scope
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # @note Since we can only have one statistics callbacks manager and one error callbacks manager
5
+ # we use WaterDrops one that is already configured.
6
+ module Instrumentation
7
+ class << self
8
+ # Returns a manager for statistics callbacks
9
+ # @return [::WaterDrop::CallbacksManager]
10
+ def statistics_callbacks
11
+ ::WaterDrop::Instrumentation.statistics_callbacks
12
+ end
13
+
14
+ # Returns a manager for error callbacks
15
+ # @return [::WaterDrop::CallbacksManager]
16
+ def error_callbacks
17
+ ::WaterDrop::Instrumentation.error_callbacks
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Checks the license presence for pro and loads pro components when needed (if any)
5
+ class Licenser
6
+ # Location in the gem where we store the public key
7
+ PUBLIC_KEY_LOCATION = File.join(Karafka.gem_root, 'certs', 'karafka-pro.pem')
8
+
9
+ private_constant :PUBLIC_KEY_LOCATION
10
+
11
+ # Tries to prepare license and verifies it
12
+ #
13
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
14
+ def prepare_and_verify(license_config)
15
+ prepare(license_config)
16
+ verify(license_config)
17
+ end
18
+
19
+ private
20
+
21
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
22
+ def prepare(license_config)
23
+ # If there is token, no action needed
24
+ # We support a case where someone would put the token in instead of using one from the
25
+ # license. That's in case there are limitations to using external package sources, etc
26
+ return if license_config.token
27
+
28
+ begin
29
+ license_config.token || require('karafka-license')
30
+ rescue LoadError
31
+ return
32
+ end
33
+
34
+ license_config.token = Karafka::License.token
35
+ end
36
+
37
+ # Check license and setup license details (if needed)
38
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
39
+ def verify(license_config)
40
+ # If no license, it will just run LGPL components without anything extra
41
+ return unless license_config.token
42
+
43
+ public_key = OpenSSL::PKey::RSA.new(File.read(PUBLIC_KEY_LOCATION))
44
+
45
+ # We gsub and strip in case someone copy-pasted it as a multi line string
46
+ formatted_token = license_config.token.strip.delete("\n").delete(' ')
47
+ decoded_token = Base64.decode64(formatted_token)
48
+
49
+ begin
50
+ data = public_key.public_decrypt(decoded_token)
51
+ rescue OpenSSL::OpenSSLError
52
+ data = nil
53
+ end
54
+
55
+ details = data ? JSON.parse(data) : raise_invalid_license_token(license_config)
56
+
57
+ license_config.entity = details.fetch('entity')
58
+ end
59
+
60
+ # Raises an error with info, that used token is invalid
61
+ # @param license_config [Karafka::Core::Configurable::Node]
62
+ def raise_invalid_license_token(license_config)
63
+ # We set it to false so `Karafka.pro?` method behaves as expected
64
+ license_config.token = false
65
+
66
+ raise(
67
+ Errors::InvalidLicenseTokenError,
68
+ <<~MSG.tr("\n", ' ')
69
+ License key you provided is invalid.
70
+ Please reach us at contact@karafka.io or visit https://karafka.io to obtain a valid one.
71
+ MSG
72
+ )
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data.
7
+ #
8
+ # @note This metadata object refers to per batch metadata, not `#message.metadata`
9
+ BatchMetadata = Struct.new(
10
+ :size,
11
+ :first_offset,
12
+ :last_offset,
13
+ :deserializer,
14
+ :partition,
15
+ :topic,
16
+ :created_at,
17
+ :scheduled_at,
18
+ :processed_at,
19
+ keyword_init: true
20
+ ) do
21
+ # This lag describes how long did it take for a message to be consumed from the moment it was
22
+ # created
23
+ def consumption_lag
24
+ time_distance_in_ms(processed_at, created_at)
25
+ end
26
+
27
+ # This lag describes how long did a batch have to wait before it was picked up by one of the
28
+ # workers
29
+ def processing_lag
30
+ time_distance_in_ms(processed_at, scheduled_at)
31
+ end
32
+
33
+ private
34
+
35
+ # Computes time distance in between two times in ms
36
+ #
37
+ # @param time1 [Time]
38
+ # @param time2 [Time]
39
+ # @return [Integer] distance in between two times in ms
40
+ def time_distance_in_ms(time1, time2)
41
+ ((time1 - time2) * 1_000).round
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations.
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data.
10
+ #
11
+ # @param messages [Array<Karafka::Messages::Message>] messages array
12
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
13
+ # @param scheduled_at [Time] moment when the batch was scheduled for processing
14
+ # @return [Karafka::Messages::BatchMetadata] batch metadata object
15
+ #
16
+ # @note We do not set `processed_at` as this needs to be assigned when the batch is
17
+ # picked up for processing.
18
+ def call(messages, topic, scheduled_at)
19
+ Karafka::Messages::BatchMetadata.new(
20
+ size: messages.count,
21
+ first_offset: messages.first.offset,
22
+ last_offset: messages.last.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: messages.first.partition,
25
+ topic: topic.name,
26
+ # We go with the assumption that the creation of the whole batch is the last message
27
+ # creation time
28
+ created_at: messages.last.timestamp,
29
+ # When this batch was built and scheduled for execution
30
+ scheduled_at: scheduled_at,
31
+ # This needs to be set to a correct value prior to processing starting
32
+ processed_at: nil
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Builders encapsulate logic related to creating messages related objects.
6
+ module Builders
7
+ # Builder of a single message based on raw rdkafka message.
8
+ module Message
9
+ class << self
10
+ # @param kafka_message [Rdkafka::Consumer::Message] raw fetched message
11
+ # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
12
+ # @param received_at [Time] moment when we've received the message
13
+ # @return [Karafka::Messages::Message] message object with payload and metadata
14
+ def call(kafka_message, topic, received_at)
15
+ # @see https://github.com/appsignal/rdkafka-ruby/issues/168
16
+ kafka_message.headers.transform_keys!(&:to_s)
17
+
18
+ metadata = Karafka::Messages::Metadata.new(
19
+ timestamp: kafka_message.timestamp,
20
+ headers: kafka_message.headers,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ topic: topic.name,
26
+ received_at: received_at
27
+ ).freeze
28
+
29
+ # Karafka messages cannot be frozen because of the lazy deserialization feature
30
+ Karafka::Messages::Message.new(
31
+ kafka_message.payload,
32
+ metadata
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ module Builders
6
+ # Builder for creating message batch instances.
7
+ module Messages
8
+ class << self
9
+ # Creates messages batch with messages inside based on the incoming messages and the
10
+ # topic from which it comes.
11
+ #
12
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages array
13
+ # @param topic [Karafka::Routing::Topic] topic for which we're received messages
14
+ # @param received_at [Time] moment in time when the messages were received
15
+ # @return [Karafka::Messages::Messages] messages batch object
16
+ def call(messages, topic, received_at)
17
+ # We cannot freeze the batch metadata because it is altered with the processed_at time
18
+ # prior to the consumption. It is being frozen there
19
+ metadata = BatchMetadata.call(
20
+ messages,
21
+ topic,
22
+ received_at
23
+ )
24
+
25
+ Karafka::Messages::Messages.new(
26
+ messages,
27
+ metadata
28
+ ).freeze
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- # Params namespace encapsulating all the logic that is directly related to params handling
5
- module Params
4
+ # Messages namespace encapsulating all the logic that is directly related to messages handling
5
+ module Messages
6
6
  # It provides lazy loading not only until the first usage, but also allows us to skip
7
7
  # using deserializer until we execute our logic. That way we can operate with
8
8
  # heavy-deserialization data without slowing down the whole application.
9
- class Params
9
+ class Message
10
10
  extend Forwardable
11
11
 
12
12
  attr_reader :raw_payload, :metadata
@@ -14,7 +14,7 @@ module Karafka
14
14
  def_delegators :metadata, *Metadata.members
15
15
 
16
16
  # @param raw_payload [Object] incoming payload before deserialization
17
- # @param metadata [Karafka::Params::Metadata] message metadata object
17
+ # @param metadata [Karafka::Messages::Metadata] message metadata object
18
18
  def initialize(raw_payload, metadata)
19
19
  @raw_payload = raw_payload
20
20
  @metadata = metadata
@@ -33,21 +33,16 @@ module Karafka
33
33
  @payload
34
34
  end
35
35
 
36
- # @return [Boolean] did given params payload were deserialized already
36
+ # @return [Boolean] did we deserialize payload already
37
37
  def deserialized?
38
38
  @deserialized
39
39
  end
40
40
 
41
41
  private
42
42
 
43
- # @return [Object] tries de-serializes data
43
+ # @return [Object] deserialized data
44
44
  def deserialize
45
- Karafka.monitor.instrument('params.params.deserialize', caller: self) do
46
- metadata.deserializer.call(self)
47
- end
48
- rescue ::StandardError => e
49
- Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
50
- raise e
45
+ metadata.deserializer.call(self)
51
46
  end
52
47
  end
53
48
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Messages batch represents a set of messages received from Kafka of a single topic partition.
6
+ class Messages
7
+ include Enumerable
8
+
9
+ attr_reader :metadata
10
+
11
+ # @param messages_array [Array<Karafka::Messages::Message>] array with karafka messages
12
+ # @param metadata [Karafka::Messages::BatchMetadata]
13
+ # @return [Karafka::Messages::Messages] lazy evaluated messages batch object
14
+ def initialize(messages_array, metadata)
15
+ @messages_array = messages_array
16
+ @metadata = metadata
17
+ end
18
+
19
+ # @param block [Proc] block we want to execute per each message
20
+ # @note Invocation of this method will not cause loading and deserializing of messages.
21
+ def each(&block)
22
+ @messages_array.each(&block)
23
+ end
24
+
25
+ # Runs deserialization of all the messages and returns them
26
+ # @return [Array<Karafka::Messages::Message>]
27
+ def deserialize!
28
+ each(&:payload)
29
+ end
30
+
31
+ # @return [Array<Object>] array with deserialized payloads. This method can be useful when
32
+ # we don't care about metadata and just want to extract all the data payloads from the
33
+ # batch
34
+ def payloads
35
+ map(&:payload)
36
+ end
37
+
38
+ # @return [Array<String>] array with raw, not deserialized payloads
39
+ def raw_payloads
40
+ map(&:raw_payload)
41
+ end
42
+
43
+ # @return [Karafka::Messages::Message] first message
44
+ def first
45
+ @messages_array.first
46
+ end
47
+
48
+ # @return [Karafka::Messages::Message] last message
49
+ def last
50
+ @messages_array.last
51
+ end
52
+
53
+ # @return [Integer] number of messages in the batch
54
+ def size
55
+ @messages_array.size
56
+ end
57
+
58
+ # @return [Array<Karafka::Messages::Message>] pure array with messages
59
+ def to_a
60
+ @messages_array
61
+ end
62
+ end
63
+ end
64
+ end
@@ -1,18 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- module Params
5
- # Single message / params metadata details that can be accessed without the need for the
6
- # payload deserialization
4
+ module Messages
5
+ # Single message metadata details that can be accessed without the need of deserialization.
7
6
  Metadata = Struct.new(
8
- :create_time,
7
+ :timestamp,
9
8
  :headers,
10
- :is_control_record,
11
9
  :key,
12
10
  :offset,
13
11
  :deserializer,
14
12
  :partition,
15
- :receive_time,
13
+ :received_at,
16
14
  :topic,
17
15
  keyword_init: true
18
16
  )
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # "Fake" message that we use as an abstraction layer when seeking back.
6
+ # This allows us to encapsulate a seek with a simple abstraction
7
+ Seek = Struct.new(:topic, :partition, :offset)
8
+ end
9
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Patches to external components
5
+ module Patches
6
+ # Rdkafka related patches
7
+ module Rdkafka
8
+ # Rdkafka::Consumer patches
9
+ module Consumer
10
+ # A method that allows us to get the native kafka producer name
11
+ # @return [String] producer instance name
12
+ # @note We need this to make sure that we allocate proper dispatched events only to
13
+ # callback listeners that should publish them
14
+ def name
15
+ @name ||= ::Rdkafka::Bindings.rd_kafka_name(@native_kafka)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ ::Rdkafka::Consumer.include ::Karafka::Patches::Rdkafka::Consumer