karafka 2.5.9 → 2.5.10.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/certs/expired.txt +83 -0
- data/config/locales/errors.yml +3 -0
- data/karafka.gemspec +2 -2
- data/lib/karafka/connection/client.rb +7 -1
- data/lib/karafka/connection/listener.rb +2 -1
- data/lib/karafka/helpers/interval_runner.rb +2 -2
- data/lib/karafka/instrumentation/assignments_tracker.rb +65 -2
- data/lib/karafka/instrumentation/logger_listener.rb +19 -0
- data/lib/karafka/instrumentation/notifications.rb +2 -0
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +1 -3
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +1 -1
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +13 -10
- data/lib/karafka/pro/loader.rb +2 -0
- data/lib/karafka/pro/setup/defaults_injector.rb +70 -0
- data/lib/karafka/pro/swarm/liveness_listener.rb +22 -10
- data/lib/karafka/processing/jobs_queue.rb +12 -4
- data/lib/karafka/processing/worker.rb +7 -2
- data/lib/karafka/processing/workers_pool.rb +158 -0
- data/lib/karafka/runner.rb +17 -17
- data/lib/karafka/server.rb +24 -6
- data/lib/karafka/setup/attributes_map.rb +2 -0
- data/lib/karafka/setup/contracts/config.rb +17 -0
- data/lib/karafka/setup/defaults_injector.rb +10 -0
- data/lib/karafka/swarm/liveness_listener.rb +7 -0
- data/lib/karafka/swarm/manager.rb +7 -7
- data/lib/karafka/version.rb +1 -1
- metadata +7 -6
- data/lib/karafka/processing/workers_batch.rb +0 -29
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 953c11ca73eda62bac76aaca94946b50b880a54e50756b0665da68155c9ad68d
|
|
4
|
+
data.tar.gz: a5d200ac66bc102d6dac680aaa03df1cc897113dc2af52f4684f3f7128608340
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 96d8087bda95065a1b56f5a525b9dd53c9ec71d01d9dd51f56403f7a14fd5ee406f2c40a366e1bb607495ebea8ce76ddd379df08e8e76594cdfae5905ff9c3d0
|
|
7
|
+
data.tar.gz: 393f0264db679e7b71eeb2fe6c419090d6ff46a43354789fcef2e1e7e6baec3c839583982129669d02a6eae1d245396f48c0dcfebe475946ca2aa97dadce90ed
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Karafka Framework Changelog
|
|
2
2
|
|
|
3
|
+
## 2.5.10 (Unreleased)
|
|
4
|
+
- **[Feature]** Add `Processing::WorkersPool` with dynamic thread pool scaling via `#scale`, `nil` sentinel-based worker exit for downscaling, and `worker.scaling.up`/`worker.scaling.down` instrumentation events.
|
|
5
|
+
- [Enhancement] Add per-partition generation tracking to `AssignmentsTracker` to distinguish first-time assignments from reassignments.
|
|
6
|
+
- [Enhancement] Make liveness listeners fiber-safe.
|
|
7
|
+
- [Fix] Include orphaned node detection in Pro swarm liveness listener.
|
|
8
|
+
- [Fix] Report liveness on `connection.listener.before_fetch_loop` so nodes send an initial healthy report before the first consumption.
|
|
9
|
+
- [Fix] Fire liveness events during `wait_pinging` so nodes continue reporting health status during shutdown with active LRJ jobs.
|
|
10
|
+
- [Maintenance] Use namespaced topic naming format in all integration specs for consistent traceability.
|
|
11
|
+
- [Fix] Fix `DataCollector::SPEC_HASH` producing non-deterministic hashes for pristine and poro specs by passing the original spec path via `KARAFKA_SPEC_PATH` env var.
|
|
12
|
+
- [Maintenance] Add `bin/tests_topics_hashes` script for looking up spec files by their topic name hash prefix.
|
|
13
|
+
- [Change] Require `karafka-rdkafka` `>=` `0.26.1` to support upcoming features relying on low-level Rdkafka APIs.
|
|
14
|
+
|
|
3
15
|
## 2.5.9 (2026-03-30)
|
|
4
16
|
- [Enhancement] Validate that `statistics.interval.ms` is not zero when dynamic multiplexing is enabled (Pro).
|
|
5
17
|
- [Fix] Fix swarm liveness reporting to also use `on_connection_listener_fetch_loop` so nodes stay alive when `statistics.interval.ms` is disabled.
|
data/certs/expired.txt
CHANGED
|
@@ -1,2 +1,85 @@
|
|
|
1
1
|
# Expired/revoked license checksums (SHA256 of license token)
|
|
2
2
|
# One checksum per line, lines starting with # are comments
|
|
3
|
+
|
|
4
|
+
01603767339fd25cf874e6fe29315fb27b6f6bbdf2e769c3b88e6efcf4096d37
|
|
5
|
+
024e2c943693c4ea9eea852eb02d32acc9cb34df21aea90af5ab39dcf3b1d8cf
|
|
6
|
+
0371303ba704d8f256b40932aa7840a1adb1cdc6fd032ac902794320d0d35ff1
|
|
7
|
+
051dc2d8176df8d3bde1595e01af786a3f58ccab4cc48743d624e68125d80bd6
|
|
8
|
+
06e3d91c9881ebf9686f3492e53400adc2c6a5fd4a409f6775cd36094b24f181
|
|
9
|
+
0ae8daf74b8ccd6c4828ed707edf0d05df3e7d328e91ddc973e160ae8b120787
|
|
10
|
+
0b4420e202d9c6075bb108ac4bd5917e35e89aa7d1741afaab93ca07839d06fc
|
|
11
|
+
11936f8e0637565a309157d1fd4e84a3c44e5a3f9f226435e7d38bc43e452db6
|
|
12
|
+
122ecfd0493596239995b9e7425046737b4893e28e33f31815eb4727c3f607d9
|
|
13
|
+
17c14c38e10d0d2bd4a9c847a2c23fd0726459337af1d9815b6e34d1fd9af8be
|
|
14
|
+
227f48f0b22974c43f120de28d5ca33a77e58d33064f9a73748501ba700f1474
|
|
15
|
+
24d59b10d982e5642dfda3ec54cd56dda9022927c89584bbe12d31f3df28e17d
|
|
16
|
+
2634761cc75479548003f941346f0a51a6c535415696f386b40ae28f88f63efa
|
|
17
|
+
284967207a93e9c989d05ad6e5bb0dbe3e0a8a871c3fba48324558f127166bda
|
|
18
|
+
2e2b4ccd902e43feb418cc8b3aed5874ec194ae8495ded864aff917538d91f7a
|
|
19
|
+
39aeae710122bf14729a05f24f5bd702d2a1127e5c7fb9bd90de0e20890575dc
|
|
20
|
+
3ada575caaf0b8bdeca0686a93074de9ed830751e6f46251849475f70e84e408
|
|
21
|
+
4367e57ca36a4b447f23745d76611514df61b56611225be4c8f9a90560bbace9
|
|
22
|
+
47380207ca3041b14c7d4c81a69f820852069ecab4d80a068d39133f61a6e641
|
|
23
|
+
4d49b497ece81604358afc8e9adc7d1163995c4c08446bcd5c0bbfe3d7ebc1e7
|
|
24
|
+
4d97c95c46e51105705964aecbfa5219120295e7d0a172f5c098d12c3a5cbf7e
|
|
25
|
+
60171be4b327d48b2445b00f1fca3d2a3554d06c69f63f3e97742b29030a0a69
|
|
26
|
+
630907de3c6bf258bfc2c51d768d51ca446ebd49fb29b29a0b24d363d46d1a6e
|
|
27
|
+
68db4a9700f6cc3494f8eac1e0df2471f11ef7312e7b58dae839b193e226fae5
|
|
28
|
+
6a06fb85cee01a27a5e819e464b0b5fd84051d234540d11774bc17e90c91abd4
|
|
29
|
+
6d79bcac25c0026dad0f1871ccae79fc07d696df2e0f080ce1d1038454545a05
|
|
30
|
+
6e2ed71a0925d59815c49ea1e72dfd0ba28bdbb57f93d81fa34a31c50db53830
|
|
31
|
+
711a3cced55062707b64f6d652e107d0f1318fd9f3762c732fad23d04884cdd2
|
|
32
|
+
71c2d2ab89a11010c41c9938fbc2565297364b4e8378e78ddf58b22ea2c43ca2
|
|
33
|
+
72b19e6e274b1f36799eea65b07f4849db44c82315647266608ebdb6fde0cad6
|
|
34
|
+
731c0d75d9b47cca49d9d95fc8d291fda452383f920e70849373959892443329
|
|
35
|
+
77b6974b96701d82a0670c79830543e345bf0a00d9e5b5a1bbf8443439d75987
|
|
36
|
+
77e461a9a4534870cfb7cca350d56ed0a504643bb39eeb750b338179d05069a9
|
|
37
|
+
7c06718c4314e28561f337b3ecc66fb012c2510f1ed5fcb82dbab5ba009a8f7d
|
|
38
|
+
7ca5a6183ed9c97e4da2c7030f809bfb32da49319d106377bd0e6d7d52734141
|
|
39
|
+
7ce4cade640a4f34e2909360c4c0ceb8a647b7bfead7a57880d8ce2bf6eea26c
|
|
40
|
+
7d3a2cb0ab08f2f25324124973370da30210dab04abaeeed97192576238a03e0
|
|
41
|
+
7dc790fd8bf18e663c2e4276f4362b0c14795945b508807ef03a92ece266cff8
|
|
42
|
+
81e69732cc6e9c5266fffea08fd1c69fd12c1d841b979723a77533c5efef244e
|
|
43
|
+
839f4a449e0d68acccc0f6a2d0209d2d137375d740ae6d0244dde17e63131fbb
|
|
44
|
+
85d26e4fedf364e20ea33c1b0102d34cea19156243fe781ddf597f6677cf2de3
|
|
45
|
+
8609d26341a5c4e908af8b63bcc6895d99d8cc13f0403dc7b36a2920bb8e048a
|
|
46
|
+
8d3e09a94bd00710a49b186be832f74cc3340ec3d87e780e10159e5e8d048b81
|
|
47
|
+
8e933669352c272935c310d7ce798b3ea2687611378b978e412491e7fddfce4d
|
|
48
|
+
8f3190524285a5c0a0d1e24bdc2a5c417416c2cb92b9da26f0e492aaa4600bd7
|
|
49
|
+
9221a49b42e05d39e1939cf9353805abea2042457c2247859e3274976ac2fbc6
|
|
50
|
+
96c8dde1a7f938899f3f5bd764eff295f090e1c40635e7efd25d69ef55881db2
|
|
51
|
+
9b0b791259e86740d4a2cab736802614938b7043479ac8770c570b1b37f38fb7
|
|
52
|
+
a0db6c5e7c8714a2d33adff3b69d2d688d5a34e6d5ba871d1cbdfcba3edf1e89
|
|
53
|
+
a7ce882d45a00ad82e653b60d14f1a1ebefdc9033d6cc60ed0a2ed507949a9a2
|
|
54
|
+
ac2941c112f397543fde83cc1b0dcf70ec34d77e149d3e90ad07a0f1bc809933
|
|
55
|
+
adea773fadaaaeadf0f20643e84ae6c40bcb55fb73ecee5894a41836453e4abb
|
|
56
|
+
b1411e92ad939b59c898e78520c8ec82547950493a45004b747f12820aa7d390
|
|
57
|
+
b3984622fbc6ba91a2180283a6ada7e1bbe01fb705dbaae37c831e9c893b8e00
|
|
58
|
+
b5ed1ff168e86300735e59dc2786be6d535fc9e0f3c6c8a76bc4354cbbed4ae0
|
|
59
|
+
b9e38e9c1b34db847a4b82ae35382b06e61a5872aa8c24f988d7d5246ded4648
|
|
60
|
+
bdc4f155153a59c25c7c52932f77973487d6336e99eb0beeb90edf4a4120a94a
|
|
61
|
+
bf6c5c58b02fa5e7c7dc845213ad943c5b6613f12ac6012be00b1484bb85aa04
|
|
62
|
+
c011c3d168a159c135ef35bf55121edaf1a4cc94f663a1f461595776b0313160
|
|
63
|
+
c311cb6cfb38a8b2e83b3080c1f87444a52f3f645be8337559a123aaacf1344a
|
|
64
|
+
c87222a94f98c3a2191dda9254bd6ed031fd8ca190f75b40c67a0fcd4b56f795
|
|
65
|
+
ca53af70cd7e67abbb9dbf0a8da6788a9b2192aa3a5a1dd8720aaa68a2511722
|
|
66
|
+
d02da0cbd2c1d83432a2cee7984dd311249692373554002d1d543dd5007f6a58
|
|
67
|
+
d13295e597120b88bf4c4f36de63d0aa3e5a68e3578c09a2e026bd1f6aaaf04f
|
|
68
|
+
d862dd776d5b42a73a63574dc00187c587403101cd211ed348fc422f6460844e
|
|
69
|
+
d9f25b56eae6a62b9383c068c2a7c22ef6b051f1cf4137164ba3dd1baf0bac61
|
|
70
|
+
db1f2c50f3dc6956c498b06f67f82412a76f0b138265a55713076b2a4a35725f
|
|
71
|
+
ded7405c91e9bb1994c58c728f83b705fdc5e816ce0cb3fdf16887127fff27f0
|
|
72
|
+
dfe08da10ae5e74e9bbf648261860f3b94201a11ab4269cd4db7c73017452dde
|
|
73
|
+
dff7372b5ee3136af892561507602423bbae42d12d3a65277ae2af1add666723
|
|
74
|
+
e1a040c0c9a6c5e4603d9aebbc4201075238b47446348ab635b53704b3095a63
|
|
75
|
+
e3b94496b65bb626e723637926e729a33be673fe0355d964306bde563ff6fbf7
|
|
76
|
+
e577d74987f1d31c10fc4ec67dd74084486514f844668d7d01bd4cad2d8f09c4
|
|
77
|
+
e7a91a5ce0e68b888026bbddb57c92c7d5ad61e9416c8188721eee0a8ba85bc1
|
|
78
|
+
eda2ab2c99f769d646dd6fff564af9ec6f32e4462390e3b3246bf953cd7a9c70
|
|
79
|
+
f031aa98b2375c6573f6f775f6000dc81e07b1034694742a900a9115eb124900
|
|
80
|
+
f1ade1306fd64c2e32b15e505d87c0a9ec7a8392f5bea014fc4e70acd17f9935
|
|
81
|
+
f397ccd272b6ee7b33595aa93a6e40b725af265b681efca05bb26f9a7766d6c2
|
|
82
|
+
f54723b14f7cfc4e3c88b946cab881b907dd5ac8372b4600213241ab82778979
|
|
83
|
+
f9cc7b7d9c7d81ba477a70749d166edbc2b44f1714096fce10118bb18fd9696c
|
|
84
|
+
fb417a807d7595a4aff44cc79712e9bc887181e64eb9a98fe9f200b585856ae7
|
|
85
|
+
fd5009f463fe0c38cfe66677ff9b8e5031cbc4ce354883c99c392109ae8ae3b8
|
data/config/locales/errors.yml
CHANGED
|
@@ -25,6 +25,9 @@ en:
|
|
|
25
25
|
Decrease max_wait_time or increase node_report_timeout
|
|
26
26
|
kafka_format: needs to be a filled hash
|
|
27
27
|
key_must_be_a_symbol: All keys under the kafka settings scope need to be symbols
|
|
28
|
+
managed_key_not_supported: >
|
|
29
|
+
this kafka setting is managed internally by Karafka and should not be set directly.
|
|
30
|
+
Setting it manually may cause misbehaviours and other unexpected issues
|
|
28
31
|
pause.timeout_max_timeout_vs_pause_max_timeout: pause.timeout must be less or equal to pause.max_timeout
|
|
29
32
|
shutdown_timeout_vs_max_wait_time: shutdown_timeout must be more than max_wait_time
|
|
30
33
|
worker_thread_priority_format: must be between -3 and 3
|
data/karafka.gemspec
CHANGED
|
@@ -21,8 +21,8 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
without having to focus on things that are not your business domain.
|
|
22
22
|
DESC
|
|
23
23
|
|
|
24
|
-
spec.add_dependency "karafka-core", ">= 2.5.
|
|
25
|
-
spec.add_dependency "karafka-rdkafka", ">= 0.
|
|
24
|
+
spec.add_dependency "karafka-core", ">= 2.5.13", "< 2.6.0"
|
|
25
|
+
spec.add_dependency "karafka-rdkafka", ">= 0.26.1"
|
|
26
26
|
spec.add_dependency "waterdrop", ">= 2.8.14", "< 3.0.0"
|
|
27
27
|
spec.add_dependency "zeitwerk", "~> 2.3"
|
|
28
28
|
|
|
@@ -401,10 +401,14 @@ module Karafka
|
|
|
401
401
|
# - OAUTHBEARER token refresh callbacks
|
|
402
402
|
#
|
|
403
403
|
# @param timeout [Integer] number of milliseconds to wait on events or 0 not to wait.
|
|
404
|
+
# @param safe [Boolean] when true, rescues Rdkafka::RdkafkaError so callers in
|
|
405
|
+
# shutdown/quiet paths do not trigger a full listener reset. When shutting down, errors
|
|
406
|
+
# at this layer are not relevant enough. We want to log them but we should not propagate
|
|
407
|
+
# them any further.
|
|
404
408
|
#
|
|
405
409
|
# @note It is non-blocking when timeout 0 and will not wait if queue empty. It costs up to
|
|
406
410
|
# 2ms when no callbacks are triggered.
|
|
407
|
-
def events_poll(timeout = 0)
|
|
411
|
+
def events_poll(timeout = 0, safe: false)
|
|
408
412
|
kafka.events_poll(timeout)
|
|
409
413
|
|
|
410
414
|
# Emit event for monitoring - happens once per tick_interval (default 5s)
|
|
@@ -414,6 +418,8 @@ module Karafka
|
|
|
414
418
|
caller: self,
|
|
415
419
|
subscription_group: @subscription_group
|
|
416
420
|
)
|
|
421
|
+
rescue Rdkafka::RdkafkaError
|
|
422
|
+
safe ? nil : raise
|
|
417
423
|
end
|
|
418
424
|
|
|
419
425
|
# Returns pointer to the consumer group metadata. It is used only in the context of
|
|
@@ -50,7 +50,7 @@ module Karafka
|
|
|
50
50
|
@executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
|
|
51
51
|
@partitioner = partitioner_class.new(subscription_group)
|
|
52
52
|
@scheduler = scheduler
|
|
53
|
-
@events_poller = Helpers::IntervalRunner.new { @client.events_poll }
|
|
53
|
+
@events_poller = Helpers::IntervalRunner.new { |**opts| @client.events_poll(**opts) }
|
|
54
54
|
# We keep one buffer for messages to preserve memory and not allocate extra objects
|
|
55
55
|
# We can do this that way because we always first schedule jobs using messages before we
|
|
56
56
|
# fetch another batch.
|
|
@@ -499,6 +499,7 @@ module Karafka
|
|
|
499
499
|
until wait_until.call
|
|
500
500
|
@client.ping
|
|
501
501
|
@scheduler.on_manage
|
|
502
|
+
@events_poller.call(safe: true)
|
|
502
503
|
|
|
503
504
|
after_ping.call
|
|
504
505
|
sleep(0.2)
|
|
@@ -25,14 +25,14 @@ module Karafka
|
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
# Runs the requested code if it was not executed previously recently
|
|
28
|
-
def call
|
|
28
|
+
def call(...)
|
|
29
29
|
now = monotonic_now
|
|
30
30
|
|
|
31
31
|
return if now - @last_called_at < @interval
|
|
32
32
|
|
|
33
33
|
@last_called_at = now
|
|
34
34
|
|
|
35
|
-
@block.call
|
|
35
|
+
@block.call(...)
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
# Runs the requested code bypassing any time frequencies
|
|
@@ -14,10 +14,33 @@ module Karafka
|
|
|
14
14
|
class AssignmentsTracker
|
|
15
15
|
include Singleton
|
|
16
16
|
|
|
17
|
+
class << self
|
|
18
|
+
# @return [Hash{Karafka::Routing::Topic => Array<Integer>}]
|
|
19
|
+
# @see #current
|
|
20
|
+
def current
|
|
21
|
+
instance.current
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# @return [Hash{Karafka::Routing::Topic => Hash{Integer => Integer}}]
|
|
25
|
+
# @see #generations
|
|
26
|
+
def generations
|
|
27
|
+
instance.generations
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @param topic [Karafka::Routing::Topic]
|
|
31
|
+
# @param partition [Integer]
|
|
32
|
+
# @return [Integer]
|
|
33
|
+
# @see #generation
|
|
34
|
+
def generation(topic, partition)
|
|
35
|
+
instance.generation(topic, partition)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
17
39
|
# Initializes the assignments tracker with empty assignments
|
|
18
40
|
def initialize
|
|
19
41
|
@mutex = Mutex.new
|
|
20
42
|
@assignments = Hash.new { |hash, key| hash[key] = [] }
|
|
43
|
+
@generations = Hash.new { |h, k| h[k] = {} }
|
|
21
44
|
end
|
|
22
45
|
|
|
23
46
|
# Returns all the active/current assignments of this given process
|
|
@@ -44,10 +67,41 @@ module Karafka
|
|
|
44
67
|
assignments.freeze
|
|
45
68
|
end
|
|
46
69
|
|
|
47
|
-
#
|
|
70
|
+
# Returns the generation counts for all partitions that have ever been assigned
|
|
71
|
+
#
|
|
72
|
+
# @return [Hash{Karafka::Routing::Topic => Hash{Integer => Integer}}] topic to partition
|
|
73
|
+
# generation mapping. Generation starts at 1 on first assignment and increments on each
|
|
74
|
+
# reassignment. Revoked partitions remain in the hash with their last generation value.
|
|
75
|
+
#
|
|
76
|
+
# @note Returns a frozen deep copy to prevent external mutation
|
|
77
|
+
def generations
|
|
78
|
+
result = {}
|
|
79
|
+
|
|
80
|
+
@mutex.synchronize do
|
|
81
|
+
@generations.each do |topic, partitions|
|
|
82
|
+
result[topic] = partitions.dup.freeze
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
result.freeze
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Returns the generation count for a specific topic-partition
|
|
90
|
+
#
|
|
91
|
+
# @param topic [Karafka::Routing::Topic]
|
|
92
|
+
# @param partition [Integer]
|
|
93
|
+
# @return [Integer] generation count (0 if never assigned, 1+ otherwise)
|
|
94
|
+
def generation(topic, partition)
|
|
95
|
+
@mutex.synchronize do
|
|
96
|
+
@generations.dig(topic, partition) || 0
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Clears all the assignments and generations
|
|
48
101
|
def clear
|
|
49
102
|
@mutex.synchronize do
|
|
50
103
|
@assignments.clear
|
|
104
|
+
@generations.clear
|
|
51
105
|
end
|
|
52
106
|
end
|
|
53
107
|
|
|
@@ -125,7 +179,16 @@ module Karafka
|
|
|
125
179
|
event[:tpl].to_h.each do |topic, partitions|
|
|
126
180
|
topic = sg.topics.find(topic)
|
|
127
181
|
|
|
128
|
-
|
|
182
|
+
partition_ids = []
|
|
183
|
+
|
|
184
|
+
partitions.each do |partition|
|
|
185
|
+
partition_id = partition.partition
|
|
186
|
+
partition_ids << partition_id
|
|
187
|
+
@generations[topic][partition_id] ||= 0
|
|
188
|
+
@generations[topic][partition_id] += 1
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
@assignments[topic] += partition_ids
|
|
129
192
|
@assignments[topic].sort!
|
|
130
193
|
end
|
|
131
194
|
end
|
|
@@ -93,6 +93,25 @@ module Karafka
|
|
|
93
93
|
MSG
|
|
94
94
|
end
|
|
95
95
|
|
|
96
|
+
# Logs info about the workers pool scaling up
|
|
97
|
+
#
|
|
98
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
|
99
|
+
def on_worker_scaling_up(event)
|
|
100
|
+
from = event[:from]
|
|
101
|
+
to = event[:to]
|
|
102
|
+
info "Workers pool scaled up from #{from} to #{to} workers"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Logs info about the workers pool scaling down.
|
|
106
|
+
# The actual size change happens asynchronously as workers pick up nil sentinels and exit.
|
|
107
|
+
#
|
|
108
|
+
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
|
109
|
+
def on_worker_scaling_down(event)
|
|
110
|
+
from = event[:from]
|
|
111
|
+
to = event[:to]
|
|
112
|
+
info "Workers pool scaling down from #{from} to #{to} workers"
|
|
113
|
+
end
|
|
114
|
+
|
|
96
115
|
# Prints info about a consumer pause occurrence. Irrelevant if user or system initiated.
|
|
97
116
|
#
|
|
98
117
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
|
@@ -328,10 +328,8 @@ module Karafka
|
|
|
328
328
|
|
|
329
329
|
# Sends minute based probing metrics
|
|
330
330
|
def minute_probe
|
|
331
|
-
concurrency = Karafka::App.config.concurrency
|
|
332
|
-
|
|
333
331
|
count("processes_count", 1, {})
|
|
334
|
-
count("threads_count",
|
|
332
|
+
count("threads_count", Karafka::Server.workers.size, {})
|
|
335
333
|
end
|
|
336
334
|
end
|
|
337
335
|
end
|
|
@@ -164,7 +164,7 @@ module Karafka
|
|
|
164
164
|
jq_stats = event[:jobs_queue].statistics
|
|
165
165
|
|
|
166
166
|
tags = default_tags
|
|
167
|
-
gauge("worker.total_threads", Karafka::
|
|
167
|
+
gauge("worker.total_threads", Karafka::Server.workers.size, tags: tags)
|
|
168
168
|
histogram("worker.processing", jq_stats[:busy], tags: tags)
|
|
169
169
|
histogram("worker.enqueued_jobs", jq_stats[:enqueued], tags: tags)
|
|
170
170
|
end
|
|
@@ -158,36 +158,39 @@ module Karafka
|
|
|
158
158
|
@mutex.synchronize(&)
|
|
159
159
|
end
|
|
160
160
|
|
|
161
|
-
# @return [Integer] object id of the current
|
|
162
|
-
|
|
163
|
-
|
|
161
|
+
# @return [Integer] object id of the current fiber
|
|
162
|
+
# @note We use fiber object id instead of thread object id to ensure fiber-safety.
|
|
163
|
+
# Multiple fibers can run on the same thread, and using thread id would cause them
|
|
164
|
+
# to overwrite each other's timestamps.
|
|
165
|
+
def fiber_id
|
|
166
|
+
Fiber.current.object_id
|
|
164
167
|
end
|
|
165
168
|
|
|
166
|
-
# Update the polling tick time for current
|
|
169
|
+
# Update the polling tick time for current fiber
|
|
167
170
|
def mark_polling_tick
|
|
168
171
|
synchronize do
|
|
169
|
-
@pollings[
|
|
172
|
+
@pollings[fiber_id] = monotonic_now
|
|
170
173
|
end
|
|
171
174
|
end
|
|
172
175
|
|
|
173
|
-
# Clear current
|
|
176
|
+
# Clear current fiber polling time tracker
|
|
174
177
|
def clear_polling_tick
|
|
175
178
|
synchronize do
|
|
176
|
-
@pollings.delete(
|
|
179
|
+
@pollings.delete(fiber_id)
|
|
177
180
|
end
|
|
178
181
|
end
|
|
179
182
|
|
|
180
183
|
# Update the processing tick time
|
|
181
184
|
def mark_consumption_tick
|
|
182
185
|
synchronize do
|
|
183
|
-
@consumptions[
|
|
186
|
+
@consumptions[fiber_id] = monotonic_now
|
|
184
187
|
end
|
|
185
188
|
end
|
|
186
189
|
|
|
187
|
-
# Clear current
|
|
190
|
+
# Clear current fiber consumption time tracker
|
|
188
191
|
def clear_consumption_tick
|
|
189
192
|
synchronize do
|
|
190
|
-
@consumptions.delete(
|
|
193
|
+
@consumptions.delete(fiber_id)
|
|
191
194
|
end
|
|
192
195
|
end
|
|
193
196
|
|
data/lib/karafka/pro/loader.rb
CHANGED
|
@@ -113,6 +113,8 @@ module Karafka
|
|
|
113
113
|
def reconfigure(config)
|
|
114
114
|
icfg = config.internal
|
|
115
115
|
|
|
116
|
+
Karafka::Setup::DefaultsInjector.singleton_class.prepend(Setup::DefaultsInjector)
|
|
117
|
+
|
|
116
118
|
icfg.cli.contract = Cli::Contracts::Server.new
|
|
117
119
|
|
|
118
120
|
# Use manager that supports multiplexing
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Karafka Pro - Source Available Commercial Software
|
|
4
|
+
# Copyright (c) 2017-present Maciej Mensfeld. All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This software is NOT open source. It is source-available commercial software
|
|
7
|
+
# requiring a paid license for use. It is NOT covered by LGPL.
|
|
8
|
+
#
|
|
9
|
+
# The author retains all right, title, and interest in this software,
|
|
10
|
+
# including all copyrights, patents, and other intellectual property rights.
|
|
11
|
+
# No patent rights are granted under this license.
|
|
12
|
+
#
|
|
13
|
+
# PROHIBITED:
|
|
14
|
+
# - Use without a valid commercial license
|
|
15
|
+
# - Redistribution, modification, or derivative works without authorization
|
|
16
|
+
# - Reverse engineering, decompilation, or disassembly of this software
|
|
17
|
+
# - Use as training data for AI/ML models or inclusion in datasets
|
|
18
|
+
# - Scraping, crawling, or automated collection for any purpose
|
|
19
|
+
#
|
|
20
|
+
# PERMITTED:
|
|
21
|
+
# - Reading, referencing, and linking for personal or commercial use
|
|
22
|
+
# - Runtime retrieval by AI assistants, coding agents, and RAG systems
|
|
23
|
+
# for the purpose of providing contextual help to Karafka users
|
|
24
|
+
#
|
|
25
|
+
# Receipt, viewing, or possession of this software does not convey or
|
|
26
|
+
# imply any license or right beyond those expressly stated above.
|
|
27
|
+
#
|
|
28
|
+
# License: https://karafka.io/docs/Pro-License-Comm/
|
|
29
|
+
# Contact: contact@karafka.io
|
|
30
|
+
|
|
31
|
+
module Karafka
|
|
32
|
+
module Pro
|
|
33
|
+
# Namespace for Pro setup components
|
|
34
|
+
module Setup
|
|
35
|
+
# Pro defaults injector that extends the OSS defaults with Pro-specific settings
|
|
36
|
+
module DefaultsInjector
|
|
37
|
+
# Pro-specific consumer kafka defaults
|
|
38
|
+
# These defaults are carefully tuned to work with Pro's internal statistics aggregation,
|
|
39
|
+
# the Web UI dashboard, and the performance tracker. They depend on Pro's extended
|
|
40
|
+
# instrumentation pipeline and should not be applied outside of Pro as they may cause
|
|
41
|
+
# incomplete or inconsistent metrics collection and other unexpected behaviours.
|
|
42
|
+
CONSUMER_KAFKA_DEFAULTS = {
|
|
43
|
+
"statistics.unassigned.include": false
|
|
44
|
+
}.freeze
|
|
45
|
+
|
|
46
|
+
private_constant :CONSUMER_KAFKA_DEFAULTS
|
|
47
|
+
|
|
48
|
+
# Pro actively manages these keys via its own DefaultsInjector so users are allowed
|
|
49
|
+
# to set them if needed.
|
|
50
|
+
#
|
|
51
|
+
# @return [Set<Symbol>] empty set since Pro handles these keys
|
|
52
|
+
def managed_keys
|
|
53
|
+
@managed_keys ||= Set.new
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Enriches consumer kafka config with Pro-specific defaults
|
|
57
|
+
# @param kafka_config [Hash] kafka scoped config
|
|
58
|
+
def consumer(kafka_config)
|
|
59
|
+
super
|
|
60
|
+
|
|
61
|
+
CONSUMER_KAFKA_DEFAULTS.each do |key, value|
|
|
62
|
+
next if kafka_config.key?(key)
|
|
63
|
+
|
|
64
|
+
kafka_config[key] = value
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -73,6 +73,13 @@ module Karafka
|
|
|
73
73
|
super()
|
|
74
74
|
end
|
|
75
75
|
|
|
76
|
+
# Report status before the first fetch loop iteration so the supervisor gets an initial
|
|
77
|
+
# healthy report even if the first consumption takes longer than the report timeout.
|
|
78
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
|
79
|
+
def on_connection_listener_before_fetch_loop(_event)
|
|
80
|
+
report_status
|
|
81
|
+
end
|
|
82
|
+
|
|
76
83
|
# Tick on each fetch and report liveness so it works even when statistics are disabled
|
|
77
84
|
#
|
|
78
85
|
# @param _event [Karafka::Core::Monitoring::Event]
|
|
@@ -143,42 +150,47 @@ module Karafka
|
|
|
143
150
|
periodically do
|
|
144
151
|
return unless node
|
|
145
152
|
|
|
153
|
+
Kernel.exit!(orphaned_exit_code) if node.orphaned?
|
|
154
|
+
|
|
146
155
|
current_status = status
|
|
147
156
|
|
|
148
157
|
current_status.positive? ? node.unhealthy(current_status) : node.healthy
|
|
149
158
|
end
|
|
150
159
|
end
|
|
151
160
|
|
|
152
|
-
# @return [Integer] object id of the current
|
|
153
|
-
|
|
154
|
-
|
|
161
|
+
# @return [Integer] object id of the current fiber
|
|
162
|
+
# @note We use fiber object id instead of thread object id to ensure fiber-safety.
|
|
163
|
+
# Multiple fibers can run on the same thread, and using thread id would cause them
|
|
164
|
+
# to overwrite each other's timestamps.
|
|
165
|
+
def fiber_id
|
|
166
|
+
Fiber.current.object_id
|
|
155
167
|
end
|
|
156
168
|
|
|
157
|
-
# Update the polling tick time for current
|
|
169
|
+
# Update the polling tick time for current fiber
|
|
158
170
|
def mark_polling_tick
|
|
159
171
|
synchronize do
|
|
160
|
-
@pollings[
|
|
172
|
+
@pollings[fiber_id] = monotonic_now
|
|
161
173
|
end
|
|
162
174
|
end
|
|
163
175
|
|
|
164
|
-
# Clear current
|
|
176
|
+
# Clear current fiber polling time tracker
|
|
165
177
|
def clear_polling_tick
|
|
166
178
|
synchronize do
|
|
167
|
-
@pollings.delete(
|
|
179
|
+
@pollings.delete(fiber_id)
|
|
168
180
|
end
|
|
169
181
|
end
|
|
170
182
|
|
|
171
183
|
# Update the processing tick time
|
|
172
184
|
def mark_consumption_tick
|
|
173
185
|
synchronize do
|
|
174
|
-
@consumptions[
|
|
186
|
+
@consumptions[fiber_id] = monotonic_now
|
|
175
187
|
end
|
|
176
188
|
end
|
|
177
189
|
|
|
178
|
-
# Clear current
|
|
190
|
+
# Clear current fiber consumption time tracker
|
|
179
191
|
def clear_consumption_tick
|
|
180
192
|
synchronize do
|
|
181
|
-
@consumptions.delete(
|
|
193
|
+
@consumptions.delete(fiber_id)
|
|
182
194
|
end
|
|
183
195
|
end
|
|
184
196
|
|
|
@@ -14,10 +14,13 @@ module Karafka
|
|
|
14
14
|
# we use a single workers poll that can have granular scheduling.
|
|
15
15
|
class JobsQueue
|
|
16
16
|
include Helpers::ConfigImporter.new(
|
|
17
|
-
concurrency: %i[concurrency],
|
|
18
17
|
tick_interval: %i[internal tick_interval]
|
|
19
18
|
)
|
|
20
19
|
|
|
20
|
+
# Set via writer because of a circular dependency: the queue must exist before the pool
|
|
21
|
+
# (workers need the queue at construction), but the queue needs the pool for concurrency.
|
|
22
|
+
attr_writer :pool
|
|
23
|
+
|
|
21
24
|
# @return [Karafka::Processing::JobsQueue]
|
|
22
25
|
def initialize
|
|
23
26
|
@queue = Queue.new
|
|
@@ -56,10 +59,15 @@ module Karafka
|
|
|
56
59
|
#
|
|
57
60
|
# @param job [Jobs::Base] job that we want to run
|
|
58
61
|
def <<(job)
|
|
59
|
-
# We do not push the job if the queue is closed as it means that it would anyhow not be
|
|
60
|
-
# executed
|
|
61
62
|
return if @queue.closed?
|
|
62
63
|
|
|
64
|
+
# nil is used by WorkersPool to signal a worker to exit during downscaling.
|
|
65
|
+
# Passed straight through to the raw queue, bypassing statistics and tracking.
|
|
66
|
+
unless job
|
|
67
|
+
@queue << job
|
|
68
|
+
return
|
|
69
|
+
end
|
|
70
|
+
|
|
63
71
|
@mutex.synchronize do
|
|
64
72
|
group = @in_processing[job.group_id]
|
|
65
73
|
|
|
@@ -70,7 +78,7 @@ module Karafka
|
|
|
70
78
|
# Assume that moving to queue means being picked up immediately not to create stats
|
|
71
79
|
# race conditions because of pop overhead. If there are workers available, we assume
|
|
72
80
|
# work is going to be handled as we never reject enqueued jobs
|
|
73
|
-
if @statistics[:busy] <
|
|
81
|
+
if @statistics[:busy] < @pool.size
|
|
74
82
|
@statistics[:busy] += 1
|
|
75
83
|
else
|
|
76
84
|
# If system is fully loaded, it means this job is indeed enqueued
|
|
@@ -27,10 +27,12 @@ module Karafka
|
|
|
27
27
|
attr_reader :id
|
|
28
28
|
|
|
29
29
|
# @param jobs_queue [JobsQueue]
|
|
30
|
+
# @param pool [WorkersPool] pool this worker belongs to (for deregistration on shutdown)
|
|
30
31
|
# @return [Worker]
|
|
31
|
-
def initialize(jobs_queue)
|
|
32
|
+
def initialize(jobs_queue, pool)
|
|
32
33
|
@id = SecureRandom.hex(6)
|
|
33
34
|
@jobs_queue = jobs_queue
|
|
35
|
+
@pool = pool
|
|
34
36
|
@non_wrapped_flow = worker_job_call_wrapper == false
|
|
35
37
|
end
|
|
36
38
|
|
|
@@ -80,6 +82,9 @@ module Karafka
|
|
|
80
82
|
end
|
|
81
83
|
end
|
|
82
84
|
else
|
|
85
|
+
# nil means either queue closed (full shutdown) or pool downscaling.
|
|
86
|
+
# Either way, deregister from pool so it reflects the actual worker count.
|
|
87
|
+
@pool.deregister(self)
|
|
83
88
|
false
|
|
84
89
|
end
|
|
85
90
|
# We signal critical exceptions, notify and do not allow worker to fail
|
|
@@ -95,7 +100,7 @@ module Karafka
|
|
|
95
100
|
type: "worker.process.error"
|
|
96
101
|
)
|
|
97
102
|
ensure
|
|
98
|
-
# job can be nil when the queue is being closed
|
|
103
|
+
# job can be nil when the queue is being closed or during pool downscaling
|
|
99
104
|
if job
|
|
100
105
|
@jobs_queue.complete(job)
|
|
101
106
|
job.finish!
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Dynamic thread pool that manages worker threads.
|
|
6
|
+
# Supports scaling at runtime via {#scale}.
|
|
7
|
+
#
|
|
8
|
+
# All public methods that read or mutate `@workers` are synchronized via `@mutex`.
|
|
9
|
+
# `@size` is always updated under `@mutex` but can be read without locking for performance
|
|
10
|
+
# (integer assignment is atomic in MRI).
|
|
11
|
+
class WorkersPool
|
|
12
|
+
include Helpers::ConfigImporter.new(
|
|
13
|
+
concurrency: %i[concurrency],
|
|
14
|
+
worker_thread_priority: %i[worker_thread_priority],
|
|
15
|
+
monitor: %i[monitor]
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# @return [Integer] current number of workers registered in the pool.
|
|
19
|
+
# Reflects the actual thread count, not a target. After a downscale request this value
|
|
20
|
+
# converges towards the target as workers pick up nil sentinels and deregister.
|
|
21
|
+
# Updated atomically under mutex, safe to read without locking.
|
|
22
|
+
attr_reader :size
|
|
23
|
+
|
|
24
|
+
# Jobs queue reference, set by the Runner after both pool and queue are created.
|
|
25
|
+
# Must be assigned before calling {#scale}.
|
|
26
|
+
attr_writer :jobs_queue
|
|
27
|
+
|
|
28
|
+
# Initializes an empty pool with zero workers.
|
|
29
|
+
# Workers are not started until {#scale} is called, allowing the pool to be created early
|
|
30
|
+
# (e.g. in Server.run) before the jobs queue exists.
|
|
31
|
+
#
|
|
32
|
+
# @return [WorkersPool]
|
|
33
|
+
def initialize
|
|
34
|
+
@jobs_queue = nil
|
|
35
|
+
@workers = []
|
|
36
|
+
@size = 0
|
|
37
|
+
@mutex = Mutex.new
|
|
38
|
+
# Monotonically increasing index for naming worker threads. Indices are never reused
|
|
39
|
+
# after a worker exits, so thread names remain unique across the lifetime of the process
|
|
40
|
+
# and make it easy to correlate log entries with specific worker generations.
|
|
41
|
+
@next_index = 0
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Scale pool towards `target` workers (minimum 1).
|
|
45
|
+
#
|
|
46
|
+
# **Scaling up** is synchronous -- new worker threads are spawned and registered before this
|
|
47
|
+
# method returns. {#size} reflects the new count immediately.
|
|
48
|
+
#
|
|
49
|
+
# **Scaling down** is asynchronous -- nil sentinels are enqueued and workers exit when they
|
|
50
|
+
# pick one up. {#size} decreases gradually as workers deregister themselves. Callers that
|
|
51
|
+
# need to know when downsizing is complete should poll {#size} or listen for the
|
|
52
|
+
# `worker.scaling.down` instrumentation event (whose `:to` payload reports the *target*,
|
|
53
|
+
# not the current count).
|
|
54
|
+
#
|
|
55
|
+
# The entire read-decide-act cycle is synchronized to prevent stale reads.
|
|
56
|
+
# Instrumentation runs outside the mutex to avoid holding the lock during user callbacks.
|
|
57
|
+
#
|
|
58
|
+
# @param target [Integer] desired number of workers
|
|
59
|
+
def scale(target)
|
|
60
|
+
raise(Karafka::Errors::BaseError, "jobs_queue must be set before scaling") unless @jobs_queue
|
|
61
|
+
|
|
62
|
+
target = [target, 1].max
|
|
63
|
+
event = nil
|
|
64
|
+
|
|
65
|
+
@mutex.synchronize do
|
|
66
|
+
current = @workers.size
|
|
67
|
+
delta = target - current
|
|
68
|
+
|
|
69
|
+
if delta.positive?
|
|
70
|
+
event = grow(delta)
|
|
71
|
+
elsif delta.negative?
|
|
72
|
+
event = shrink(delta.abs)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
return unless event
|
|
77
|
+
|
|
78
|
+
monitor.instrument(*event)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# @return [Boolean] true if all workers have stopped
|
|
82
|
+
def stopped?
|
|
83
|
+
snapshot.none?(&:alive?)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# @return [Array<Worker>] workers that are still alive
|
|
87
|
+
def alive
|
|
88
|
+
snapshot.select(&:alive?)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Forcefully terminate all worker threads.
|
|
92
|
+
def terminate
|
|
93
|
+
snapshot.each(&:terminate)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Wait for all current workers to finish.
|
|
97
|
+
def join
|
|
98
|
+
snapshot.each(&:join)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Called by a worker when it exits (queue closed or pool downscaling).
|
|
102
|
+
# Thread-safe -- worker threads call this from their own thread.
|
|
103
|
+
#
|
|
104
|
+
# @param worker [Worker] worker to remove from the pool
|
|
105
|
+
def deregister(worker)
|
|
106
|
+
@mutex.synchronize do
|
|
107
|
+
@workers.delete(worker)
|
|
108
|
+
@size = @workers.size
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
private
|
|
113
|
+
|
|
114
|
+
# @return [Array<Worker>] snapshot of workers taken under mutex
|
|
115
|
+
def snapshot
|
|
116
|
+
@mutex.synchronize { @workers.dup }
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Add `count` workers and start their threads immediately.
|
|
120
|
+
# Must be called under `@mutex` (from {#scale}) or during construction (no contention).
|
|
121
|
+
#
|
|
122
|
+
# @param count [Integer] number of workers to add
|
|
123
|
+
# @return [Array] instrumentation event args to be emitted outside the mutex
|
|
124
|
+
def grow(count)
|
|
125
|
+
from = @workers.size
|
|
126
|
+
|
|
127
|
+
count.times do
|
|
128
|
+
worker = Worker.new(@jobs_queue, self)
|
|
129
|
+
@workers << worker
|
|
130
|
+
worker.async_call("karafka.worker##{@next_index}", worker_thread_priority)
|
|
131
|
+
@next_index += 1
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
@size = @workers.size
|
|
135
|
+
|
|
136
|
+
["worker.scaling.up", { workers_pool: self, from: from, to: @size }]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Push nil into the queue to signal workers to exit.
|
|
140
|
+
# Whichever workers pick them up will deregister and stop.
|
|
141
|
+
# Must be called under `@mutex` (from {#scale}).
|
|
142
|
+
#
|
|
143
|
+
# @param count [Integer] number of workers to remove
|
|
144
|
+
# @return [Array, nil] instrumentation event args or nil if no-op
|
|
145
|
+
# @note Never shrinks below 1 worker.
|
|
146
|
+
def shrink(count)
|
|
147
|
+
effective = [count, @workers.size - 1].min
|
|
148
|
+
return if effective <= 0
|
|
149
|
+
|
|
150
|
+
from = @workers.size
|
|
151
|
+
effective.times { @jobs_queue << nil }
|
|
152
|
+
to = from - effective
|
|
153
|
+
|
|
154
|
+
["worker.scaling.down", { workers_pool: self, from: from, to: to }]
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
data/lib/karafka/runner.rb
CHANGED
|
@@ -4,20 +4,20 @@ module Karafka
|
|
|
4
4
|
# Class used to run the Karafka listeners in separate threads
|
|
5
5
|
class Runner
|
|
6
6
|
include Helpers::ConfigImporter.new(
|
|
7
|
-
worker_thread_priority: %i[worker_thread_priority],
|
|
8
7
|
manager: %i[internal connection manager],
|
|
9
|
-
conductor: %i[internal connection conductor]
|
|
10
|
-
jobs_queue_class: %i[internal processing jobs_queue_class]
|
|
8
|
+
conductor: %i[internal connection conductor]
|
|
11
9
|
)
|
|
12
10
|
|
|
13
11
|
# Starts listening on all the listeners asynchronously and handles the jobs queue closing
|
|
14
12
|
# after listeners are done with their work.
|
|
15
13
|
def call
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
jobs_queue = Karafka::Server.jobs_queue
|
|
15
|
+
workers = Karafka::Server.workers
|
|
16
|
+
|
|
17
|
+
# Wire up the circular dependency between pool and queue
|
|
18
|
+
workers.jobs_queue = jobs_queue
|
|
19
|
+
jobs_queue.pool = workers
|
|
19
20
|
|
|
20
|
-
workers = Processing::WorkersBatch.new(jobs_queue)
|
|
21
21
|
listeners = Connection::ListenersBatch.new(jobs_queue)
|
|
22
22
|
|
|
23
23
|
# We mark it prior to delegating to the manager as manager will have to start at least one
|
|
@@ -27,17 +27,12 @@ module Karafka
|
|
|
27
27
|
# Register all the listeners so they can be started and managed
|
|
28
28
|
manager.register(listeners)
|
|
29
29
|
|
|
30
|
-
workers.each_with_index do |worker, i|
|
|
31
|
-
worker.async_call(
|
|
32
|
-
"karafka.worker##{i}",
|
|
33
|
-
worker_thread_priority
|
|
34
|
-
)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
30
|
# We aggregate threads here for a supervised shutdown process
|
|
38
|
-
Karafka::Server.workers = workers
|
|
39
31
|
Karafka::Server.listeners = listeners
|
|
40
|
-
|
|
32
|
+
|
|
33
|
+
# Start worker threads after listeners are created so a failure in the boot steps above
|
|
34
|
+
# does not leave live worker threads blocked on an open queue.
|
|
35
|
+
workers.scale(Karafka::App.config.concurrency)
|
|
41
36
|
|
|
42
37
|
until manager.done?
|
|
43
38
|
conductor.wait
|
|
@@ -56,7 +51,7 @@ module Karafka
|
|
|
56
51
|
# with everything. One thing worth keeping in mind though: It is the end user responsibility
|
|
57
52
|
# to handle the shutdown detection in their long-running processes. Otherwise if timeout
|
|
58
53
|
# is exceeded, there will be a forced shutdown.
|
|
59
|
-
workers.
|
|
54
|
+
workers.join
|
|
60
55
|
# If anything crashes here, we need to raise the error and crush the runner because it means
|
|
61
56
|
# that something terrible happened
|
|
62
57
|
rescue => e
|
|
@@ -67,6 +62,11 @@ module Karafka
|
|
|
67
62
|
type: "runner.call.error"
|
|
68
63
|
)
|
|
69
64
|
Karafka::App.stop!
|
|
65
|
+
|
|
66
|
+
# Clean up workers so we don't leak threads blocked on the queue
|
|
67
|
+
jobs_queue.close
|
|
68
|
+
workers.join
|
|
69
|
+
|
|
70
70
|
raise e
|
|
71
71
|
end
|
|
72
72
|
end
|
data/lib/karafka/server.rb
CHANGED
|
@@ -10,7 +10,8 @@ module Karafka
|
|
|
10
10
|
shutdown_timeout: %i[shutdown_timeout],
|
|
11
11
|
forceful_exit_code: %i[internal forceful_exit_code],
|
|
12
12
|
forceful_shutdown_wait: %i[internal forceful_shutdown_wait],
|
|
13
|
-
process: %i[internal process]
|
|
13
|
+
process: %i[internal process],
|
|
14
|
+
jobs_queue_class: %i[internal processing jobs_queue_class]
|
|
14
15
|
)
|
|
15
16
|
|
|
16
17
|
class << self
|
|
@@ -39,8 +40,7 @@ module Karafka
|
|
|
39
40
|
|
|
40
41
|
# Method which runs app
|
|
41
42
|
def run
|
|
42
|
-
|
|
43
|
-
self.workers = []
|
|
43
|
+
prepare
|
|
44
44
|
|
|
45
45
|
# We need to validate this prior to running because it may be executed also from the
|
|
46
46
|
# embedded
|
|
@@ -93,6 +93,7 @@ module Karafka
|
|
|
93
93
|
# @note We don't need to sleep because Karafka::Runner is locking and waiting to finish loop
|
|
94
94
|
# (and it won't happen until we explicitly want to stop)
|
|
95
95
|
def start
|
|
96
|
+
prepare
|
|
96
97
|
Karafka::Runner.new.call
|
|
97
98
|
end
|
|
98
99
|
|
|
@@ -117,7 +118,7 @@ module Karafka
|
|
|
117
118
|
# We divide it by 1000 because we use time in ms.
|
|
118
119
|
((timeout / 1_000) * (1 / supervision_sleep)).to_i.times do
|
|
119
120
|
all_listeners_stopped = listeners.all?(&:stopped?)
|
|
120
|
-
all_workers_stopped = workers.
|
|
121
|
+
all_workers_stopped = workers.stopped?
|
|
121
122
|
|
|
122
123
|
return if all_listeners_stopped && all_workers_stopped
|
|
123
124
|
|
|
@@ -127,7 +128,7 @@ module Karafka
|
|
|
127
128
|
raise Errors::ForcefulShutdownError
|
|
128
129
|
rescue Errors::ForcefulShutdownError => e
|
|
129
130
|
active_listeners = listeners.select(&:active?)
|
|
130
|
-
alive_workers = workers.
|
|
131
|
+
alive_workers = workers.alive
|
|
131
132
|
|
|
132
133
|
# Collect details about subscription groups that still have jobs in processing
|
|
133
134
|
in_processing = jobs_queue ? jobs_queue.in_processing : {}
|
|
@@ -143,7 +144,7 @@ module Karafka
|
|
|
143
144
|
)
|
|
144
145
|
|
|
145
146
|
# We're done waiting, lets kill them!
|
|
146
|
-
workers.
|
|
147
|
+
workers.terminate
|
|
147
148
|
listeners.active.each(&:terminate)
|
|
148
149
|
|
|
149
150
|
# We always need to shutdown clients to make sure we do not force the GC to close consumer.
|
|
@@ -186,9 +187,26 @@ module Karafka
|
|
|
186
187
|
WaterDrop::ConnectionPool.close
|
|
187
188
|
|
|
188
189
|
Karafka::App.terminate!
|
|
190
|
+
|
|
191
|
+
# Allow prepare to run again if the server is restarted (e.g. reset_status in tests,
|
|
192
|
+
# or embedded re-start).
|
|
193
|
+
@prepared = false
|
|
189
194
|
end
|
|
190
195
|
end
|
|
191
196
|
|
|
197
|
+
# Initializes listeners, jobs queue and workers pool.
|
|
198
|
+
# Called from both {.run} (standalone) and {.start} (embedded). Guarded so it runs only
|
|
199
|
+
# once even when {.run} delegates to {.start}.
|
|
200
|
+
def prepare
|
|
201
|
+
return if @prepared
|
|
202
|
+
|
|
203
|
+
@prepared = true
|
|
204
|
+
|
|
205
|
+
self.listeners = []
|
|
206
|
+
self.jobs_queue = jobs_queue_class.new
|
|
207
|
+
self.workers = Processing::WorkersPool.new
|
|
208
|
+
end
|
|
209
|
+
|
|
192
210
|
# Quiets the Karafka server.
|
|
193
211
|
#
|
|
194
212
|
# Karafka will stop processing but won't quit the consumer group, so no rebalance will be
|
|
@@ -125,6 +125,7 @@ module Karafka
|
|
|
125
125
|
sasl.oauthbearer.metadata.authentication.type
|
|
126
126
|
sasl.oauthbearer.method
|
|
127
127
|
sasl.oauthbearer.scope
|
|
128
|
+
sasl.oauthbearer.sub.claim.name
|
|
128
129
|
sasl.oauthbearer.token.endpoint.url
|
|
129
130
|
sasl.password
|
|
130
131
|
sasl.username
|
|
@@ -283,6 +284,7 @@ module Karafka
|
|
|
283
284
|
sasl.oauthbearer.metadata.authentication.type
|
|
284
285
|
sasl.oauthbearer.method
|
|
285
286
|
sasl.oauthbearer.scope
|
|
287
|
+
sasl.oauthbearer.sub.claim.name
|
|
286
288
|
sasl.oauthbearer.token.endpoint.url
|
|
287
289
|
sasl.password
|
|
288
290
|
sasl.username
|
|
@@ -170,6 +170,23 @@ module Karafka
|
|
|
170
170
|
detected_errors
|
|
171
171
|
end
|
|
172
172
|
|
|
173
|
+
# Certain kafka settings are managed internally by Karafka and should not be set
|
|
174
|
+
# directly. Setting them manually may cause misbehaviours and other unexpected issues.
|
|
175
|
+
virtual do |data, errors|
|
|
176
|
+
next unless errors.empty?
|
|
177
|
+
|
|
178
|
+
managed_keys = Karafka::Setup::DefaultsInjector.managed_keys
|
|
179
|
+
detected_errors = []
|
|
180
|
+
|
|
181
|
+
data.fetch(:kafka).each_key do |key|
|
|
182
|
+
next unless managed_keys.include?(key)
|
|
183
|
+
|
|
184
|
+
detected_errors << [[:kafka, key], :managed_key_not_supported]
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
detected_errors
|
|
188
|
+
end
|
|
189
|
+
|
|
173
190
|
virtual do |data, errors|
|
|
174
191
|
next unless errors.empty?
|
|
175
192
|
|
|
@@ -52,6 +52,16 @@ module Karafka
|
|
|
52
52
|
)
|
|
53
53
|
|
|
54
54
|
class << self
|
|
55
|
+
# Kafka settings that are managed internally by Karafka and should not be set directly
|
|
56
|
+
# by users. Setting them manually may cause misbehaviours and other unexpected issues.
|
|
57
|
+
#
|
|
58
|
+
# @return [Set<Symbol>] set of managed kafka setting keys
|
|
59
|
+
def managed_keys
|
|
60
|
+
@managed_keys ||= Set[
|
|
61
|
+
:"statistics.unassigned.include"
|
|
62
|
+
]
|
|
63
|
+
end
|
|
64
|
+
|
|
55
65
|
# Propagates the kafka setting defaults unless they are already present for consumer config
|
|
56
66
|
# This makes it easier to set some values that users usually don't change but still allows
|
|
57
67
|
# them to overwrite the whole hash if they want to
|
|
@@ -19,6 +19,13 @@ module Karafka
|
|
|
19
19
|
@mutex = Mutex.new
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
+
# Report liveness before the first fetch loop iteration so the supervisor gets an initial
|
|
23
|
+
# healthy report even if the first consumption takes longer than the report timeout.
|
|
24
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
|
25
|
+
def on_connection_listener_before_fetch_loop(_event)
|
|
26
|
+
report_liveness
|
|
27
|
+
end
|
|
28
|
+
|
|
22
29
|
# Report from the fetch loop at the top of each iteration
|
|
23
30
|
# @param _event [Karafka::Core::Monitoring::Event]
|
|
24
31
|
def on_connection_listener_fetch_loop(_event)
|
|
@@ -19,12 +19,12 @@ module Karafka
|
|
|
19
19
|
node_restart_timeout: %i[internal swarm node_restart_timeout]
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
-
# Status we issue when we decide to shutdown
|
|
22
|
+
# Status we issue when we decide to shutdown a non-reporting node
|
|
23
23
|
# We use -1 because nodes are expected to report 0+ statuses and we can use negative numbers
|
|
24
24
|
# for non-node based statuses
|
|
25
|
-
|
|
25
|
+
NOT_REPORTING_SHUTDOWN_STATUS = -1
|
|
26
26
|
|
|
27
|
-
private_constant :
|
|
27
|
+
private_constant :NOT_REPORTING_SHUTDOWN_STATUS
|
|
28
28
|
|
|
29
29
|
# @return [Array<Node>] All nodes that manager manages
|
|
30
30
|
attr_reader :nodes
|
|
@@ -84,7 +84,7 @@ module Karafka
|
|
|
84
84
|
if node.alive?
|
|
85
85
|
next if terminate_if_hanging(statuses, node)
|
|
86
86
|
next if stop_if_not_healthy(statuses, node)
|
|
87
|
-
next if
|
|
87
|
+
next if stop_if_not_reporting(statuses, node)
|
|
88
88
|
else
|
|
89
89
|
next if cleanup_one(statuses, node)
|
|
90
90
|
next if restart_after_timeout(statuses, node)
|
|
@@ -144,12 +144,12 @@ module Karafka
|
|
|
144
144
|
end
|
|
145
145
|
end
|
|
146
146
|
|
|
147
|
-
# If node stopped
|
|
147
|
+
# If node stopped reporting, starts the stopping procedure.
|
|
148
148
|
#
|
|
149
149
|
# @param statuses [Hash] hash with statuses transitions with times
|
|
150
150
|
# @param node [Swarm::Node] node we're checking
|
|
151
151
|
# @return [Boolean] should it be the last action taken on this node in this run
|
|
152
|
-
def
|
|
152
|
+
def stop_if_not_reporting(statuses, node)
|
|
153
153
|
# Do nothing if already stopping
|
|
154
154
|
return true if statuses.key?(:stop)
|
|
155
155
|
# Do nothing if we've received status update recently enough
|
|
@@ -160,7 +160,7 @@ module Karafka
|
|
|
160
160
|
"swarm.manager.stopping",
|
|
161
161
|
caller: self,
|
|
162
162
|
node: node,
|
|
163
|
-
status:
|
|
163
|
+
status: NOT_REPORTING_SHUTDOWN_STATUS
|
|
164
164
|
) do
|
|
165
165
|
node.stop
|
|
166
166
|
statuses[:stop] = monotonic_now
|
data/lib/karafka/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: karafka
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.5.
|
|
4
|
+
version: 2.5.10.rc1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Maciej Mensfeld
|
|
@@ -15,7 +15,7 @@ dependencies:
|
|
|
15
15
|
requirements:
|
|
16
16
|
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version: 2.5.
|
|
18
|
+
version: 2.5.13
|
|
19
19
|
- - "<"
|
|
20
20
|
- !ruby/object:Gem::Version
|
|
21
21
|
version: 2.6.0
|
|
@@ -25,7 +25,7 @@ dependencies:
|
|
|
25
25
|
requirements:
|
|
26
26
|
- - ">="
|
|
27
27
|
- !ruby/object:Gem::Version
|
|
28
|
-
version: 2.5.
|
|
28
|
+
version: 2.5.13
|
|
29
29
|
- - "<"
|
|
30
30
|
- !ruby/object:Gem::Version
|
|
31
31
|
version: 2.6.0
|
|
@@ -35,14 +35,14 @@ dependencies:
|
|
|
35
35
|
requirements:
|
|
36
36
|
- - ">="
|
|
37
37
|
- !ruby/object:Gem::Version
|
|
38
|
-
version: 0.
|
|
38
|
+
version: 0.26.1
|
|
39
39
|
type: :runtime
|
|
40
40
|
prerelease: false
|
|
41
41
|
version_requirements: !ruby/object:Gem::Requirement
|
|
42
42
|
requirements:
|
|
43
43
|
- - ">="
|
|
44
44
|
- !ruby/object:Gem::Version
|
|
45
|
-
version: 0.
|
|
45
|
+
version: 0.26.1
|
|
46
46
|
- !ruby/object:Gem::Dependency
|
|
47
47
|
name: waterdrop
|
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -456,6 +456,7 @@ files:
|
|
|
456
456
|
- lib/karafka/pro/scheduled_messages/setup/config.rb
|
|
457
457
|
- lib/karafka/pro/scheduled_messages/state.rb
|
|
458
458
|
- lib/karafka/pro/scheduled_messages/tracker.rb
|
|
459
|
+
- lib/karafka/pro/setup/defaults_injector.rb
|
|
459
460
|
- lib/karafka/pro/swarm/liveness_listener.rb
|
|
460
461
|
- lib/karafka/process.rb
|
|
461
462
|
- lib/karafka/processing/coordinator.rb
|
|
@@ -486,7 +487,7 @@ files:
|
|
|
486
487
|
- lib/karafka/processing/strategies/mom.rb
|
|
487
488
|
- lib/karafka/processing/strategy_selector.rb
|
|
488
489
|
- lib/karafka/processing/worker.rb
|
|
489
|
-
- lib/karafka/processing/
|
|
490
|
+
- lib/karafka/processing/workers_pool.rb
|
|
490
491
|
- lib/karafka/railtie.rb
|
|
491
492
|
- lib/karafka/routing/activity_manager.rb
|
|
492
493
|
- lib/karafka/routing/builder.rb
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Karafka
|
|
4
|
-
module Processing
|
|
5
|
-
# Abstraction layer around workers batch.
|
|
6
|
-
class WorkersBatch
|
|
7
|
-
include Enumerable
|
|
8
|
-
include Helpers::ConfigImporter.new(
|
|
9
|
-
concurrency: %i[concurrency]
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
# @param jobs_queue [JobsQueue]
|
|
13
|
-
# @return [WorkersBatch]
|
|
14
|
-
def initialize(jobs_queue)
|
|
15
|
-
@batch = Array.new(concurrency) { Processing::Worker.new(jobs_queue) }
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
# Iterates over available workers and yields each worker
|
|
19
|
-
def each(&)
|
|
20
|
-
@batch.each(&)
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# @return [Integer] number of workers in the batch
|
|
24
|
-
def size
|
|
25
|
-
@batch.size
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
end
|