waterdrop 2.8.5 → 2.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +10 -2
- data/.github/workflows/push.yml +1 -1
- data/.github/workflows/trigger-wiki-refresh.yml +30 -0
- data/.github/workflows/verify-action-pins.yml +1 -1
- data/.rspec +1 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +12 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +25 -7
- data/README.md +1 -0
- data/bin/integrations +242 -0
- data/config/locales/errors.yml +1 -0
- data/lib/waterdrop/config.rb +6 -0
- data/lib/waterdrop/contracts/config.rb +3 -0
- data/lib/waterdrop/instrumentation/idle_disconnector_listener.rb +109 -0
- data/lib/waterdrop/instrumentation/logger_listener.rb +12 -0
- data/lib/waterdrop/instrumentation/notifications.rb +2 -0
- data/lib/waterdrop/producer/status.rb +7 -4
- data/lib/waterdrop/producer.rb +117 -0
- data/lib/waterdrop/version.rb +1 -1
- data/renovate.json +12 -0
- data/waterdrop.gemspec +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 96b89ee79adb2be90c97e5e87f79d79ad86ceb7d3652c0220d32030e605afadf
|
4
|
+
data.tar.gz: fbdc38cc1c913e3dd433fa4448848f9f3f3799552d2c18741dd207849f59750d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 253cdaf4f5ba5e0f92e723d0a8214dd36b05a9b11986c005a63399bce38f193a90922281cf150f434ea4f0aaa6e6950dbb02a7df2e4134e0508ed474b0a68c04
|
7
|
+
data.tar.gz: 07043a71104f7a818dcb0a4261ee559b62aadb37ad28696ae5ca6d8c0c66c20de5f7d7ad3ab7bfd68ef0b9f91b1f55d67cc1319e37c009a0023beea6fa49905d
|
data/.github/workflows/ci.yml
CHANGED
@@ -20,6 +20,8 @@ jobs:
|
|
20
20
|
timeout-minutes: 15
|
21
21
|
runs-on: ubuntu-latest
|
22
22
|
needs: diffend
|
23
|
+
env:
|
24
|
+
BUNDLE_FORCE_RUBY_PLATFORM: ${{ matrix.force_ruby_platform }}
|
23
25
|
strategy:
|
24
26
|
fail-fast: false
|
25
27
|
matrix:
|
@@ -29,6 +31,9 @@ jobs:
|
|
29
31
|
- '3.3'
|
30
32
|
- '3.2'
|
31
33
|
- '3.1'
|
34
|
+
force_ruby_platform:
|
35
|
+
- true
|
36
|
+
- false
|
32
37
|
include:
|
33
38
|
- ruby: '3.4'
|
34
39
|
coverage: 'true'
|
@@ -46,7 +51,7 @@ jobs:
|
|
46
51
|
ruby -i -ne 'puts $_ unless /^\s*ffi \(.*-.*\)$/' Gemfile.lock
|
47
52
|
|
48
53
|
- name: Set up Ruby
|
49
|
-
uses: ruby/setup-ruby@
|
54
|
+
uses: ruby/setup-ruby@2a7b30092b0caf9c046252510f9273b4875f3db9 # v1.254.0
|
50
55
|
with:
|
51
56
|
ruby-version: ${{matrix.ruby}}
|
52
57
|
bundler-cache: true
|
@@ -75,6 +80,9 @@ jobs:
|
|
75
80
|
GITHUB_COVERAGE: ${{matrix.coverage}}
|
76
81
|
run: bundle exec rspec
|
77
82
|
|
83
|
+
- name: Run integration tests
|
84
|
+
run: ./bin/integrations
|
85
|
+
|
78
86
|
- name: Check Kafka logs for unexpected warnings
|
79
87
|
run: bin/verify_kafka_warnings
|
80
88
|
|
@@ -91,7 +99,7 @@ jobs:
|
|
91
99
|
with:
|
92
100
|
fetch-depth: 0
|
93
101
|
- name: Set up Ruby
|
94
|
-
uses: ruby/setup-ruby@
|
102
|
+
uses: ruby/setup-ruby@2a7b30092b0caf9c046252510f9273b4875f3db9 # v1.254.0
|
95
103
|
with:
|
96
104
|
ruby-version: 3.4
|
97
105
|
- name: Install latest bundler
|
data/.github/workflows/push.yml
CHANGED
@@ -0,0 +1,30 @@
|
|
1
|
+
name: Trigger Wiki Refresh
|
2
|
+
|
3
|
+
on:
|
4
|
+
release:
|
5
|
+
types: [published]
|
6
|
+
push:
|
7
|
+
branches: [master]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
trigger-wiki-refresh:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
environment: wiki-trigger
|
13
|
+
if: github.repository_owner == 'karafka'
|
14
|
+
steps:
|
15
|
+
- name: Trigger wiki refresh
|
16
|
+
uses: peter-evans/repository-dispatch@ff45666b9427631e3450c54a1bcbee4d9ff4d7c0 # v3.0.0
|
17
|
+
with:
|
18
|
+
token: ${{ secrets.WIKI_REPO_TOKEN }}
|
19
|
+
repository: karafka/wiki
|
20
|
+
event-type: sync-trigger
|
21
|
+
client-payload: |
|
22
|
+
{
|
23
|
+
"repository": "${{ github.repository }}",
|
24
|
+
"event_name": "${{ github.event_name }}",
|
25
|
+
"release_tag": "${{ github.event.release.tag_name || '' }}",
|
26
|
+
"release_name": "${{ github.event.release.name || '' }}",
|
27
|
+
"commit_sha": "${{ github.sha }}",
|
28
|
+
"commit_message": "Trigger Wiki Refresh",
|
29
|
+
"triggered_by": "${{ github.actor }}"
|
30
|
+
}
|
data/.rspec
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.4.
|
1
|
+
3.4.5
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# WaterDrop changelog
|
2
2
|
|
3
|
+
## 2.8.6 (2025-08-18)
|
4
|
+
- [Feature] Add `idle_disconnect_timeout` config option to automatically disconnect idle producers after a configurable timeout period.
|
5
|
+
- [Feature] Add support for [async](https://github.com/socketry/async) gems ecosystem with proper fiber yielding during blocking operations.
|
6
|
+
- [Feature] Add integration testing infrastructure with `bin/integrations` runner for testing external ecosystem compatibility.
|
7
|
+
- [Enhancement] Introduce the `WaterDrop::Producer#disconnect` so users can write custom logic to save on connections then producer is only used from time to time.
|
8
|
+
- [Enhancement] Introduce `WaterDrop::Producer#inspect` that is mutex-safe.
|
9
|
+
- [Enhancement] Raise errors on detected Ruby warnings.
|
10
|
+
- [Enhancement] Optimize producer for Ruby shapes.
|
11
|
+
- [Enhancement] Add integration spec to validate fiber yielding behavior with async gems.
|
12
|
+
- [Change] Require `karafka-rdkafka` `>=` `0.20.0`.
|
13
|
+
- [Change] Add new CI action to trigger auto-doc refresh.
|
14
|
+
|
3
15
|
## 2.8.5 (2025-06-23)
|
4
16
|
- [Enhancement] Normalize topic + partition logs format (single place).
|
5
17
|
- [Fix] A producer is not idempotent unless the enable.idempotence config is `true` (ferrous26).
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
waterdrop (2.8.
|
4
|
+
waterdrop (2.8.6)
|
5
5
|
karafka-core (>= 2.4.9, < 3.0.0)
|
6
|
-
karafka-rdkafka (>= 0.
|
6
|
+
karafka-rdkafka (>= 0.20.0)
|
7
7
|
zeitwerk (~> 2.3)
|
8
8
|
|
9
9
|
GEM
|
@@ -23,16 +23,32 @@ GEM
|
|
23
23
|
ffi (1.17.2-x86_64-darwin)
|
24
24
|
ffi (1.17.2-x86_64-linux-gnu)
|
25
25
|
ffi (1.17.2-x86_64-linux-musl)
|
26
|
-
karafka-core (2.
|
27
|
-
karafka-rdkafka (>= 0.
|
26
|
+
karafka-core (2.5.2)
|
27
|
+
karafka-rdkafka (>= 0.19.2, < 0.21.0)
|
28
28
|
logger (>= 1.6.0)
|
29
|
-
karafka-rdkafka (0.
|
29
|
+
karafka-rdkafka (0.21.0.rc1)
|
30
30
|
ffi (~> 1.15)
|
31
|
+
logger
|
32
|
+
mini_portile2 (~> 2.6)
|
33
|
+
rake (> 12)
|
34
|
+
karafka-rdkafka (0.21.0.rc1-arm64-darwin)
|
35
|
+
ffi (~> 1.15)
|
36
|
+
logger
|
37
|
+
mini_portile2 (~> 2.6)
|
38
|
+
rake (> 12)
|
39
|
+
karafka-rdkafka (0.21.0.rc1-x86_64-linux-gnu)
|
40
|
+
ffi (~> 1.15)
|
41
|
+
logger
|
42
|
+
mini_portile2 (~> 2.6)
|
43
|
+
rake (> 12)
|
44
|
+
karafka-rdkafka (0.21.0.rc1-x86_64-linux-musl)
|
45
|
+
ffi (~> 1.15)
|
46
|
+
logger
|
31
47
|
mini_portile2 (~> 2.6)
|
32
48
|
rake (> 12)
|
33
49
|
logger (1.7.0)
|
34
50
|
mini_portile2 (2.8.9)
|
35
|
-
ostruct (0.6.
|
51
|
+
ostruct (0.6.3)
|
36
52
|
rake (13.3.0)
|
37
53
|
rspec (3.13.1)
|
38
54
|
rspec-core (~> 3.13.0)
|
@@ -53,6 +69,7 @@ GEM
|
|
53
69
|
simplecov_json_formatter (~> 0.1)
|
54
70
|
simplecov-html (0.13.1)
|
55
71
|
simplecov_json_formatter (0.1.4)
|
72
|
+
warning (1.5.0)
|
56
73
|
zeitwerk (2.6.18)
|
57
74
|
|
58
75
|
PLATFORMS
|
@@ -73,8 +90,9 @@ DEPENDENCIES
|
|
73
90
|
ostruct
|
74
91
|
rspec
|
75
92
|
simplecov
|
93
|
+
warning
|
76
94
|
waterdrop!
|
77
95
|
zeitwerk (~> 2.6.18)
|
78
96
|
|
79
97
|
BUNDLED WITH
|
80
|
-
2.
|
98
|
+
2.7.0
|
data/README.md
CHANGED
@@ -15,6 +15,7 @@ It:
|
|
15
15
|
- Supports producing to multiple clusters
|
16
16
|
- Supports multiple delivery policies
|
17
17
|
- Supports per-topic configuration alterations (variants)
|
18
|
+
- Works with [async](https://github.com/socketry/async) gems ecosystem
|
18
19
|
- Works with Kafka `1.0+` and Ruby `3.1+`
|
19
20
|
- Works with and without Karafka
|
20
21
|
|
data/bin/integrations
ADDED
@@ -0,0 +1,242 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Runner to run integration specs
|
4
|
+
|
5
|
+
# All integration specs run with their own bundler context to avoid dependency conflicts.
|
6
|
+
# All WaterDrop integration specs are pristine by default since they use isolated Gemfiles.
|
7
|
+
raise 'This code needs to be executed WITHOUT bundle exec' if Kernel.const_defined?(:Bundler)
|
8
|
+
|
9
|
+
require 'open3'
|
10
|
+
require 'fileutils'
|
11
|
+
require 'pathname'
|
12
|
+
require 'tmpdir'
|
13
|
+
|
14
|
+
ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../')))
|
15
|
+
|
16
|
+
# How may bytes do we want to keep from the stdout in the buffer for when we need to print it
|
17
|
+
MAX_BUFFER_OUTPUT = 307_200
|
18
|
+
|
19
|
+
# Abstraction around a single test scenario execution process
|
20
|
+
class Scenario
|
21
|
+
# How long a scenario can run before we kill it
|
22
|
+
# This is a fail-safe just in case something would hang
|
23
|
+
MAX_RUN_TIME = 5 * 60 # 5 minutes tops
|
24
|
+
|
25
|
+
# Expected exit codes for each integration test
|
26
|
+
# All WaterDrop integration tests should exit with 0 on success, 1 on failure
|
27
|
+
EXIT_CODES = {
|
28
|
+
default: [0]
|
29
|
+
}.freeze
|
30
|
+
|
31
|
+
private_constant :MAX_RUN_TIME, :EXIT_CODES
|
32
|
+
|
33
|
+
# Creates scenario instance and runs in the background process
|
34
|
+
#
|
35
|
+
# @param path [String] path to the scenarios file
|
36
|
+
def initialize(path)
|
37
|
+
@path = path
|
38
|
+
# First 1024 characters from stdout
|
39
|
+
@stdout_head = ''
|
40
|
+
# Last 1024 characters from stdout
|
41
|
+
@stdout_tail = ''
|
42
|
+
end
|
43
|
+
|
44
|
+
# Starts running given scenario in a separate process
|
45
|
+
def start
|
46
|
+
@stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
|
47
|
+
@started_at = current_time
|
48
|
+
end
|
49
|
+
|
50
|
+
# @return [String] integration spec name
|
51
|
+
def name
|
52
|
+
@path.gsub("#{ROOT_PATH}/spec/integrations/", '')
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
# @return [Boolean] did this scenario finished or is it still running
|
57
|
+
def finished?
|
58
|
+
# If the thread is running too long, kill it
|
59
|
+
if current_time - @started_at > MAX_RUN_TIME
|
60
|
+
begin
|
61
|
+
Process.kill('TERM', pid)
|
62
|
+
# It may finish right after we want to kill it, that's why we ignore this
|
63
|
+
rescue Errno::ESRCH
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# We read it so it won't grow as we use our default logger that prints to both test.log and
|
68
|
+
# to stdout. Otherwise after reaching the buffer size, it would hang
|
69
|
+
buffer = ''
|
70
|
+
@stdout.read_nonblock(MAX_BUFFER_OUTPUT, buffer, exception: false)
|
71
|
+
@stdout_head = buffer if @stdout_head.empty?
|
72
|
+
@stdout_tail << buffer
|
73
|
+
@stdout_tail = @stdout_tail[-MAX_BUFFER_OUTPUT..-1] || @stdout_tail
|
74
|
+
|
75
|
+
!@wait_thr.alive?
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [Boolean] did this scenario finish successfully or not
|
79
|
+
def success?
|
80
|
+
expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
|
81
|
+
|
82
|
+
expected_exit_codes.include?(exit_code)
|
83
|
+
end
|
84
|
+
|
85
|
+
# @return [Integer] pid of the process of this scenario
|
86
|
+
def pid
|
87
|
+
@wait_thr.pid
|
88
|
+
end
|
89
|
+
|
90
|
+
# @return [Integer] exit code of the process running given scenario
|
91
|
+
def exit_code
|
92
|
+
# There may be no exit status if we killed the thread
|
93
|
+
@wait_thr.value&.exitstatus || 123
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [String] exit status of the process
|
97
|
+
def exit_status
|
98
|
+
@wait_thr.value.to_s
|
99
|
+
end
|
100
|
+
|
101
|
+
# Prints a status report when scenario is finished and stdout if it failed
|
102
|
+
def report
|
103
|
+
if success?
|
104
|
+
print "\e[#{32}m#{'.'}\e[0m"
|
105
|
+
else
|
106
|
+
buffer = ''
|
107
|
+
|
108
|
+
@stderr.read_nonblock(MAX_BUFFER_OUTPUT, buffer, exception: false)
|
109
|
+
|
110
|
+
puts
|
111
|
+
puts "\e[#{31}m#{'[FAILED]'}\e[0m #{name}"
|
112
|
+
puts "Time taken: #{current_time - @started_at} seconds"
|
113
|
+
puts "Exit code: #{exit_code}"
|
114
|
+
puts "Exit status: #{exit_status}"
|
115
|
+
puts @stdout_head
|
116
|
+
puts '...'
|
117
|
+
puts @stdout_tail
|
118
|
+
puts buffer
|
119
|
+
puts
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# @return [Float] number of seconds that a given spec took to run
|
124
|
+
def time_taken
|
125
|
+
@finished_at - @started_at
|
126
|
+
end
|
127
|
+
|
128
|
+
# Close all the files that are open, so they do not pile up
|
129
|
+
def close
|
130
|
+
@finished_at = current_time
|
131
|
+
@stdin.close
|
132
|
+
@stdout.close
|
133
|
+
@stderr.close
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
# Sets up a proper environment for a given spec to run and returns the run command
|
139
|
+
# All WaterDrop integration specs run in pristine mode with isolated Gemfiles
|
140
|
+
# @return [String] run command
|
141
|
+
def init_and_build_cmd
|
142
|
+
scenario_dir = File.dirname(@path)
|
143
|
+
# We copy the spec into a temp dir, not to pollute the spec location with logs, etc
|
144
|
+
temp_dir = Dir.mktmpdir
|
145
|
+
file_name = File.basename(@path)
|
146
|
+
|
147
|
+
FileUtils.cp_r("#{scenario_dir}/.", temp_dir)
|
148
|
+
|
149
|
+
<<~CMD
|
150
|
+
cd #{temp_dir} &&
|
151
|
+
WATERDROP_GEM_DIR=#{ROOT_PATH} \
|
152
|
+
bundle install &&
|
153
|
+
BUNDLE_AUTO_INSTALL=true \
|
154
|
+
WATERDROP_GEM_DIR=#{ROOT_PATH} \
|
155
|
+
bundle exec ruby #{file_name}
|
156
|
+
CMD
|
157
|
+
end
|
158
|
+
|
159
|
+
# @return [Float] current machine time
|
160
|
+
def current_time
|
161
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Load all the specs
|
166
|
+
specs = Dir[ROOT_PATH.join('spec/integrations/**/*_spec.rb')]
|
167
|
+
|
168
|
+
FILTER_TYPE = ARGV[0] == '--exclude' ? 'exclude' : 'include'
|
169
|
+
|
170
|
+
# Remove the exclude flag
|
171
|
+
ARGV.shift if FILTER_TYPE == '--exclude'
|
172
|
+
|
173
|
+
# If filters is provided, apply
|
174
|
+
# Allows to provide several filters one after another and applies all of them
|
175
|
+
ARGV.each do |filter|
|
176
|
+
specs.delete_if do |name|
|
177
|
+
case FILTER_TYPE
|
178
|
+
when 'include'
|
179
|
+
!name.include?(filter)
|
180
|
+
when 'exclude'
|
181
|
+
name.include?(filter)
|
182
|
+
else
|
183
|
+
raise 'Invalid filter type'
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Randomize order
|
189
|
+
seed = (ENV['SPECS_SEED'] || rand(0..10_000)).to_i
|
190
|
+
|
191
|
+
puts "Random seed: #{seed}"
|
192
|
+
|
193
|
+
scenarios = specs
|
194
|
+
.shuffle(random: Random.new(seed))
|
195
|
+
.map { |integration| Scenario.new(integration) }
|
196
|
+
|
197
|
+
raise ArgumentError, "No integration specs with filters: #{ARGV.join(', ')}" if scenarios.empty?
|
198
|
+
|
199
|
+
puts "Running #{scenarios.size} scenarios"
|
200
|
+
|
201
|
+
finished_scenarios = []
|
202
|
+
|
203
|
+
scenarios.each do |scenario|
|
204
|
+
scenario.start
|
205
|
+
|
206
|
+
# Wait for this scenario to finish before moving to the next one
|
207
|
+
until scenario.finished?
|
208
|
+
sleep(0.1)
|
209
|
+
end
|
210
|
+
|
211
|
+
scenario.report
|
212
|
+
scenario.close
|
213
|
+
finished_scenarios << scenario
|
214
|
+
end
|
215
|
+
|
216
|
+
# Report longest scenarios
|
217
|
+
puts
|
218
|
+
puts "\nLongest scenarios:\n\n"
|
219
|
+
|
220
|
+
finished_scenarios.sort_by(&:time_taken).reverse.first(10).each do |long_scenario|
|
221
|
+
puts "[#{'%6.2f' % long_scenario.time_taken}] #{long_scenario.name}"
|
222
|
+
end
|
223
|
+
|
224
|
+
failed_scenarios = finished_scenarios.reject(&:success?)
|
225
|
+
|
226
|
+
if failed_scenarios.empty?
|
227
|
+
puts
|
228
|
+
else
|
229
|
+
# Report once more on the failed jobs
|
230
|
+
# This will only list scenarios that failed without printing their stdout here.
|
231
|
+
puts
|
232
|
+
puts "\nFailed scenarios:\n\n"
|
233
|
+
|
234
|
+
failed_scenarios.each do |scenario|
|
235
|
+
puts "\e[#{31}m#{'[FAILED]'}\e[0m #{scenario.name}"
|
236
|
+
end
|
237
|
+
|
238
|
+
puts
|
239
|
+
|
240
|
+
# Exit with 1 if not all scenarios were successful
|
241
|
+
exit 1
|
242
|
+
end
|
data/config/locales/errors.yml
CHANGED
@@ -19,6 +19,7 @@ en:
|
|
19
19
|
max_attempts_on_transaction_command_format: must be an integer that is equal or bigger than 1
|
20
20
|
reload_on_transaction_fatal_error_format: must be boolean
|
21
21
|
oauth.token_provider_listener_format: 'must be false or respond to #on_oauthbearer_token_refresh'
|
22
|
+
idle_disconnect_timeout_format: 'must be an integer that is equal to 0 or bigger than 30 000 (30 seconds)'
|
22
23
|
|
23
24
|
variant:
|
24
25
|
missing: must be present
|
data/lib/waterdrop/config.rb
CHANGED
@@ -76,6 +76,12 @@ module WaterDrop
|
|
76
76
|
# to keep going or should we stop. Since we will open a new instance and the failed transaction
|
77
77
|
# anyhow rolls back, we should be able to safely reload.
|
78
78
|
setting :reload_on_transaction_fatal_error, default: true
|
79
|
+
# option [Integer] Idle disconnect timeout in milliseconds. When set to 0, idle disconnection
|
80
|
+
# is disabled. When set to a positive value, WaterDrop will automatically disconnect
|
81
|
+
# producers that haven't sent any messages for the specified time period. This helps preserve
|
82
|
+
# TCP connections in low-intensity scenarios. Minimum value is 30 seconds (30_000 ms) to
|
83
|
+
# prevent overly aggressive disconnections.
|
84
|
+
setting :idle_disconnect_timeout, default: 0
|
79
85
|
|
80
86
|
# option [Boolean] should we send messages. Setting this to false can be really useful when
|
81
87
|
# testing and or developing because when set to false, won't actually ping Kafka but will
|
@@ -27,6 +27,9 @@ module WaterDrop
|
|
27
27
|
required(:wait_backoff_on_transaction_command) { |val| val.is_a?(Numeric) && val >= 0 }
|
28
28
|
required(:max_attempts_on_transaction_command) { |val| val.is_a?(Integer) && val >= 1 }
|
29
29
|
required(:reload_on_transaction_fatal_error) { |val| [true, false].include?(val) }
|
30
|
+
required(:idle_disconnect_timeout) do |val|
|
31
|
+
val.is_a?(Integer) && (val.zero? || val >= 30_000)
|
32
|
+
end
|
30
33
|
|
31
34
|
nested(:oauth) do
|
32
35
|
required(:token_provider_listener) do |val|
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WaterDrop
|
4
|
+
module Instrumentation
|
5
|
+
# Idle disconnector listener that monitors producer activity and automatically disconnects
|
6
|
+
# idle producers to preserve TCP connections
|
7
|
+
#
|
8
|
+
# This listener subscribes to statistics.emitted events and tracks the txmsgs (transmitted
|
9
|
+
# messages) count. If the producer doesn't send any messages for a configurable timeout
|
10
|
+
# period, it will automatically disconnect the producer.
|
11
|
+
#
|
12
|
+
# @note We do not have to worry about the running transactions or buffer being used because
|
13
|
+
# the disconnect is graceful and will not disconnect unless it is allowed to. This is why
|
14
|
+
# we can simplify things and take interest only in txmsgs.
|
15
|
+
#
|
16
|
+
# @note For convenience, WaterDrop provides a config shortcut. Instead of manually subscribing
|
17
|
+
# this listener, you can simply set `config.idle_disconnect_timeout` in your producer config.
|
18
|
+
#
|
19
|
+
# @example Using config shortcut (recommended)
|
20
|
+
# WaterDrop::Producer.new do |config|
|
21
|
+
# config.idle_disconnect_timeout = 5 * 60 * 1000 # 5 minutes
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# @example Manual listener usage with 5 minute timeout
|
25
|
+
# producer.monitor.subscribe(
|
26
|
+
# WaterDrop::Instrumentation::IdleDisconnectorListener.new(
|
27
|
+
# producer,
|
28
|
+
# disconnect_timeout: 5 * 60 * 1000)
|
29
|
+
# )
|
30
|
+
#
|
31
|
+
# @example Usage with custom timeout
|
32
|
+
# idle_disconnector = WaterDrop::Instrumentation::IdleDisconnectorListener.new(
|
33
|
+
# producer,
|
34
|
+
# disconnect_timeout: 10 * 60 * 1000
|
35
|
+
# )
|
36
|
+
# producer.monitor.subscribe(idle_disconnector)
|
37
|
+
class IdleDisconnectorListener
|
38
|
+
include ::Karafka::Core::Helpers::Time
|
39
|
+
|
40
|
+
# @param producer [WaterDrop::Producer] the producer instance to monitor
|
41
|
+
# @param disconnect_timeout [Integer] timeout in milliseconds before disconnecting
|
42
|
+
# (default: 5 minutes). Be aware that if you set it to a value lower than statistics
|
43
|
+
# publishing interval (5 seconds by default) it may be to aggressive in closing
|
44
|
+
def initialize(producer, disconnect_timeout: 5 * 60 * 1_000)
|
45
|
+
@producer = producer
|
46
|
+
@disconnect_timeout = disconnect_timeout
|
47
|
+
# We set this initially to -1 so any statistics change triggers a change to prevent an
|
48
|
+
# early shutdown
|
49
|
+
@last_txmsgs = -1
|
50
|
+
@last_activity_time = monotonic_now
|
51
|
+
end
|
52
|
+
|
53
|
+
# This method is called automatically when the listener is subscribed to the monitor
|
54
|
+
# using producer.monitor.subscribe(listener_instance)
|
55
|
+
#
|
56
|
+
# @param event [Hash] the statistics event containing producer statistics
|
57
|
+
def on_statistics_emitted(event)
|
58
|
+
call(event[:statistics])
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
# Handles statistics.emitted events to monitor message transmission activity
|
64
|
+
# @param statistics [Hash] producer librdkafka statistics
|
65
|
+
def call(statistics)
|
66
|
+
current_txmsgs = statistics.fetch('txmsgs', 0)
|
67
|
+
current_time = monotonic_now
|
68
|
+
|
69
|
+
# Update activity if messages changed
|
70
|
+
if current_txmsgs != @last_txmsgs
|
71
|
+
@last_txmsgs = current_txmsgs
|
72
|
+
@last_activity_time = current_time
|
73
|
+
|
74
|
+
return
|
75
|
+
end
|
76
|
+
|
77
|
+
# Check for timeout and attempt disconnect
|
78
|
+
return unless (current_time - @last_activity_time) >= @disconnect_timeout
|
79
|
+
|
80
|
+
if @producer.disconnectable?
|
81
|
+
# Since the statistics operations happen from the rdkafka native thread. we cannot close
|
82
|
+
# it from itself as you cannot join on yourself as it would cause a deadlock. We spawn
|
83
|
+
# a thread to do this
|
84
|
+
# We do an early check if producer is in a viable state for a disconnect so in case its
|
85
|
+
# internal state would prevent us from disconnecting, we won't be spamming with new
|
86
|
+
# thread creation
|
87
|
+
Thread.new do
|
88
|
+
@producer.disconnect
|
89
|
+
rescue StandardError => e
|
90
|
+
@producer.monitor.instrument(
|
91
|
+
'error.occurred',
|
92
|
+
producer_id: @producer.id,
|
93
|
+
error: e,
|
94
|
+
type: 'producer.disconnect.error'
|
95
|
+
)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# We change this always because:
|
100
|
+
# - if we were able to disconnect, this should give us time before any potential future
|
101
|
+
# attempts. While they should not happen because events won't be published on a
|
102
|
+
# disconnected producer, this may still with frequent events be called post disconnect
|
103
|
+
# - if we were not able to disconnect, it means that there was something in the producer
|
104
|
+
# state that prevent it, and we consider this as activity as well
|
105
|
+
@last_activity_time = current_time
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -132,6 +132,18 @@ module WaterDrop
|
|
132
132
|
info(event, 'Closing producer')
|
133
133
|
end
|
134
134
|
|
135
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
136
|
+
def on_producer_disconnecting(event)
|
137
|
+
info(event, 'Disconnecting producer')
|
138
|
+
end
|
139
|
+
|
140
|
+
# @param event [Dry::Events::Event] event that happened with the details
|
141
|
+
# @note While this says "Disconnecting producer", it produces a nice message with time taken:
|
142
|
+
# "Disconnecting producer took 5 ms" indicating it happened in the past.
|
143
|
+
def on_producer_disconnected(event)
|
144
|
+
info(event, 'Disconnected producer')
|
145
|
+
end
|
146
|
+
|
135
147
|
# @param event [Dry::Events::Event] event that happened with the details
|
136
148
|
def on_producer_reloaded(event)
|
137
149
|
info(event, 'Producer successfully reloaded')
|
@@ -9,6 +9,8 @@ module WaterDrop
|
|
9
9
|
initial
|
10
10
|
configured
|
11
11
|
connected
|
12
|
+
disconnecting
|
13
|
+
disconnected
|
12
14
|
closing
|
13
15
|
closed
|
14
16
|
].freeze
|
@@ -22,11 +24,12 @@ module WaterDrop
|
|
22
24
|
end
|
23
25
|
|
24
26
|
# @return [Boolean] true if producer is in a active state. Active means, that we can start
|
25
|
-
# sending messages.
|
26
|
-
# which means, that producer is configured, but connection with Kafka is
|
27
|
-
#
|
27
|
+
# sending messages. Active states are connected (connection established), configured
|
28
|
+
# which means, that producer is configured, but connection with Kafka is not yet
|
29
|
+
# established or disconnected, meaning it was working but user disconnected for his own
|
30
|
+
# reasons though sending could reconnect and continue.
|
28
31
|
def active?
|
29
|
-
connected? || configured?
|
32
|
+
connected? || configured? || disconnecting? || disconnected?
|
30
33
|
end
|
31
34
|
|
32
35
|
# @return [String] current status as a string
|
data/lib/waterdrop/producer.rb
CHANGED
@@ -50,6 +50,12 @@ module WaterDrop
|
|
50
50
|
@connecting_mutex = Mutex.new
|
51
51
|
@operating_mutex = Mutex.new
|
52
52
|
@transaction_mutex = Mutex.new
|
53
|
+
@id = nil
|
54
|
+
@monitor = nil
|
55
|
+
@contract = nil
|
56
|
+
@default_variant = nil
|
57
|
+
@client = nil
|
58
|
+
@closing_thread_id = nil
|
53
59
|
|
54
60
|
@status = Status.new
|
55
61
|
@messages = []
|
@@ -73,6 +79,18 @@ module WaterDrop
|
|
73
79
|
@monitor = @config.monitor
|
74
80
|
@contract = Contracts::Message.new(max_payload_size: @config.max_payload_size)
|
75
81
|
@default_variant = Variant.new(self, default: true)
|
82
|
+
|
83
|
+
return @status.configured! if @config.idle_disconnect_timeout.zero?
|
84
|
+
|
85
|
+
# Setup idle disconnect listener if configured so we preserve tcp connections on rarely
|
86
|
+
# used producers
|
87
|
+
disconnector = Instrumentation::IdleDisconnectorListener.new(
|
88
|
+
self,
|
89
|
+
disconnect_timeout: @config.idle_disconnect_timeout
|
90
|
+
)
|
91
|
+
|
92
|
+
@monitor.subscribe(disconnector)
|
93
|
+
|
76
94
|
@status.configured!
|
77
95
|
end
|
78
96
|
|
@@ -178,6 +196,74 @@ module WaterDrop
|
|
178
196
|
@middleware ||= config.middleware
|
179
197
|
end
|
180
198
|
|
199
|
+
# Disconnects the producer from Kafka while keeping it configured for potential reconnection
|
200
|
+
#
|
201
|
+
# This method safely disconnects the underlying Kafka client while preserving the producer's
|
202
|
+
# configuration. Unlike `#close`, this allows the producer to be reconnected later by calling
|
203
|
+
# methods that require the client. The disconnection will only proceed if certain safety
|
204
|
+
# conditions are met.
|
205
|
+
#
|
206
|
+
# This API can be used to preserve connections on low-intensity producer instances, etc.
|
207
|
+
#
|
208
|
+
# @return [Boolean] true if disconnection was successful, false if disconnection was not
|
209
|
+
# possible due to safety conditions (active transactions, ongoing operations, pending
|
210
|
+
# messages in buffer, or if already disconnected)
|
211
|
+
#
|
212
|
+
# @note This method will refuse to disconnect if:
|
213
|
+
# - There are pending messages in the internal buffer
|
214
|
+
# - There are operations currently in progress
|
215
|
+
# - A transaction is currently active
|
216
|
+
# - The client is not currently connected
|
217
|
+
# - Required mutexes are locked by other operations
|
218
|
+
#
|
219
|
+
# @note After successful disconnection, the producer status changes to disconnected but
|
220
|
+
# remains configured, allowing for future reconnection when client access is needed.
|
221
|
+
def disconnect
|
222
|
+
return false unless disconnectable?
|
223
|
+
|
224
|
+
# Use the same mutex pattern as the regular close method to prevent race conditions
|
225
|
+
@transaction_mutex.synchronize do
|
226
|
+
@operating_mutex.synchronize do
|
227
|
+
@buffer_mutex.synchronize do
|
228
|
+
return false unless @client
|
229
|
+
return false unless @status.connected?
|
230
|
+
return false unless @messages.empty?
|
231
|
+
return false unless @operations_in_progress.value.zero?
|
232
|
+
|
233
|
+
@status.disconnecting!
|
234
|
+
@monitor.instrument('producer.disconnecting', producer_id: id)
|
235
|
+
|
236
|
+
@monitor.instrument('producer.disconnected', producer_id: id) do
|
237
|
+
# Close the client
|
238
|
+
@client.close
|
239
|
+
@client = nil
|
240
|
+
|
241
|
+
# Reset connection status but keep producer configured
|
242
|
+
@status.disconnected!
|
243
|
+
end
|
244
|
+
|
245
|
+
true
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# Is the producer in a state from which we can disconnect
|
252
|
+
#
|
253
|
+
# @return [Boolean] is producer in a state that potentially allows for a disconnect
|
254
|
+
#
|
255
|
+
# @note This is a best effort method. The proper checks happen also when disconnecting behind
|
256
|
+
# all the needed mutexes
|
257
|
+
def disconnectable?
|
258
|
+
return false unless @client
|
259
|
+
return false unless @status.connected?
|
260
|
+
return false unless @messages.empty?
|
261
|
+
return false if @transaction_mutex.locked?
|
262
|
+
return false if @operating_mutex.locked?
|
263
|
+
|
264
|
+
true
|
265
|
+
end
|
266
|
+
|
181
267
|
# Flushes the buffers in a sync way and closes the producer
|
182
268
|
# @param force [Boolean] should we force closing even with outstanding messages after the
|
183
269
|
# max wait timeout
|
@@ -260,6 +346,37 @@ module WaterDrop
|
|
260
346
|
close(force: true)
|
261
347
|
end
|
262
348
|
|
349
|
+
# @return [String] mutex-safe inspect details
|
350
|
+
def inspect
|
351
|
+
# Basic info that's always safe to access
|
352
|
+
parts = []
|
353
|
+
parts << "id=#{@id.inspect}"
|
354
|
+
parts << "status=#{@status}" if @status
|
355
|
+
|
356
|
+
# Try to get buffer info safely
|
357
|
+
if @buffer_mutex.try_lock
|
358
|
+
begin
|
359
|
+
parts << "buffer_size=#{@messages.size}"
|
360
|
+
ensure
|
361
|
+
@buffer_mutex.unlock
|
362
|
+
end
|
363
|
+
else
|
364
|
+
parts << 'buffer_size=busy'
|
365
|
+
end
|
366
|
+
|
367
|
+
# Check if client is connected without triggering connection
|
368
|
+
parts << if @status.connected?
|
369
|
+
'connected=true'
|
370
|
+
else
|
371
|
+
'connected=false'
|
372
|
+
end
|
373
|
+
|
374
|
+
parts << "operations=#{@operations_in_progress.value}"
|
375
|
+
parts << 'in_transaction=true' if @transaction_mutex.locked?
|
376
|
+
|
377
|
+
"#<#{self.class.name}:#{format('%#x', object_id)} #{parts.join(' ')}>"
|
378
|
+
end
|
379
|
+
|
263
380
|
private
|
264
381
|
|
265
382
|
# Ensures that we don't run any operations when the producer is not configured or when it
|
data/lib/waterdrop/version.rb
CHANGED
data/renovate.json
CHANGED
@@ -7,12 +7,24 @@
|
|
7
7
|
"enabled": true,
|
8
8
|
"pinDigests": true
|
9
9
|
},
|
10
|
+
"includePaths": [
|
11
|
+
"Gemfile",
|
12
|
+
"waterdrop.gemspec",
|
13
|
+
"spec/integrations/**/Gemfile"
|
14
|
+
],
|
10
15
|
"packageRules": [
|
11
16
|
{
|
12
17
|
"matchManagers": [
|
13
18
|
"github-actions"
|
14
19
|
],
|
15
20
|
"minimumReleaseAge": "7 days"
|
21
|
+
},
|
22
|
+
{
|
23
|
+
"matchFileNames": [
|
24
|
+
"spec/integrations/**/Gemfile"
|
25
|
+
],
|
26
|
+
"groupName": "integration test dependencies",
|
27
|
+
"commitMessageTopic": "integration test dependencies"
|
16
28
|
}
|
17
29
|
]
|
18
30
|
}
|
data/waterdrop.gemspec
CHANGED
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.licenses = %w[LGPL-3.0-only Commercial]
|
18
18
|
|
19
19
|
spec.add_dependency 'karafka-core', '>= 2.4.9', '< 3.0.0'
|
20
|
-
spec.add_dependency 'karafka-rdkafka', '>= 0.
|
20
|
+
spec.add_dependency 'karafka-rdkafka', '>= 0.20.0'
|
21
21
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
22
22
|
|
23
23
|
spec.required_ruby_version = '>= 3.1.0'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: waterdrop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.8.
|
4
|
+
version: 2.8.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,14 +35,14 @@ dependencies:
|
|
35
35
|
requirements:
|
36
36
|
- - ">="
|
37
37
|
- !ruby/object:Gem::Version
|
38
|
-
version: 0.
|
38
|
+
version: 0.20.0
|
39
39
|
type: :runtime
|
40
40
|
prerelease: false
|
41
41
|
version_requirements: !ruby/object:Gem::Requirement
|
42
42
|
requirements:
|
43
43
|
- - ">="
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version: 0.
|
45
|
+
version: 0.20.0
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: zeitwerk
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- ".github/ISSUE_TEMPLATE/feature_request.md"
|
73
73
|
- ".github/workflows/ci.yml"
|
74
74
|
- ".github/workflows/push.yml"
|
75
|
+
- ".github/workflows/trigger-wiki-refresh.yml"
|
75
76
|
- ".github/workflows/verify-action-pins.yml"
|
76
77
|
- ".gitignore"
|
77
78
|
- ".rspec"
|
@@ -83,6 +84,7 @@ files:
|
|
83
84
|
- LICENSE
|
84
85
|
- README.md
|
85
86
|
- Rakefile
|
87
|
+
- bin/integrations
|
86
88
|
- bin/verify_kafka_warnings
|
87
89
|
- bin/verify_topics_naming
|
88
90
|
- config/locales/errors.yml
|
@@ -103,6 +105,7 @@ files:
|
|
103
105
|
- lib/waterdrop/instrumentation/callbacks/error.rb
|
104
106
|
- lib/waterdrop/instrumentation/callbacks/oauthbearer_token_refresh.rb
|
105
107
|
- lib/waterdrop/instrumentation/callbacks/statistics.rb
|
108
|
+
- lib/waterdrop/instrumentation/idle_disconnector_listener.rb
|
106
109
|
- lib/waterdrop/instrumentation/logger_listener.rb
|
107
110
|
- lib/waterdrop/instrumentation/monitor.rb
|
108
111
|
- lib/waterdrop/instrumentation/notifications.rb
|
@@ -147,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
150
|
- !ruby/object:Gem::Version
|
148
151
|
version: '0'
|
149
152
|
requirements: []
|
150
|
-
rubygems_version: 3.6.
|
153
|
+
rubygems_version: 3.6.9
|
151
154
|
specification_version: 4
|
152
155
|
summary: Kafka messaging made easy!
|
153
156
|
test_files: []
|