cdc-concurrent 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +123 -1
- data/benchmark/README.md +165 -0
- data/benchmark/processor_pool_benchmark.rb +470 -0
- data/lib/cdc/concurrent/configuration.rb +15 -4
- data/lib/cdc/concurrent/processor_extensions.rb +21 -6
- data/lib/cdc/concurrent/processor_pool.rb +71 -9
- data/lib/cdc/concurrent/result_collector.rb +14 -5
- data/lib/cdc/concurrent/router.rb +16 -5
- data/lib/cdc/concurrent/runtime.rb +31 -11
- data/lib/cdc/concurrent/transaction_pool.rb +27 -9
- data/lib/cdc/concurrent/version.rb +1 -1
- data/sig/cdc/concurrent/processor_extensions.rbs +18 -5
- data/sig/cdc/concurrent/processor_pool.rbs +13 -13
- data/sig/cdc/concurrent/result_collector.rbs +2 -2
- data/sig/cdc/concurrent/router.rbs +7 -4
- data/sig/cdc/concurrent/runtime.rbs +11 -9
- data/sig/cdc/concurrent/transaction_pool.rbs +4 -4
- data/sig/cdc/concurrent/version.rbs +1 -1
- data/sig/cdc/concurrent.rbs +7 -0
- metadata +9 -3
- data/sig/shims/async.rbs +0 -20
- data/sig/shims/cdc_core.rbs +0 -31
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "etc"
|
|
5
|
+
require "json"
|
|
6
|
+
require "socket"
|
|
7
|
+
require "time"
|
|
8
|
+
require "cdc_concurrent"
|
|
9
|
+
require "cdc/core"
|
|
10
|
+
|
|
11
|
+
# Reproducible processor-pool benchmark entrypoint.
|
|
12
|
+
module CDCConcurrentBenchmark # rubocop:disable Metrics/ModuleLength
|
|
13
|
+
module_function
|
|
14
|
+
|
|
15
|
+
Config = Data.define(
|
|
16
|
+
:iterations,
|
|
17
|
+
:warmup,
|
|
18
|
+
:concurrency_counts,
|
|
19
|
+
:trials,
|
|
20
|
+
:min_duration,
|
|
21
|
+
:workload,
|
|
22
|
+
:batch_size,
|
|
23
|
+
:io_sleep
|
|
24
|
+
)
|
|
25
|
+
Trial = Data.define(:elapsed, :passes, :effective_events)
|
|
26
|
+
|
|
27
|
+
VALID_WORKLOADS = %w[tiny io batch].freeze
|
|
28
|
+
|
|
29
|
+
def integer_env(name, default)
|
|
30
|
+
value = ENV.fetch(name, default.to_s)
|
|
31
|
+
Integer(value)
|
|
32
|
+
rescue ArgumentError
|
|
33
|
+
warn "#{name} must be an integer; got #{value.inspect}"
|
|
34
|
+
exit 1
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def positive_integer_env(name, default)
|
|
38
|
+
value = integer_env(name, default)
|
|
39
|
+
return value if value.positive?
|
|
40
|
+
|
|
41
|
+
warn "#{name} must be greater than zero; got #{value.inspect}"
|
|
42
|
+
exit 1
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def nonnegative_integer_env(name, default)
|
|
46
|
+
value = integer_env(name, default)
|
|
47
|
+
return value if value >= 0
|
|
48
|
+
|
|
49
|
+
warn "#{name} must be zero or greater; got #{value.inspect}"
|
|
50
|
+
exit 1
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def float_env(name, default)
|
|
54
|
+
value = ENV.fetch(name, default.to_s)
|
|
55
|
+
Float(value)
|
|
56
|
+
rescue ArgumentError
|
|
57
|
+
warn "#{name} must be numeric; got #{value.inspect}"
|
|
58
|
+
exit 1
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def positive_float_env(name, default)
|
|
62
|
+
value = float_env(name, default)
|
|
63
|
+
return value if value.positive?
|
|
64
|
+
|
|
65
|
+
warn "#{name} must be greater than zero; got #{value.inspect}"
|
|
66
|
+
exit 1
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def nonnegative_float_env(name, default)
|
|
70
|
+
value = float_env(name, default)
|
|
71
|
+
return value if value >= 0
|
|
72
|
+
|
|
73
|
+
warn "#{name} must be zero or greater; got #{value.inspect}"
|
|
74
|
+
exit 1
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def concurrency_counts_env
|
|
78
|
+
value = ENV.fetch("BENCHMARK_CONCURRENCY_COUNTS", nil)
|
|
79
|
+
return [positive_integer_env("BENCHMARK_CONCURRENCY", 100)] unless value
|
|
80
|
+
|
|
81
|
+
counts = value.split(",").map { |entry| Integer(entry.strip) }
|
|
82
|
+
return counts if counts.any? && counts.all?(&:positive?)
|
|
83
|
+
|
|
84
|
+
warn "BENCHMARK_CONCURRENCY_COUNTS must contain positive integers; got #{value.inspect}"
|
|
85
|
+
exit 1
|
|
86
|
+
rescue ArgumentError
|
|
87
|
+
warn "BENCHMARK_CONCURRENCY_COUNTS must be a comma-separated integer list; got #{value.inspect}"
|
|
88
|
+
exit 1
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def workload_env
|
|
92
|
+
workload = ENV.fetch("BENCHMARK_WORKLOAD", "io")
|
|
93
|
+
|
|
94
|
+
return workload if VALID_WORKLOADS.include?(workload)
|
|
95
|
+
|
|
96
|
+
warn "BENCHMARK_WORKLOAD must be one of #{VALID_WORKLOADS.join(", ")}; got #{workload.inspect}"
|
|
97
|
+
exit 1
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def monotonic
|
|
101
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def config
|
|
105
|
+
Config.new(
|
|
106
|
+
iterations: positive_integer_env("BENCHMARK_ITERATIONS", 1_000),
|
|
107
|
+
warmup: nonnegative_integer_env("BENCHMARK_WARMUP", 100),
|
|
108
|
+
concurrency_counts: concurrency_counts_env,
|
|
109
|
+
trials: positive_integer_env("BENCHMARK_TRIALS", 5),
|
|
110
|
+
min_duration: positive_float_env("BENCHMARK_MIN_DURATION", 0.1),
|
|
111
|
+
workload: workload_env,
|
|
112
|
+
batch_size: positive_integer_env("BENCHMARK_BATCH_SIZE", 100),
|
|
113
|
+
io_sleep: nonnegative_float_env("BENCHMARK_IO_SLEEP", 0.001)
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def change_event(counter)
|
|
118
|
+
CDC::Core::ChangeEvent.new(
|
|
119
|
+
operation: :update,
|
|
120
|
+
schema: "public",
|
|
121
|
+
table: "benchmark_events",
|
|
122
|
+
old_values: { "counter" => counter - 1 },
|
|
123
|
+
new_values: { "counter" => counter },
|
|
124
|
+
transaction_id: counter
|
|
125
|
+
)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def event
|
|
129
|
+
change_event(42)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def batch_event(settings)
|
|
133
|
+
Array.new(settings.batch_size) { |index| change_event(index + 1) }
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Minimal concurrent-safe processor.
|
|
137
|
+
#
|
|
138
|
+
# This intentionally benchmarks the overhead of the processor pool itself.
|
|
139
|
+
class TinyProcessor < CDC::Core::Processor
|
|
140
|
+
concurrent_safe!
|
|
141
|
+
|
|
142
|
+
def process(event)
|
|
143
|
+
payload = {
|
|
144
|
+
operation: event.operation,
|
|
145
|
+
schema: event.schema,
|
|
146
|
+
table: event.table,
|
|
147
|
+
changed: event.new_values.keys
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
CDC::Core::ProcessorResult.success(payload)
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# I/O-like processor.
|
|
155
|
+
#
|
|
156
|
+
# The sleep call cooperates with Async's fiber scheduler inside the pool.
|
|
157
|
+
class IoProcessor < CDC::Core::Processor
|
|
158
|
+
concurrent_safe!
|
|
159
|
+
|
|
160
|
+
def initialize(sleep_seconds:)
|
|
161
|
+
@sleep_seconds = sleep_seconds
|
|
162
|
+
super()
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def process(event)
|
|
166
|
+
sleep @sleep_seconds
|
|
167
|
+
|
|
168
|
+
CDC::Core::ProcessorResult.success(
|
|
169
|
+
{
|
|
170
|
+
operation: event.operation,
|
|
171
|
+
table: event.table,
|
|
172
|
+
waited_seconds: @sleep_seconds
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Batch I/O-like processor.
|
|
179
|
+
#
|
|
180
|
+
# This models one runtime dispatch that fans out over a group of CDC events.
|
|
181
|
+
class BatchIoProcessor < CDC::Core::Processor
|
|
182
|
+
concurrent_safe!
|
|
183
|
+
|
|
184
|
+
def initialize(sleep_seconds:)
|
|
185
|
+
@sleep_seconds = sleep_seconds
|
|
186
|
+
super()
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def process(events)
|
|
190
|
+
sleep @sleep_seconds
|
|
191
|
+
|
|
192
|
+
CDC::Core::ProcessorResult.success(
|
|
193
|
+
{
|
|
194
|
+
count: events.length,
|
|
195
|
+
tables: events.map(&:table).uniq,
|
|
196
|
+
waited_seconds: @sleep_seconds
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def processor_for(settings)
|
|
203
|
+
case settings.workload
|
|
204
|
+
when "tiny"
|
|
205
|
+
TinyProcessor.new
|
|
206
|
+
when "io"
|
|
207
|
+
IoProcessor.new(sleep_seconds: settings.io_sleep)
|
|
208
|
+
when "batch"
|
|
209
|
+
BatchIoProcessor.new(sleep_seconds: settings.io_sleep)
|
|
210
|
+
else
|
|
211
|
+
raise "unsupported workload: #{settings.workload}"
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def sample_event_for(settings)
|
|
216
|
+
settings.workload == "batch" ? batch_event(settings) : event
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def effective_events_per_pass(settings)
|
|
220
|
+
settings.workload == "batch" ? settings.iterations * settings.batch_size : settings.iterations
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def run_trial(settings)
|
|
224
|
+
passes = 0
|
|
225
|
+
started_at = monotonic
|
|
226
|
+
|
|
227
|
+
loop do
|
|
228
|
+
yield
|
|
229
|
+
passes += 1
|
|
230
|
+
break if monotonic - started_at >= settings.min_duration
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
trial_for(settings, passes, monotonic - started_at)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def trial_for(settings, passes, elapsed)
|
|
237
|
+
Trial.new(
|
|
238
|
+
elapsed: elapsed,
|
|
239
|
+
passes: passes,
|
|
240
|
+
effective_events: passes * effective_events_per_pass(settings)
|
|
241
|
+
)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def run_trials(settings, &)
|
|
245
|
+
Array.new(settings.trials) { run_trial(settings, &) }
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def serial_trials(settings, sample_event)
|
|
249
|
+
processor = processor_for(settings)
|
|
250
|
+
settings.warmup.times { processor.process(sample_event) }
|
|
251
|
+
|
|
252
|
+
run_trials(settings) do
|
|
253
|
+
settings.iterations.times do
|
|
254
|
+
result = processor.process(sample_event)
|
|
255
|
+
raise "serial processor failed" unless result.success?
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def repeated_process_trials(settings, sample_event, concurrency)
|
|
261
|
+
with_pool(settings, concurrency) do |pool|
|
|
262
|
+
settings.warmup.times { pool.process(sample_event) }
|
|
263
|
+
|
|
264
|
+
run_trials(settings) do
|
|
265
|
+
settings.iterations.times do
|
|
266
|
+
result = pool.process(sample_event)
|
|
267
|
+
raise "pool.process failed" unless result.success?
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def process_many_trials(settings, sample_event, concurrency)
|
|
274
|
+
with_pool(settings, concurrency) do |pool|
|
|
275
|
+
warmup_items = Array.new(settings.warmup) { sample_event }
|
|
276
|
+
benchmark_items = Array.new(settings.iterations) { sample_event }
|
|
277
|
+
|
|
278
|
+
pool.process_many(warmup_items)
|
|
279
|
+
run_trials(settings) do
|
|
280
|
+
results = pool.process_many(benchmark_items)
|
|
281
|
+
raise "pool.process_many failed" unless results.all?(&:success?)
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def with_pool(settings, concurrency)
|
|
287
|
+
pool = CDC::Concurrent::ProcessorPool.new(
|
|
288
|
+
processor: processor_for(settings),
|
|
289
|
+
concurrency: concurrency
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
yield pool
|
|
293
|
+
ensure
|
|
294
|
+
pool&.shutdown
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def report(settings, serial)
|
|
298
|
+
{
|
|
299
|
+
benchmark: "processor_pool",
|
|
300
|
+
gem: "cdc-concurrent",
|
|
301
|
+
timestamp: Time.now.utc.iso8601,
|
|
302
|
+
**report_body(settings, serial)
|
|
303
|
+
}
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def report_body(settings, serial)
|
|
307
|
+
{
|
|
308
|
+
environment: environment,
|
|
309
|
+
config: config_report(settings),
|
|
310
|
+
workload_options: workload_options(settings),
|
|
311
|
+
serial: summarize_trials(serial),
|
|
312
|
+
concurrency_sweep: concurrency_sweep(settings, serial),
|
|
313
|
+
interpretation: interpretation
|
|
314
|
+
}
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def config_report(settings)
|
|
318
|
+
{
|
|
319
|
+
iterations: settings.iterations,
|
|
320
|
+
warmup: settings.warmup,
|
|
321
|
+
trials: settings.trials,
|
|
322
|
+
min_duration_seconds: settings.min_duration,
|
|
323
|
+
concurrency_counts: settings.concurrency_counts,
|
|
324
|
+
workload: settings.workload
|
|
325
|
+
}
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def environment
|
|
329
|
+
{
|
|
330
|
+
ruby: RUBY_DESCRIPTION,
|
|
331
|
+
ruby_engine: defined?(RUBY_ENGINE) ? RUBY_ENGINE : "ruby",
|
|
332
|
+
ruby_engine_version: defined?(RUBY_ENGINE_VERSION) ? RUBY_ENGINE_VERSION : RUBY_VERSION,
|
|
333
|
+
platform: RUBY_PLATFORM,
|
|
334
|
+
hostname: Socket.gethostname,
|
|
335
|
+
cpu_count: Etc.nprocessors,
|
|
336
|
+
uname: Etc.respond_to?(:uname) ? Etc.uname : {}
|
|
337
|
+
}
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def workload_options(settings)
|
|
341
|
+
case settings.workload
|
|
342
|
+
when "tiny"
|
|
343
|
+
{}
|
|
344
|
+
when "io"
|
|
345
|
+
{ io_sleep_seconds: settings.io_sleep }
|
|
346
|
+
when "batch"
|
|
347
|
+
{ batch_size: settings.batch_size, io_sleep_seconds: settings.io_sleep }
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def concurrency_sweep(settings, serial)
|
|
352
|
+
sample_event = sample_event_for(settings)
|
|
353
|
+
|
|
354
|
+
settings.concurrency_counts.map do |concurrency|
|
|
355
|
+
repeated = repeated_process_trials(settings, sample_event, concurrency)
|
|
356
|
+
many = process_many_trials(settings, sample_event, concurrency)
|
|
357
|
+
|
|
358
|
+
concurrency_report(concurrency, serial, repeated, many)
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
def concurrency_report(concurrency, serial, repeated, many)
|
|
363
|
+
{
|
|
364
|
+
concurrency: concurrency,
|
|
365
|
+
repeated_process: concurrent_summary(serial, repeated),
|
|
366
|
+
process_many: concurrent_summary(serial, many)
|
|
367
|
+
}
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def concurrent_summary(serial, trials)
|
|
371
|
+
throughput_ratio = ratio(
|
|
372
|
+
serial_value: median(throughputs(serial)),
|
|
373
|
+
concurrent_value: median(throughputs(trials))
|
|
374
|
+
)
|
|
375
|
+
summary = summarize_trials(trials)
|
|
376
|
+
|
|
377
|
+
summary.merge(
|
|
378
|
+
ratio_to_serial_median_events_per_second: throughput_ratio,
|
|
379
|
+
interpretation: interpretation_for(throughput_ratio)
|
|
380
|
+
)
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
def summarize_trials(trials)
|
|
384
|
+
elapsed = trials.map(&:elapsed)
|
|
385
|
+
throughput = throughputs(trials)
|
|
386
|
+
|
|
387
|
+
{
|
|
388
|
+
trials: trials.length,
|
|
389
|
+
elapsed_seconds: distribution(elapsed),
|
|
390
|
+
events_per_second: distribution(throughput),
|
|
391
|
+
effective_events: distribution(trials.map(&:effective_events)),
|
|
392
|
+
passes: distribution(trials.map(&:passes)),
|
|
393
|
+
raw_trials: trials.map { |trial| trial_report(trial) }
|
|
394
|
+
}
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def trial_report(trial)
|
|
398
|
+
{
|
|
399
|
+
elapsed_seconds: trial.elapsed.round(6),
|
|
400
|
+
effective_events: trial.effective_events,
|
|
401
|
+
passes: trial.passes,
|
|
402
|
+
events_per_second: (trial.effective_events / trial.elapsed).round(2)
|
|
403
|
+
}
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
def distribution(values)
|
|
407
|
+
sorted = values.sort
|
|
408
|
+
|
|
409
|
+
{
|
|
410
|
+
min: format_stat(sorted.first),
|
|
411
|
+
median: format_stat(median(sorted)),
|
|
412
|
+
max: format_stat(sorted.last),
|
|
413
|
+
p95: format_stat(percentile(sorted, 95))
|
|
414
|
+
}
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def median(values)
|
|
418
|
+
sorted = values.sort
|
|
419
|
+
mid = sorted.length / 2
|
|
420
|
+
|
|
421
|
+
return sorted[mid] if sorted.length.odd?
|
|
422
|
+
|
|
423
|
+
(sorted[mid - 1] + sorted[mid]) / 2.0
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def percentile(sorted_values, percentile)
|
|
427
|
+
index = ((percentile / 100.0) * (sorted_values.length - 1)).ceil
|
|
428
|
+
sorted_values[index]
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
def format_stat(value)
|
|
432
|
+
value.is_a?(Integer) ? value : value.round(6)
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def throughputs(trials)
|
|
436
|
+
trials.map { |trial| trial.effective_events / trial.elapsed }
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def ratio(serial_value:, concurrent_value:)
|
|
440
|
+
(concurrent_value / serial_value).round(4)
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
def interpretation_for(value)
|
|
444
|
+
if value > 1
|
|
445
|
+
"concurrent faster"
|
|
446
|
+
elsif value == 1
|
|
447
|
+
"concurrent equal to serial"
|
|
448
|
+
else
|
|
449
|
+
"serial faster"
|
|
450
|
+
end
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
def interpretation
|
|
454
|
+
{
|
|
455
|
+
"ratio > 1.0" => "concurrent median throughput is higher",
|
|
456
|
+
"ratio = 1.0" => "concurrent and serial median throughput are equivalent",
|
|
457
|
+
"ratio < 1.0" => "serial median throughput is higher"
|
|
458
|
+
}
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def run
|
|
462
|
+
settings = config
|
|
463
|
+
sample_event = sample_event_for(settings)
|
|
464
|
+
serial = serial_trials(settings, sample_event)
|
|
465
|
+
|
|
466
|
+
puts JSON.pretty_generate(report(settings, serial))
|
|
467
|
+
end
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
CDCConcurrentBenchmark.run
|
|
@@ -2,13 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Concurrent
|
|
5
|
-
# Immutable configuration for concurrent
|
|
5
|
+
# Immutable configuration for cdc-concurrent runtime pools.
|
|
6
|
+
#
|
|
7
|
+
# A Configuration instance captures the execution limits shared by
|
|
8
|
+
# ProcessorPool, TransactionPool, and Runtime. Instances are frozen so pool
|
|
9
|
+
# behavior cannot change while work is being processed.
|
|
6
10
|
class Configuration
|
|
11
|
+
# @return [Integer] Maximum number of Async tasks allowed to run concurrently.
|
|
12
|
+
# @return [Float, nil] Optional per-event processing timeout in seconds.
|
|
13
|
+
# @return [Boolean] Whether batch results should be returned in input order.
|
|
7
14
|
attr_reader :concurrency, :timeout, :preserve_order
|
|
8
15
|
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
# @param
|
|
16
|
+
# Builds a frozen runtime configuration.
|
|
17
|
+
#
|
|
18
|
+
# @param concurrency [Integer] Maximum number of Async tasks allowed to run at once.
|
|
19
|
+
# @param timeout [Float, nil] Optional per-event processing timeout in seconds.
|
|
20
|
+
# @param preserve_order [Boolean] Whether batch results should preserve input order.
|
|
21
|
+
# @raise [ArgumentError] If concurrency is not a positive Integer.
|
|
22
|
+
# @return [void] Does not return a useful value.
|
|
12
23
|
def initialize(concurrency: 100, timeout: nil, preserve_order: true)
|
|
13
24
|
raise ArgumentError, "concurrency must be an Integer" unless concurrency.is_a?(Integer)
|
|
14
25
|
raise ArgumentError, "concurrency must be greater than zero" unless concurrency.positive?
|
|
@@ -4,21 +4,36 @@ module CDC
|
|
|
4
4
|
# Optional concurrent runtime adapter for cdc-core processors.
|
|
5
5
|
module Concurrent
|
|
6
6
|
# Adds concurrent-safe declarations to CDC::Core::Processor subclasses.
|
|
7
|
+
#
|
|
8
|
+
# cdc-concurrent requires processors to explicitly opt in before they can be
|
|
9
|
+
# executed by Async-based runtime pools. The declaration is intentionally
|
|
10
|
+
# separate from ractor_safe! so processor authors can describe the execution
|
|
11
|
+
# model they support.
|
|
7
12
|
module ProcessorExtensions
|
|
8
|
-
# Class
|
|
13
|
+
# Class-level declarations added to CDC::Core::Processor.
|
|
9
14
|
module ClassMethods
|
|
10
|
-
#
|
|
11
|
-
|
|
15
|
+
# Marks the processor class as safe for cdc-concurrent execution.
|
|
16
|
+
#
|
|
17
|
+
# This declaration means processor instances may be executed by the
|
|
18
|
+
# Async task fan-out runtime. It does not imply Ractor safety.
|
|
19
|
+
#
|
|
20
|
+
# @return [true] Always returns true after setting the declaration flag.
|
|
21
|
+
def concurrent_safe! # rubocop:disable Naming/PredicateMethod
|
|
12
22
|
@concurrent_safe = true
|
|
23
|
+
true
|
|
13
24
|
end
|
|
14
25
|
|
|
15
|
-
#
|
|
26
|
+
# Reports whether the processor class declared concurrent_safe!.
|
|
27
|
+
#
|
|
28
|
+
# @return [Boolean] True when this processor class opted into concurrent execution.
|
|
16
29
|
def concurrent_safe?
|
|
17
30
|
@concurrent_safe == true
|
|
18
31
|
end
|
|
19
32
|
end
|
|
20
33
|
|
|
21
|
-
#
|
|
34
|
+
# Reports whether this processor instance is safe for concurrent execution.
|
|
35
|
+
#
|
|
36
|
+
# @return [Boolean] True when the processor class declared concurrent_safe!.
|
|
22
37
|
def concurrent_safe?
|
|
23
38
|
self.class.instance_variable_get(:@concurrent_safe) == true
|
|
24
39
|
end
|
|
@@ -26,7 +41,7 @@ module CDC
|
|
|
26
41
|
|
|
27
42
|
# Installs concurrent-safe declarations on CDC::Core::Processor.
|
|
28
43
|
#
|
|
29
|
-
# @return [void]
|
|
44
|
+
# @return [void] Does not return a useful value.
|
|
30
45
|
def self.install_processor_extensions!
|
|
31
46
|
CDC::Core::Processor.extend(ProcessorExtensions::ClassMethods)
|
|
32
47
|
CDC::Core::Processor.include(ProcessorExtensions)
|
|
@@ -3,11 +3,62 @@
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Concurrent
|
|
5
5
|
# Executes one concurrent-safe processor using Async tasks.
|
|
6
|
+
#
|
|
7
|
+
# ARCHITECTURAL NOTE
|
|
8
|
+
#
|
|
9
|
+
# cdc-concurrent implements the same fan-out / fan-in execution pattern used
|
|
10
|
+
# by cdc-parallel. The runtime differs, but the processor contract and result
|
|
11
|
+
# contract remain the same.
|
|
12
|
+
#
|
|
13
|
+
# events
|
|
14
|
+
# |
|
|
15
|
+
# v
|
|
16
|
+
# fan-out
|
|
17
|
+
# |
|
|
18
|
+
# +----> Async task
|
|
19
|
+
# +----> Async task
|
|
20
|
+
# +----> Async task
|
|
21
|
+
# |
|
|
22
|
+
# v
|
|
23
|
+
# fan-in
|
|
24
|
+
# |
|
|
25
|
+
# v
|
|
26
|
+
# ProcessorResult array
|
|
27
|
+
#
|
|
28
|
+
# Fan-out:
|
|
29
|
+
#
|
|
30
|
+
# * Events are dispatched into Async tasks.
|
|
31
|
+
# * Async::Semaphore bounds the number of concurrently running tasks.
|
|
32
|
+
# * Multiple events may make progress concurrently under Ruby's scheduler.
|
|
33
|
+
#
|
|
34
|
+
# Fan-in:
|
|
35
|
+
#
|
|
36
|
+
# * Tasks append indexed ProcessorResult values into a shared collection.
|
|
37
|
+
# * Results may complete out of execution order.
|
|
38
|
+
# * When preserve_order is enabled, ProcessorPool sorts by submission index so
|
|
39
|
+
# the returned array matches the input order.
|
|
40
|
+
#
|
|
41
|
+
# Relationship to cdc-parallel:
|
|
42
|
+
#
|
|
43
|
+
# * cdc-concurrent performs fan-out using Async tasks and cooperative
|
|
44
|
+
# concurrency.
|
|
45
|
+
# * cdc-parallel performs fan-out using pre-warmed Ractor workers and true
|
|
46
|
+
# parallel execution.
|
|
47
|
+
# * Both runtimes preserve the same processor contract and return
|
|
48
|
+
# CDC::Core::ProcessorResult objects.
|
|
49
|
+
#
|
|
50
|
+
# Processor authors should be able to switch runtimes without changing
|
|
51
|
+
# processor behavior when their processor satisfies the selected runtime's
|
|
52
|
+
# safety declaration.
|
|
6
53
|
class ProcessorPool
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
# @param
|
|
10
|
-
# @param
|
|
54
|
+
# Builds an Async-backed processor pool.
|
|
55
|
+
#
|
|
56
|
+
# @param processor [CDC::Core::Processor] Processor instance that declares concurrent_safe!.
|
|
57
|
+
# @param concurrency [Integer] Maximum number of Async tasks allowed to run at once.
|
|
58
|
+
# @param timeout [Float, nil] Optional per-event processing timeout in seconds.
|
|
59
|
+
# @param preserve_order [Boolean] Whether process_many should return results in input order.
|
|
60
|
+
# @raise [UnsafeProcessorError] If the processor does not declare concurrent_safe!.
|
|
61
|
+
# @return [void] Does not return a useful value.
|
|
11
62
|
def initialize(processor:, concurrency: 100, timeout: nil, preserve_order: true)
|
|
12
63
|
validate_processor!(processor)
|
|
13
64
|
|
|
@@ -16,16 +67,25 @@ module CDC
|
|
|
16
67
|
@shutdown = false
|
|
17
68
|
end
|
|
18
69
|
|
|
19
|
-
#
|
|
20
|
-
#
|
|
70
|
+
# Processes one event synchronously through the Async runtime.
|
|
71
|
+
#
|
|
72
|
+
# @param event [CDC::Core::ChangeEvent] Event to process.
|
|
73
|
+
# @raise [ShutdownError] If the pool has already been shut down.
|
|
74
|
+
# @return [CDC::Core::ProcessorResult] Normalized processor result.
|
|
21
75
|
def process(event)
|
|
22
76
|
raise ShutdownError, "processor pool has been shut down" if @shutdown
|
|
23
77
|
|
|
24
78
|
process_one(event)
|
|
25
79
|
end
|
|
26
80
|
|
|
27
|
-
#
|
|
28
|
-
#
|
|
81
|
+
# Processes many events through bounded Async fan-out.
|
|
82
|
+
#
|
|
83
|
+
# When preserve_order is true, the returned array matches the order of the
|
|
84
|
+
# supplied events even if individual tasks complete out of order.
|
|
85
|
+
#
|
|
86
|
+
# @param events [Array<CDC::Core::ChangeEvent>] Events to process.
|
|
87
|
+
# @raise [ShutdownError] If the pool has already been shut down.
|
|
88
|
+
# @return [Array<CDC::Core::ProcessorResult>] Frozen array of normalized results.
|
|
29
89
|
def process_many(events)
|
|
30
90
|
raise ShutdownError, "processor pool has been shut down" if @shutdown
|
|
31
91
|
return empty_results if events.empty?
|
|
@@ -39,7 +99,9 @@ module CDC
|
|
|
39
99
|
indexed_results.map(&:last).freeze
|
|
40
100
|
end
|
|
41
101
|
|
|
42
|
-
#
|
|
102
|
+
# Prevents new work from being submitted to the pool.
|
|
103
|
+
#
|
|
104
|
+
# @return [void] Does not return a useful value.
|
|
43
105
|
def shutdown
|
|
44
106
|
@shutdown = true
|
|
45
107
|
end
|
|
@@ -2,10 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Concurrent
|
|
5
|
-
# Normalizes values returned by
|
|
5
|
+
# Normalizes values returned by Async tasks into ProcessorResult objects.
|
|
6
|
+
#
|
|
7
|
+
# ResultCollector keeps runtime pools tolerant of processors that either
|
|
8
|
+
# return a CDC::Core::ProcessorResult directly or return a plain Ruby value.
|
|
9
|
+
# Plain values are wrapped as successful ProcessorResult instances, while
|
|
10
|
+
# raised errors are represented as failure results.
|
|
6
11
|
class ResultCollector
|
|
7
|
-
#
|
|
8
|
-
#
|
|
12
|
+
# Normalizes a processor return value into a ProcessorResult.
|
|
13
|
+
#
|
|
14
|
+
# @param value [Object] Value returned by a processor or an existing ProcessorResult.
|
|
15
|
+
# @return [CDC::Core::ProcessorResult] Existing ProcessorResult or success result wrapping the value.
|
|
9
16
|
def self.normalize(value)
|
|
10
17
|
return value if value.is_a?(CDC::Core::ProcessorResult)
|
|
11
18
|
|
|
@@ -14,8 +21,10 @@ module CDC
|
|
|
14
21
|
failure(e)
|
|
15
22
|
end
|
|
16
23
|
|
|
17
|
-
#
|
|
18
|
-
#
|
|
24
|
+
# Wraps an exception as a failed ProcessorResult.
|
|
25
|
+
#
|
|
26
|
+
# @param error [Exception] Error raised while processing an event.
|
|
27
|
+
# @return [CDC::Core::ProcessorResult] Failure result containing the supplied error.
|
|
19
28
|
def self.failure(error)
|
|
20
29
|
CDC::Core::ProcessorResult.failure(error)
|
|
21
30
|
end
|